Browse Source

Implement reading, merging and writing of Packages files

debianarchive-update
Merlijn Wajer 6 years ago
parent
commit
742e8d0e7f
  1. 42
      amprolla
  2. 130
      lib/config.py
  3. 98
      lib/package.py
  4. 63
      lib/parse.py

42
amprolla

@ -0,0 +1,42 @@
#!/usr/bin/env python3
from os.path import join
from time import time
from lib.package import (write_packages, load_packages_file,
merge_packages, merge_packages_many)
from lib.parse import parse_release
from lib.config import banpkgs
roots = {
'devuan': 'spool/devuan/dists/jessie',
'debian': 'spool/debian/dists/jessie',
'debian-sec': 'spool/dists/jessie/updates/',
}
#devuan_release_contents = open(join(roots['devuan'], 'Release')).read()
#debian_release_contents = open(join(roots['debian'], 'Release')).read()
#devuan_release = parse_release(devuan_release_contents)
#debian_release = parse_release(debian_release_contents)
#devuan_files = list(filter(lambda x: x.endswith('Packages.gz') and 'armhf' in x, devuan_release.keys()))
#debian_files = list(filter(lambda x: x.endswith('Packages.gz') and 'armhf' in x, debian_release.keys()))
packages_file = 'main/binary-armhf/Packages.gz'
t1 = time()
print('Loading packages')
devuan = load_packages_file(join(roots['devuan'], packages_file))
debian = load_packages_file(join(roots['debian'], packages_file))
debian_sec = load_packages_file(join(roots['debian-sec'], packages_file))
all_repos = [devuan, debian_sec, debian]
print('Merging packages')
new_pkgs = merge_packages_many(all_repos, banned_packages=banpkgs)
print('Writing packages')
write_packages(new_pkgs, 'Packages.merged')
t2 = time()
print('time:', t2-t1)

130
lib/config.py

@ -2,14 +2,12 @@
# copyright (c) 2017 - Ivan J. <parazyd@dyne.org>
# see LICENSE file for copyright and license details
amprolla = {
"spooldir": "./spool",
"sign_key": "fa1b0274",
"mergedir": "./merged",
"mergedsubdirs": ["dists", "pool"],
"banpkgs": ['systemd', 'systemd-sysv']
#"checksums": [ 'md5sum', 'sha1', 'sha256', 'sha512' ]
}
spooldir = "./spool"
sign_key = "fa1b0274"
mergedir = "./merged"
mergedsubdirs = ["dists", "pool"]
banpkgs = {'systemd', 'systemd-sysv'}
#checksums = [ 'md5sum', 'sha1', 'sha256', 'sha512' ]
repos = {
# key name is priority, first is 0
@ -136,63 +134,63 @@ mainrepofiles = [
"Release.gpg"
]
pkgfmt = [
'Package:',
'Version:',
'Essential:',
'Installed-Size:',
'Maintainer:',
'Architecture:',
'Replaces:',
'Provides:',
'Depends:',
'Conflicts:',
'Pre-Depends:',
'Breaks:',
'Homepage:',
'Apport:',
'Auto-Built-Package:',
packages_keys = [
'Package',
'Version',
'Essential',
'Installed-Size',
'Maintainer',
'Architecture',
'Replaces',
'Provides',
'Depends',
'Conflicts',
'Pre-Depends',
'Breaks',
'Homepage',
'Apport',
'Auto-Built-Package',
'Build-Ids',
'Origin:',
'Bugs:',
'Built-Using:',
'Enhances:',
'Recommends:',
'Description:',
'Description-md5:',
'Ghc-Package:',
'Gstreamer-Decoders:',
'Gstreamer-Elements:',
'Gstreamer-Encoders:',
'Gstreamer-Uri-Sinks:',
'Gstreamer-Uri-Sources:',
'Gstreamer-Version:',
'Lua-Versions:',
'Modaliases:',
'Npp-Applications:',
'Npp-Description:',
'Npp-File:',
'Npp-Mimetype:',
'Npp-Name:',
'Origin:',
'Original-Maintainer:',
'Original-Source-Maintainer:',
'Package-Type:',
'Postgresql-Version:',
'Python-Version:',
'Python-Versions:',
'Ruby-Versions:',
'Source:',
'Suggests:',
'Xul-Appid:',
'Multi-Arch:',
'Build-Essential:',
'Tag:',
'Section:',
'Priority:',
'Filename:',
'Size:',
'MD5sum:',
'SHA1:',
'SHA256:'
'Origin',
'Bugs',
'Built-Using',
'Enhances',
'Recommends',
'Description',
'Description-md5',
'Ghc-Package',
'Gstreamer-Decoders',
'Gstreamer-Elements',
'Gstreamer-Encoders',
'Gstreamer-Uri-Sinks',
'Gstreamer-Uri-Sources',
'Gstreamer-Version',
'Lua-Versions',
'Modaliases',
'Npp-Applications',
'Npp-Description',
'Npp-File',
'Npp-Mimetype',
'Npp-Name',
'Origin',
'Original-Maintainer',
'Original-Source-Maintainer',
'Package-Type',
'Postgresql-Version',
'Python-Version',
'Python-Versions',
'Ruby-Versions',
'Source',
'Suggests',
'Xul-Appid',
'Multi-Arch',
'Build-Essential',
'Tag',
'Section',
'Priority',
'Filename',
'Size',
'MD5sum',
'SHA1',
'SHA256'
]

98
lib/package.py

@ -0,0 +1,98 @@
from gzip import open as gzip_open
from lib.parse import (parse_packages, parse_dependencies)
from lib.config import packages_keys
def write_packages(packages, filename, sort=False):
"""
Writes `packages` to a file (per debian Packages format)
If sort=True, the packages are sorted by name.
"""
f = open(filename, 'w+')
pkg_items = packages.items()
if sort:
pkg_items = sorted(pkg_items, key=lambda x: x[0])
for pkg_name, pkg_contents in pkg_items:
for key in packages_keys:
if key in pkg_contents:
f.write('%s: %s\n' % (key, pkg_contents[key]))
f.write('\n')
f.close()
def load_packages_file(filename):
""" Load a gzip'd packages file.
Returns a dictionary of package name and package key-values.
"""
packages_contents = gzip_open(filename).read()
packages_contents = packages_contents.decode('utf-8')
return parse_packages(packages_contents)
def package_banned(pkg, banned_pkgs):
"""
Returns True is the package contains a banned dependency.
Currently checks and parses both the 'Depends:' and the 'Pre-Depends' fields
of the package.
"""
if pkg.get('Package') in banned_pkgs:
return True
depends = parse_dependencies(pkg.get('Depends', ''))
pre_depends = parse_dependencies(pkg.get('Pre-Depends', ''))
depends = [v[0] for v in depends]
pre_depends = [v[0] for v in pre_depends]
deps = set(depends).union(set(pre_depends))
return bool(deps.intersection(banned_pkgs))
def merge_packages(pkg1, pkg2, banned_packages=set()):
"""
Merges two previously loaded/parsed (using load_packages_file) packages
dictionaries, preferring `pkg1` over `pkg2`, and optionally discarding any
banned packages.
"""
new_pkgs = {}
package_names = set(pkg1.keys()).union(set(pkg2.keys()))
for pkg in package_names:
pkg1_pkg = pkg1.get(pkg)
pkg2_pkg = pkg2.get(pkg)
if pkg1_pkg and pkg2_pkg:
new_pkgs[pkg] = pkg1_pkg
elif pkg1_pkg:
if not package_banned(pkg1_pkg, banned_packages):
new_pkgs[pkg] = pkg1_pkg
elif pkg2_pkg:
if not package_banned(pkg2_pkg, banned_packages):
new_pkgs[pkg] = pkg2_pkg
else:
assert False, 'Impossibru'
return new_pkgs
def merge_packages_many(packages, banned_packages=set()): # TODO: Make generic
"""
Merges two (or more) previously loaded/parsed (using load_packages_file)
packages dictionaries, priority is defined by the order of the `packages`
list, optionally discarding any banned packages.
"""
assert len(packages) > 1
new_pkgs = {}
pkg1 = packages[0]
pkg2 = packages[1]
new_pkgs = merge_packages(pkg1, pkg2, banned_packages=banned_packages)
for pkg in packages[2:]:
new_pkgs = merge_packages(new_pkgs, pkg, banned_packages=banned_packages)
return new_pkgs

63
lib/delta.py → lib/parse.py

@ -5,11 +5,11 @@
import ast
import gzip
import re
import requests
#import requests
import time
import config
from log import notice
from . import config
from .log import notice
def get_time(date):
@ -25,24 +25,26 @@ def get_date(relfile):
def parse_release(reltext):
hash = {}
_hash = {}
match = re.search('SHA256:+', reltext)
if match:
line = reltext[match.start():-1]
for i in line.split('\n'):
if i == 'SHA256:' or i == '\n': # XXX: hack
continue
hash[(i.split()[2])] = i.split()[0]
return hash
_hash[(i.split()[2])] = i.split()[0]
return _hash
PACKAGES_REGEX = re.compile('([A-Za-z0-9\-]+): ')
def parse_package(entry):
# for parsing a single package
values = re.split('\\n[A-Z].+?:', entry)[0:]
values[0] = values[0].split(':')[1]
keys = re.findall('\\n[A-Z].+?:', '\n' + entry)
both = zip(keys, values)
return {key.lstrip(): value for key, value in both}
""" Parses a single Packages entry """
contents = PACKAGES_REGEX.split(entry)[1:] # Throw away the first ''
keys = contents[::2]
vals = map(lambda x: x.strip(), contents[1::2])
return dict(zip(keys, vals))
def parse_packages(pkgtext):
@ -50,11 +52,6 @@ def parse_packages(pkgtext):
# key: package name, value: entire package paragraph as a hashmap
map = {}
# TODO: consider also this approach
# def parse_packages(pkgfilepath):
# with gzip.open(pkgfilepath, "rb") as f:
# pkgs = f.read().split("\n\n")
pkgs = pkgtext.split("\n\n")
for pkg in pkgs:
m = re.match('Package: .+', pkg)
@ -62,8 +59,40 @@ def parse_packages(pkgtext):
line = pkg[m.start():m.end()]
key = line.split(': ')[1]
map[key] = parse_package(pkg)
return map
def parse_dependencies(dependencies):
"""
Parses a dependency line from a debian Packages file.
Example line::
'lib6 (>= 2.4), libdbus-1-3 (>= 1.0.2), foo'
Output::
{'lib6': '(>= 2.4)', 'libdbus-1-3': '(>= 1.0.2)', 'foo': None}
"""
r = {}
for pkg_plus_version in dependencies.split(', '):
v = pkg_plus_version.split(' ', 1)
name = v[0]
# If we get passed an empty string, the name is '', and we just outright
# stop
if not name:
return {}
if len(v) == 2:
version = v[1]
r[name] = version
else:
r[name] = None
return r
def print_package(map, pkgname):
try:
Loading…
Cancel
Save