amprolla is an apt repository merger originally intended for use with the Devuan infrastructure. This version is the third iteration of the software.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

207 lines
6.5 KiB

#!/usr/bin/env python3
5 years ago
# See LICENSE file for copyright and license details.
6 years ago
"""
Amprolla main module
"""
from os.path import basename, join, isfile
from multiprocessing import Pool
Fix locking by using flock instead of tombstone files. Necessary changes to orchestrate.sh: The way I understand things, there are three processes (A,B,C) A. amprolla_update is run with orchestrate.sh (very often!): 1. there is always a consistent working set in merged which usually points to merged-production 2. before amprolla_update, merged switches to merged-staging (why here and not after amprolla_update?) 3. amprolla_update works against -volatile, during this process that directory is not necessarily fully consistent 4. after amprolla_update, merged-volatile is synchronised to merged-production 5. merged switches to merged-production 6. merged-volatile is synchronised to merged-staging 7. merged-production is synchronised to pkgmaster B. Sometimes amprolla_merge is run C. Sometimes amprolla_merge_contents + amprolla_merge are run The *intent* of the implemented locks appears to be that only one process out of A,B,C is active at a time, but the reality is that this is not formally provided, timing just happens to match, but shouldn't be relied upon: Existence of an active lock does prevent A, B or C from starting. But in A the lock is only active while point A.3. is executed, everything else runs without an active lock, so if e.g. A.4 to A.7 take long enough that A.4 is running again while A.7 is happening, bogus data will be synchronised to pkgmaster. Since A.7 is a network operation and A.5 and A.6 are disk operations, delays *could* happen and they could lead to this scenario (maybe it has happened at some point). That's why I added the support for the --no-lock-I-am-sure argument to amprolla_update, and instead obtain the lock as a step A.0 that is valid throughout all process A in the orchestrate.sh script. With that, I think there is now only need for 2 directories: -volatile and -production, with A being redefined as: A. amprolla_update is run with orchestrate.sh (very often!): 0. obtain amprolla lock and exit if it can't be obtained 1. there is always a consistent working set in merged which usually points to merged-production 2. amprolla_update --no-lock-I-am-sure works against -volatile, during this process that directory is not necessarily fully consistent 3. merged switches to merged-volatile 4. merged-volatile is synchronised to merged-production 5. merged switches to merged-production 7. merged-production is synchronised to pkgmaster I have adapted orchestrate.sh and therefore merged-staging is not used with these patches and could be removed in theory.
3 years ago
import sys
import lib.globalvars as globalvars
from lib.config import (aliases, arches, banpkgs, categories, cpunm, i18n,
mergedir, mergesubdir, pkgfiles, repos, repo_order,
srcfiles, spooldir, suites, skips)
from lib.lock import run_with_args_locking
from lib.package import (load_packages_file, merge_packages_many,
write_packages)
from lib.release import write_release
from lib.log import info, die, warn
def prepare_merge_dict():
"""
This function will prepare a dict of lists that contain the repos
that need to be merged in an ordered fashion. Orders them using the
repo_order list found in lib.config
Example output:
{'ascii': ['ascii', None, 'stretch']},
"""
merge_dict = {}
for suite in suites:
for i in suites[suite]:
merge_dict[i] = []
for suite in merge_dict:
for repo in repo_order:
tmpsuite = suite
if repos[repo]['aliases'] is True:
if tmpsuite in aliases[repos[repo]['name']]:
tmpsuite = aliases[repos[repo]['name']][suite]
elif repos[repo]['skipmissing'] is True:
tmpsuite = None
if repo == 'debian' and suite in skips:
tmpsuite = None
if tmpsuite: # make it a proper path
tmpsuite = join(spooldir, repos[repo]['dists'], tmpsuite)
merge_dict[suite].append(tmpsuite)
return merge_dict
def devuan_rewrite(pkg, repo_name):
"""
Function to be called when including a certain package. Allows for changing
any attributes.
"""
if 'Filename' in pkg and repos[repo_name]['name'] not in pkg['Filename']:
pkg['Filename'] = pkg['Filename'].replace('pool/', 'pool/%s/' %
repos[repo_name]['name'], 1)
if 'Directory' in pkg and repos[repo_name]['name'] not in pkg['Directory']:
pkg['Directory'] = pkg['Directory'].replace('pool/', 'pool/%s/' %
repos[repo_name]['name'], 1)
return pkg
def merge(packages_list):
"""
Merges the Packages/Sources files given in the package list
['path/to/devuan/Packages.gz', None, 'path/to/debian/Packages.gz']
"""
all_repos = []
print('Loading packages: %s' % packages_list)
for i in range(len(repo_order)):
try:
pkgs = load_packages_file(packages_list[i])
except FileNotFoundError:
pkgs = None
warn("Skipping missing file: {}".format(packages_list[i]))
if pkgs:
all_repos.append({'name': repo_order[i], 'packages': pkgs})
for i in range(len(repo_order)):
if packages_list[i]:
if basename(packages_list[i]) == 'Packages.gz':
print('Merging packages')
src = False
new_pkgs = merge_packages_many(all_repos,
banned_packages=banpkgs,
rewriter=devuan_rewrite)
elif basename(packages_list[i]) == 'Sources.gz':
print('Merging sources')
src = True
new_pkgs = merge_packages_many(all_repos,
rewriter=devuan_rewrite)
break
print('Writing packages')
for i in range(len(repo_order)):
if packages_list[i]:
new_out = packages_list[i].replace(join(spooldir,
5 years ago
repos[repo_order[i]]['dists']),
join(mergedir, mergesubdir))
break
if src:
write_packages(new_pkgs, new_out, sources=True)
else:
write_packages(new_pkgs, new_out)
def gen_release(suite):
"""
Generates a Release file for a given suite (jessie/ascii/unstable)
"""
filelist = []
print('\nCrawling %s' % suite)
rootdir = join(mergedir, mergesubdir, suite)
for cat in categories:
for arch in arches:
if arch == 'source':
flist = srcfiles
else:
flist = pkgfiles
cont = arch.replace('binary', 'Contents')
cont_udeb = arch.replace('binary', 'Contents-udeb')
filelist.append(join(rootdir, cat, cont+'.gz'))
filelist.append(join(rootdir, cat, cont))
filelist.append(join(rootdir, cat, cont_udeb+'.gz'))
filelist.append(join(rootdir, cat, cont_udeb))
for i in flist:
filelist.append(join(rootdir, cat, arch, i))
if arch != 'source':
filelist.append(join(rootdir, cat,
'debian-installer', arch, i))
filelist.extend((join(rootdir, cat, 'i18n',
'Translation-{}.bz2'.format(lang))
for lang in i18n))
newrfl = join(rootdir, 'Release')
oldrfl = newrfl.replace(join(mergedir, mergesubdir),
join(spooldir, repos['devuan']['dists']))
print('Writing Release')
write_release(oldrfl, newrfl, filelist, rootdir)
def main_merge(packages_file):
"""
Main function that calls the actual merge
"""
to_merge = prepare_merge_dict()
for suite in to_merge:
globalvars.suite = suite
pkg_list = []
for rep in to_merge[suite]:
if rep:
pkg_list.append(join(rep, packages_file))
else:
pkg_list.append(None)
merge(pkg_list)
def main():
"""
Crawls the entire directory structure and orchestrates the merge
in a queue using multiprocessing
"""
pkg = []
for i in arches:
for j in categories:
if i == 'source':
mrgfile = 'Sources.gz'
else:
mrgfile = 'Packages.gz'
pkg.append(join(j, 'debian-installer', i, mrgfile))
pkg.append(join(j, i, mrgfile))
mrgpool = Pool(cpunm)
mrgpool.map(main_merge, pkg)
mrgpool.close()
rel_list = []
for i in suites:
for j in suites[i]:
rel_list.append(j)
relpool = Pool(cpunm)
relpool.map(gen_release, rel_list)
relpool.close()
if __name__ == '__main__':
run_with_args_locking(main, "full merge")