amprolla is an apt repository merger originally intended for use with the Devuan infrastructure. This version is the third iteration of the software.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

147 lines
4.4 KiB

#!/usr/bin/env python3
# see LICENSE file for copyright and license details
"""
Perform incremental updates
"""
from os.path import join
from multiprocessing import Pool
import requests
Fix locking by using flock instead of tombstone files. Necessary changes to orchestrate.sh: The way I understand things, there are three processes (A,B,C) A. amprolla_update is run with orchestrate.sh (very often!): 1. there is always a consistent working set in merged which usually points to merged-production 2. before amprolla_update, merged switches to merged-staging (why here and not after amprolla_update?) 3. amprolla_update works against -volatile, during this process that directory is not necessarily fully consistent 4. after amprolla_update, merged-volatile is synchronised to merged-production 5. merged switches to merged-production 6. merged-volatile is synchronised to merged-staging 7. merged-production is synchronised to pkgmaster B. Sometimes amprolla_merge is run C. Sometimes amprolla_merge_contents + amprolla_merge are run The *intent* of the implemented locks appears to be that only one process out of A,B,C is active at a time, but the reality is that this is not formally provided, timing just happens to match, but shouldn't be relied upon: Existence of an active lock does prevent A, B or C from starting. But in A the lock is only active while point A.3. is executed, everything else runs without an active lock, so if e.g. A.4 to A.7 take long enough that A.4 is running again while A.7 is happening, bogus data will be synchronised to pkgmaster. Since A.7 is a network operation and A.5 and A.6 are disk operations, delays *could* happen and they could lead to this scenario (maybe it has happened at some point). That's why I added the support for the --no-lock-I-am-sure argument to amprolla_update, and instead obtain the lock as a step A.0 that is valid throughout all process A in the orchestrate.sh script. With that, I think there is now only need for 2 directories: -volatile and -production, with A being redefined as: A. amprolla_update is run with orchestrate.sh (very often!): 0. obtain amprolla lock and exit if it can't be obtained 1. there is always a consistent working set in merged which usually points to merged-production 2. amprolla_update --no-lock-I-am-sure works against -volatile, during this process that directory is not necessarily fully consistent 3. merged switches to merged-volatile 4. merged-volatile is synchronised to merged-production 5. merged switches to merged-production 7. merged-production is synchronised to pkgmaster I have adapted orchestrate.sh and therefore merged-staging is not used with these patches and could be removed in theory.
3 years ago
import sys
import lib.globalvars as globalvars
from lib.config import aliases, cpunm, repos, repo_order, spooldir, skips
from lib.lock import run_with_args_locking
from lib.log import info, warn, die
from lib.parse import compare_dict, get_date, get_time, parse_release
from lib.net import download
from amprolla_merge import gen_release, merge, prepare_merge_dict
def remote_is_newer(remote, local):
"""
Checks if a remote Release file holds a newer date, and returns True if so
"""
rem_date = get_date(remote)
loc_date = get_date(local)
# print('Remote date: %s' % rem_date)
# print('Local date: %s' % loc_date)
if get_time(rem_date) > get_time(loc_date):
info('Remote Release is newer!')
return True
return False
def perform_update(suite, paths):
"""
Performs an incremental update and merge of a given suite
"""
info('Checking for updates in %s' % suite)
# print(paths)
globalvars.suite = suite
globalvars.rehash = False
needsmerge = {}
needsmerge['downloads'] = [] # all files that have to be downloaded
regenrelease = False
cnt = 0
for i in repo_order:
# i = repository name
needsmerge[i] = {}
needsmerge[i]['mergelist'] = []
if paths[cnt]:
info('Working on %s repo' % i)
remote_path = paths[cnt].replace(spooldir, repos[i]['host'])
try:
remote_rel = requests.get(join(remote_path, 'Release'))
except requests.exceptions.ConnectionError as err:
warn('Caught exception: "%s". Retrying...' % err)
return perform_update(suite, paths)
local_rel_text = open(join(paths[cnt], 'Release')).read()
diffs = {}
if remote_is_newer(remote_rel.text, local_rel_text):
download((join(remote_path, 'Release'),
join(paths[cnt], 'Release')))
regenrelease = True
diffs = compare_dict(parse_release(remote_rel.text),
parse_release(local_rel_text))
if diffs:
globalvars.rehash = True
for k in diffs:
if k.endswith('Packages.gz') or k.endswith('Sources.gz'):
needsmerge[i]['mergelist'].append(k)
rmt = join(paths[cnt].replace(spooldir, repos[i]['host']), k)
loc = join(paths[cnt], k)
dlf = (rmt, loc)
needsmerge['downloads'].append(dlf)
cnt += 1
# break
# download what needs to be downloaded
if needsmerge['downloads']:
info('Downloading updates...')
dlpool = Pool(cpunm)
dlpool.map(download, needsmerge['downloads'])
# create union of our Packages.gz and Sources.gz files we will merge
uni = []
for i in repo_order:
uni.append(needsmerge[i]['mergelist'])
updpkg_list = set().union(*uni)
# make a list of package lists to feed into merge()
merge_list = []
for i in updpkg_list:
pkgs = []
for j in repo_order:
sui = suite
# append valid aliases
if repos[j]['aliases']:
if suite in aliases[repos[j]['name']]:
sui = aliases[repos[j]['name']][suite]
elif repos[j]['skipmissing']:
sui = None
if j == 'debian' and suite in skips:
sui = None
if sui:
pkgs.append(join(spooldir, repos[j]['dists'], sui, i))
else:
pkgs.append(None)
merge_list.append(pkgs)
# perform the actual merge
if merge_list:
info('Merging files...')
mrgpool = Pool(cpunm)
mrgpool.map(merge, merge_list)
# generate Release files if we got any new files
if needsmerge['downloads'] or regenrelease:
info('Generating Release...')
gen_release(suite)
def main():
"""
Do the update for all repos
"""
roots = prepare_merge_dict()
for suite, paths in roots.items():
perform_update(suite, paths)
# break
if __name__ == '__main__':
run_with_args_locking(main, "incremental update")