From ef724c4960027f7758f15952e8f5a85a1dba1673 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89rico=20Nogueira?= Date: Wed, 23 Jun 2021 13:39:19 -0300 Subject: [PATCH 1/6] common/scripts: import xbps-cycles. From https://github.com/ahesford/xbps-cycles, that is based on https://gist.github.com/Chocimier/de76441493ec7775c201dac0bb03ced5 . License is compatible with void-packages. Will be run in CI, so it should live in the same repository. --- common/scripts/README.xbps-cycles.md | 18 +++++ common/scripts/xbps-cycles.py | 102 +++++++++++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 common/scripts/README.xbps-cycles.md create mode 100755 common/scripts/xbps-cycles.py diff --git a/common/scripts/README.xbps-cycles.md b/common/scripts/README.xbps-cycles.md new file mode 100644 index 000000000000..075484829716 --- /dev/null +++ b/common/scripts/README.xbps-cycles.md @@ -0,0 +1,18 @@ +# Cycle detector for void-packages + +This script enumerates dependencies for packages in a +[void-packages repository](https://github.com/void-linux/void-packages) +and identifies build-time dependency cycles. + +For command syntax, run `xbps-cycles.py -h`. Often, it may be sufficient to run +`xbps-cycles.py` with no arguments. By default, the script will look for a +repository at `$XBPS_DISTDIR`; if that variable is not defined, the current +directory is used instead. To override this behavior, use the `-d` option to +provide the path to your desired void-packages clone. + +The standard behavior will be to spawn multiple processes, one per CPU, to +enumerate package dependencies. This is by far the most time-consuming part of +the execution. To override the degree of parallelism, use the `-j` option. + +Failures should be harmless but, at this early stage, unlikely to be pretty or +even helpful. diff --git a/common/scripts/xbps-cycles.py b/common/scripts/xbps-cycles.py new file mode 100755 index 000000000000..24ef17156336 --- /dev/null +++ b/common/scripts/xbps-cycles.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 + +import os +import sys +import glob +import subprocess +import multiprocessing + +from argparse import ArgumentParser + +import networkx as nx + + +def enum_depends(pkg, xbpsdir): + ''' + Return a pair (pkg, [dependencies]), where [dependencies] is the list + of dependencies for the given package pkg. The argument xbpsdir should + be a path to a void-packages repository. Dependencies will be + determined by invoking + + /xbps-src show-build-deps + + If the return code of this call nonzero, a message will be printed but + the package will treated as if it has no dependencies. + ''' + cmd = [os.path.join(xbpsdir, 'xbps-src'), 'show-build-deps', pkg] + + try: + deps = subprocess.check_output(cmd) + except subprocess.CalledProcessError as err: + print('xbps-src failed to find dependencies for package', pkg) + deps = [ ] + else: + deps = [d for d in deps.decode('utf-8').split('\n') if d] + + return pkg, deps + + +def find_cycles(depmap, xbpsdir): + ''' + For a map depmap: package -> [dependencies], construct a directed graph + and identify any cycles therein. + + The argument xbpsdir should be a path to the root of a void-packages + repository. All package names in depmap will be appended to the path + /srcpkgs and reduced with os.path.realpath to coalesce + subpackages. + ''' + G = nx.DiGraph() + + for i, deps in depmap.items(): + path = os.path.join(xbpsdir, 'srcpkgs', i) + i = os.path.basename(os.path.realpath(path)) + + for j in deps: + path = os.path.join(xbpsdir, 'srcpkgs', j.strip()) + j = os.path.basename(os.path.realpath(path)) + G.add_edge(i, j) + + for c in nx.strongly_connected_components(G): + if len(c) < 2: continue + pkgs = nx.to_dict_of_lists(G, c) + + p = next(iter(pkgs.keys())) + cycles = [ ] + while True: + cycles.append(p) + + # Cycle is complete when package is not in map + try: deps = pkgs.pop(p) + except KeyError: break + + # Any of the dependencies here contributes to a cycle + p = deps[0] + if len(deps) > 1: + print('Mulitpath: {} -> {}, choosing first'.format(p, deps)) + + if cycles: + print('Cycle: ' + ' -> '.join(cycles) + '\n') + + +if __name__ == '__main__': + parser = ArgumentParser(description='Cycle detector for xbps-src') + parser.add_argument('-j', '--jobs', default=None, + type=int, help='Number of parallel jobs') + parser.add_argument('-d', '--directory', + default=None, help='Path to void-packages repo') + + args = parser.parse_args() + + if not args.directory: + try: args.directory = os.environ['XBPS_DISTDIR'] + except KeyError: args.directory = '.' + + pool = multiprocessing.Pool(processes = args.jobs) + + pattern = os.path.join(args.directory, 'srcpkgs', '*') + depmap = dict(pool.starmap(enum_depends, + ((os.path.basename(g), args.directory) + for g in glob.iglob(pattern)))) + + find_cycles(depmap, args.directory) From 8807abcead4be8c36e61a5ece6ab8508f304914f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89rico=20Nogueira?= Date: Wed, 23 Jun 2021 13:45:23 -0300 Subject: [PATCH 2/6] .github/workflows: run xbps-cycles daily. Should help in catching cyclic dependencies early. Rename lockthreads.yml to include all scheduled CI tasks. --- .github/workflows/{lockthreads.yml => cron.yml} | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) rename .github/workflows/{lockthreads.yml => cron.yml} (61%) diff --git a/.github/workflows/lockthreads.yml b/.github/workflows/cron.yml similarity index 61% rename from .github/workflows/lockthreads.yml rename to .github/workflows/cron.yml index f3ec106a6e6c..c87446743fe5 100644 --- a/.github/workflows/lockthreads.yml +++ b/.github/workflows/cron.yml @@ -1,4 +1,4 @@ -name: 'Lock threads' +name: 'Scheduled tasks' on: schedule: @@ -13,3 +13,8 @@ jobs: github-token: ${{ github.token }} pr-lock-inactive-days: '90' process-only: 'prs' + cycles: + runs-on: ubuntu-latest + steps: + - run: apt-get install -y python3-networkx + - run: common/scripts/xbps-cycles.py From b3c34c8791feabd0ac57100a13fc263211cd6ed2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20W=C3=B3jcik?= Date: Wed, 23 Jun 2021 23:11:13 +0200 Subject: [PATCH 3/6] .github/workflows: prepare container for xbps-cycles --- .github/workflows/cron.yml | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml index c87446743fe5..c284857efcb6 100644 --- a/.github/workflows/cron.yml +++ b/.github/workflows/cron.yml @@ -15,6 +15,23 @@ jobs: process-only: 'prs' cycles: runs-on: ubuntu-latest + container: + image: 'ghcr.io/void-linux/xbps-src-masterdir:20210313rc01-x86_64-musl' steps: - - run: apt-get install -y python3-networkx + - name: Prepare container + run: | + # Sync and upgrade once, assume error comes from xbps update + xbps-install -Syu || xbps-install -yu xbps + # Upgrade again (in case there was a xbps update) + xbps-install -yu + # Install script dependencies + xbps-install -y python3-networkx + - uses: actions/checkout@v1 + with: + fetch-depth: 1 + - name: Create hostrepo and prepare masterdir + run: | + ln -s "$(pwd)" /hostrepo && + common/travis/set_mirror.sh && + common/travis/prepare.sh - run: common/scripts/xbps-cycles.py From fcc3d671f0a84a51f1dc4ca600445d53f271e0ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20W=C3=B3jcik?= Date: Tue, 29 Jun 2021 19:58:45 +0200 Subject: [PATCH 4/6] .github/workflows: open issue when cycle is detected --- .github/workflows/cron.yml | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml index c284857efcb6..1b53808a7588 100644 --- a/.github/workflows/cron.yml +++ b/.github/workflows/cron.yml @@ -25,7 +25,7 @@ jobs: # Upgrade again (in case there was a xbps update) xbps-install -yu # Install script dependencies - xbps-install -y python3-networkx + xbps-install -y python3-networkx github-cli - uses: actions/checkout@v1 with: fetch-depth: 1 @@ -34,4 +34,15 @@ jobs: ln -s "$(pwd)" /hostrepo && common/travis/set_mirror.sh && common/travis/prepare.sh - - run: common/scripts/xbps-cycles.py + - name: Find cycles and open issues + run: | + common/scripts/xbps-cycles.py | tee cycles + grep 'Cycle:' cycles | while read -r line; do + if gh issue list -R "$GITHUB_REPOSITORY" -S "$line" | grep .; then + printf "Issue on '%s' already exists.\n" "$line" + else + gh issue create -R "$GITHUB_REPOSITORY" -b '' -t "$line" + fi + done + env: + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN }} From ec550b63b1d9b99ee028306efcaf045a5a439372 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20W=C3=B3jcik?= Date: Wed, 30 Jun 2021 20:38:44 +0200 Subject: [PATCH 5/6] common/xbps-cycles.py: Add cache option allows to modify deps and see if cycle is resolved --- common/scripts/README.xbps-cycles.md | 4 ++++ common/scripts/xbps-cycles.py | 26 ++++++++++++++++++++++---- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/common/scripts/README.xbps-cycles.md b/common/scripts/README.xbps-cycles.md index 075484829716..12774e52c707 100644 --- a/common/scripts/README.xbps-cycles.md +++ b/common/scripts/README.xbps-cycles.md @@ -14,5 +14,9 @@ The standard behavior will be to spawn multiple processes, one per CPU, to enumerate package dependencies. This is by far the most time-consuming part of the execution. To override the degree of parallelism, use the `-j` option. +Dependencies can be cached on disk, one file per package, in directory +passed with `-c` option. On next execution with same option, dependencies are +read from file rather than computed. + Failures should be harmless but, at this early stage, unlikely to be pretty or even helpful. diff --git a/common/scripts/xbps-cycles.py b/common/scripts/xbps-cycles.py index 24ef17156336..769e941ad89e 100755 --- a/common/scripts/xbps-cycles.py +++ b/common/scripts/xbps-cycles.py @@ -11,7 +11,7 @@ import networkx as nx -def enum_depends(pkg, xbpsdir): +def enum_depends(pkg, xbpsdir, cachedir): ''' Return a pair (pkg, [dependencies]), where [dependencies] is the list of dependencies for the given package pkg. The argument xbpsdir should @@ -20,18 +20,32 @@ def enum_depends(pkg, xbpsdir): /xbps-src show-build-deps + unless /deps- file exist, in that case it is read. + If the return code of this call nonzero, a message will be printed but the package will treated as if it has no dependencies. ''' + if cachedir: + cachepath = os.path.join(cachedir, 'deps-' + pkg) + try: + with open(cachepath) as f: + return pkg, [l.strip() for l in f] + except FileNotFoundError: + pass + cmd = [os.path.join(xbpsdir, 'xbps-src'), 'show-build-deps', pkg] try: deps = subprocess.check_output(cmd) except subprocess.CalledProcessError as err: - print('xbps-src failed to find dependencies for package', pkg) + print('xbps-src failed to find dependencies for package', pkg) deps = [ ] else: deps = [d for d in deps.decode('utf-8').split('\n') if d] + if cachedir: + with open(cachepath, 'w') as f: + for d in deps: + print(d, file=f) return pkg, deps @@ -83,6 +97,8 @@ def find_cycles(depmap, xbpsdir): parser = ArgumentParser(description='Cycle detector for xbps-src') parser.add_argument('-j', '--jobs', default=None, type=int, help='Number of parallel jobs') + parser.add_argument('-c', '--cachedir', + default=None, help='''Directory to use as cache for xbps-src show-build-deps. Directory must exist already.''') parser.add_argument('-d', '--directory', default=None, help='Path to void-packages repo') @@ -92,11 +108,13 @@ def find_cycles(depmap, xbpsdir): try: args.directory = os.environ['XBPS_DISTDIR'] except KeyError: args.directory = '.' + cachedir = args.cachedir + pool = multiprocessing.Pool(processes = args.jobs) pattern = os.path.join(args.directory, 'srcpkgs', '*') - depmap = dict(pool.starmap(enum_depends, - ((os.path.basename(g), args.directory) + depmap = dict(pool.starmap(enum_depends, + ((os.path.basename(g), args.directory, cachedir) for g in glob.iglob(pattern)))) find_cycles(depmap, args.directory) From acc1111c84064fa9a88cd343f15d1bf98f9466f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20W=C3=B3jcik?= Date: Wed, 30 Jun 2021 20:39:34 +0200 Subject: [PATCH 6/6] common/xbps-cycles.py: deterministic cycle path --- common/scripts/xbps-cycles.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/scripts/xbps-cycles.py b/common/scripts/xbps-cycles.py index 769e941ad89e..dbfd538235ca 100755 --- a/common/scripts/xbps-cycles.py +++ b/common/scripts/xbps-cycles.py @@ -75,7 +75,7 @@ def find_cycles(depmap, xbpsdir): if len(c) < 2: continue pkgs = nx.to_dict_of_lists(G, c) - p = next(iter(pkgs.keys())) + p = min(pkgs.keys()) cycles = [ ] while True: cycles.append(p) @@ -85,7 +85,7 @@ def find_cycles(depmap, xbpsdir): except KeyError: break # Any of the dependencies here contributes to a cycle - p = deps[0] + p = min(deps) if len(deps) > 1: print('Mulitpath: {} -> {}, choosing first'.format(p, deps))