94427deb9d
GitOrigin-RevId: f91ee3065de91a3531329a674a45ddcb3467a650
550 lines
17 KiB
Python
Executable file
550 lines
17 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
|
|
"""
|
|
Update a Python package expression by passing in the `.nix` file, or the directory containing it.
|
|
You can pass in multiple files or paths.
|
|
|
|
You'll likely want to use
|
|
``
|
|
$ ./update-python-libraries ../../pkgs/development/python-modules/**/default.nix
|
|
``
|
|
to update all non-pinned libraries in that folder.
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
import requests
|
|
from concurrent.futures import ThreadPoolExecutor as Pool
|
|
from packaging.version import Version as _Version
|
|
from packaging.version import InvalidVersion
|
|
from packaging.specifiers import SpecifierSet
|
|
from typing import Optional, Any
|
|
import collections
|
|
import subprocess
|
|
|
|
INDEX = "https://pypi.io/pypi"
|
|
"""url of PyPI"""
|
|
|
|
EXTENSIONS = ['tar.gz', 'tar.bz2', 'tar', 'zip', '.whl']
|
|
"""Permitted file extensions. These are evaluated from left to right and the first occurance is returned."""
|
|
|
|
PRERELEASES = False
|
|
|
|
BULK_UPDATE = False
|
|
|
|
GIT = "git"
|
|
|
|
NIXPKGS_ROOT = subprocess.check_output(["git", "rev-parse", "--show-toplevel"]).decode('utf-8').strip()
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
|
|
class Version(_Version, collections.abc.Sequence):
|
|
|
|
def __init__(self, version):
|
|
super().__init__(version)
|
|
# We cannot use `str(Version(0.04.21))` because that becomes `0.4.21`
|
|
# https://github.com/avian2/unidecode/issues/13#issuecomment-354538882
|
|
self.raw_version = version
|
|
|
|
def __getitem__(self, i):
|
|
return self._version.release[i]
|
|
|
|
def __len__(self):
|
|
return len(self._version.release)
|
|
|
|
def __iter__(self):
|
|
yield from self._version.release
|
|
|
|
|
|
def _get_values(attribute, text):
|
|
"""Match attribute in text and return all matches.
|
|
|
|
:returns: List of matches.
|
|
"""
|
|
regex = fr'{re.escape(attribute)}\s+=\s+"(.*)";'
|
|
regex = re.compile(regex)
|
|
values = regex.findall(text)
|
|
return values
|
|
|
|
|
|
def _get_attr_value(attr_path: str) -> Optional[Any]:
|
|
try:
|
|
response = subprocess.check_output([
|
|
"nix",
|
|
"--extra-experimental-features", "nix-command",
|
|
"eval",
|
|
"-f", f"{NIXPKGS_ROOT}/default.nix",
|
|
"--json",
|
|
f"{attr_path}"
|
|
])
|
|
return json.loads(response.decode())
|
|
except (subprocess.CalledProcessError, ValueError):
|
|
return None
|
|
|
|
|
|
def _get_unique_value(attribute, text):
|
|
"""Match attribute in text and return unique match.
|
|
|
|
:returns: Single match.
|
|
"""
|
|
values = _get_values(attribute, text)
|
|
n = len(values)
|
|
if n > 1:
|
|
raise ValueError("found too many values for {}".format(attribute))
|
|
elif n == 1:
|
|
return values[0]
|
|
else:
|
|
raise ValueError("no value found for {}".format(attribute))
|
|
|
|
def _get_line_and_value(attribute, text, value=None):
|
|
"""Match attribute in text. Return the line and the value of the attribute."""
|
|
if value is None:
|
|
regex = rf'({re.escape(attribute)}\s+=\s+\"(.*)\";)'
|
|
else:
|
|
regex = rf'({re.escape(attribute)}\s+=\s+\"({re.escape(value)})\";)'
|
|
regex = re.compile(regex)
|
|
results = regex.findall(text)
|
|
n = len(results)
|
|
if n > 1:
|
|
raise ValueError("found too many values for {}".format(attribute))
|
|
elif n == 1:
|
|
return results[0]
|
|
else:
|
|
raise ValueError("no value found for {}".format(attribute))
|
|
|
|
|
|
def _replace_value(attribute, value, text, oldvalue=None):
|
|
"""Search and replace value of attribute in text."""
|
|
if oldvalue is None:
|
|
old_line, old_value = _get_line_and_value(attribute, text)
|
|
else:
|
|
old_line, old_value = _get_line_and_value(attribute, text, oldvalue)
|
|
new_line = old_line.replace(old_value, value)
|
|
new_text = text.replace(old_line, new_line)
|
|
return new_text
|
|
|
|
|
|
def _fetch_page(url):
|
|
r = requests.get(url)
|
|
if r.status_code == requests.codes.ok:
|
|
return r.json()
|
|
else:
|
|
raise ValueError("request for {} failed".format(url))
|
|
|
|
|
|
def _fetch_github(url):
|
|
headers = {}
|
|
token = os.environ.get('GITHUB_API_TOKEN')
|
|
if token:
|
|
headers["Authorization"] = f"token {token}"
|
|
r = requests.get(url, headers=headers)
|
|
|
|
if r.status_code == requests.codes.ok:
|
|
return r.json()
|
|
else:
|
|
raise ValueError("request for {} failed".format(url))
|
|
|
|
|
|
def _hash_to_sri(algorithm, value):
|
|
"""Convert a hash to its SRI representation"""
|
|
return subprocess.check_output([
|
|
"nix",
|
|
"hash",
|
|
"to-sri",
|
|
"--type", algorithm,
|
|
value
|
|
]).decode().strip()
|
|
|
|
|
|
def _skip_bulk_update(attr_name: str) -> bool:
|
|
return bool(_get_attr_value(
|
|
f"{attr_name}.skipBulkUpdate"
|
|
))
|
|
|
|
|
|
SEMVER = {
|
|
'major' : 0,
|
|
'minor' : 1,
|
|
'patch' : 2,
|
|
}
|
|
|
|
|
|
def _determine_latest_version(current_version, target, versions):
|
|
"""Determine latest version, given `target`.
|
|
"""
|
|
current_version = Version(current_version)
|
|
|
|
def _parse_versions(versions):
|
|
for v in versions:
|
|
try:
|
|
yield Version(v)
|
|
except InvalidVersion:
|
|
pass
|
|
|
|
versions = _parse_versions(versions)
|
|
|
|
index = SEMVER[target]
|
|
|
|
ceiling = list(current_version[0:index])
|
|
if len(ceiling) == 0:
|
|
ceiling = None
|
|
else:
|
|
ceiling[-1]+=1
|
|
ceiling = Version(".".join(map(str, ceiling)))
|
|
|
|
# We do not want prereleases
|
|
versions = SpecifierSet(prereleases=PRERELEASES).filter(versions)
|
|
|
|
if ceiling is not None:
|
|
versions = SpecifierSet(f"<{ceiling}").filter(versions)
|
|
|
|
return (max(sorted(versions))).raw_version
|
|
|
|
|
|
def _get_latest_version_pypi(package, extension, current_version, target):
|
|
"""Get latest version and hash from PyPI."""
|
|
url = "{}/{}/json".format(INDEX, package)
|
|
json = _fetch_page(url)
|
|
|
|
versions = json['releases'].keys()
|
|
version = _determine_latest_version(current_version, target, versions)
|
|
|
|
try:
|
|
releases = json['releases'][version]
|
|
except KeyError as e:
|
|
raise KeyError('Could not find version {} for {}'.format(version, package)) from e
|
|
for release in releases:
|
|
if release['filename'].endswith(extension):
|
|
# TODO: In case of wheel we need to do further checks!
|
|
sha256 = release['digests']['sha256']
|
|
break
|
|
else:
|
|
sha256 = None
|
|
return version, sha256, None
|
|
|
|
|
|
def _get_latest_version_github(package, extension, current_version, target):
|
|
def strip_prefix(tag):
|
|
return re.sub("^[^0-9]*", "", tag)
|
|
|
|
def get_prefix(string):
|
|
matches = re.findall(r"^([^0-9]*)", string)
|
|
return next(iter(matches), "")
|
|
|
|
# when invoked as an updateScript, UPDATE_NIX_ATTR_PATH will be set
|
|
# this allows us to work with packages which live outside of python-modules
|
|
attr_path = os.environ.get("UPDATE_NIX_ATTR_PATH", f"python3Packages.{package}")
|
|
try:
|
|
homepage = subprocess.check_output(
|
|
["nix", "eval", "-f", f"{NIXPKGS_ROOT}/default.nix", "--raw", f"{attr_path}.src.meta.homepage"])\
|
|
.decode('utf-8')
|
|
except Exception as e:
|
|
raise ValueError(f"Unable to determine homepage: {e}")
|
|
owner_repo = homepage[len("https://github.com/"):] # remove prefix
|
|
owner, repo = owner_repo.split("/")
|
|
|
|
url = f"https://api.github.com/repos/{owner}/{repo}/releases"
|
|
all_releases = _fetch_github(url)
|
|
releases = list(filter(lambda x: not x['prerelease'], all_releases))
|
|
|
|
if len(releases) == 0:
|
|
raise ValueError(f"{homepage} does not contain any stable releases")
|
|
|
|
versions = map(lambda x: strip_prefix(x['tag_name']), releases)
|
|
version = _determine_latest_version(current_version, target, versions)
|
|
|
|
release = next(filter(lambda x: strip_prefix(x['tag_name']) == version, releases))
|
|
prefix = get_prefix(release['tag_name'])
|
|
|
|
# some attributes require using the fetchgit
|
|
git_fetcher_args = []
|
|
if (_get_attr_value(f"{attr_path}.src.fetchSubmodules")):
|
|
git_fetcher_args.append("--fetch-submodules")
|
|
if (_get_attr_value(f"{attr_path}.src.fetchLFS")):
|
|
git_fetcher_args.append("--fetch-lfs")
|
|
if (_get_attr_value(f"{attr_path}.src.leaveDotGit")):
|
|
git_fetcher_args.append("--leave-dotGit")
|
|
|
|
if git_fetcher_args:
|
|
algorithm = "sha256"
|
|
cmd = [
|
|
"nix-prefetch-git",
|
|
f"https://github.com/{owner}/{repo}.git",
|
|
"--hash", algorithm,
|
|
"--rev", f"refs/tags/{release['tag_name']}"
|
|
]
|
|
cmd.extend(git_fetcher_args)
|
|
response = subprocess.check_output(cmd)
|
|
document = json.loads(response.decode())
|
|
hash = _hash_to_sri(algorithm, document[algorithm])
|
|
else:
|
|
try:
|
|
hash = subprocess.check_output([
|
|
"nix-prefetch-url",
|
|
"--type", "sha256",
|
|
"--unpack",
|
|
f"{release['tarball_url']}"
|
|
], stderr=subprocess.DEVNULL).decode('utf-8').strip()
|
|
except (subprocess.CalledProcessError, UnicodeError):
|
|
# this may fail if they have both a branch and a tag of the same name, attempt tag name
|
|
tag_url = str(release['tarball_url']).replace("tarball","tarball/refs/tags")
|
|
hash = subprocess.check_output([
|
|
"nix-prefetch-url",
|
|
"--type", "sha256",
|
|
"--unpack",
|
|
tag_url
|
|
], stderr=subprocess.DEVNULL).decode('utf-8').strip()
|
|
|
|
return version, hash, prefix
|
|
|
|
|
|
FETCHERS = {
|
|
'fetchFromGitHub' : _get_latest_version_github,
|
|
'fetchPypi' : _get_latest_version_pypi,
|
|
'fetchurl' : _get_latest_version_pypi,
|
|
}
|
|
|
|
|
|
DEFAULT_SETUPTOOLS_EXTENSION = 'tar.gz'
|
|
|
|
|
|
FORMATS = {
|
|
'setuptools' : DEFAULT_SETUPTOOLS_EXTENSION,
|
|
'wheel' : 'whl',
|
|
'pyproject' : 'tar.gz',
|
|
'flit' : 'tar.gz'
|
|
}
|
|
|
|
def _determine_fetcher(text):
|
|
# Count occurrences of fetchers.
|
|
nfetchers = sum(text.count('src = {}'.format(fetcher)) for fetcher in FETCHERS.keys())
|
|
if nfetchers == 0:
|
|
raise ValueError("no fetcher.")
|
|
elif nfetchers > 1:
|
|
raise ValueError("multiple fetchers.")
|
|
else:
|
|
# Then we check which fetcher to use.
|
|
for fetcher in FETCHERS.keys():
|
|
if 'src = {}'.format(fetcher) in text:
|
|
return fetcher
|
|
|
|
|
|
def _determine_extension(text, fetcher):
|
|
"""Determine what extension is used in the expression.
|
|
|
|
If we use:
|
|
- fetchPypi, we check if format is specified.
|
|
- fetchurl, we determine the extension from the url.
|
|
- fetchFromGitHub we simply use `.tar.gz`.
|
|
"""
|
|
if fetcher == 'fetchPypi':
|
|
try:
|
|
src_format = _get_unique_value('format', text)
|
|
except ValueError:
|
|
src_format = None # format was not given
|
|
|
|
try:
|
|
extension = _get_unique_value('extension', text)
|
|
except ValueError:
|
|
extension = None # extension was not given
|
|
|
|
if extension is None:
|
|
if src_format is None:
|
|
src_format = 'setuptools'
|
|
elif src_format == 'other':
|
|
raise ValueError("Don't know how to update a format='other' package.")
|
|
extension = FORMATS[src_format]
|
|
|
|
elif fetcher == 'fetchurl':
|
|
url = _get_unique_value('url', text)
|
|
extension = os.path.splitext(url)[1]
|
|
if 'pypi' not in url:
|
|
raise ValueError('url does not point to PyPI.')
|
|
|
|
elif fetcher == 'fetchFromGitHub':
|
|
extension = "tar.gz"
|
|
|
|
return extension
|
|
|
|
|
|
def _update_package(path, target):
|
|
|
|
# Read the expression
|
|
with open(path, 'r') as f:
|
|
text = f.read()
|
|
|
|
# Determine pname. Many files have more than one pname
|
|
pnames = _get_values('pname', text)
|
|
|
|
# Determine version.
|
|
version = _get_unique_value('version', text)
|
|
|
|
# First we check how many fetchers are mentioned.
|
|
fetcher = _determine_fetcher(text)
|
|
|
|
extension = _determine_extension(text, fetcher)
|
|
|
|
# Attempt a fetch using each pname, e.g. backports-zoneinfo vs backports.zoneinfo
|
|
successful_fetch = False
|
|
for pname in pnames:
|
|
if BULK_UPDATE and _skip_bulk_update(f"python3Packages.{pname}"):
|
|
raise ValueError(f"Bulk update skipped for {pname}")
|
|
try:
|
|
new_version, new_sha256, prefix = FETCHERS[fetcher](pname, extension, version, target)
|
|
successful_fetch = True
|
|
break
|
|
except ValueError:
|
|
continue
|
|
|
|
if not successful_fetch:
|
|
raise ValueError(f"Unable to find correct package using these pnames: {pnames}")
|
|
|
|
if new_version == version:
|
|
logging.info("Path {}: no update available for {}.".format(path, pname))
|
|
return False
|
|
elif Version(new_version) <= Version(version):
|
|
raise ValueError("downgrade for {}.".format(pname))
|
|
if not new_sha256:
|
|
raise ValueError("no file available for {}.".format(pname))
|
|
|
|
text = _replace_value('version', new_version, text)
|
|
|
|
# hashes from pypi are 16-bit encoded sha256's, normalize it to sri to avoid merge conflicts
|
|
# sri hashes have been the default format since nix 2.4+
|
|
sri_hash = _hash_to_sri("sha256", new_sha256)
|
|
|
|
# retrieve the old output hash for a more precise match
|
|
if old_hash := _get_attr_value(f"python3Packages.{pname}.src.outputHash"):
|
|
# fetchers can specify a sha256, or a sri hash
|
|
try:
|
|
text = _replace_value('hash', sri_hash, text, old_hash)
|
|
except ValueError:
|
|
text = _replace_value('sha256', sri_hash, text, old_hash)
|
|
else:
|
|
raise ValueError(f"Unable to retrieve old hash for {pname}")
|
|
|
|
if fetcher == 'fetchFromGitHub':
|
|
# in the case of fetchFromGitHub, it's common to see `rev = version;` or `rev = "v${version}";`
|
|
# in which no string value is meant to be substituted. However, we can just overwrite the previous value.
|
|
regex = r'(rev\s+=\s+[^;]*;)'
|
|
regex = re.compile(regex)
|
|
matches = regex.findall(text)
|
|
n = len(matches)
|
|
|
|
if n == 0:
|
|
raise ValueError("Unable to find rev value for {}.".format(pname))
|
|
else:
|
|
# forcefully rewrite rev, incase tagging conventions changed for a release
|
|
match = matches[0]
|
|
text = text.replace(match, f'rev = "refs/tags/{prefix}${{version}}";')
|
|
# incase there's no prefix, just rewrite without interpolation
|
|
text = text.replace('"${version}";', 'version;')
|
|
|
|
with open(path, 'w') as f:
|
|
f.write(text)
|
|
|
|
logging.info("Path {}: updated {} from {} to {}".format(path, pname, version, new_version))
|
|
|
|
result = {
|
|
'path' : path,
|
|
'target': target,
|
|
'pname': pname,
|
|
'old_version' : version,
|
|
'new_version' : new_version,
|
|
#'fetcher' : fetcher,
|
|
}
|
|
|
|
return result
|
|
|
|
|
|
def _update(path, target):
|
|
|
|
# We need to read and modify a Nix expression.
|
|
if os.path.isdir(path):
|
|
path = os.path.join(path, 'default.nix')
|
|
|
|
# If a default.nix does not exist, we quit.
|
|
if not os.path.isfile(path):
|
|
logging.info("Path {}: does not exist.".format(path))
|
|
return False
|
|
|
|
# If file is not a Nix expression, we quit.
|
|
if not path.endswith(".nix"):
|
|
logging.info("Path {}: does not end with `.nix`.".format(path))
|
|
return False
|
|
|
|
try:
|
|
return _update_package(path, target)
|
|
except ValueError as e:
|
|
logging.warning("Path {}: {}".format(path, e))
|
|
return False
|
|
|
|
|
|
def _commit(path, pname, old_version, new_version, pkgs_prefix="python: ", **kwargs):
|
|
"""Commit result.
|
|
"""
|
|
|
|
msg = f'{pkgs_prefix}{pname}: {old_version} -> {new_version}'
|
|
|
|
try:
|
|
subprocess.check_call([GIT, 'add', path])
|
|
subprocess.check_call([GIT, 'commit', '-m', msg])
|
|
except subprocess.CalledProcessError as e:
|
|
subprocess.check_call([GIT, 'checkout', path])
|
|
raise subprocess.CalledProcessError(f'Could not commit {path}') from e
|
|
|
|
return True
|
|
|
|
|
|
def main():
|
|
|
|
epilog = """
|
|
environment variables:
|
|
GITHUB_API_TOKEN\tGitHub API token used when updating github packages
|
|
"""
|
|
parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, epilog=epilog)
|
|
parser.add_argument('package', type=str, nargs='+')
|
|
parser.add_argument('--target', type=str, choices=SEMVER.keys(), default='major')
|
|
parser.add_argument('--commit', action='store_true', help='Create a commit for each package update')
|
|
parser.add_argument('--use-pkgs-prefix', action='store_true', help='Use python3Packages.${pname}: instead of python: ${pname}: when making commits')
|
|
|
|
args = parser.parse_args()
|
|
target = args.target
|
|
|
|
packages = list(map(os.path.abspath, args.package))
|
|
|
|
if len(packages) > 1:
|
|
global BULK_UPDATE
|
|
BULK_UPDATE = True
|
|
|
|
logging.info("Updating packages...")
|
|
|
|
# Use threads to update packages concurrently
|
|
with Pool() as p:
|
|
results = list(filter(bool, p.map(lambda pkg: _update(pkg, target), packages)))
|
|
|
|
logging.info("Finished updating packages.")
|
|
|
|
commit_options = {}
|
|
if args.use_pkgs_prefix:
|
|
logging.info("Using python3Packages. prefix for commits")
|
|
commit_options["pkgs_prefix"] = "python3Packages."
|
|
|
|
# Commits are created sequentially.
|
|
if args.commit:
|
|
logging.info("Committing updates...")
|
|
# list forces evaluation
|
|
list(map(lambda x: _commit(**x, **commit_options), results))
|
|
logging.info("Finished committing updates")
|
|
|
|
count = len(results)
|
|
logging.info("{} package(s) updated".format(count))
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|