diff options
Diffstat (limited to 'releng')
-rw-r--r-- | releng/__init__.py | 18 | ||||
-rw-r--r-- | releng/create_release.xsh | 12 | ||||
-rw-r--r-- | releng/docker.xsh | 69 | ||||
-rw-r--r-- | releng/docker_assemble.py | 399 | ||||
-rw-r--r-- | releng/environment.py | 26 |
5 files changed, 506 insertions, 18 deletions
diff --git a/releng/__init__.py b/releng/__init__.py index 39d2beb51..fc23c52f3 100644 --- a/releng/__init__.py +++ b/releng/__init__.py @@ -2,12 +2,29 @@ from xonsh.main import setup setup() del setup +import logging + from . import environment from . import create_release from . import keys from . import version from . import cli from . import docker +from . import docker_assemble + +rootLogger = logging.getLogger() +rootLogger.setLevel(logging.DEBUG) +log = logging.getLogger(__name__) +log.setLevel(logging.DEBUG) + +fmt = logging.Formatter('{asctime} {levelname} {name}: {message}', + datefmt='%b %d %H:%M:%S', + style='{') + +if not any(isinstance(h, logging.StreamHandler) for h in rootLogger.handlers): + hand = logging.StreamHandler() + hand.setFormatter(fmt) + rootLogger.addHandler(hand) def reload(): import importlib @@ -17,3 +34,4 @@ def reload(): importlib.reload(version) importlib.reload(cli) importlib.reload(docker) + importlib.reload(docker_assemble) diff --git a/releng/create_release.xsh b/releng/create_release.xsh index 128edb63f..6f4df2142 100644 --- a/releng/create_release.xsh +++ b/releng/create_release.xsh @@ -258,15 +258,16 @@ def upload_artifacts(env: RelengEnvironment, noconfirm=False, no_check_git=False 'I want to release this' ) + docker_images = list((ARTIFACTS / f'lix/lix-{VERSION}').glob(f'lix-{VERSION}-docker-image-*.tar.gz')) + assert docker_images + print('[+] Upload to cache') with open(DRVS_TXT) as fh: upload_drv_paths_and_outputs(env, [x.strip() for x in fh.readlines() if x]) - docker_images = (ARTIFACTS / f'lix/lix-{VERSION}').glob(f'lix-{VERSION}-docker-image-*.tar.gz') print('[+] Upload docker images') - for image in docker_images: - for target in env.docker_targets: - docker.upload_docker_image(target, image) + for target in env.docker_targets: + docker.upload_docker_images(target, docker_images) print('[+] Upload to release bucket') aws s3 cp --recursive @(ARTIFACTS)/ @(env.releases_bucket)/ @@ -321,7 +322,8 @@ def build_artifacts(no_check_git=False): build_manual(eval_result) with open(DRVS_TXT, 'w') as fh: - fh.write('\n'.join(drv_paths)) + # don't bother putting the release tarballs themselves because they are duplicate and huge + fh.write('\n'.join(x['drvPath'] for x in eval_result if x['attr'] != 'lix-release-tarballs')) make_artifacts_dir(eval_result, ARTIFACTS) print(f'[+] Done! See {ARTIFACTS}') diff --git a/releng/docker.xsh b/releng/docker.xsh index 1ed2330cf..f45a69d27 100644 --- a/releng/docker.xsh +++ b/releng/docker.xsh @@ -1,6 +1,18 @@ -from .environment import DockerTarget, RelengEnvironment -from .version import VERSION +import json +import logging from pathlib import Path +import tempfile + +import requests + +from .environment import DockerTarget, RelengEnvironment +from .version import VERSION, MAJOR +from . import gitutils +from .docker_assemble import Registry, OCIIndex, OCIIndexItem +from . import docker_assemble + +log = logging.getLogger(__name__) +log.setLevel(logging.INFO) def check_all_logins(env: RelengEnvironment): for target in env.docker_targets: @@ -9,5 +21,54 @@ def check_all_logins(env: RelengEnvironment): def check_login(target: DockerTarget): skopeo login @(target.registry_name()) -def upload_docker_image(target: DockerTarget, path: Path): - skopeo --insecure-policy copy docker-archive:@(path) docker://@(target.resolve(version=VERSION)) +def upload_docker_images(target: DockerTarget, paths: list[Path]): + if not paths: return + + sess = requests.Session() + sess.headers['User-Agent'] = 'lix-releng' + + tag_names = [DockerTarget.resolve(tag, version=VERSION, major=MAJOR) for tag in target.tags] + + # latest only gets tagged for the current release branch of Lix + if not gitutils.is_maintenance_branch('HEAD'): + tag_names.append('latest') + + meta = {} + + reg = docker_assemble.Registry(sess) + manifests = [] + + with tempfile.TemporaryDirectory() as tmp: + tmp = Path(tmp) + + for path in paths: + digest_file = tmp / (path.name + '.digest') + inspection = json.loads($(skopeo inspect docker-archive:@(path))) + + docker_arch = inspection['Architecture'] + docker_os = inspection['Os'] + meta = inspection['Labels'] + + log.info('Pushing image %s for %s', path, docker_arch) + + # insecure-policy: we don't have any signature policy, we are just uploading an image + # We upload to a junk tag, because otherwise it will upload to `latest`, which is undesirable + skopeo --insecure-policy copy --format oci --digestfile @(digest_file) docker-archive:@(path) docker://@(target.registry_path):temp + + digest = digest_file.read_text().strip() + + # skopeo doesn't give us the manifest size directly, so we just ask the registry + metadata = reg.image_info(target.registry_path, digest) + + manifests.append(OCIIndexItem(metadata=metadata, architecture=docker_arch, os=docker_os)) + # delete the temp tag, which we only have to create because of skopeo + # limitations anyhow (it seems to not have a way to say "don't tag it, find + # your checksum and put it there") + # FIXME: this is not possible because GitHub only has a proprietary API for it. amazing. 11/10. + # reg.delete_tag(target.registry_path, 'temp') + + log.info('Pushed images, building a bigger and more menacing manifest from %r with metadata %r', manifests, meta) + # send the multiarch manifest to each tag + index = OCIIndex(manifests=manifests, annotations=meta) + for tag in tag_names: + reg.upload_index(target.registry_path, tag, index) diff --git a/releng/docker_assemble.py b/releng/docker_assemble.py new file mode 100644 index 000000000..ef1d8c4e6 --- /dev/null +++ b/releng/docker_assemble.py @@ -0,0 +1,399 @@ +from typing import Any, Literal, Optional +import re +from pathlib import Path +import json +import dataclasses +import time +from urllib.parse import unquote +import urllib.request +import logging + +import requests.auth +import requests +import xdg_base_dirs + +log = logging.getLogger(__name__) +log.setLevel(logging.INFO) + +DEBUG_REQUESTS = False +if DEBUG_REQUESTS: + urllib3_logger = logging.getLogger('requests.packages.urllib3') + urllib3_logger.setLevel(logging.DEBUG) + urllib3_logger.propagate = True + +# So, there is a bunch of confusing stuff happening in this file. The gist of why it's Like This is: +# +# nix2container does not concern itself with tags (reasonably enough): +# https://github.com/nlewo/nix2container/issues/59 +# +# This is fine. But then we noticed: docker images don't play nice if you have +# multiple architectures you want to abstract over if you don't do special +# things. Those special things are images with manifests containing multiple +# images. +# +# Docker has a data model vaguely analogous to git: you have higher level +# objects referring to a bunch of content-addressed blobs. +# +# A multiarch image is more or less just a manifest that refers to more +# manifests; in OCI it is an Index. +# +# See the API spec here: https://github.com/opencontainers/distribution-spec/blob/v1.0.1/spec.md#definitions +# And the Index spec here: https://github.com/opencontainers/image-spec/blob/v1.0.1/image-index.md +# +# skopeo doesn't *know* how to make multiarch *manifests*: +# https://github.com/containers/skopeo/issues/1136 +# +# There is a tool called manifest-tool that is supposed to do this +# (https://github.com/estesp/manifest-tool) but it doesn't support putting in +# annotations on the outer image, and I *really* didn't want to write golang to +# fix that. Thus, a little bit of homebrew containers code. +# +# Essentially what we are doing in here is splatting a bunch of images into the +# registry without tagging them (except as "temp", due to podman issues), then +# simply sending a new composite manifest ourselves. + +DockerArchitecture = Literal['amd64'] | Literal['arm64'] +MANIFEST_MIME = 'application/vnd.oci.image.manifest.v1+json' +INDEX_MIME = 'application/vnd.oci.image.index.v1+json' + + +@dataclasses.dataclass(frozen=True, order=True) +class ImageMetadata: + size: int + digest: str + """sha256:SOMEHEX""" + + +@dataclasses.dataclass(frozen=True, order=True) +class OCIIndexItem: + """Information about an untagged uploaded image.""" + + metadata: ImageMetadata + + architecture: DockerArchitecture + + os: str = 'linux' + + def serialize(self): + return { + 'mediaType': MANIFEST_MIME, + 'size': self.metadata.size, + 'digest': self.metadata.digest, + 'platform': { + 'architecture': self.architecture, + 'os': self.os, + } + } + + +@dataclasses.dataclass(frozen=True) +class OCIIndex: + manifests: list[OCIIndexItem] + + annotations: dict[str, str] + + def serialize(self): + return { + 'schemaVersion': 2, + 'manifests': [item.serialize() for item in sorted(self.manifests)], + 'annotations': self.annotations + } + + +def docker_architecture_from_nix_system(system: str) -> DockerArchitecture: + MAP = { + 'x86_64-linux': 'amd64', + 'aarch64-linux': 'arm64', + } + return MAP[system] # type: ignore + + +@dataclasses.dataclass +class TaggingOperation: + manifest: OCIIndex + tags: list[str] + """Tags this image is uploaded under""" + + +runtime_dir = xdg_base_dirs.xdg_runtime_dir() +config_dir = xdg_base_dirs.xdg_config_home() + +AUTH_FILES = ([runtime_dir / 'containers/auth.json'] if runtime_dir else []) + \ + [config_dir / 'containers/auth.json', Path.home() / '.docker/config.json'] + + +# Copied from Werkzeug https://github.com/pallets/werkzeug/blob/62e3ea45846d06576199a2f8470be7fe44c867c1/src/werkzeug/http.py#L300-L325 +def parse_list_header(value: str) -> list[str]: + """Parse a header value that consists of a list of comma separated items according + to `RFC 9110 <https://httpwg.org/specs/rfc9110.html#abnf.extension>`__. + + This extends :func:`urllib.request.parse_http_list` to remove surrounding quotes + from values. + + .. code-block:: python + + parse_list_header('token, "quoted value"') + ['token', 'quoted value'] + + This is the reverse of :func:`dump_header`. + + :param value: The header value to parse. + """ + result = [] + + for item in urllib.request.parse_http_list(value): + if len(item) >= 2 and item[0] == item[-1] == '"': + item = item[1:-1] + + result.append(item) + + return result + + +# https://www.rfc-editor.org/rfc/rfc2231#section-4 +_charset_value_re = re.compile( + r""" + ([\w!#$%&*+\-.^`|~]*)' # charset part, could be empty + [\w!#$%&*+\-.^`|~]*' # don't care about language part, usually empty + ([\w!#$%&'*+\-.^`|~]+) # one or more token chars with percent encoding + """, + re.ASCII | re.VERBOSE, +) + + +# Copied from: https://github.com/pallets/werkzeug/blob/62e3ea45846d06576199a2f8470be7fe44c867c1/src/werkzeug/http.py#L327-L394 +def parse_dict_header(value: str) -> dict[str, str | None]: + """Parse a list header using :func:`parse_list_header`, then parse each item as a + ``key=value`` pair. + + .. code-block:: python + + parse_dict_header('a=b, c="d, e", f') + {"a": "b", "c": "d, e", "f": None} + + This is the reverse of :func:`dump_header`. + + If a key does not have a value, it is ``None``. + + This handles charsets for values as described in + `RFC 2231 <https://www.rfc-editor.org/rfc/rfc2231#section-3>`__. Only ASCII, UTF-8, + and ISO-8859-1 charsets are accepted, otherwise the value remains quoted. + + :param value: The header value to parse. + + .. versionchanged:: 3.0 + Passing bytes is not supported. + + .. versionchanged:: 3.0 + The ``cls`` argument is removed. + + .. versionchanged:: 2.3 + Added support for ``key*=charset''value`` encoded items. + + .. versionchanged:: 0.9 + The ``cls`` argument was added. + """ + result: dict[str, str | None] = {} + + for item in parse_list_header(value): + key, has_value, value = item.partition("=") + key = key.strip() + + if not has_value: + result[key] = None + continue + + value = value.strip() + encoding: str | None = None + + if key[-1] == "*": + # key*=charset''value becomes key=value, where value is percent encoded + # adapted from parse_options_header, without the continuation handling + key = key[:-1] + match = _charset_value_re.match(value) + + if match: + # If there is a charset marker in the value, split it off. + encoding, value = match.groups() + assert encoding + encoding = encoding.lower() + + # A safe list of encodings. Modern clients should only send ASCII or UTF-8. + # This list will not be extended further. An invalid encoding will leave the + # value quoted. + if encoding in {"ascii", "us-ascii", "utf-8", "iso-8859-1"}: + # invalid bytes are replaced during unquoting + value = unquote(value, encoding=encoding) + + if len(value) >= 2 and value[0] == value[-1] == '"': + value = value[1:-1] + + result[key] = value + + return result + + +def parse_www_authenticate(www_authenticate): + scheme, _, rest = www_authenticate.partition(' ') + scheme = scheme.lower() + rest = rest.strip() + + parsed = parse_dict_header(rest.rstrip('=')) + return parsed + + +class AuthState: + + def __init__(self, auth_files: list[Path] = AUTH_FILES): + self.auth_map: dict[str, str] = {} + for f in auth_files: + self.auth_map.update(AuthState.load_auth_file(f)) + self.token_cache: dict[str, str] = {} + + @staticmethod + def load_auth_file(path: Path) -> dict[str, str]: + if path.exists(): + with path.open() as fh: + try: + json_obj = json.load(fh) + return {k: v['auth'] for k, v in json_obj['auths'].items()} + except (json.JSONDecodeError, KeyError) as e: + log.exception('JSON decode error in %s', path, exc_info=e) + return {} + + def get_token(self, hostname: str) -> Optional[str]: + return self.token_cache.get(hostname) + + def obtain_token(self, session: requests.Session, token_endpoint: str, + scope: str, service: str, image_path: str) -> str: + authority, _, _ = image_path.partition('/') + if tok := self.get_token(authority): + return tok + + creds = self.find_credential_for(image_path) + if not creds: + raise ValueError('No credentials available for ' + image_path) + + resp = session.get(token_endpoint, + params={ + 'client_id': 'lix-releng', + 'scope': scope, + 'service': service, + }, + headers={ + 'Authorization': 'Basic ' + creds + }).json() + token = resp['token'] + self.token_cache[service] = token + return token + + def find_credential_for(self, image_path: str): + trails = image_path.split('/') + for i in range(len(trails)): + prefix = '/'.join(trails[:len(trails) - i]) + if prefix in self.auth_map: + return self.auth_map[prefix] + + return None + + +class RegistryAuthenticator(requests.auth.AuthBase): + """Authenticates to an OCI compliant registry""" + + def __init__(self, auth_state: AuthState, session: requests.Session, + image: str): + self.auth_map: dict[str, str] = {} + self.image = image + self.session = session + self.auth_state = auth_state + + def response_hook(self, r: requests.Response, + **kwargs: Any) -> requests.Response: + if r.status_code == 401: + www_authenticate = r.headers.get('www-authenticate', '').lower() + parsed = parse_www_authenticate(www_authenticate) + assert parsed + + tok = self.auth_state.obtain_token( + self.session, + parsed['realm'], # type: ignore + parsed['scope'], # type: ignore + parsed['service'], # type: ignore + self.image) + + new_req = r.request.copy() + new_req.headers['Authorization'] = 'Bearer ' + tok + + return self.session.send(new_req) + else: + return r + + def __call__(self, + r: requests.PreparedRequest) -> requests.PreparedRequest: + authority, _, _ = self.image.partition('/') + auth_may = self.auth_state.get_token(authority) + + if auth_may: + r.headers['Authorization'] = 'Bearer ' + auth_may + + r.register_hook('response', self.response_hook) + return r + + +class Registry: + + def __init__(self, session: requests.Session): + self.auth_state = AuthState() + self.session = session + + def image_info(self, image_path: str, manifest_id: str) -> ImageMetadata: + authority, _, path = image_path.partition('/') + resp = self.session.head( + f'https://{authority}/v2/{path}/manifests/{manifest_id}', + headers={'Accept': MANIFEST_MIME}, + auth=RegistryAuthenticator(self.auth_state, self.session, + image_path)) + resp.raise_for_status() + return ImageMetadata(int(resp.headers['content-length']), + resp.headers['docker-content-digest']) + + def delete_tag(self, image_path: str, tag: str): + authority, _, path = image_path.partition('/') + resp = self.session.delete( + f'https://{authority}/v2/{path}/manifests/{tag}', + headers={'Content-Type': INDEX_MIME}, + auth=RegistryAuthenticator(self.auth_state, self.session, + image_path)) + resp.raise_for_status() + + def _upload_index(self, image_path: str, tag: str, index: OCIIndex): + authority, _, path = image_path.partition('/') + body = json.dumps(index.serialize(), + separators=(',', ':'), + sort_keys=True) + + resp = self.session.put( + f'https://{authority}/v2/{path}/manifests/{tag}', + data=body, + headers={'Content-Type': INDEX_MIME}, + auth=RegistryAuthenticator(self.auth_state, self.session, + image_path)) + resp.raise_for_status() + + return resp.headers['Location'] + + def upload_index(self, + image_path: str, + tag: str, + index: OCIIndex, + retries=20, + retry_delay=1): + # eventual consistency lmao + for _ in range(retries): + try: + return self._upload_index(image_path, tag, index) + except requests.HTTPError as e: + if e.response.status_code != 404: + raise + + time.sleep(retry_delay) diff --git a/releng/environment.py b/releng/environment.py index e8e7e771e..3d65b2799 100644 --- a/releng/environment.py +++ b/releng/environment.py @@ -1,5 +1,5 @@ -import dataclasses import urllib.parse +import dataclasses S3_HOST = 's3.lix.systems' S3_ENDPOINT = 'https://s3.lix.systems' @@ -19,12 +19,19 @@ DEFAULT_STORE_URI_BITS = { @dataclasses.dataclass class DockerTarget: registry_path: str + """Registry path without the tag, e.g. ghcr.io/lix-project/lix""" - def resolve(self, version: str) -> str: - """Applies templates: - - version: the Lix version + tags: list[str] + """List of tags this image should take. There must be at least one.""" + + @staticmethod + def resolve(item: str, version: str, major: str) -> str: + """ + Applies templates: + - version: the Lix version e.g. 2.90.0 + - major: the major Lix version e.g. 2.90 """ - return self.registry_path.format(version=version) + return item.format(version=version, major=major) def registry_name(self) -> str: [a, _, _] = self.registry_path.partition('/') @@ -57,10 +64,11 @@ STAGING = RelengEnvironment( releases_bucket='s3://staging-releases', git_repo='ssh://git@git.lix.systems/lix-project/lix-releng-staging', docker_targets=[ - DockerTarget( - 'git.lix.systems/lix-project/lix-releng-staging:{version}'), - DockerTarget( - 'ghcr.io/lix-project/lix-releng-staging:{version}'), + # FIXME: how do we make sure that latest gets the latest of the *most recent* branch? + DockerTarget('git.lix.systems/lix-project/lix-releng-staging', + tags=['{version}', '{major}']), + DockerTarget('ghcr.io/lix-project/lix-releng-staging', + tags=['{version}', '{major}']), ], ) |