diff options
author | jade <lix@jade.fyi> | 2024-06-11 04:45:12 +0000 |
---|---|---|
committer | Gerrit Code Review <gerrit@localhost> | 2024-06-11 04:45:12 +0000 |
commit | 8a3d063a494c4b8c767190a5ce3e4075a75f9d07 (patch) | |
tree | 31ed3c6cdb149c56951658de2680a48fecfc57b0 /releng/docker_assemble.py | |
parent | f432e464dd1a11ef47b1487f1913cd9b5256c189 (diff) | |
parent | 82dc712d9312f06e653213d030b6db21529a422f (diff) |
Merge changes from topic "releng" into main
* changes:
releng: add prod environment, ready for release
releng: automatically figure out if we should tag latest for docker
releng: support multiarch docker images
manual: rewrite the docker guide now that we have images
Rewrite docker to be sensible and smaller
Implement docker upload in the releng tools
Diffstat (limited to 'releng/docker_assemble.py')
-rw-r--r-- | releng/docker_assemble.py | 399 |
1 files changed, 399 insertions, 0 deletions
diff --git a/releng/docker_assemble.py b/releng/docker_assemble.py new file mode 100644 index 000000000..ef1d8c4e6 --- /dev/null +++ b/releng/docker_assemble.py @@ -0,0 +1,399 @@ +from typing import Any, Literal, Optional +import re +from pathlib import Path +import json +import dataclasses +import time +from urllib.parse import unquote +import urllib.request +import logging + +import requests.auth +import requests +import xdg_base_dirs + +log = logging.getLogger(__name__) +log.setLevel(logging.INFO) + +DEBUG_REQUESTS = False +if DEBUG_REQUESTS: + urllib3_logger = logging.getLogger('requests.packages.urllib3') + urllib3_logger.setLevel(logging.DEBUG) + urllib3_logger.propagate = True + +# So, there is a bunch of confusing stuff happening in this file. The gist of why it's Like This is: +# +# nix2container does not concern itself with tags (reasonably enough): +# https://github.com/nlewo/nix2container/issues/59 +# +# This is fine. But then we noticed: docker images don't play nice if you have +# multiple architectures you want to abstract over if you don't do special +# things. Those special things are images with manifests containing multiple +# images. +# +# Docker has a data model vaguely analogous to git: you have higher level +# objects referring to a bunch of content-addressed blobs. +# +# A multiarch image is more or less just a manifest that refers to more +# manifests; in OCI it is an Index. +# +# See the API spec here: https://github.com/opencontainers/distribution-spec/blob/v1.0.1/spec.md#definitions +# And the Index spec here: https://github.com/opencontainers/image-spec/blob/v1.0.1/image-index.md +# +# skopeo doesn't *know* how to make multiarch *manifests*: +# https://github.com/containers/skopeo/issues/1136 +# +# There is a tool called manifest-tool that is supposed to do this +# (https://github.com/estesp/manifest-tool) but it doesn't support putting in +# annotations on the outer image, and I *really* didn't want to write golang to +# fix that. Thus, a little bit of homebrew containers code. +# +# Essentially what we are doing in here is splatting a bunch of images into the +# registry without tagging them (except as "temp", due to podman issues), then +# simply sending a new composite manifest ourselves. + +DockerArchitecture = Literal['amd64'] | Literal['arm64'] +MANIFEST_MIME = 'application/vnd.oci.image.manifest.v1+json' +INDEX_MIME = 'application/vnd.oci.image.index.v1+json' + + +@dataclasses.dataclass(frozen=True, order=True) +class ImageMetadata: + size: int + digest: str + """sha256:SOMEHEX""" + + +@dataclasses.dataclass(frozen=True, order=True) +class OCIIndexItem: + """Information about an untagged uploaded image.""" + + metadata: ImageMetadata + + architecture: DockerArchitecture + + os: str = 'linux' + + def serialize(self): + return { + 'mediaType': MANIFEST_MIME, + 'size': self.metadata.size, + 'digest': self.metadata.digest, + 'platform': { + 'architecture': self.architecture, + 'os': self.os, + } + } + + +@dataclasses.dataclass(frozen=True) +class OCIIndex: + manifests: list[OCIIndexItem] + + annotations: dict[str, str] + + def serialize(self): + return { + 'schemaVersion': 2, + 'manifests': [item.serialize() for item in sorted(self.manifests)], + 'annotations': self.annotations + } + + +def docker_architecture_from_nix_system(system: str) -> DockerArchitecture: + MAP = { + 'x86_64-linux': 'amd64', + 'aarch64-linux': 'arm64', + } + return MAP[system] # type: ignore + + +@dataclasses.dataclass +class TaggingOperation: + manifest: OCIIndex + tags: list[str] + """Tags this image is uploaded under""" + + +runtime_dir = xdg_base_dirs.xdg_runtime_dir() +config_dir = xdg_base_dirs.xdg_config_home() + +AUTH_FILES = ([runtime_dir / 'containers/auth.json'] if runtime_dir else []) + \ + [config_dir / 'containers/auth.json', Path.home() / '.docker/config.json'] + + +# Copied from Werkzeug https://github.com/pallets/werkzeug/blob/62e3ea45846d06576199a2f8470be7fe44c867c1/src/werkzeug/http.py#L300-L325 +def parse_list_header(value: str) -> list[str]: + """Parse a header value that consists of a list of comma separated items according + to `RFC 9110 <https://httpwg.org/specs/rfc9110.html#abnf.extension>`__. + + This extends :func:`urllib.request.parse_http_list` to remove surrounding quotes + from values. + + .. code-block:: python + + parse_list_header('token, "quoted value"') + ['token', 'quoted value'] + + This is the reverse of :func:`dump_header`. + + :param value: The header value to parse. + """ + result = [] + + for item in urllib.request.parse_http_list(value): + if len(item) >= 2 and item[0] == item[-1] == '"': + item = item[1:-1] + + result.append(item) + + return result + + +# https://www.rfc-editor.org/rfc/rfc2231#section-4 +_charset_value_re = re.compile( + r""" + ([\w!#$%&*+\-.^`|~]*)' # charset part, could be empty + [\w!#$%&*+\-.^`|~]*' # don't care about language part, usually empty + ([\w!#$%&'*+\-.^`|~]+) # one or more token chars with percent encoding + """, + re.ASCII | re.VERBOSE, +) + + +# Copied from: https://github.com/pallets/werkzeug/blob/62e3ea45846d06576199a2f8470be7fe44c867c1/src/werkzeug/http.py#L327-L394 +def parse_dict_header(value: str) -> dict[str, str | None]: + """Parse a list header using :func:`parse_list_header`, then parse each item as a + ``key=value`` pair. + + .. code-block:: python + + parse_dict_header('a=b, c="d, e", f') + {"a": "b", "c": "d, e", "f": None} + + This is the reverse of :func:`dump_header`. + + If a key does not have a value, it is ``None``. + + This handles charsets for values as described in + `RFC 2231 <https://www.rfc-editor.org/rfc/rfc2231#section-3>`__. Only ASCII, UTF-8, + and ISO-8859-1 charsets are accepted, otherwise the value remains quoted. + + :param value: The header value to parse. + + .. versionchanged:: 3.0 + Passing bytes is not supported. + + .. versionchanged:: 3.0 + The ``cls`` argument is removed. + + .. versionchanged:: 2.3 + Added support for ``key*=charset''value`` encoded items. + + .. versionchanged:: 0.9 + The ``cls`` argument was added. + """ + result: dict[str, str | None] = {} + + for item in parse_list_header(value): + key, has_value, value = item.partition("=") + key = key.strip() + + if not has_value: + result[key] = None + continue + + value = value.strip() + encoding: str | None = None + + if key[-1] == "*": + # key*=charset''value becomes key=value, where value is percent encoded + # adapted from parse_options_header, without the continuation handling + key = key[:-1] + match = _charset_value_re.match(value) + + if match: + # If there is a charset marker in the value, split it off. + encoding, value = match.groups() + assert encoding + encoding = encoding.lower() + + # A safe list of encodings. Modern clients should only send ASCII or UTF-8. + # This list will not be extended further. An invalid encoding will leave the + # value quoted. + if encoding in {"ascii", "us-ascii", "utf-8", "iso-8859-1"}: + # invalid bytes are replaced during unquoting + value = unquote(value, encoding=encoding) + + if len(value) >= 2 and value[0] == value[-1] == '"': + value = value[1:-1] + + result[key] = value + + return result + + +def parse_www_authenticate(www_authenticate): + scheme, _, rest = www_authenticate.partition(' ') + scheme = scheme.lower() + rest = rest.strip() + + parsed = parse_dict_header(rest.rstrip('=')) + return parsed + + +class AuthState: + + def __init__(self, auth_files: list[Path] = AUTH_FILES): + self.auth_map: dict[str, str] = {} + for f in auth_files: + self.auth_map.update(AuthState.load_auth_file(f)) + self.token_cache: dict[str, str] = {} + + @staticmethod + def load_auth_file(path: Path) -> dict[str, str]: + if path.exists(): + with path.open() as fh: + try: + json_obj = json.load(fh) + return {k: v['auth'] for k, v in json_obj['auths'].items()} + except (json.JSONDecodeError, KeyError) as e: + log.exception('JSON decode error in %s', path, exc_info=e) + return {} + + def get_token(self, hostname: str) -> Optional[str]: + return self.token_cache.get(hostname) + + def obtain_token(self, session: requests.Session, token_endpoint: str, + scope: str, service: str, image_path: str) -> str: + authority, _, _ = image_path.partition('/') + if tok := self.get_token(authority): + return tok + + creds = self.find_credential_for(image_path) + if not creds: + raise ValueError('No credentials available for ' + image_path) + + resp = session.get(token_endpoint, + params={ + 'client_id': 'lix-releng', + 'scope': scope, + 'service': service, + }, + headers={ + 'Authorization': 'Basic ' + creds + }).json() + token = resp['token'] + self.token_cache[service] = token + return token + + def find_credential_for(self, image_path: str): + trails = image_path.split('/') + for i in range(len(trails)): + prefix = '/'.join(trails[:len(trails) - i]) + if prefix in self.auth_map: + return self.auth_map[prefix] + + return None + + +class RegistryAuthenticator(requests.auth.AuthBase): + """Authenticates to an OCI compliant registry""" + + def __init__(self, auth_state: AuthState, session: requests.Session, + image: str): + self.auth_map: dict[str, str] = {} + self.image = image + self.session = session + self.auth_state = auth_state + + def response_hook(self, r: requests.Response, + **kwargs: Any) -> requests.Response: + if r.status_code == 401: + www_authenticate = r.headers.get('www-authenticate', '').lower() + parsed = parse_www_authenticate(www_authenticate) + assert parsed + + tok = self.auth_state.obtain_token( + self.session, + parsed['realm'], # type: ignore + parsed['scope'], # type: ignore + parsed['service'], # type: ignore + self.image) + + new_req = r.request.copy() + new_req.headers['Authorization'] = 'Bearer ' + tok + + return self.session.send(new_req) + else: + return r + + def __call__(self, + r: requests.PreparedRequest) -> requests.PreparedRequest: + authority, _, _ = self.image.partition('/') + auth_may = self.auth_state.get_token(authority) + + if auth_may: + r.headers['Authorization'] = 'Bearer ' + auth_may + + r.register_hook('response', self.response_hook) + return r + + +class Registry: + + def __init__(self, session: requests.Session): + self.auth_state = AuthState() + self.session = session + + def image_info(self, image_path: str, manifest_id: str) -> ImageMetadata: + authority, _, path = image_path.partition('/') + resp = self.session.head( + f'https://{authority}/v2/{path}/manifests/{manifest_id}', + headers={'Accept': MANIFEST_MIME}, + auth=RegistryAuthenticator(self.auth_state, self.session, + image_path)) + resp.raise_for_status() + return ImageMetadata(int(resp.headers['content-length']), + resp.headers['docker-content-digest']) + + def delete_tag(self, image_path: str, tag: str): + authority, _, path = image_path.partition('/') + resp = self.session.delete( + f'https://{authority}/v2/{path}/manifests/{tag}', + headers={'Content-Type': INDEX_MIME}, + auth=RegistryAuthenticator(self.auth_state, self.session, + image_path)) + resp.raise_for_status() + + def _upload_index(self, image_path: str, tag: str, index: OCIIndex): + authority, _, path = image_path.partition('/') + body = json.dumps(index.serialize(), + separators=(',', ':'), + sort_keys=True) + + resp = self.session.put( + f'https://{authority}/v2/{path}/manifests/{tag}', + data=body, + headers={'Content-Type': INDEX_MIME}, + auth=RegistryAuthenticator(self.auth_state, self.session, + image_path)) + resp.raise_for_status() + + return resp.headers['Location'] + + def upload_index(self, + image_path: str, + tag: str, + index: OCIIndex, + retries=20, + retry_delay=1): + # eventual consistency lmao + for _ in range(retries): + try: + return self._upload_index(image_path, tag, index) + except requests.HTTPError as e: + if e.response.status_code != 404: + raise + + time.sleep(retry_delay) |