aboutsummaryrefslogtreecommitdiff
path: root/releng/docker_assemble.py
diff options
context:
space:
mode:
Diffstat (limited to 'releng/docker_assemble.py')
-rw-r--r--releng/docker_assemble.py399
1 files changed, 399 insertions, 0 deletions
diff --git a/releng/docker_assemble.py b/releng/docker_assemble.py
new file mode 100644
index 000000000..ef1d8c4e6
--- /dev/null
+++ b/releng/docker_assemble.py
@@ -0,0 +1,399 @@
+from typing import Any, Literal, Optional
+import re
+from pathlib import Path
+import json
+import dataclasses
+import time
+from urllib.parse import unquote
+import urllib.request
+import logging
+
+import requests.auth
+import requests
+import xdg_base_dirs
+
+log = logging.getLogger(__name__)
+log.setLevel(logging.INFO)
+
+DEBUG_REQUESTS = False
+if DEBUG_REQUESTS:
+ urllib3_logger = logging.getLogger('requests.packages.urllib3')
+ urllib3_logger.setLevel(logging.DEBUG)
+ urllib3_logger.propagate = True
+
+# So, there is a bunch of confusing stuff happening in this file. The gist of why it's Like This is:
+#
+# nix2container does not concern itself with tags (reasonably enough):
+# https://github.com/nlewo/nix2container/issues/59
+#
+# This is fine. But then we noticed: docker images don't play nice if you have
+# multiple architectures you want to abstract over if you don't do special
+# things. Those special things are images with manifests containing multiple
+# images.
+#
+# Docker has a data model vaguely analogous to git: you have higher level
+# objects referring to a bunch of content-addressed blobs.
+#
+# A multiarch image is more or less just a manifest that refers to more
+# manifests; in OCI it is an Index.
+#
+# See the API spec here: https://github.com/opencontainers/distribution-spec/blob/v1.0.1/spec.md#definitions
+# And the Index spec here: https://github.com/opencontainers/image-spec/blob/v1.0.1/image-index.md
+#
+# skopeo doesn't *know* how to make multiarch *manifests*:
+# https://github.com/containers/skopeo/issues/1136
+#
+# There is a tool called manifest-tool that is supposed to do this
+# (https://github.com/estesp/manifest-tool) but it doesn't support putting in
+# annotations on the outer image, and I *really* didn't want to write golang to
+# fix that. Thus, a little bit of homebrew containers code.
+#
+# Essentially what we are doing in here is splatting a bunch of images into the
+# registry without tagging them (except as "temp", due to podman issues), then
+# simply sending a new composite manifest ourselves.
+
+DockerArchitecture = Literal['amd64'] | Literal['arm64']
+MANIFEST_MIME = 'application/vnd.oci.image.manifest.v1+json'
+INDEX_MIME = 'application/vnd.oci.image.index.v1+json'
+
+
+@dataclasses.dataclass(frozen=True, order=True)
+class ImageMetadata:
+ size: int
+ digest: str
+ """sha256:SOMEHEX"""
+
+
+@dataclasses.dataclass(frozen=True, order=True)
+class OCIIndexItem:
+ """Information about an untagged uploaded image."""
+
+ metadata: ImageMetadata
+
+ architecture: DockerArchitecture
+
+ os: str = 'linux'
+
+ def serialize(self):
+ return {
+ 'mediaType': MANIFEST_MIME,
+ 'size': self.metadata.size,
+ 'digest': self.metadata.digest,
+ 'platform': {
+ 'architecture': self.architecture,
+ 'os': self.os,
+ }
+ }
+
+
+@dataclasses.dataclass(frozen=True)
+class OCIIndex:
+ manifests: list[OCIIndexItem]
+
+ annotations: dict[str, str]
+
+ def serialize(self):
+ return {
+ 'schemaVersion': 2,
+ 'manifests': [item.serialize() for item in sorted(self.manifests)],
+ 'annotations': self.annotations
+ }
+
+
+def docker_architecture_from_nix_system(system: str) -> DockerArchitecture:
+ MAP = {
+ 'x86_64-linux': 'amd64',
+ 'aarch64-linux': 'arm64',
+ }
+ return MAP[system] # type: ignore
+
+
+@dataclasses.dataclass
+class TaggingOperation:
+ manifest: OCIIndex
+ tags: list[str]
+ """Tags this image is uploaded under"""
+
+
+runtime_dir = xdg_base_dirs.xdg_runtime_dir()
+config_dir = xdg_base_dirs.xdg_config_home()
+
+AUTH_FILES = ([runtime_dir / 'containers/auth.json'] if runtime_dir else []) + \
+ [config_dir / 'containers/auth.json', Path.home() / '.docker/config.json']
+
+
+# Copied from Werkzeug https://github.com/pallets/werkzeug/blob/62e3ea45846d06576199a2f8470be7fe44c867c1/src/werkzeug/http.py#L300-L325
+def parse_list_header(value: str) -> list[str]:
+ """Parse a header value that consists of a list of comma separated items according
+ to `RFC 9110 <https://httpwg.org/specs/rfc9110.html#abnf.extension>`__.
+
+ This extends :func:`urllib.request.parse_http_list` to remove surrounding quotes
+ from values.
+
+ .. code-block:: python
+
+ parse_list_header('token, "quoted value"')
+ ['token', 'quoted value']
+
+ This is the reverse of :func:`dump_header`.
+
+ :param value: The header value to parse.
+ """
+ result = []
+
+ for item in urllib.request.parse_http_list(value):
+ if len(item) >= 2 and item[0] == item[-1] == '"':
+ item = item[1:-1]
+
+ result.append(item)
+
+ return result
+
+
+# https://www.rfc-editor.org/rfc/rfc2231#section-4
+_charset_value_re = re.compile(
+ r"""
+ ([\w!#$%&*+\-.^`|~]*)' # charset part, could be empty
+ [\w!#$%&*+\-.^`|~]*' # don't care about language part, usually empty
+ ([\w!#$%&'*+\-.^`|~]+) # one or more token chars with percent encoding
+ """,
+ re.ASCII | re.VERBOSE,
+)
+
+
+# Copied from: https://github.com/pallets/werkzeug/blob/62e3ea45846d06576199a2f8470be7fe44c867c1/src/werkzeug/http.py#L327-L394
+def parse_dict_header(value: str) -> dict[str, str | None]:
+ """Parse a list header using :func:`parse_list_header`, then parse each item as a
+ ``key=value`` pair.
+
+ .. code-block:: python
+
+ parse_dict_header('a=b, c="d, e", f')
+ {"a": "b", "c": "d, e", "f": None}
+
+ This is the reverse of :func:`dump_header`.
+
+ If a key does not have a value, it is ``None``.
+
+ This handles charsets for values as described in
+ `RFC 2231 <https://www.rfc-editor.org/rfc/rfc2231#section-3>`__. Only ASCII, UTF-8,
+ and ISO-8859-1 charsets are accepted, otherwise the value remains quoted.
+
+ :param value: The header value to parse.
+
+ .. versionchanged:: 3.0
+ Passing bytes is not supported.
+
+ .. versionchanged:: 3.0
+ The ``cls`` argument is removed.
+
+ .. versionchanged:: 2.3
+ Added support for ``key*=charset''value`` encoded items.
+
+ .. versionchanged:: 0.9
+ The ``cls`` argument was added.
+ """
+ result: dict[str, str | None] = {}
+
+ for item in parse_list_header(value):
+ key, has_value, value = item.partition("=")
+ key = key.strip()
+
+ if not has_value:
+ result[key] = None
+ continue
+
+ value = value.strip()
+ encoding: str | None = None
+
+ if key[-1] == "*":
+ # key*=charset''value becomes key=value, where value is percent encoded
+ # adapted from parse_options_header, without the continuation handling
+ key = key[:-1]
+ match = _charset_value_re.match(value)
+
+ if match:
+ # If there is a charset marker in the value, split it off.
+ encoding, value = match.groups()
+ assert encoding
+ encoding = encoding.lower()
+
+ # A safe list of encodings. Modern clients should only send ASCII or UTF-8.
+ # This list will not be extended further. An invalid encoding will leave the
+ # value quoted.
+ if encoding in {"ascii", "us-ascii", "utf-8", "iso-8859-1"}:
+ # invalid bytes are replaced during unquoting
+ value = unquote(value, encoding=encoding)
+
+ if len(value) >= 2 and value[0] == value[-1] == '"':
+ value = value[1:-1]
+
+ result[key] = value
+
+ return result
+
+
+def parse_www_authenticate(www_authenticate):
+ scheme, _, rest = www_authenticate.partition(' ')
+ scheme = scheme.lower()
+ rest = rest.strip()
+
+ parsed = parse_dict_header(rest.rstrip('='))
+ return parsed
+
+
+class AuthState:
+
+ def __init__(self, auth_files: list[Path] = AUTH_FILES):
+ self.auth_map: dict[str, str] = {}
+ for f in auth_files:
+ self.auth_map.update(AuthState.load_auth_file(f))
+ self.token_cache: dict[str, str] = {}
+
+ @staticmethod
+ def load_auth_file(path: Path) -> dict[str, str]:
+ if path.exists():
+ with path.open() as fh:
+ try:
+ json_obj = json.load(fh)
+ return {k: v['auth'] for k, v in json_obj['auths'].items()}
+ except (json.JSONDecodeError, KeyError) as e:
+ log.exception('JSON decode error in %s', path, exc_info=e)
+ return {}
+
+ def get_token(self, hostname: str) -> Optional[str]:
+ return self.token_cache.get(hostname)
+
+ def obtain_token(self, session: requests.Session, token_endpoint: str,
+ scope: str, service: str, image_path: str) -> str:
+ authority, _, _ = image_path.partition('/')
+ if tok := self.get_token(authority):
+ return tok
+
+ creds = self.find_credential_for(image_path)
+ if not creds:
+ raise ValueError('No credentials available for ' + image_path)
+
+ resp = session.get(token_endpoint,
+ params={
+ 'client_id': 'lix-releng',
+ 'scope': scope,
+ 'service': service,
+ },
+ headers={
+ 'Authorization': 'Basic ' + creds
+ }).json()
+ token = resp['token']
+ self.token_cache[service] = token
+ return token
+
+ def find_credential_for(self, image_path: str):
+ trails = image_path.split('/')
+ for i in range(len(trails)):
+ prefix = '/'.join(trails[:len(trails) - i])
+ if prefix in self.auth_map:
+ return self.auth_map[prefix]
+
+ return None
+
+
+class RegistryAuthenticator(requests.auth.AuthBase):
+ """Authenticates to an OCI compliant registry"""
+
+ def __init__(self, auth_state: AuthState, session: requests.Session,
+ image: str):
+ self.auth_map: dict[str, str] = {}
+ self.image = image
+ self.session = session
+ self.auth_state = auth_state
+
+ def response_hook(self, r: requests.Response,
+ **kwargs: Any) -> requests.Response:
+ if r.status_code == 401:
+ www_authenticate = r.headers.get('www-authenticate', '').lower()
+ parsed = parse_www_authenticate(www_authenticate)
+ assert parsed
+
+ tok = self.auth_state.obtain_token(
+ self.session,
+ parsed['realm'], # type: ignore
+ parsed['scope'], # type: ignore
+ parsed['service'], # type: ignore
+ self.image)
+
+ new_req = r.request.copy()
+ new_req.headers['Authorization'] = 'Bearer ' + tok
+
+ return self.session.send(new_req)
+ else:
+ return r
+
+ def __call__(self,
+ r: requests.PreparedRequest) -> requests.PreparedRequest:
+ authority, _, _ = self.image.partition('/')
+ auth_may = self.auth_state.get_token(authority)
+
+ if auth_may:
+ r.headers['Authorization'] = 'Bearer ' + auth_may
+
+ r.register_hook('response', self.response_hook)
+ return r
+
+
+class Registry:
+
+ def __init__(self, session: requests.Session):
+ self.auth_state = AuthState()
+ self.session = session
+
+ def image_info(self, image_path: str, manifest_id: str) -> ImageMetadata:
+ authority, _, path = image_path.partition('/')
+ resp = self.session.head(
+ f'https://{authority}/v2/{path}/manifests/{manifest_id}',
+ headers={'Accept': MANIFEST_MIME},
+ auth=RegistryAuthenticator(self.auth_state, self.session,
+ image_path))
+ resp.raise_for_status()
+ return ImageMetadata(int(resp.headers['content-length']),
+ resp.headers['docker-content-digest'])
+
+ def delete_tag(self, image_path: str, tag: str):
+ authority, _, path = image_path.partition('/')
+ resp = self.session.delete(
+ f'https://{authority}/v2/{path}/manifests/{tag}',
+ headers={'Content-Type': INDEX_MIME},
+ auth=RegistryAuthenticator(self.auth_state, self.session,
+ image_path))
+ resp.raise_for_status()
+
+ def _upload_index(self, image_path: str, tag: str, index: OCIIndex):
+ authority, _, path = image_path.partition('/')
+ body = json.dumps(index.serialize(),
+ separators=(',', ':'),
+ sort_keys=True)
+
+ resp = self.session.put(
+ f'https://{authority}/v2/{path}/manifests/{tag}',
+ data=body,
+ headers={'Content-Type': INDEX_MIME},
+ auth=RegistryAuthenticator(self.auth_state, self.session,
+ image_path))
+ resp.raise_for_status()
+
+ return resp.headers['Location']
+
+ def upload_index(self,
+ image_path: str,
+ tag: str,
+ index: OCIIndex,
+ retries=20,
+ retry_delay=1):
+ # eventual consistency lmao
+ for _ in range(retries):
+ try:
+ return self._upload_index(image_path, tag, index)
+ except requests.HTTPError as e:
+ if e.response.status_code != 404:
+ raise
+
+ time.sleep(retry_delay)