releng/docker_assemble.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391

from typing import Any, Literal, Optional
import re
from pathlib import Path
import json
import dataclasses
import time
from urllib.parse import unquote
import urllib.request
import logging

import requests.auth
import requests
import xdg_base_dirs

log = logging.getLogger(__name__)
log.setLevel(logging.INFO)

DEBUG_REQUESTS = False
if DEBUG_REQUESTS:
    urllib3_logger = logging.getLogger('requests.packages.urllib3')
    urllib3_logger.setLevel(logging.DEBUG)
    urllib3_logger.propagate = True

# So, there is a bunch of confusing stuff happening in this file. The gist of why it's Like This is:
#
# nix2container does not concern itself with tags (reasonably enough):
# https://github.com/nlewo/nix2container/issues/59
#
# This is fine. But then we noticed: docker images don't play nice if you have
# multiple architectures you want to abstract over if you don't do special
# things. Those special things are images with manifests containing multiple
# images.
#
# Docker has a data model vaguely analogous to git: you have higher level
# objects referring to a bunch of content-addressed blobs.
#
# A multiarch image is more or less just a manifest that refers to more
# manifests; in OCI it is an Index.
#
# See the API spec here: https://github.com/opencontainers/distribution-spec/blob/v1.0.1/spec.md#definitions
# And the Index spec here: https://github.com/opencontainers/image-spec/blob/v1.0.1/image-index.md
#
# skopeo doesn't *know* how to make multiarch *manifests*:
# https://github.com/containers/skopeo/issues/1136
#
# There is a tool called manifest-tool that is supposed to do this
# (https://github.com/estesp/manifest-tool) but it doesn't support putting in
# annotations on the outer image, and I *really* didn't want to write golang to
# fix that. Thus, a little bit of homebrew containers code.
#
# Essentially what we are doing in here is splatting a bunch of images into the
# registry without tagging them (except as "temp", due to podman issues), then
# simply sending a new composite manifest ourselves.

DockerArchitecture = Literal['amd64'] | Literal['arm64']
MANIFEST_MIME = 'application/vnd.oci.image.manifest.v1+json'
INDEX_MIME = 'application/vnd.oci.image.index.v1+json'


@dataclasses.dataclass(frozen=True, order=True)
class ImageMetadata:
    size: int
    digest: str
    """sha256:SOMEHEX"""


@dataclasses.dataclass(frozen=True, order=True)
class OCIIndexItem:
    """Information about an untagged uploaded image."""

    metadata: ImageMetadata

    architecture: DockerArchitecture

    os: str = 'linux'

    def serialize(self):
        return {
            'mediaType': MANIFEST_MIME,
            'size': self.metadata.size,
            'digest': self.metadata.digest,
            'platform': {
                'architecture': self.architecture,
                'os': self.os,
            }
        }


@dataclasses.dataclass(frozen=True)
class OCIIndex:
    manifests: list[OCIIndexItem]

    annotations: dict[str, str]

    def serialize(self):
        return {
            'schemaVersion': 2,
            'manifests': [item.serialize() for item in sorted(self.manifests)],
            'annotations': self.annotations
        }


@dataclasses.dataclass
class TaggingOperation:
    manifest: OCIIndex
    tags: list[str]
    """Tags this image is uploaded under"""


runtime_dir = xdg_base_dirs.xdg_runtime_dir()
config_dir = xdg_base_dirs.xdg_config_home()

AUTH_FILES = ([runtime_dir / 'containers/auth.json'] if runtime_dir else []) + \
    [config_dir / 'containers/auth.json', Path.home() / '.docker/config.json']


# Copied from Werkzeug https://github.com/pallets/werkzeug/blob/62e3ea45846d06576199a2f8470be7fe44c867c1/src/werkzeug/http.py#L300-L325
def parse_list_header(value: str) -> list[str]:
    """Parse a header value that consists of a list of comma separated items according
    to `RFC 9110 <https://httpwg.org/specs/rfc9110.html#abnf.extension>`__.

    This extends :func:`urllib.request.parse_http_list` to remove surrounding quotes
    from values.

    .. code-block:: python

        parse_list_header('token, "quoted value"')
        ['token', 'quoted value']

    This is the reverse of :func:`dump_header`.

    :param value: The header value to parse.
    """
    result = []

    for item in urllib.request.parse_http_list(value):
        if len(item) >= 2 and item[0] == item[-1] == '"':
            item = item[1:-1]

        result.append(item)

    return result


# https://www.rfc-editor.org/rfc/rfc2231#section-4
_charset_value_re = re.compile(
    r"""
    ([\w!#$%&*+\-.^`|~]*)'  # charset part, could be empty
    [\w!#$%&*+\-.^`|~]*'  # don't care about language part, usually empty
    ([\w!#$%&'*+\-.^`|~]+)  # one or more token chars with percent encoding
    """,
    re.ASCII | re.VERBOSE,
)


# Copied from: https://github.com/pallets/werkzeug/blob/62e3ea45846d06576199a2f8470be7fe44c867c1/src/werkzeug/http.py#L327-L394
def parse_dict_header(value: str) -> dict[str, str | None]:
    """Parse a list header using :func:`parse_list_header`, then parse each item as a
    ``key=value`` pair.

    .. code-block:: python

        parse_dict_header('a=b, c="d, e", f')
        {"a": "b", "c": "d, e", "f": None}

    This is the reverse of :func:`dump_header`.

    If a key does not have a value, it is ``None``.

    This handles charsets for values as described in
    `RFC 2231 <https://www.rfc-editor.org/rfc/rfc2231#section-3>`__. Only ASCII, UTF-8,
    and ISO-8859-1 charsets are accepted, otherwise the value remains quoted.

    :param value: The header value to parse.

    .. versionchanged:: 3.0
        Passing bytes is not supported.

    .. versionchanged:: 3.0
        The ``cls`` argument is removed.

    .. versionchanged:: 2.3
        Added support for ``key*=charset''value`` encoded items.

    .. versionchanged:: 0.9
       The ``cls`` argument was added.
    """
    result: dict[str, str | None] = {}

    for item in parse_list_header(value):
        key, has_value, value = item.partition("=")
        key = key.strip()

        if not has_value:
            result[key] = None
            continue

        value = value.strip()
        encoding: str | None = None

        if key[-1] == "*":
            # key*=charset''value becomes key=value, where value is percent encoded
            # adapted from parse_options_header, without the continuation handling
            key = key[:-1]
            match = _charset_value_re.match(value)

            if match:
                # If there is a charset marker in the value, split it off.
                encoding, value = match.groups()
                assert encoding
                encoding = encoding.lower()

            # A safe list of encodings. Modern clients should only send ASCII or UTF-8.
            # This list will not be extended further. An invalid encoding will leave the
            # value quoted.
            if encoding in {"ascii", "us-ascii", "utf-8", "iso-8859-1"}:
                # invalid bytes are replaced during unquoting
                value = unquote(value, encoding=encoding)

        if len(value) >= 2 and value[0] == value[-1] == '"':
            value = value[1:-1]

        result[key] = value

    return result


def parse_www_authenticate(www_authenticate):
    scheme, _, rest = www_authenticate.partition(' ')
    scheme = scheme.lower()
    rest = rest.strip()

    parsed = parse_dict_header(rest.rstrip('='))
    return parsed


class AuthState:

    def __init__(self, auth_files: list[Path] = AUTH_FILES):
        self.auth_map: dict[str, str] = {}
        for f in auth_files:
            self.auth_map.update(AuthState.load_auth_file(f))
        self.token_cache: dict[str, str] = {}

    @staticmethod
    def load_auth_file(path: Path) -> dict[str, str]:
        if path.exists():
            with path.open() as fh:
                try:
                    json_obj = json.load(fh)
                    return {k: v['auth'] for k, v in json_obj['auths'].items()}
                except (json.JSONDecodeError, KeyError) as e:
                    log.exception('JSON decode error in %s', path, exc_info=e)
        return {}

    def get_token(self, hostname: str) -> Optional[str]:
        return self.token_cache.get(hostname)

    def obtain_token(self, session: requests.Session, token_endpoint: str,
                     scope: str, service: str, image_path: str) -> str:
        authority, _, _ = image_path.partition('/')
        if tok := self.get_token(authority):
            return tok

        creds = self.find_credential_for(image_path)
        if not creds:
            raise ValueError('No credentials available for ' + image_path)

        resp = session.get(token_endpoint,
                           params={
                               'client_id': 'lix-releng',
                               'scope': scope,
                               'service': service,
                           },
                           headers={
                               'Authorization': 'Basic ' + creds
                           }).json()
        token = resp['token']
        self.token_cache[service] = token
        return token

    def find_credential_for(self, image_path: str):
        trails = image_path.split('/')
        for i in range(len(trails)):
            prefix = '/'.join(trails[:len(trails) - i])
            if prefix in self.auth_map:
                return self.auth_map[prefix]

        return None


class RegistryAuthenticator(requests.auth.AuthBase):
    """Authenticates to an OCI compliant registry"""

    def __init__(self, auth_state: AuthState, session: requests.Session,
                 image: str):
        self.auth_map: dict[str, str] = {}
        self.image = image
        self.session = session
        self.auth_state = auth_state

    def response_hook(self, r: requests.Response,
                      **kwargs: Any) -> requests.Response:
        if r.status_code == 401:
            www_authenticate = r.headers.get('www-authenticate', '').lower()
            parsed = parse_www_authenticate(www_authenticate)
            assert parsed

            tok = self.auth_state.obtain_token(
                self.session,
                parsed['realm'],  # type: ignore
                parsed['scope'],  # type: ignore
                parsed['service'],  # type: ignore
                self.image)

            new_req = r.request.copy()
            new_req.headers['Authorization'] = 'Bearer ' + tok

            return self.session.send(new_req)
        else:
            return r

    def __call__(self,
                 r: requests.PreparedRequest) -> requests.PreparedRequest:
        authority, _, _ = self.image.partition('/')
        auth_may = self.auth_state.get_token(authority)

        if auth_may:
            r.headers['Authorization'] = 'Bearer ' + auth_may

        r.register_hook('response', self.response_hook)
        return r


class Registry:

    def __init__(self, session: requests.Session):
        self.auth_state = AuthState()
        self.session = session

    def image_info(self, image_path: str, manifest_id: str) -> ImageMetadata:
        authority, _, path = image_path.partition('/')
        resp = self.session.head(
            f'https://{authority}/v2/{path}/manifests/{manifest_id}',
            headers={'Accept': MANIFEST_MIME},
            auth=RegistryAuthenticator(self.auth_state, self.session,
                                       image_path))
        resp.raise_for_status()
        return ImageMetadata(int(resp.headers['content-length']),
                             resp.headers['docker-content-digest'])

    def delete_tag(self, image_path: str, tag: str):
        authority, _, path = image_path.partition('/')
        resp = self.session.delete(
            f'https://{authority}/v2/{path}/manifests/{tag}',
            headers={'Content-Type': INDEX_MIME},
            auth=RegistryAuthenticator(self.auth_state, self.session,
                                       image_path))
        resp.raise_for_status()

    def _upload_index(self, image_path: str, tag: str, index: OCIIndex):
        authority, _, path = image_path.partition('/')
        body = json.dumps(index.serialize(),
                          separators=(',', ':'),
                          sort_keys=True)

        resp = self.session.put(
            f'https://{authority}/v2/{path}/manifests/{tag}',
            data=body,
            headers={'Content-Type': INDEX_MIME},
            auth=RegistryAuthenticator(self.auth_state, self.session,
                                       image_path))
        resp.raise_for_status()

        return resp.headers['Location']

    def upload_index(self,
                     image_path: str,
                     tag: str,
                     index: OCIIndex,
                     retries=20,
                     retry_delay=1):
        # eventual consistency lmao
        for _ in range(retries):
            try:
                return self._upload_index(image_path, tag, index)
            except requests.HTTPError as e:
                if e.response.status_code != 404:
                    raise

            time.sleep(retry_delay)