aboutsummaryrefslogtreecommitdiff
path: root/systems/parse.nix
diff options
context:
space:
mode:
authorRobert Hensing <robert@roberthensing.nl>2023-08-16 15:46:37 +0200
committerRobert Hensing <robert@roberthensing.nl>2023-08-16 15:46:37 +0200
commit63e0b5d081fed582ac6a0a66f402dc525953524b (patch)
tree1bdcd7fc8a6a40b2e805bad759b36e64e911036b /systems/parse.nix
GC root for fetched nixpkgs/lib content
Diffstat (limited to 'systems/parse.nix')
-rw-r--r--systems/parse.nix503
1 files changed, 503 insertions, 0 deletions
diff --git a/systems/parse.nix b/systems/parse.nix
new file mode 100644
index 000000000..6eb4f27cc
--- /dev/null
+++ b/systems/parse.nix
@@ -0,0 +1,503 @@
+# Define the list of system with their properties.
+#
+# See https://clang.llvm.org/docs/CrossCompilation.html and
+# http://llvm.org/docs/doxygen/html/Triple_8cpp_source.html especially
+# Triple::normalize. Parsing should essentially act as a more conservative
+# version of that last function.
+#
+# Most of the types below come in "open" and "closed" pairs. The open ones
+# specify what information we need to know about systems in general, and the
+# closed ones are sub-types representing the whitelist of systems we support in
+# practice.
+#
+# Code in the remainder of nixpkgs shouldn't rely on the closed ones in
+# e.g. exhaustive cases. Its more a sanity check to make sure nobody defines
+# systems that overlap with existing ones and won't notice something amiss.
+#
+{ lib }:
+with lib.lists;
+with lib.types;
+with lib.attrsets;
+with lib.strings;
+with (import ./inspect.nix { inherit lib; }).predicates;
+
+let
+ inherit (lib.options) mergeOneOption;
+
+ setTypes = type:
+ mapAttrs (name: value:
+ assert type.check value;
+ setType type.name ({ inherit name; } // value));
+
+in
+
+rec {
+
+ ################################################################################
+
+ types.openSignificantByte = mkOptionType {
+ name = "significant-byte";
+ description = "Endianness";
+ merge = mergeOneOption;
+ };
+
+ types.significantByte = enum (attrValues significantBytes);
+
+ significantBytes = setTypes types.openSignificantByte {
+ bigEndian = {};
+ littleEndian = {};
+ };
+
+ ################################################################################
+
+ # Reasonable power of 2
+ types.bitWidth = enum [ 8 16 32 64 128 ];
+
+ ################################################################################
+
+ types.openCpuType = mkOptionType {
+ name = "cpu-type";
+ description = "instruction set architecture name and information";
+ merge = mergeOneOption;
+ check = x: types.bitWidth.check x.bits
+ && (if 8 < x.bits
+ then types.significantByte.check x.significantByte
+ else !(x ? significantByte));
+ };
+
+ types.cpuType = enum (attrValues cpuTypes);
+
+ cpuTypes = with significantBytes; setTypes types.openCpuType {
+ arm = { bits = 32; significantByte = littleEndian; family = "arm"; };
+ armv5tel = { bits = 32; significantByte = littleEndian; family = "arm"; version = "5"; arch = "armv5t"; };
+ armv6m = { bits = 32; significantByte = littleEndian; family = "arm"; version = "6"; arch = "armv6-m"; };
+ armv6l = { bits = 32; significantByte = littleEndian; family = "arm"; version = "6"; arch = "armv6"; };
+ armv7a = { bits = 32; significantByte = littleEndian; family = "arm"; version = "7"; arch = "armv7-a"; };
+ armv7r = { bits = 32; significantByte = littleEndian; family = "arm"; version = "7"; arch = "armv7-r"; };
+ armv7m = { bits = 32; significantByte = littleEndian; family = "arm"; version = "7"; arch = "armv7-m"; };
+ armv7l = { bits = 32; significantByte = littleEndian; family = "arm"; version = "7"; arch = "armv7"; };
+ armv8a = { bits = 32; significantByte = littleEndian; family = "arm"; version = "8"; arch = "armv8-a"; };
+ armv8r = { bits = 32; significantByte = littleEndian; family = "arm"; version = "8"; arch = "armv8-a"; };
+ armv8m = { bits = 32; significantByte = littleEndian; family = "arm"; version = "8"; arch = "armv8-m"; };
+ aarch64 = { bits = 64; significantByte = littleEndian; family = "arm"; version = "8"; arch = "armv8-a"; };
+ aarch64_be = { bits = 64; significantByte = bigEndian; family = "arm"; version = "8"; arch = "armv8-a"; };
+
+ i386 = { bits = 32; significantByte = littleEndian; family = "x86"; arch = "i386"; };
+ i486 = { bits = 32; significantByte = littleEndian; family = "x86"; arch = "i486"; };
+ i586 = { bits = 32; significantByte = littleEndian; family = "x86"; arch = "i586"; };
+ i686 = { bits = 32; significantByte = littleEndian; family = "x86"; arch = "i686"; };
+ x86_64 = { bits = 64; significantByte = littleEndian; family = "x86"; arch = "x86-64"; };
+
+ microblaze = { bits = 32; significantByte = bigEndian; family = "microblaze"; };
+ microblazeel = { bits = 32; significantByte = littleEndian; family = "microblaze"; };
+
+ mips = { bits = 32; significantByte = bigEndian; family = "mips"; };
+ mipsel = { bits = 32; significantByte = littleEndian; family = "mips"; };
+ mips64 = { bits = 64; significantByte = bigEndian; family = "mips"; };
+ mips64el = { bits = 64; significantByte = littleEndian; family = "mips"; };
+
+ mmix = { bits = 64; significantByte = bigEndian; family = "mmix"; };
+
+ m68k = { bits = 32; significantByte = bigEndian; family = "m68k"; };
+
+ powerpc = { bits = 32; significantByte = bigEndian; family = "power"; };
+ powerpc64 = { bits = 64; significantByte = bigEndian; family = "power"; };
+ powerpc64le = { bits = 64; significantByte = littleEndian; family = "power"; };
+ powerpcle = { bits = 32; significantByte = littleEndian; family = "power"; };
+
+ riscv32 = { bits = 32; significantByte = littleEndian; family = "riscv"; };
+ riscv64 = { bits = 64; significantByte = littleEndian; family = "riscv"; };
+
+ s390 = { bits = 32; significantByte = bigEndian; family = "s390"; };
+ s390x = { bits = 64; significantByte = bigEndian; family = "s390"; };
+
+ sparc = { bits = 32; significantByte = bigEndian; family = "sparc"; };
+ sparc64 = { bits = 64; significantByte = bigEndian; family = "sparc"; };
+
+ wasm32 = { bits = 32; significantByte = littleEndian; family = "wasm"; };
+ wasm64 = { bits = 64; significantByte = littleEndian; family = "wasm"; };
+
+ alpha = { bits = 64; significantByte = littleEndian; family = "alpha"; };
+
+ rx = { bits = 32; significantByte = littleEndian; family = "rx"; };
+ msp430 = { bits = 16; significantByte = littleEndian; family = "msp430"; };
+ avr = { bits = 8; family = "avr"; };
+
+ vc4 = { bits = 32; significantByte = littleEndian; family = "vc4"; };
+
+ or1k = { bits = 32; significantByte = bigEndian; family = "or1k"; };
+
+ loongarch64 = { bits = 64; significantByte = littleEndian; family = "loongarch"; };
+
+ javascript = { bits = 32; significantByte = littleEndian; family = "javascript"; };
+ };
+
+ # GNU build systems assume that older NetBSD architectures are using a.out.
+ gnuNetBSDDefaultExecFormat = cpu:
+ if (cpu.family == "arm" && cpu.bits == 32) ||
+ (cpu.family == "sparc" && cpu.bits == 32) ||
+ (cpu.family == "m68k" && cpu.bits == 32) ||
+ (cpu.family == "x86" && cpu.bits == 32)
+ then execFormats.aout
+ else execFormats.elf;
+
+ # Determine when two CPUs are compatible with each other. That is,
+ # can code built for system B run on system A? For that to happen,
+ # the programs that system B accepts must be a subset of the
+ # programs that system A accepts.
+ #
+ # We have the following properties of the compatibility relation,
+ # which must be preserved when adding compatibility information for
+ # additional CPUs.
+ # - (reflexivity)
+ # Every CPU is compatible with itself.
+ # - (transitivity)
+ # If A is compatible with B and B is compatible with C then A is compatible with C.
+ #
+ # Note: Since 22.11 the archs of a mode switching CPU are no longer considered
+ # pairwise compatible. Mode switching implies that binaries built for A
+ # and B respectively can't be executed at the same time.
+ isCompatible = a: b: with cpuTypes; lib.any lib.id [
+ # x86
+ (b == i386 && isCompatible a i486)
+ (b == i486 && isCompatible a i586)
+ (b == i586 && isCompatible a i686)
+
+ # XXX: Not true in some cases. Like in WSL mode.
+ (b == i686 && isCompatible a x86_64)
+
+ # ARMv4
+ (b == arm && isCompatible a armv5tel)
+
+ # ARMv5
+ (b == armv5tel && isCompatible a armv6l)
+
+ # ARMv6
+ (b == armv6l && isCompatible a armv6m)
+ (b == armv6m && isCompatible a armv7l)
+
+ # ARMv7
+ (b == armv7l && isCompatible a armv7a)
+ (b == armv7l && isCompatible a armv7r)
+ (b == armv7l && isCompatible a armv7m)
+
+ # ARMv8
+ (b == aarch64 && a == armv8a)
+ (b == armv8a && isCompatible a aarch64)
+ (b == armv8r && isCompatible a armv8a)
+ (b == armv8m && isCompatible a armv8a)
+
+ # PowerPC
+ (b == powerpc && isCompatible a powerpc64)
+ (b == powerpcle && isCompatible a powerpc64le)
+
+ # MIPS
+ (b == mips && isCompatible a mips64)
+ (b == mipsel && isCompatible a mips64el)
+
+ # RISCV
+ (b == riscv32 && isCompatible a riscv64)
+
+ # SPARC
+ (b == sparc && isCompatible a sparc64)
+
+ # WASM
+ (b == wasm32 && isCompatible a wasm64)
+
+ # identity
+ (b == a)
+ ];
+
+ ################################################################################
+
+ types.openVendor = mkOptionType {
+ name = "vendor";
+ description = "vendor for the platform";
+ merge = mergeOneOption;
+ };
+
+ types.vendor = enum (attrValues vendors);
+
+ vendors = setTypes types.openVendor {
+ apple = {};
+ pc = {};
+ # Actually matters, unlocking some MinGW-w64-specific options in GCC. See
+ # bottom of https://sourceforge.net/p/mingw-w64/wiki2/Unicode%20apps/
+ w64 = {};
+
+ none = {};
+ unknown = {};
+ };
+
+ ################################################################################
+
+ types.openExecFormat = mkOptionType {
+ name = "exec-format";
+ description = "executable container used by the kernel";
+ merge = mergeOneOption;
+ };
+
+ types.execFormat = enum (attrValues execFormats);
+
+ execFormats = setTypes types.openExecFormat {
+ aout = {}; # a.out
+ elf = {};
+ macho = {};
+ pe = {};
+ wasm = {};
+
+ unknown = {};
+ };
+
+ ################################################################################
+
+ types.openKernelFamily = mkOptionType {
+ name = "exec-format";
+ description = "executable container used by the kernel";
+ merge = mergeOneOption;
+ };
+
+ types.kernelFamily = enum (attrValues kernelFamilies);
+
+ kernelFamilies = setTypes types.openKernelFamily {
+ bsd = {};
+ darwin = {};
+ };
+
+ ################################################################################
+
+ types.openKernel = mkOptionType {
+ name = "kernel";
+ description = "kernel name and information";
+ merge = mergeOneOption;
+ check = x: types.execFormat.check x.execFormat
+ && all types.kernelFamily.check (attrValues x.families);
+ };
+
+ types.kernel = enum (attrValues kernels);
+
+ kernels = with execFormats; with kernelFamilies; setTypes types.openKernel {
+ # TODO(@Ericson2314): Don't want to mass-rebuild yet to keeping 'darwin' as
+ # the normalized name for macOS.
+ macos = { execFormat = macho; families = { inherit darwin; }; name = "darwin"; };
+ ios = { execFormat = macho; families = { inherit darwin; }; };
+ # A tricky thing about FreeBSD is that there is no stable ABI across
+ # versions. That means that putting in the version as part of the
+ # config string is paramount.
+ freebsd12 = { execFormat = elf; families = { inherit bsd; }; name = "freebsd"; version = 12; };
+ freebsd13 = { execFormat = elf; families = { inherit bsd; }; name = "freebsd"; version = 13; };
+ linux = { execFormat = elf; families = { }; };
+ netbsd = { execFormat = elf; families = { inherit bsd; }; };
+ none = { execFormat = unknown; families = { }; };
+ openbsd = { execFormat = elf; families = { inherit bsd; }; };
+ solaris = { execFormat = elf; families = { }; };
+ wasi = { execFormat = wasm; families = { }; };
+ redox = { execFormat = elf; families = { }; };
+ windows = { execFormat = pe; families = { }; };
+ ghcjs = { execFormat = unknown; families = { }; };
+ genode = { execFormat = elf; families = { }; };
+ mmixware = { execFormat = unknown; families = { }; };
+ } // { # aliases
+ # 'darwin' is the kernel for all of them. We choose macOS by default.
+ darwin = kernels.macos;
+ watchos = kernels.ios;
+ tvos = kernels.ios;
+ win32 = kernels.windows;
+ };
+
+ ################################################################################
+
+ types.openAbi = mkOptionType {
+ name = "abi";
+ description = "binary interface for compiled code and syscalls";
+ merge = mergeOneOption;
+ };
+
+ types.abi = enum (attrValues abis);
+
+ abis = setTypes types.openAbi {
+ cygnus = {};
+ msvc = {};
+
+ # Note: eabi is specific to ARM and PowerPC.
+ # On PowerPC, this corresponds to PPCEABI.
+ # On ARM, this corresponds to ARMEABI.
+ eabi = { float = "soft"; };
+ eabihf = { float = "hard"; };
+
+ # Other architectures should use ELF in embedded situations.
+ elf = {};
+
+ androideabi = {};
+ android = {
+ assertions = [
+ { assertion = platform: !platform.isAarch32;
+ message = ''
+ The "android" ABI is not for 32-bit ARM. Use "androideabi" instead.
+ '';
+ }
+ ];
+ };
+
+ gnueabi = { float = "soft"; };
+ gnueabihf = { float = "hard"; };
+ gnu = {
+ assertions = [
+ { assertion = platform: !platform.isAarch32;
+ message = ''
+ The "gnu" ABI is ambiguous on 32-bit ARM. Use "gnueabi" or "gnueabihf" instead.
+ '';
+ }
+ { assertion = platform: with platform; !(isPower64 && isBigEndian);
+ message = ''
+ The "gnu" ABI is ambiguous on big-endian 64-bit PowerPC. Use "gnuabielfv2" or "gnuabielfv1" instead.
+ '';
+ }
+ ];
+ };
+ gnuabi64 = { abi = "64"; };
+ muslabi64 = { abi = "64"; };
+
+ # NOTE: abi=n32 requires a 64-bit MIPS chip! That is not a typo.
+ # It is basically the 64-bit abi with 32-bit pointers. Details:
+ # https://www.linux-mips.org/pub/linux/mips/doc/ABI/MIPS-N32-ABI-Handbook.pdf
+ gnuabin32 = { abi = "n32"; };
+ muslabin32 = { abi = "n32"; };
+
+ gnuabielfv2 = { abi = "elfv2"; };
+ gnuabielfv1 = { abi = "elfv1"; };
+
+ musleabi = { float = "soft"; };
+ musleabihf = { float = "hard"; };
+ musl = {};
+
+ uclibceabi = { float = "soft"; };
+ uclibceabihf = { float = "hard"; };
+ uclibc = {};
+
+ unknown = {};
+ };
+
+ ################################################################################
+
+ types.parsedPlatform = mkOptionType {
+ name = "system";
+ description = "fully parsed representation of llvm- or nix-style platform tuple";
+ merge = mergeOneOption;
+ check = { cpu, vendor, kernel, abi }:
+ types.cpuType.check cpu
+ && types.vendor.check vendor
+ && types.kernel.check kernel
+ && types.abi.check abi;
+ };
+
+ isSystem = isType "system";
+
+ mkSystem = components:
+ assert types.parsedPlatform.check components;
+ setType "system" components;
+
+ mkSkeletonFromList = l: {
+ "1" = if elemAt l 0 == "avr"
+ then { cpu = elemAt l 0; kernel = "none"; abi = "unknown"; }
+ else throw "Target specification with 1 components is ambiguous";
+ "2" = # We only do 2-part hacks for things Nix already supports
+ if elemAt l 1 == "cygwin"
+ then { cpu = elemAt l 0; kernel = "windows"; abi = "cygnus"; }
+ # MSVC ought to be the default ABI so this case isn't needed. But then it
+ # becomes difficult to handle the gnu* variants for Aarch32 correctly for
+ # minGW. So it's easier to make gnu* the default for the MinGW, but
+ # hack-in MSVC for the non-MinGW case right here.
+ else if elemAt l 1 == "windows"
+ then { cpu = elemAt l 0; kernel = "windows"; abi = "msvc"; }
+ else if (elemAt l 1) == "elf"
+ then { cpu = elemAt l 0; vendor = "unknown"; kernel = "none"; abi = elemAt l 1; }
+ else { cpu = elemAt l 0; kernel = elemAt l 1; };
+ "3" =
+ # cpu-kernel-environment
+ if elemAt l 1 == "linux" ||
+ elem (elemAt l 2) ["eabi" "eabihf" "elf" "gnu"]
+ then {
+ cpu = elemAt l 0;
+ kernel = elemAt l 1;
+ abi = elemAt l 2;
+ vendor = "unknown";
+ }
+ # cpu-vendor-os
+ else if elemAt l 1 == "apple" ||
+ elem (elemAt l 2) [ "wasi" "redox" "mmixware" "ghcjs" "mingw32" ] ||
+ hasPrefix "freebsd" (elemAt l 2) ||
+ hasPrefix "netbsd" (elemAt l 2) ||
+ hasPrefix "genode" (elemAt l 2)
+ then {
+ cpu = elemAt l 0;
+ vendor = elemAt l 1;
+ kernel = if elemAt l 2 == "mingw32"
+ then "windows" # autotools breaks on -gnu for window
+ else elemAt l 2;
+ }
+ else throw "Target specification with 3 components is ambiguous";
+ "4" = { cpu = elemAt l 0; vendor = elemAt l 1; kernel = elemAt l 2; abi = elemAt l 3; };
+ }.${toString (length l)}
+ or (throw "system string has invalid number of hyphen-separated components");
+
+ # This should revert the job done by config.guess from the gcc compiler.
+ mkSystemFromSkeleton = { cpu
+ , # Optional, but fallback too complex for here.
+ # Inferred below instead.
+ vendor ? assert false; null
+ , kernel
+ , # Also inferred below
+ abi ? assert false; null
+ } @ args: let
+ getCpu = name: cpuTypes.${name} or (throw "Unknown CPU type: ${name}");
+ getVendor = name: vendors.${name} or (throw "Unknown vendor: ${name}");
+ getKernel = name: kernels.${name} or (throw "Unknown kernel: ${name}");
+ getAbi = name: abis.${name} or (throw "Unknown ABI: ${name}");
+
+ parsed = {
+ cpu = getCpu args.cpu;
+ vendor =
+ /**/ if args ? vendor then getVendor args.vendor
+ else if isDarwin parsed then vendors.apple
+ else if isWindows parsed then vendors.pc
+ else vendors.unknown;
+ kernel = if hasPrefix "darwin" args.kernel then getKernel "darwin"
+ else if hasPrefix "netbsd" args.kernel then getKernel "netbsd"
+ else getKernel args.kernel;
+ abi =
+ /**/ if args ? abi then getAbi args.abi
+ else if isLinux parsed || isWindows parsed then
+ if isAarch32 parsed then
+ if lib.versionAtLeast (parsed.cpu.version or "0") "6"
+ then abis.gnueabihf
+ else abis.gnueabi
+ # Default ppc64 BE to ELFv2
+ else if isPower64 parsed && isBigEndian parsed then abis.gnuabielfv2
+ else abis.gnu
+ else abis.unknown;
+ };
+
+ in mkSystem parsed;
+
+ mkSystemFromString = s: mkSystemFromSkeleton (mkSkeletonFromList (lib.splitString "-" s));
+
+ kernelName = kernel:
+ kernel.name + toString (kernel.version or "");
+
+ doubleFromSystem = { cpu, kernel, abi, ... }:
+ /**/ if abi == abis.cygnus then "${cpu.name}-cygwin"
+ else if kernel.families ? darwin then "${cpu.name}-darwin"
+ else "${cpu.name}-${kernelName kernel}";
+
+ tripleFromSystem = { cpu, vendor, kernel, abi, ... } @ sys: assert isSystem sys; let
+ optExecFormat =
+ lib.optionalString (kernel.name == "netbsd" &&
+ gnuNetBSDDefaultExecFormat cpu != kernel.execFormat)
+ kernel.execFormat.name;
+ optAbi = lib.optionalString (abi != abis.unknown) "-${abi.name}";
+ in "${cpu.name}-${vendor.name}-${kernelName kernel}${optExecFormat}${optAbi}";
+
+ ################################################################################
+
+}