9 files changed, 181 insertions, 62 deletions
diff --git a/src/libutil/archive.cc b/src/libutil/archive.cc
index 51c88537e..ce7cf9754 100644
--- a/src/libutil/archive.cc
+++ b/src/libutil/archive.cc
@@ -150,17 +150,17 @@ static void skipGeneric(Source & source)
 
 static void parseContents(ParseSink & sink, Source & source, const Path & path)
 {
-    unsigned long long size = readLongLong(source);
+    uint64_t size = readLongLong(source);
 
     sink.preallocateContents(size);
 
-    unsigned long long left = size;
+    uint64_t left = size;
     std::vector<unsigned char> buf(65536);
 
     while (left) {
         checkInterrupt();
         auto n = buf.size();
-        if ((unsigned long long)n > left) n = left;
+        if ((uint64_t)n > left) n = left;
         source(buf.data(), n);
         sink.receiveContents(buf.data(), n);
         left -= n;
@@ -323,7 +323,7 @@ struct RestoreSink : ParseSink
             throw SysError("fchmod");
     }
 
-    void preallocateContents(unsigned long long len)
+    void preallocateContents(uint64_t len)
     {
 #if HAVE_POSIX_FALLOCATE
         if (len) {
@@ -338,7 +338,7 @@ struct RestoreSink : ParseSink
 #endif
     }
 
-    void receiveContents(unsigned char * data, unsigned int len)
+    void receiveContents(unsigned char * data, size_t len)
     {
         writeFull(fd.get(), data, len);
     }
diff --git a/src/libutil/archive.hh b/src/libutil/archive.hh
index 57780d16a..5665732d2 100644
--- a/src/libutil/archive.hh
+++ b/src/libutil/archive.hh
@@ -57,8 +57,8 @@ struct ParseSink
 
     virtual void createRegularFile(const Path & path) { };
     virtual void isExecutable() { };
-    virtual void preallocateContents(unsigned long long size) { };
-    virtual void receiveContents(unsigned char * data, unsigned int len) { };
+    virtual void preallocateContents(uint64_t size) { };
+    virtual void receiveContents(unsigned char * data, size_t len) { };
 
     virtual void createSymlink(const Path & path, const string & target) { };
 };
@@ -77,7 +77,7 @@ struct RetrieveRegularNARSink : ParseSink
         regular = false;
     }
 
-    void receiveContents(unsigned char * data, unsigned int len)
+    void receiveContents(unsigned char * data, size_t len)
     {
         sink(data, len);
     }
diff --git a/src/libutil/hash.cc b/src/libutil/hash.cc
index 2b0390da4..dfb3668f1 100644
--- a/src/libutil/hash.cc
+++ b/src/libutil/hash.cc
@@ -7,6 +7,7 @@
 #include "args.hh"
 #include "hash.hh"
 #include "archive.hh"
+#include "split.hh"
 #include "util.hh"
 
 #include <sys/types.h>
@@ -15,6 +16,7 @@
 
 namespace nix {
 
+
 static size_t regularHashSize(HashType type) {
     switch (type) {
     case htMD5: return md5HashSize;
@@ -25,10 +27,11 @@ static size_t regularHashSize(HashType type) {
     abort();
 }
 
+
 std::set<std::string> hashTypes = { "md5", "sha1", "sha256", "sha512" };
 
 
-void Hash::init()
+Hash::Hash(HashType type) : type(type)
 {
     hashSize = regularHashSize(type);
     assert(hashSize <= maxHashSize);
@@ -133,57 +136,89 @@ std::string Hash::to_string(Base base, bool includeType) const
     return s;
 }
 
-Hash::Hash(std::string_view s, HashType type) : Hash(s, std::optional { type }) { }
-Hash::Hash(std::string_view s) : Hash(s, std::optional<HashType>{}) { }
-
-Hash::Hash(std::string_view original, std::optional<HashType> optType)
-{
+Hash Hash::parseSRI(std::string_view original) {
     auto rest = original;
 
-    size_t pos = 0;
+    // Parse the has type before the separater, if there was one.
+    auto hashRaw = splitPrefixTo(rest, '-');
+    if (!hashRaw)
+        throw BadHash("hash '%s' is not SRI", original);
+    HashType parsedType = parseHashType(*hashRaw);
+
+    return Hash(rest, parsedType, true);
+}
+
+// Mutates the string to eliminate the prefixes when found
+static std::pair<std::optional<HashType>, bool> getParsedTypeAndSRI(std::string_view & rest) {
     bool isSRI = false;
 
     // Parse the has type before the separater, if there was one.
     std::optional<HashType> optParsedType;
     {
-        auto sep = rest.find(':');
-        if (sep == std::string_view::npos) {
-            sep = rest.find('-');
-            if (sep != std::string_view::npos)
+        auto hashRaw = splitPrefixTo(rest, ':');
+
+        if (!hashRaw) {
+            hashRaw = splitPrefixTo(rest, '-');
+            if (hashRaw)
                 isSRI = true;
         }
-        if (sep != std::string_view::npos) {
-            auto hashRaw = rest.substr(0, sep);
-            optParsedType = parseHashType(hashRaw);
-            rest = rest.substr(sep + 1);
-        }
+        if (hashRaw)
+            optParsedType = parseHashType(*hashRaw);
     }
 
+    return {optParsedType, isSRI};
+}
+
+Hash Hash::parseAnyPrefixed(std::string_view original)
+{
+    auto rest = original;
+    auto [optParsedType, isSRI] = getParsedTypeAndSRI(rest);
+
     // Either the string or user must provide the type, if they both do they
     // must agree.
-    if (!optParsedType && !optType) {
+    if (!optParsedType)
+        throw BadHash("hash '%s' does not include a type", rest);
+
+    return Hash(rest, *optParsedType, isSRI);
+}
+
+Hash Hash::parseAny(std::string_view original, std::optional<HashType> optType)
+{
+    auto rest = original;
+    auto [optParsedType, isSRI] = getParsedTypeAndSRI(rest);
+
+    // Either the string or user must provide the type, if they both do they
+    // must agree.
+    if (!optParsedType && !optType)
         throw BadHash("hash '%s' does not include a type, nor is the type otherwise known from context.", rest);
-    } else {
-        this->type = optParsedType ? *optParsedType : *optType;
-        if (optParsedType && optType && *optParsedType != *optType)
-            throw BadHash("hash '%s' should have type '%s'", original, printHashType(*optType));
-    }
+    else if (optParsedType && optType && *optParsedType != *optType)
+        throw BadHash("hash '%s' should have type '%s'", original, printHashType(*optType));
 
-    init();
+    HashType hashType = optParsedType ? *optParsedType : *optType;
+    return Hash(rest, hashType, isSRI);
+}
 
+Hash Hash::parseNonSRIUnprefixed(std::string_view s, HashType type)
+{
+    return Hash(s, type, false);
+}
+
+Hash::Hash(std::string_view rest, HashType type, bool isSRI)
+    : Hash(type)
+{
     if (!isSRI && rest.size() == base16Len()) {
 
         auto parseHexDigit = [&](char c) {
             if (c >= '0' && c <= '9') return c - '0';
             if (c >= 'A' && c <= 'F') return c - 'A' + 10;
             if (c >= 'a' && c <= 'f') return c - 'a' + 10;
-            throw BadHash("invalid base-16 hash '%s'", original);
+            throw BadHash("invalid base-16 hash '%s'", rest);
         };
 
         for (unsigned int i = 0; i < hashSize; i++) {
             hash[i] =
-                parseHexDigit(rest[pos + i * 2]) << 4
-                | parseHexDigit(rest[pos + i * 2 + 1]);
+                parseHexDigit(rest[i * 2]) << 4
+                | parseHexDigit(rest[i * 2 + 1]);
         }
     }
 
@@ -195,7 +230,7 @@ Hash::Hash(std::string_view original, std::optional<HashType> optType)
             for (digit = 0; digit < base32Chars.size(); ++digit) /* !!! slow */
                 if (base32Chars[digit] == c) break;
             if (digit >= 32)
-                throw BadHash("invalid base-32 hash '%s'", original);
+                throw BadHash("invalid base-32 hash '%s'", rest);
             unsigned int b = n * 5;
             unsigned int i = b / 8;
             unsigned int j = b % 8;
@@ -205,7 +240,7 @@ Hash::Hash(std::string_view original, std::optional<HashType> optType)
                 hash[i + 1] |= digit >> (8 - j);
             } else {
                 if (digit >> (8 - j))
-                    throw BadHash("invalid base-32 hash '%s'", original);
+                    throw BadHash("invalid base-32 hash '%s'", rest);
             }
         }
     }
@@ -213,7 +248,7 @@ Hash::Hash(std::string_view original, std::optional<HashType> optType)
     else if (isSRI || rest.size() == base64Len()) {
         auto d = base64Decode(rest);
         if (d.size() != hashSize)
-            throw BadHash("invalid %s hash '%s'", isSRI ? "SRI" : "base-64", original);
+            throw BadHash("invalid %s hash '%s'", isSRI ? "SRI" : "base-64", rest);
         assert(hashSize);
         memcpy(hash, d.data(), hashSize);
     }
@@ -231,7 +266,7 @@ Hash newHashAllowEmpty(std::string hashStr, std::optional<HashType> ht)
         warn("found empty hash, assuming '%s'", h.to_string(SRI, true));
         return h;
     } else
-        return Hash(hashStr, ht);
+        return Hash::parseAny(hashStr, ht);
 }
 
 
diff --git a/src/libutil/hash.hh b/src/libutil/hash.hh
index 98ee1bed0..00ce7bb6f 100644
--- a/src/libutil/hash.hh
+++ b/src/libutil/hash.hh
@@ -34,21 +34,31 @@ struct Hash
     HashType type;
 
     /* Create a zero-filled hash object. */
-    Hash(HashType type) : type(type) { init(); };
+    Hash(HashType type);
 
-    /* Initialize the hash from a string representation, in the format
+    /* Parse the hash from a string representation in the format
        "[<type>:]<base16|base32|base64>" or "<type>-<base64>" (a
        Subresource Integrity hash expression). If the 'type' argument
        is not present, then the hash type must be specified in the
        string. */
-    Hash(std::string_view s, std::optional<HashType> type);
-    // type must be provided
-    Hash(std::string_view s, HashType type);
-    // hash type must be part of string
-    Hash(std::string_view s);
+    static Hash parseAny(std::string_view s, std::optional<HashType> type);
 
-    void init();
+    /* Parse a hash from a string representation like the above, except the
+       type prefix is mandatory is there is no separate arguement. */
+    static Hash parseAnyPrefixed(std::string_view s);
 
+    /* Parse a plain hash that musst not have any prefix indicating the type.
+       The type is passed in to disambiguate. */
+    static Hash parseNonSRIUnprefixed(std::string_view s, HashType type);
+
+    static Hash parseSRI(std::string_view original);
+
+private:
+    /* The type must be provided, the string view must not include <type>
+       prefix. `isSRI` helps disambigate the various base-* encodings. */
+    Hash(std::string_view s, HashType type, bool isSRI);
+
+public:
     /* Check whether a hash is set. */
     operator bool () const { return (bool) type; }
 
@@ -111,7 +121,7 @@ Hash hashFile(HashType ht, const Path & path);
 
 /* Compute the hash of the given path.  The hash is defined as
    (essentially) hashString(ht, dumpPath(path)). */
-typedef std::pair<Hash, unsigned long long> HashResult;
+typedef std::pair<Hash, uint64_t> HashResult;
 HashResult hashPath(HashType ht, const Path & path,
     PathFilter & filter = defaultPathFilter);
 
@@ -141,7 +151,7 @@ class HashSink : public BufferedSink, public AbstractHashSink
 private:
     HashType ht;
     Ctx * ctx;
-    unsigned long long bytes;
+    uint64_t bytes;
 
 public:
     HashSink(HashType ht);
diff --git a/src/libutil/serialise.hh b/src/libutil/serialise.hh
index 5d9acf887..c29c6b29b 100644
--- a/src/libutil/serialise.hh
+++ b/src/libutil/serialise.hh
@@ -312,14 +312,14 @@ T readNum(Source & source)
     source(buf, sizeof(buf));
 
     uint64_t n =
-        ((unsigned long long) buf[0]) |
-        ((unsigned long long) buf[1] << 8) |
-        ((unsigned long long) buf[2] << 16) |
-        ((unsigned long long) buf[3] << 24) |
-        ((unsigned long long) buf[4] << 32) |
-        ((unsigned long long) buf[5] << 40) |
-        ((unsigned long long) buf[6] << 48) |
-        ((unsigned long long) buf[7] << 56);
+        ((uint64_t) buf[0]) |
+        ((uint64_t) buf[1] << 8) |
+        ((uint64_t) buf[2] << 16) |
+        ((uint64_t) buf[3] << 24) |
+        ((uint64_t) buf[4] << 32) |
+        ((uint64_t) buf[5] << 40) |
+        ((uint64_t) buf[6] << 48) |
+        ((uint64_t) buf[7] << 56);
 
     if (n > std::numeric_limits<T>::max())
         throw SerialisationError("serialised integer %d is too large for type '%s'", n, typeid(T).name());
diff --git a/src/libutil/split.hh b/src/libutil/split.hh
new file mode 100644
index 000000000..d19d7d8ed
--- /dev/null
+++ b/src/libutil/split.hh
@@ -0,0 +1,33 @@
+#pragma once
+
+#include <optional>
+#include <string_view>
+
+#include "util.hh"
+
+namespace nix {
+
+// If `separator` is found, we return the portion of the string before the
+// separator, and modify the string argument to contain only the part after the
+// separator. Otherwise, wer return `std::nullopt`, and we leave the argument
+// string alone.
+static inline std::optional<std::string_view> splitPrefixTo(std::string_view & string, char separator) {
+    auto sepInstance = string.find(separator);
+
+    if (sepInstance != std::string_view::npos) {
+        auto prefix = string.substr(0, sepInstance);
+        string.remove_prefix(sepInstance+1);
+        return prefix;
+    }
+
+    return std::nullopt;
+}
+
+static inline bool splitPrefix(std::string_view & string, std::string_view prefix) {
+    bool res = hasPrefix(string, prefix);
+    if (res)
+        string.remove_prefix(prefix.length());
+    return res;
+}
+
+}
diff --git a/src/libutil/topo-sort.hh b/src/libutil/topo-sort.hh
new file mode 100644
index 000000000..7a68ff169
--- /dev/null
+++ b/src/libutil/topo-sort.hh
@@ -0,0 +1,40 @@
+#include "error.hh"
+
+namespace nix {
+
+template<typename T>
+std::vector<T> topoSort(std::set<T> items,
+        std::function<std::set<T>(const T &)> getChildren,
+        std::function<Error(const T &, const T &)> makeCycleError)
+{
+    std::vector<T> sorted;
+    std::set<T> visited, parents;
+
+    std::function<void(const T & path, const T * parent)> dfsVisit;
+
+    dfsVisit = [&](const T & path, const T * parent) {
+        if (parents.count(path)) throw makeCycleError(path, *parent);
+
+        if (!visited.insert(path).second) return;
+        parents.insert(path);
+
+        std::set<T> references = getChildren(path);
+
+        for (auto & i : references)
+            /* Don't traverse into items that don't exist in our starting set. */
+            if (i != path && items.count(i))
+                dfsVisit(i, &path);
+
+        sorted.push_back(path);
+        parents.erase(path);
+    };
+
+    for (auto & i : items)
+        dfsVisit(i, nullptr);
+
+    std::reverse(sorted.begin(), sorted.end());
+
+    return sorted;
+}
+
+}
diff --git a/src/libutil/util.cc b/src/libutil/util.cc
index a0a8ff4d3..c0b9698ee 100644
--- a/src/libutil/util.cc
+++ b/src/libutil/util.cc
@@ -374,7 +374,7 @@ void writeLine(int fd, string s)
 }
 
 
-static void _deletePath(int parentfd, const Path & path, unsigned long long & bytesFreed)
+static void _deletePath(int parentfd, const Path & path, uint64_t & bytesFreed)
 {
     checkInterrupt();
 
@@ -414,7 +414,7 @@ static void _deletePath(int parentfd, const Path & path, unsigned long long & by
     }
 }
 
-static void _deletePath(const Path & path, unsigned long long & bytesFreed)
+static void _deletePath(const Path & path, uint64_t & bytesFreed)
 {
     Path dir = dirOf(path);
     if (dir == "")
@@ -435,12 +435,12 @@ static void _deletePath(const Path & path, unsigned long long & bytesFreed)
 
 void deletePath(const Path & path)
 {
-    unsigned long long dummy;
+    uint64_t dummy;
     deletePath(path, dummy);
 }
 
 
-void deletePath(const Path & path, unsigned long long & bytesFreed)
+void deletePath(const Path & path, uint64_t & bytesFreed)
 {
     //Activity act(*logger, lvlDebug, format("recursively deleting path '%1%'") % path);
     bytesFreed = 0;
@@ -494,6 +494,7 @@ std::pair<AutoCloseFD, Path> createTempFile(const Path & prefix)
 {
     Path tmpl(getEnv("TMPDIR").value_or("/tmp") + "/" + prefix + ".XXXXXX");
     // Strictly speaking, this is UB, but who cares...
+    // FIXME: use O_TMPFILE.
     AutoCloseFD fd(mkstemp((char *) tmpl.c_str()));
     if (!fd)
         throw SysError("creating temporary file '%s'", tmpl);
@@ -1449,7 +1450,7 @@ string base64Decode(std::string_view s)
 
         char digit = decode[(unsigned char) c];
         if (digit == -1)
-            throw Error("invalid character in Base64 string");
+            throw Error("invalid character in Base64 string: '%c'", c);
 
         bits += 6;
         d = d << 6 | digit;
diff --git a/src/libutil/util.hh b/src/libutil/util.hh
index 630303a5d..3a20679a8 100644
--- a/src/libutil/util.hh
+++ b/src/libutil/util.hh
@@ -125,7 +125,7 @@ void writeLine(int fd, string s);
    second variant returns the number of bytes and blocks freed. */
 void deletePath(const Path & path);
 
-void deletePath(const Path & path, unsigned long long & bytesFreed);
+void deletePath(const Path & path, uint64_t & bytesFreed);
 
 std::string getUserName();