diff options
author | Eelco Dolstra <eelco.dolstra@logicblox.com> | 2012-07-23 12:08:34 -0400 |
---|---|---|
committer | Eelco Dolstra <eelco.dolstra@logicblox.com> | 2012-07-23 17:14:15 -0400 |
commit | 564fb7d9fa80d06397a88d69f26439727cb922c5 (patch) | |
tree | b6944bacbc3f4d8cbac9d886686572aa6fdbbe8c | |
parent | ed59bf7a181bb382dea7dd72da52bf91f60deb8d (diff) |
optimiseStore(): Use a content-addressed file store in /nix/store/.links
optimiseStore() now creates persistent, content-addressed hard links
in /nix/store/.links. For instance, if it encounters a file P with
hash H, it will create a hard link
P' = /nix/store/.link/<H>
to P if P' doesn't already exist; if P' exist, then P is replaced by a
hard link to P'. This is better than the previous in-memory map,
because it had the tendency to unnecessarily replace hard links with a
hard link to whatever happened to be the first file with a given hash
it encountered. It also allows on-the-fly, incremental optimisation.
-rw-r--r-- | src/libstore/local-store.hh | 2 | ||||
-rw-r--r-- | src/libstore/optimise-store.cc | 213 | ||||
-rw-r--r-- | src/nix-store/nix-store.cc | 10 |
3 files changed, 110 insertions, 115 deletions
diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh index eb7705219..1bb47fb3b 100644 --- a/src/libstore/local-store.hh +++ b/src/libstore/local-store.hh @@ -167,7 +167,7 @@ public: /* Optimise the disk space usage of the Nix store by hard-linking files with the same contents. */ - void optimiseStore(bool dryRun, OptimiseStats & stats); + void optimiseStore(OptimiseStats & stats); /* Check the integrity of the Nix store. */ void verifyStore(bool checkContents); diff --git a/src/libstore/optimise-store.cc b/src/libstore/optimise-store.cc index 2ca98f46d..0893db9d3 100644 --- a/src/libstore/optimise-store.cc +++ b/src/libstore/optimise-store.cc @@ -1,6 +1,7 @@ #include "util.hh" #include "local-store.hh" #include "immutable.hh" +#include "globals.hh" #include <sys/types.h> #include <sys/stat.h> @@ -12,9 +13,6 @@ namespace nix { -typedef std::map<Hash, std::pair<Path, ino_t> > HashToPath; - - static void makeWritable(const Path & path) { struct stat st; @@ -51,132 +49,135 @@ struct MakeImmutable }; -static void hashAndLink(bool dryRun, HashToPath & hashToPath, - OptimiseStats & stats, const Path & path) +const string linksDir = ".links"; + + +static void hashAndLink(OptimiseStats & stats, const Path & path) { struct stat st; if (lstat(path.c_str(), &st)) throw SysError(format("getting attributes of path `%1%'") % path); + if (S_ISDIR(st.st_mode)) { + Strings names = readDirectory(path); + foreach (Strings::iterator, i, names) + hashAndLink(stats, path + "/" + *i); + return; + } + + /* We can hard link regular files and symlinks. */ + if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode)) return; + /* Sometimes SNAFUs can cause files in the Nix store to be modified, in particular when running programs as root under NixOS (example: $fontconfig/var/cache being modified). Skip - those files. */ + those files. FIXME: check the modification time. */ if (S_ISREG(st.st_mode) && (st.st_mode & S_IWUSR)) { printMsg(lvlError, format("skipping suspicious writable file `%1%'") % path); return; } - /* We can hard link regular files and symlinks. */ - if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) { - - /* Hash the file. Note that hashPath() returns the hash over - the NAR serialisation, which includes the execute bit on - the file. Thus, executable and non-executable files with - the same contents *won't* be linked (which is good because - otherwise the permissions would be screwed up). - - Also note that if `path' is a symlink, then we're hashing - the contents of the symlink (i.e. the result of - readlink()), not the contents of the target (which may not - even exist). */ - Hash hash = hashPath(htSHA256, path).first; - stats.totalFiles++; - printMsg(lvlDebug, format("`%1%' has hash `%2%'") % path % printHash(hash)); - - std::pair<Path, ino_t> prevPath = hashToPath[hash]; - - if (prevPath.first == "") { - hashToPath[hash] = std::pair<Path, ino_t>(path, st.st_ino); - return; - } - - /* Yes! We've seen a file with the same contents. Replace - the current file with a hard link to that file. */ - stats.sameContents++; - if (prevPath.second == st.st_ino) { - printMsg(lvlDebug, format("`%1%' is already linked to `%2%'") % path % prevPath.first); - return; - } + /* Hash the file. Note that hashPath() returns the hash over the + NAR serialisation, which includes the execute bit on the file. + Thus, executable and non-executable files with the same + contents *won't* be linked (which is good because otherwise the + permissions would be screwed up). + + Also note that if `path' is a symlink, then we're hashing the + contents of the symlink (i.e. the result of readlink()), not + the contents of the target (which may not even exist). */ + Hash hash = hashPath(htSHA256, path).first; + stats.totalFiles++; + printMsg(lvlDebug, format("`%1%' has hash `%2%'") % path % printHash(hash)); + + /* Check if this is a known hash. */ + Path linkPath = nixStore + "/" + linksDir + "/" + printHash32(hash); + + if (!pathExists(linkPath)) { + /* Nope, create a hard link in the links directory. */ + makeMutable(path); + MakeImmutable mk1(path); + + if (link(path.c_str(), linkPath.c_str()) == -1) + throw SysError(format("cannot link `%1%' to `%2%'") % linkPath % path); + + return; + } + + /* Yes! We've seen a file with the same contents. Replace the + current file with a hard link to that file. */ + struct stat stLink; + if (lstat(linkPath.c_str(), &stLink)) + throw SysError(format("getting attributes of path `%1%'") % linkPath); + + stats.sameContents++; + if (st.st_ino == stLink.st_ino) { + printMsg(lvlDebug, format("`%1%' is already linked to `%2%'") % path % linkPath); + return; + } - if (!dryRun) { - - printMsg(lvlTalkative, format("linking `%1%' to `%2%'") % path % prevPath.first); + printMsg(lvlTalkative, format("linking `%1%' to `%2%'") % path % linkPath); - Path tempLink = (format("%1%.tmp-%2%-%3%") - % path % getpid() % rand()).str(); + Path tempLink = (format("%1%/.tmp-link-%2%-%3%") + % nixStore % getpid() % rand()).str(); - /* Make the containing directory writable, but only if - it's not the store itself (we don't want or need to - mess with its permissions). */ - bool mustToggle = !isStorePath(path); - if (mustToggle) makeWritable(dirOf(path)); - - /* When we're done, make the directory read-only again and - reset its timestamp back to 0. */ - MakeReadOnly makeReadOnly(mustToggle ? dirOf(path) : ""); - - /* If ‘prevPath’ is immutable, we can't create hard links - to it, so make it mutable first (and make it immutable - again when we're done). We also have to make ‘path’ - mutable, otherwise rename() will fail to delete it. */ - makeMutable(prevPath.first); - MakeImmutable mk1(prevPath.first); + /* Make the containing directory writable, but only if it's not + the store itself (we don't want or need to mess with its + permissions). */ + bool mustToggle = !isStorePath(path); + if (mustToggle) makeWritable(dirOf(path)); - makeMutable(path); - MakeImmutable mk2(path); - - if (link(prevPath.first.c_str(), tempLink.c_str()) == -1) { - if (errno == EMLINK) { - /* Too many links to the same file (>= 32000 on - most file systems). This is likely to happen - with empty files. Just start over, creating - links to the current file. */ - printMsg(lvlInfo, format("`%1%' has maximum number of links") % prevPath.first); - hashToPath[hash] = std::pair<Path, ino_t>(path, st.st_ino); - return; - } - throw SysError(format("cannot link `%1%' to `%2%'") - % tempLink % prevPath.first); - } - - /* Atomically replace the old file with the new hard link. */ - if (rename(tempLink.c_str(), path.c_str()) == -1) { - if (errno == EMLINK) { - /* Some filesystems generate too many links on the - rename, rather than on the original link. - (Probably it temporarily increases the st_nlink - field before decreasing it again.) */ - printMsg(lvlInfo, format("`%1%' has maximum number of links") % prevPath.first); - hashToPath[hash] = std::pair<Path, ino_t>(path, st.st_ino); - - /* Unlink the temp link. */ - if (unlink(tempLink.c_str()) == -1) - printMsg(lvlError, format("unable to unlink `%1%'") % tempLink); - return; - } - throw SysError(format("cannot rename `%1%' to `%2%'") - % tempLink % path); - } - } else - printMsg(lvlTalkative, format("would link `%1%' to `%2%'") % path % prevPath.first); - - stats.filesLinked++; - stats.bytesFreed += st.st_size; - stats.blocksFreed += st.st_blocks; + /* When we're done, make the directory read-only again and reset + its timestamp back to 0. */ + MakeReadOnly makeReadOnly(mustToggle ? dirOf(path) : ""); + + /* If ‘linkPath’ is immutable, we can't create hard links to it, + so make it mutable first (and make it immutable again when + we're done). We also have to make ‘path’ mutable, otherwise + rename() will fail to delete it. */ + makeMutable(linkPath); + MakeImmutable mk1(linkPath); + + makeMutable(path); + MakeImmutable mk2(path); + + if (link(linkPath.c_str(), tempLink.c_str()) == -1) { + if (errno == EMLINK) { + /* Too many links to the same file (>= 32000 on most file + systems). This is likely to happen with empty files. + Just shrug and ignore. */ + printMsg(lvlInfo, format("`%1%' has maximum number of links") % linkPath); + return; + } + throw SysError(format("cannot link `%1%' to `%2%'") % tempLink % linkPath); } - if (S_ISDIR(st.st_mode)) { - Strings names = readDirectory(path); - foreach (Strings::iterator, i, names) - hashAndLink(dryRun, hashToPath, stats, path + "/" + *i); + /* Atomically replace the old file with the new hard link. */ + if (rename(tempLink.c_str(), path.c_str()) == -1) { + if (errno == EMLINK) { + /* Some filesystems generate too many links on the rename, + rather than on the original link. (Probably it + temporarily increases the st_nlink field before + decreasing it again.) */ + printMsg(lvlInfo, format("`%1%' has maximum number of links") % linkPath); + + /* Unlink the temp link. */ + if (unlink(linkPath.c_str()) == -1) + printMsg(lvlError, format("unable to unlink `%1%'") % linkPath); + return; + } + throw SysError(format("cannot rename `%1%' to `%2%'") % tempLink % path); } + + stats.filesLinked++; + stats.bytesFreed += st.st_size; + stats.blocksFreed += st.st_blocks; } -void LocalStore::optimiseStore(bool dryRun, OptimiseStats & stats) +void LocalStore::optimiseStore(OptimiseStats & stats) { - HashToPath hashToPath; + createDirs(nixStore + "/" + linksDir); PathSet paths = queryValidPaths(); @@ -184,7 +185,7 @@ void LocalStore::optimiseStore(bool dryRun, OptimiseStats & stats) addTempRoot(*i); if (!isValidPath(*i)) continue; /* path was GC'ed, probably */ startNest(nest, lvlChatty, format("hashing files in `%1%'") % *i); - hashAndLink(dryRun, hashToPath, stats, *i); + hashAndLink(stats, *i); } } diff --git a/src/nix-store/nix-store.cc b/src/nix-store/nix-store.cc index 23863525f..82e08fecf 100644 --- a/src/nix-store/nix-store.cc +++ b/src/nix-store/nix-store.cc @@ -746,18 +746,12 @@ static void showOptimiseStats(OptimiseStats & stats) files with the same contents. */ static void opOptimise(Strings opFlags, Strings opArgs) { - if (!opArgs.empty()) + if (!opArgs.empty() || !opFlags.empty()) throw UsageError("no arguments expected"); - bool dryRun = false; - - foreach (Strings::iterator, i, opFlags) - if (*i == "--dry-run") dryRun = true; - else throw UsageError(format("unknown flag `%1%'") % *i); - OptimiseStats stats; try { - ensureLocalStore().optimiseStore(dryRun, stats); + ensureLocalStore().optimiseStore(stats); } catch (...) { showOptimiseStats(stats); throw; |