aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorEelco Dolstra <e.dolstra@tudelft.nl>2007-10-09 22:14:27 +0000
committerEelco Dolstra <e.dolstra@tudelft.nl>2007-10-09 22:14:27 +0000
commita8629de827e4d5a67372614727ce6fcc26423f8c (patch)
treebf1cffcf63a74e41ec48fb7e12918d57979fc763 /src
parent27a0662828cb5ac9da198f35754750f12628d546 (diff)
* New command `nix-store --optimise' to reduce Nix store disk space
usage by finding identical files in the store and hard-linking them to each other. It typically reduces the size of the store by something like 25-35%. This is what the optimise-store.pl script did, but the new command is faster and more correct (it's safe wrt garbage collection and concurrent builds).
Diffstat (limited to 'src')
-rw-r--r--src/libstore/local-store.cc125
-rw-r--r--src/libstore/local-store.hh18
-rw-r--r--src/nix-store/help.txt1
-rw-r--r--src/nix-store/nix-store.cc52
4 files changed, 188 insertions, 8 deletions
diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc
index 4378f0ba6..c77ab3c6c 100644
--- a/src/libstore/local-store.cc
+++ b/src/libstore/local-store.cc
@@ -174,7 +174,7 @@ void copyPath(const Path & src, const Path & dst, PathFilter & filter)
}
-static void _canonicalisePathMetaData(const Path & path)
+static void _canonicalisePathMetaData(const Path & path, bool recurse)
{
checkInterrupt();
@@ -223,17 +223,17 @@ static void _canonicalisePathMetaData(const Path & path)
}
- if (S_ISDIR(st.st_mode)) {
+ if (recurse && S_ISDIR(st.st_mode)) {
Strings names = readDirectory(path);
for (Strings::iterator i = names.begin(); i != names.end(); ++i)
- _canonicalisePathMetaData(path + "/" + *i);
+ _canonicalisePathMetaData(path + "/" + *i, true);
}
}
void canonicalisePathMetaData(const Path & path)
{
- _canonicalisePathMetaData(path);
+ _canonicalisePathMetaData(path, true);
/* On platforms that don't have lchown(), the top-level path can't
be a symlink, since we can't change its ownership. */
@@ -625,7 +625,7 @@ void LocalStore::exportPath(const Path & path, bool sign,
consistent metadata. */
Transaction txn(nixDB);
addTempRoot(path);
- if (!isValidPath(path))
+ if (!isValidPathTxn(txn, path))
throw Error(format("path `%1%' is not valid") % path);
HashAndWriteSink hashAndWriteSink(sink);
@@ -950,6 +950,121 @@ void verifyStore(bool checkContents)
}
+typedef std::map<Hash, std::pair<Path, ino_t> > HashToPath;
+
+
+static void toggleWritable(const Path & path, bool writable)
+{
+ struct stat st;
+ if (lstat(path.c_str(), &st))
+ throw SysError(format("getting attributes of path `%1%'") % path);
+
+ mode_t mode = st.st_mode;
+ if (writable) mode |= S_IWUSR;
+ else mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
+
+ if (chmod(path.c_str(), mode) == -1)
+ throw SysError(format("changing writability of `%1%'") % path);
+}
+
+
+static void hashAndLink(bool dryRun, HashToPath & hashToPath,
+ OptimiseStats & stats, const Path & path)
+{
+ struct stat st;
+ if (lstat(path.c_str(), &st))
+ throw SysError(format("getting attributes of path `%1%'") % path);
+
+ /* Sometimes SNAFUs can cause files in the Nix store to be
+ modified, in particular when running programs as root under
+ NixOS (example: $fontconfig/var/cache being modified). Skip
+ those files. */
+ if (S_ISREG(st.st_mode) && (st.st_mode & S_IWUSR)) {
+ printMsg(lvlError, format("skipping suspicious writable file `%1%'") % path);
+ return;
+ }
+
+ /* We can hard link regular files and symlinks. */
+ if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) {
+
+ /* Hash the file. Note that hashPath() returns the hash over
+ the NAR serialisation, which includes the execute bit on
+ the file. Thus, executable and non-executable files with
+ the same contents *won't* be linked (which is good because
+ otherwise the permissions would be screwed up).
+
+ Also note that if `path' is a symlink, then we're hashing
+ the contents of the symlink (i.e. the result of
+ readlink()), not the contents of the target (which may not
+ even exist). */
+ Hash hash = hashPath(htSHA256, path);
+ stats.totalFiles++;
+ printMsg(lvlDebug, format("`%1%' has hash `%2%'") % path % printHash(hash));
+
+ std::pair<Path, ino_t> prevPath = hashToPath[hash];
+
+ if (prevPath.first == "") {
+ hashToPath[hash] = std::pair<Path, ino_t>(path, st.st_ino);
+ return;
+ }
+
+ /* Yes! We've seen a file with the same contents. Replace
+ the current file with a hard link to that file. */
+ stats.sameContents++;
+ if (prevPath.second == st.st_ino) {
+ printMsg(lvlDebug, format("`%1%' is already linked to `%2%'") % path % prevPath.first);
+ return;
+ }
+
+ printMsg(lvlTalkative, format("linking `%1%' to `%2%'") % path % prevPath.first);
+
+ Path tempLink = (format("%1%.tmp-%2%-%3%")
+ % path % getpid() % rand()).str();
+
+ toggleWritable(dirOf(path), true);
+
+ if (link(prevPath.first.c_str(), tempLink.c_str()) == -1)
+ throw SysError(format("cannot link `%1%' to `%2%'")
+ % tempLink % prevPath.first);
+
+ /* Atomically replace the old file with the new hard link. */
+ if (rename(tempLink.c_str(), path.c_str()) == -1)
+ throw SysError(format("cannot rename `%1%' to `%2%'")
+ % tempLink % path);
+
+ /* Make the directory read-only again and reset its timestamp
+ back to 0. */
+ _canonicalisePathMetaData(dirOf(path), false);
+
+ stats.filesLinked++;
+ stats.bytesFreed += st.st_size;
+ }
+
+ if (S_ISDIR(st.st_mode)) {
+ Strings names = readDirectory(path);
+ for (Strings::iterator i = names.begin(); i != names.end(); ++i)
+ hashAndLink(dryRun, hashToPath, stats, path + "/" + *i);
+ }
+}
+
+
+void LocalStore::optimiseStore(bool dryRun, OptimiseStats & stats)
+{
+ HashToPath hashToPath;
+
+ Paths paths;
+ PathSet validPaths;
+ nixDB.enumTable(noTxn, dbValidPaths, paths);
+
+ for (Paths::iterator i = paths.begin(); i != paths.end(); ++i) {
+ addTempRoot(*i);
+ if (!isValidPath(*i)) continue; /* path was GC'ed, probably */
+ startNest(nest, lvlChatty, format("hashing files in `%1%'") % *i);
+ hashAndLink(dryRun, hashToPath, stats, *i);
+ }
+}
+
+
/* Upgrade from schema 1 (Nix <= 0.7) to schema 2 (Nix >= 0.8). */
static void upgradeStore07()
{
diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh
index 8bd37bc0a..6c366167f 100644
--- a/src/libstore/local-store.hh
+++ b/src/libstore/local-store.hh
@@ -21,6 +21,20 @@ const int nixSchemaVersion = 4;
extern string drvsLogDir;
+struct OptimiseStats
+{
+ unsigned long totalFiles;
+ unsigned long sameContents;
+ unsigned long filesLinked;
+ unsigned long long bytesFreed;
+ OptimiseStats()
+ {
+ totalFiles = sameContents = filesLinked = 0;
+ bytesFreed = 0;
+ }
+};
+
+
class LocalStore : public StoreAPI
{
private:
@@ -83,6 +97,10 @@ public:
void collectGarbage(GCAction action, const PathSet & pathsToDelete,
bool ignoreLiveness, PathSet & result, unsigned long long & bytesFreed);
+
+ /* Optimise the disk space usage of the Nix store by hard-linking
+ files with the same contents. */
+ void optimiseStore(bool dryRun, OptimiseStats & stats);
};
diff --git a/src/nix-store/help.txt b/src/nix-store/help.txt
index 0662f6796..14b83a06c 100644
--- a/src/nix-store/help.txt
+++ b/src/nix-store/help.txt
@@ -21,6 +21,7 @@ Operations:
--init: initialise the Nix database
--verify: verify Nix structures
+ --optimise: optimise the Nix store by hard-linking identical files
--version: output version information
--help: display help
diff --git a/src/nix-store/nix-store.cc b/src/nix-store/nix-store.cc
index 176dc39f9..678ce2ae9 100644
--- a/src/nix-store/nix-store.cc
+++ b/src/nix-store/nix-store.cc
@@ -466,6 +466,13 @@ static void opCheckValidity(Strings opFlags, Strings opArgs)
}
+static string showBytes(unsigned long long bytes)
+{
+ return (format("%d bytes (%.2f MiB)")
+ % bytes % (bytes / (1024.0 * 1024.0))).str();
+}
+
+
struct PrintFreed
{
bool show, dryRun;
@@ -477,9 +484,9 @@ struct PrintFreed
if (show)
cout << format(
(dryRun
- ? "%d bytes would be freed (%.2f MiB)\n"
- : "%d bytes freed (%.2f MiB)\n"))
- % bytesFreed % (bytesFreed / (1024.0 * 1024.0));
+ ? "%1% would be freed\n"
+ : "%1% freed (%.2f MiB)\n"))
+ % showBytes(bytesFreed);
}
};
@@ -614,6 +621,43 @@ static void opVerify(Strings opFlags, Strings opArgs)
}
+
+static void showOptimiseStats(OptimiseStats & stats)
+{
+ printMsg(lvlError,
+ format("%1% freed by hard-linking %2% files; there are %3% files with equal contents out of %4% files in total")
+ % showBytes(stats.bytesFreed)
+ % stats.filesLinked
+ % stats.sameContents
+ % stats.totalFiles);
+}
+
+
+/* Optimise the disk space usage of the Nix store by hard-linking
+ files with the same contents. */
+static void opOptimise(Strings opFlags, Strings opArgs)
+{
+ if (!opArgs.empty())
+ throw UsageError("no arguments expected");
+
+ for (Strings::iterator i = opFlags.begin();
+ i != opFlags.end(); ++i)
+ throw UsageError(format("unknown flag `%1%'") % *i);
+
+ LocalStore * store2(dynamic_cast<LocalStore *>(store.get()));
+ if (!store2) throw Error("you don't have sufficient rights to use --optimise");
+
+ OptimiseStats stats;
+ try {
+ store2->optimiseStore(true, stats);
+ } catch (...) {
+ showOptimiseStats(stats);
+ throw;
+ }
+ showOptimiseStats(stats);
+}
+
+
/* Scan the arguments; find the operation, set global flags, put all
other flags in a list, and put all other arguments in another
list. */
@@ -659,6 +703,8 @@ void run(Strings args)
op = opInit;
else if (arg == "--verify")
op = opVerify;
+ else if (arg == "--optimise")
+ op = opOptimise;
else if (arg == "--add-root") {
if (i == args.end())
throw UsageError("`--add-root requires an argument");