aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorEelco Dolstra <eelco.dolstra@logicblox.com>2012-07-23 12:08:34 -0400
committerEelco Dolstra <eelco.dolstra@logicblox.com>2012-07-23 17:14:15 -0400
commit564fb7d9fa80d06397a88d69f26439727cb922c5 (patch)
treeb6944bacbc3f4d8cbac9d886686572aa6fdbbe8c /src
parented59bf7a181bb382dea7dd72da52bf91f60deb8d (diff)
optimiseStore(): Use a content-addressed file store in /nix/store/.links
optimiseStore() now creates persistent, content-addressed hard links in /nix/store/.links. For instance, if it encounters a file P with hash H, it will create a hard link P' = /nix/store/.link/<H> to P if P' doesn't already exist; if P' exist, then P is replaced by a hard link to P'. This is better than the previous in-memory map, because it had the tendency to unnecessarily replace hard links with a hard link to whatever happened to be the first file with a given hash it encountered. It also allows on-the-fly, incremental optimisation.
Diffstat (limited to 'src')
-rw-r--r--src/libstore/local-store.hh2
-rw-r--r--src/libstore/optimise-store.cc213
-rw-r--r--src/nix-store/nix-store.cc10
3 files changed, 110 insertions, 115 deletions
diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh
index eb7705219..1bb47fb3b 100644
--- a/src/libstore/local-store.hh
+++ b/src/libstore/local-store.hh
@@ -167,7 +167,7 @@ public:
/* Optimise the disk space usage of the Nix store by hard-linking
files with the same contents. */
- void optimiseStore(bool dryRun, OptimiseStats & stats);
+ void optimiseStore(OptimiseStats & stats);
/* Check the integrity of the Nix store. */
void verifyStore(bool checkContents);
diff --git a/src/libstore/optimise-store.cc b/src/libstore/optimise-store.cc
index 2ca98f46d..0893db9d3 100644
--- a/src/libstore/optimise-store.cc
+++ b/src/libstore/optimise-store.cc
@@ -1,6 +1,7 @@
#include "util.hh"
#include "local-store.hh"
#include "immutable.hh"
+#include "globals.hh"
#include <sys/types.h>
#include <sys/stat.h>
@@ -12,9 +13,6 @@
namespace nix {
-typedef std::map<Hash, std::pair<Path, ino_t> > HashToPath;
-
-
static void makeWritable(const Path & path)
{
struct stat st;
@@ -51,132 +49,135 @@ struct MakeImmutable
};
-static void hashAndLink(bool dryRun, HashToPath & hashToPath,
- OptimiseStats & stats, const Path & path)
+const string linksDir = ".links";
+
+
+static void hashAndLink(OptimiseStats & stats, const Path & path)
{
struct stat st;
if (lstat(path.c_str(), &st))
throw SysError(format("getting attributes of path `%1%'") % path);
+ if (S_ISDIR(st.st_mode)) {
+ Strings names = readDirectory(path);
+ foreach (Strings::iterator, i, names)
+ hashAndLink(stats, path + "/" + *i);
+ return;
+ }
+
+ /* We can hard link regular files and symlinks. */
+ if (!S_ISREG(st.st_mode) && !S_ISLNK(st.st_mode)) return;
+
/* Sometimes SNAFUs can cause files in the Nix store to be
modified, in particular when running programs as root under
NixOS (example: $fontconfig/var/cache being modified). Skip
- those files. */
+ those files. FIXME: check the modification time. */
if (S_ISREG(st.st_mode) && (st.st_mode & S_IWUSR)) {
printMsg(lvlError, format("skipping suspicious writable file `%1%'") % path);
return;
}
- /* We can hard link regular files and symlinks. */
- if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) {
-
- /* Hash the file. Note that hashPath() returns the hash over
- the NAR serialisation, which includes the execute bit on
- the file. Thus, executable and non-executable files with
- the same contents *won't* be linked (which is good because
- otherwise the permissions would be screwed up).
-
- Also note that if `path' is a symlink, then we're hashing
- the contents of the symlink (i.e. the result of
- readlink()), not the contents of the target (which may not
- even exist). */
- Hash hash = hashPath(htSHA256, path).first;
- stats.totalFiles++;
- printMsg(lvlDebug, format("`%1%' has hash `%2%'") % path % printHash(hash));
-
- std::pair<Path, ino_t> prevPath = hashToPath[hash];
-
- if (prevPath.first == "") {
- hashToPath[hash] = std::pair<Path, ino_t>(path, st.st_ino);
- return;
- }
-
- /* Yes! We've seen a file with the same contents. Replace
- the current file with a hard link to that file. */
- stats.sameContents++;
- if (prevPath.second == st.st_ino) {
- printMsg(lvlDebug, format("`%1%' is already linked to `%2%'") % path % prevPath.first);
- return;
- }
+ /* Hash the file. Note that hashPath() returns the hash over the
+ NAR serialisation, which includes the execute bit on the file.
+ Thus, executable and non-executable files with the same
+ contents *won't* be linked (which is good because otherwise the
+ permissions would be screwed up).
+
+ Also note that if `path' is a symlink, then we're hashing the
+ contents of the symlink (i.e. the result of readlink()), not
+ the contents of the target (which may not even exist). */
+ Hash hash = hashPath(htSHA256, path).first;
+ stats.totalFiles++;
+ printMsg(lvlDebug, format("`%1%' has hash `%2%'") % path % printHash(hash));
+
+ /* Check if this is a known hash. */
+ Path linkPath = nixStore + "/" + linksDir + "/" + printHash32(hash);
+
+ if (!pathExists(linkPath)) {
+ /* Nope, create a hard link in the links directory. */
+ makeMutable(path);
+ MakeImmutable mk1(path);
+
+ if (link(path.c_str(), linkPath.c_str()) == -1)
+ throw SysError(format("cannot link `%1%' to `%2%'") % linkPath % path);
+
+ return;
+ }
+
+ /* Yes! We've seen a file with the same contents. Replace the
+ current file with a hard link to that file. */
+ struct stat stLink;
+ if (lstat(linkPath.c_str(), &stLink))
+ throw SysError(format("getting attributes of path `%1%'") % linkPath);
+
+ stats.sameContents++;
+ if (st.st_ino == stLink.st_ino) {
+ printMsg(lvlDebug, format("`%1%' is already linked to `%2%'") % path % linkPath);
+ return;
+ }
- if (!dryRun) {
-
- printMsg(lvlTalkative, format("linking `%1%' to `%2%'") % path % prevPath.first);
+ printMsg(lvlTalkative, format("linking `%1%' to `%2%'") % path % linkPath);
- Path tempLink = (format("%1%.tmp-%2%-%3%")
- % path % getpid() % rand()).str();
+ Path tempLink = (format("%1%/.tmp-link-%2%-%3%")
+ % nixStore % getpid() % rand()).str();
- /* Make the containing directory writable, but only if
- it's not the store itself (we don't want or need to
- mess with its permissions). */
- bool mustToggle = !isStorePath(path);
- if (mustToggle) makeWritable(dirOf(path));
-
- /* When we're done, make the directory read-only again and
- reset its timestamp back to 0. */
- MakeReadOnly makeReadOnly(mustToggle ? dirOf(path) : "");
-
- /* If ‘prevPath’ is immutable, we can't create hard links
- to it, so make it mutable first (and make it immutable
- again when we're done). We also have to make ‘path’
- mutable, otherwise rename() will fail to delete it. */
- makeMutable(prevPath.first);
- MakeImmutable mk1(prevPath.first);
+ /* Make the containing directory writable, but only if it's not
+ the store itself (we don't want or need to mess with its
+ permissions). */
+ bool mustToggle = !isStorePath(path);
+ if (mustToggle) makeWritable(dirOf(path));
- makeMutable(path);
- MakeImmutable mk2(path);
-
- if (link(prevPath.first.c_str(), tempLink.c_str()) == -1) {
- if (errno == EMLINK) {
- /* Too many links to the same file (>= 32000 on
- most file systems). This is likely to happen
- with empty files. Just start over, creating
- links to the current file. */
- printMsg(lvlInfo, format("`%1%' has maximum number of links") % prevPath.first);
- hashToPath[hash] = std::pair<Path, ino_t>(path, st.st_ino);
- return;
- }
- throw SysError(format("cannot link `%1%' to `%2%'")
- % tempLink % prevPath.first);
- }
-
- /* Atomically replace the old file with the new hard link. */
- if (rename(tempLink.c_str(), path.c_str()) == -1) {
- if (errno == EMLINK) {
- /* Some filesystems generate too many links on the
- rename, rather than on the original link.
- (Probably it temporarily increases the st_nlink
- field before decreasing it again.) */
- printMsg(lvlInfo, format("`%1%' has maximum number of links") % prevPath.first);
- hashToPath[hash] = std::pair<Path, ino_t>(path, st.st_ino);
-
- /* Unlink the temp link. */
- if (unlink(tempLink.c_str()) == -1)
- printMsg(lvlError, format("unable to unlink `%1%'") % tempLink);
- return;
- }
- throw SysError(format("cannot rename `%1%' to `%2%'")
- % tempLink % path);
- }
- } else
- printMsg(lvlTalkative, format("would link `%1%' to `%2%'") % path % prevPath.first);
-
- stats.filesLinked++;
- stats.bytesFreed += st.st_size;
- stats.blocksFreed += st.st_blocks;
+ /* When we're done, make the directory read-only again and reset
+ its timestamp back to 0. */
+ MakeReadOnly makeReadOnly(mustToggle ? dirOf(path) : "");
+
+ /* If ‘linkPath’ is immutable, we can't create hard links to it,
+ so make it mutable first (and make it immutable again when
+ we're done). We also have to make ‘path’ mutable, otherwise
+ rename() will fail to delete it. */
+ makeMutable(linkPath);
+ MakeImmutable mk1(linkPath);
+
+ makeMutable(path);
+ MakeImmutable mk2(path);
+
+ if (link(linkPath.c_str(), tempLink.c_str()) == -1) {
+ if (errno == EMLINK) {
+ /* Too many links to the same file (>= 32000 on most file
+ systems). This is likely to happen with empty files.
+ Just shrug and ignore. */
+ printMsg(lvlInfo, format("`%1%' has maximum number of links") % linkPath);
+ return;
+ }
+ throw SysError(format("cannot link `%1%' to `%2%'") % tempLink % linkPath);
}
- if (S_ISDIR(st.st_mode)) {
- Strings names = readDirectory(path);
- foreach (Strings::iterator, i, names)
- hashAndLink(dryRun, hashToPath, stats, path + "/" + *i);
+ /* Atomically replace the old file with the new hard link. */
+ if (rename(tempLink.c_str(), path.c_str()) == -1) {
+ if (errno == EMLINK) {
+ /* Some filesystems generate too many links on the rename,
+ rather than on the original link. (Probably it
+ temporarily increases the st_nlink field before
+ decreasing it again.) */
+ printMsg(lvlInfo, format("`%1%' has maximum number of links") % linkPath);
+
+ /* Unlink the temp link. */
+ if (unlink(linkPath.c_str()) == -1)
+ printMsg(lvlError, format("unable to unlink `%1%'") % linkPath);
+ return;
+ }
+ throw SysError(format("cannot rename `%1%' to `%2%'") % tempLink % path);
}
+
+ stats.filesLinked++;
+ stats.bytesFreed += st.st_size;
+ stats.blocksFreed += st.st_blocks;
}
-void LocalStore::optimiseStore(bool dryRun, OptimiseStats & stats)
+void LocalStore::optimiseStore(OptimiseStats & stats)
{
- HashToPath hashToPath;
+ createDirs(nixStore + "/" + linksDir);
PathSet paths = queryValidPaths();
@@ -184,7 +185,7 @@ void LocalStore::optimiseStore(bool dryRun, OptimiseStats & stats)
addTempRoot(*i);
if (!isValidPath(*i)) continue; /* path was GC'ed, probably */
startNest(nest, lvlChatty, format("hashing files in `%1%'") % *i);
- hashAndLink(dryRun, hashToPath, stats, *i);
+ hashAndLink(stats, *i);
}
}
diff --git a/src/nix-store/nix-store.cc b/src/nix-store/nix-store.cc
index 23863525f..82e08fecf 100644
--- a/src/nix-store/nix-store.cc
+++ b/src/nix-store/nix-store.cc
@@ -746,18 +746,12 @@ static void showOptimiseStats(OptimiseStats & stats)
files with the same contents. */
static void opOptimise(Strings opFlags, Strings opArgs)
{
- if (!opArgs.empty())
+ if (!opArgs.empty() || !opFlags.empty())
throw UsageError("no arguments expected");
- bool dryRun = false;
-
- foreach (Strings::iterator, i, opFlags)
- if (*i == "--dry-run") dryRun = true;
- else throw UsageError(format("unknown flag `%1%'") % *i);
-
OptimiseStats stats;
try {
- ensureLocalStore().optimiseStore(dryRun, stats);
+ ensureLocalStore().optimiseStore(stats);
} catch (...) {
showOptimiseStats(stats);
throw;