From 6dbed1bf38cd881ebf13aae39a4d988e4bd57f79 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 19 Sep 2006 14:58:35 +0000 Subject: * `optimise-store.pl' reduces disk space consumption by hard-linking all identitical files in the Nix store to each other. (Previously it only computed the size that would be saved by doing so.) --- scripts/optimise-store.pl | 56 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/scripts/optimise-store.pl b/scripts/optimise-store.pl index 7bad1d5e5..1ed89272d 100755 --- a/scripts/optimise-store.pl +++ b/scripts/optimise-store.pl @@ -1,61 +1,85 @@ #! /usr/bin/perl -w use strict; +use File::Basename; -#{ my $ofh = select STDOUT; -# $| = 1; -# select $ofh; -#} -#my @paths = ("/nix/store/d49mc94xwwd7wf1xzfh4ch4cypn0ajjr-glibc-2.3.6", "/nix/store/1mgfgy3ga4m9z60747s0yzxl0g6w5kxz-glibc-2.3.6"); my @paths = ("/nix/store"); + +print "hashing...\n"; + my $hashList = "/tmp/nix-optimise-hash-list"; system("find @paths -type f -print0 | xargs -0 md5sum -- > $hashList") == 0 or die "cannot hash store files"; + +print "sorting by hash...\n"; + system("sort $hashList > $hashList.sorted") == 0 or die "cannot sort list"; + +sub atomicLink { + my $target = shift; + my $new = shift; + my $tmpNew = "${new}_optimise.$$"; + + # Make the directory writable temporarily. + my $dir = dirname $new; + my @st = stat $dir or die; + + chmod ($st[2] | 0200, $dir) or die "cannot make `$dir' writable: $!"; + + link $target, $tmpNew or die "cannot create hard link `$tmpNew': $!"; + + rename $tmpNew, $new or die "cannot rename `$tmpNew' to `$new': $!"; + + chmod ($st[2], $dir) or die "cannot restore permission on `$dir': $!"; +} + + +print "hard-linking...\n"; + open LIST, "<$hashList.sorted" or die; my $prevFile; my $prevHash; +my $prevInode; my $totalSpace = 0; my $savedSpace = 0; -my $files = 0; - while () { /^([0-9a-f]*)\s+(.*)$/ or die; my $curFile = $2; my $curHash = $1; - my $fileSize = (stat $curFile)[7]; + my @st = stat $curFile or die; + next if ($st[2] & 0222) != 0; # skip writable files + + my $fileSize = $st[7]; $totalSpace += $fileSize; if (defined $prevHash && $curHash eq $prevHash) { - print "$curFile = $prevFile\n"; - - $savedSpace += $fileSize; + if ($st[1] != $prevInode) { + print "$curFile = $prevFile\n"; + atomicLink $prevFile, $curFile; + $savedSpace += $fileSize; + } } else { $prevFile = $curFile; $prevHash = $curHash; + $prevInode = $st[1]; } - -# print "." if ($files++ % 100 == 0); } -#print "\n"; - print "total space = $totalSpace\n"; print "saved space = $savedSpace\n"; my $savings = ($savedSpace / $totalSpace) * 100.0; print "savings = $savings %\n"; - close LIST; -- cgit v1.2.3