From 9488ae7357b718e09362c22f075cc5553c758214 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 19 Sep 2006 13:53:35 +0000 Subject: * `show-duplication.pl', a small utility that shows the amount of package duplication present in (e.g.) a profile. It shows the number of instances of each package in a closure, along with the size in bytes of each instance as well as the "waste" (the difference between the sum of the sizes of all instances and the average size). $ ./show-duplication.pl /nix/var/nix/profiles/default gcc 11 3.3.6 19293318 3.4.4 21425257 ... average 14942970, waste 149429707 coreutils 6 ... average package duplication 1.87628865979381, total size 3486330471, total waste 1335324237, 38.3017114443825% wasted This utility is useful for measuring the cost in terms of disk space of the Nix approach. --- scripts/show-duplication.pl | 73 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100755 scripts/show-duplication.pl (limited to 'scripts/show-duplication.pl') diff --git a/scripts/show-duplication.pl b/scripts/show-duplication.pl new file mode 100755 index 000000000..0604c6696 --- /dev/null +++ b/scripts/show-duplication.pl @@ -0,0 +1,73 @@ +#! /usr/bin/perl -w + +if (scalar @ARGV != 1) { + print "syntax: show-duplication.pl PATH\n"; + exit 1; +} + +my $root = $ARGV[0]; + + +my $nameRE = "(?:(?:[A-Za-z0-9\+\_]|(?:-[^0-9]))+)"; +my $versionRE = "(?:[A-Za-z0-9\.\-]+)"; + + +my %pkgInstances; + + +my $pid = open(PATHS, "-|") || exec "nix-store", "-qR", $root; +while () { + chomp; + /^.*\/[0-9a-z]*-(.*)$/; + my $nameVersion = $1; + $nameVersion =~ /^($nameRE)(-($versionRE))?$/; + $name = $1; + $version = $3; + $version = "(unnumbered)" unless defined $version; +# print "$nameVersion $name $version\n"; + push @{$pkgInstances{$name}}, {version => $version, path => $_}; +} +close PATHS or exit 1; + + +sub pathSize { + my $path = shift; + my @st = lstat $path or die; + + my $size = $st[7]; + + if (-d $path) { + opendir DIR, $path or die; + foreach my $name (readdir DIR) { + next if $name eq "." || $name eq ".."; + $size += pathSize("$path/$name"); + } + } + + return $size; +} + + +my $totalPaths = 0; +my $totalSize = 0, $totalWaste = 0; + +foreach my $name (sort {scalar @{$pkgInstances{$b}} <=> scalar @{$pkgInstances{$a}}} (keys %pkgInstances)) { + print "$name ", scalar @{$pkgInstances{$name}}, "\n"; + my $allSize = 0; + foreach my $x (sort {$a->{version} cmp $b->{version}} @{$pkgInstances{$name}}) { + $totalPaths++; + my $size = pathSize $x->{path}; + $allSize += $size; + print " $x->{version} $size\n"; + } + my $avgSize = int($allSize / scalar @{$pkgInstances{$name}}); + my $waste = $allSize - $avgSize; + $totalSize += $allSize; + $totalWaste += $waste; + print " average $avgSize, waste $waste\n"; +} + + +my $avgDupl = $totalPaths / scalar (keys %pkgInstances); +my $wasteFactor = ($totalWaste / $totalSize) * 100; +print "average package duplication $avgDupl, total size $totalSize, total waste $totalWaste, $wasteFactor% wasted\n"; -- cgit v1.2.3