use File::Compare;
use File::Path;
use File::Temp;
+use File::stat;
# Copyright (C) 2010 Zygo Blaxell <dm5@mailtoo.hungrycats.org>
sub link_files {
my ($from, $to) = (@_);
+ print STDERR "link '$from' '$to' ...";
my $inode_dir = $to;
my $inode_base = $to;
$inode_dir =~ s:[^/]*$::o;
unlink($tmp_to) or warn "unlink: $tmp_to: $!"; # Try, possibly in vain, to clean up
die "rename: $tmp_to -> $from: $saved_bang";
}
+ print STDERR "\n";
}
my $link_dir = shift @ARGV;
(-d $link_dir) or usage;
+sub slash_prefix {
+ my ($file) = @_;
+ my $prefix = substr($file, 0, 3);
+ my $suffix = substr($file, 3);
+ $prefix =~ s:(.):$1/:osg;
+ chop($prefix);
+ return ($prefix, $suffix);
+}
+
+sub prepare_parents {
+ my ($link_dir, $file) = @_;
+ my ($prefix, $suffix) = slash_prefix($file);
+ my $parent = "$link_dir/$prefix";
+ mkpath($parent, { verbose => 1 });
+ die "mkpath: $parent: $!" unless -d $parent;
+ return "$parent/$prefix/$suffix";
+}
+
+# ext3 cannot handle more than 32000 links to a file. Leave some headroom.
+my $link_count_max = 31990;
+
$/ = "\0";
while (<STDIN>) {
my $file = $_;
eval {
- chomp $file;
- print STDERR "digest($file) = ";
- my $digest = digest($file);
- $digest =~ y:/:_:;
- print STDERR "$digest\n";
- $digest =~ s:^(.)(.)(.):$1/$2/$3:osg;
- my ($parent) = ($digest =~ m:^(.*/):osg);
- $parent = "$link_dir/$parent";
- mkpath($parent, { verbose => 1 });
- die "mkpath: $parent: $!" unless -d $parent;
- link_files($file, "$link_dir/$digest");
+ for (1) {
+ chomp $file;
+
+ # Get file stat data
+ my $st = lstat($file);
+ die "lstat: $file: $!" unless $st;
+
+ # Oops?
+ next unless -f _;
+
+ # Skip the file if it has far too many links already
+ next if ($st->nlink > $link_count_max);
+
+ # Check link to inode
+ my $inode_link = prepare_parents("$link_dir/inode", $st->ino);
+ my $inode_st = lstat($inode_link);
+ my $update_links;
+ if ($inode_st) {
+ my $inode_dev = $inode_st->dev;
+ my $inode_ino = $inode_st->ino;
+ my $file_dev = $st->dev;
+ my $file_ino = $st->ino;
+ if ($inode_ino != $file_ino || $inode_dev != $file_dev) {
+ warn "inode link '$inode_link' is wrong (inode $inode_ino should be $file_ino)" if $inode_ino != $file_ino;
+ warn "inode link '$inode_link' is wrong (dev $inode_dev should be $file_dev)" if $inode_dev != $file_dev;
+ $update_links = 1;
+ }
+ } else {
+ $update_links = 1;
+ }
+
+ # If neither criteria for updating link is met, leave it as-is
+ next unless $update_links;
+
+ # Compute digest
+ print STDERR "digest($file) = ";
+ my $digest = digest($file);
+
+ # Base64 uses /, we prefer _
+ $digest =~ y:/:_:;
+
+ print STDERR "$digest\n";
+
+ # Check link to digest
+ my $digest_link = prepare_parents("$link_dir/digest", $digest);
+ my $digest_st = lstat($digest_link);
+ if ($digest_st) {
+ my $digest_nlink = $digest_st->nlink;
+ if ($digest_nlink > 31990) {
+ print STDERR "Removing '$digest_link' with $digest_nlink links\n";
+ unlink($digest_link) or die "unlink: $digest_link: $!";
+ undef $digest_st;
+ }
+ }
+
+ # If digest link exists, link it to file
+ if ($digest_st) {
+ print STDERR "cmp '$digest_link' '$file' ...";
+ die "NOT identical!" if compare($digest_link, $file);
+ print STDERR "\n";
+ link_files($digest_link, $file);
+ } else {
+ link_files($file, $digest_link);
+ }
+
+ # A link to the inode indicates we are done, so do it last
+ link_files($file, $inode_link);
+
+ }
};
warn "$file: $@" if $@;
}