X-Git-Url: http://git.hungrycats.org/cgi-bin/gitweb.cgi?p=dupemerge;a=blobdiff_plain;f=dm6;h=34bb654f41f9bf77c33e933557414e51e99c2a68;hp=8b45f507058e652a4093d6c51fe8fb7c57ae0091;hb=941242a8d5dfcd2fbbbd306d00ef38115f841600;hpb=b2ef2da7bbf765cff8910935cfbdedca3f7296cf diff --git a/dm6 b/dm6 index 8b45f50..34bb654 100755 --- a/dm6 +++ b/dm6 @@ -7,6 +7,7 @@ use File::Compare; use File::Path; use File::Temp; use File::stat; +use MIME::Base64; # Copyright (C) 2010 Zygo Blaxell @@ -67,7 +68,7 @@ sub link_files { } my $link_dir = shift @ARGV; -(-d $link_dir) or usage; +usage unless $link_dir; my $prefix_length = 3; @@ -103,7 +104,25 @@ sub prepare_parents { return "$parent/$suffix"; } +sub name_quad { + my ($int64) = @_; + my $packed = pack('Q', $int64); + $packed =~ s/^\0+//os; + my $base64_packed = encode_base64($packed, ''); + # Don't strip off the trailing padding since it makes the string + # so short we end up just putting it back on again. + # $base64_packed =~ s/=+$//os; + return $base64_packed; +} + +sub name_ino { + my ($ino) = @_; + return name_quad($ino) . 'I'; +} + # ext3 cannot handle more than 32000 links to a file. Leave some headroom. +# Arguably this should be configurable, but the losses are miniscule and +# the coding for option support is not. my $link_count_max = 31990; $/ = "\0"; @@ -125,7 +144,7 @@ while () { next if ($st->nlink > $link_count_max); # Check link to inode - my $inode_link = prepare_parents("$link_dir/inode", $st->ino); + my $inode_link = prepare_parents($link_dir, name_ino($st->ino)); print STDERR 'I'; my $inode_st = lstat($inode_link); my $update_links; @@ -156,7 +175,7 @@ while () { $digest =~ y:/:_:; # Check link to digest - my $digest_link = prepare_parents("$link_dir/digest", $digest); + my $digest_link = prepare_parents($link_dir, "${digest}D"); print STDERR 'D'; my $digest_st = lstat($digest_link); if ($digest_st) { @@ -170,7 +189,7 @@ while () { print STDERR "\b"; # Which file are we keeping? - my $keep_file; + my $keep_ino; # If digest link exists, link it to file if ($digest_st) { @@ -180,17 +199,18 @@ while () { # Old, replace input with old file print STDERR '-'; link_files($digest_link, $file); - $keep_file = $digest_link; + $keep_ino = $digest_st->ino; } else { # New, add input to digest print STDERR '+'; link_files($file, $digest_link); - $keep_file = $file; + $keep_ino = $st->ino; } # A link to the inode indicates we are done, so do it last - print STDERR '_'; - link_files($keep_file, $inode_link); + $inode_link = prepare_parents($link_dir, name_ino($keep_ino)); + print STDERR ' '; + link_files($digest_link, $inode_link); } }; @@ -201,9 +221,9 @@ while () { print STDERR "\nGarbage collection in '$link_dir'..."; chdir($link_dir) || die "chdir: $link_dir: $!"; print STDERR "\nRemoving files with link count < 3..."; -system("find digest inode -type f -links -3 -print0 | xargs -0 rm -f") and die "system: exit status $?"; +system("find . -type f -links -3 -print0 | xargs -0rt rm -f") and die "system: exit status $?"; print STDERR "\nRemoving empty directories..."; -system("find digest inode -type d -empty -print0 | xargs -0r rmdir -p --ignore-fail-on-non-empty") and die "system: exit status $?"; +system("find . -type d -empty -print0 | xargs -0rt rmdir -p --ignore-fail-on-non-empty") and die "system: exit status $?"; print STDERR "\nDone.\n"; exit(0);