X-Git-Url: http://git.hungrycats.org/cgi-bin/gitweb.cgi?p=dupemerge;a=blobdiff_plain;f=dm6;h=c824c0e517832fe418d3fb8fc7cec2278d1102c4;hp=0dcfab5778d56e17f6be1980d73fef2dc358c3da;hb=3a2190b200435c28a3099695ef65d0d983dcf56e;hpb=f34e832fb0a9b7bd96cfc05a14edd072bf4b32e0 diff --git a/dm6 b/dm6 index 0dcfab5..c824c0e 100755 --- a/dm6 +++ b/dm6 @@ -7,8 +7,9 @@ use File::Compare; use File::Path; use File::Temp; use File::stat; +use MIME::Base64; -# Copyright (C) 2010 Zygo Blaxell +# Copyright (C) 2010 Zygo Blaxell # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -40,7 +41,7 @@ sub usage { Usage: $0 link-dir Hashes a NUL-separated list of files on stdin into link-dir. -Version: 20100513.0 +Version: 0.20100518 USAGE } @@ -57,12 +58,15 @@ sub link_files { print STDERR "\bL"; link($from, $tmp_to) or die "link: $from -> $tmp_to: $!"; print STDERR "\bR"; - unless (rename($tmp_to, $to)) { - my $saved_bang = $!; - print STDERR "\bU"; - unlink($tmp_to) or warn "unlink: $tmp_to: $!"; # Try, possibly in vain, to clean up - die "rename: $tmp_to -> $from: $saved_bang"; - } + my $saved_bang; + $saved_bang = $! unless rename($tmp_to, $to); + + # If $to exists and is a hardlink to $tmp_to (or $from), + # rename returns success but $tmp_to still exists. + print STDERR "\bU"; + unlink($tmp_to) or warn "unlink: $tmp_to: $!" if -e $tmp_to; + + die "rename: $tmp_to -> $from: $saved_bang" if $saved_bang; print STDERR "\b"; } @@ -103,7 +107,21 @@ sub prepare_parents { return "$parent/$suffix"; } +sub name_ino { + my ($int64) = @_; + my $packed = pack('Q>', $int64); + $packed =~ s/^\0+//os; + my $base64_packed = encode_base64($packed, ''); + $base64_packed =~ y:/:_:; + # Don't strip off the trailing padding since it makes the string + # so short we end up just putting it back on again. + # $base64_packed =~ s/=+$//os; + return $base64_packed; +} + # ext3 cannot handle more than 32000 links to a file. Leave some headroom. +# Arguably this should be configurable, but the losses are miniscule and +# the coding for option support is not. my $link_count_max = 31990; $/ = "\0"; @@ -125,7 +143,7 @@ while () { next if ($st->nlink > $link_count_max); # Check link to inode - my $inode_link = prepare_parents($link_dir, $st->ino . 'I'); + my $inode_link = prepare_parents($link_dir, name_ino($st->ino)); print STDERR 'I'; my $inode_st = lstat($inode_link); my $update_links; @@ -156,7 +174,7 @@ while () { $digest =~ y:/:_:; # Check link to digest - my $digest_link = prepare_parents($link_dir, "${digest}D"); + my $digest_link = prepare_parents($link_dir, $digest); print STDERR 'D'; my $digest_st = lstat($digest_link); if ($digest_st) { @@ -189,7 +207,7 @@ while () { } # A link to the inode indicates we are done, so do it last - $inode_link = prepare_parents($link_dir, "${keep_ino}I"); + $inode_link = prepare_parents($link_dir, name_ino($keep_ino)); print STDERR ' '; link_files($digest_link, $inode_link); @@ -201,10 +219,10 @@ while () { # Garbage collection print STDERR "\nGarbage collection in '$link_dir'..."; chdir($link_dir) || die "chdir: $link_dir: $!"; -print STDERR "\nRemoving files with link count < 3..."; -system("find . -type f -links -3 -print0 | xargs -0 rm -f") and die "system: exit status $?"; +print STDERR "\nRemoving files with link count < 3 and temporary links..."; +system('find . -type f \( -links -3 -o -name ".*" \) -print0 | xargs -0rt rm -f') and die "system: exit status $?"; print STDERR "\nRemoving empty directories..."; -system("find . -type d -empty -print0 | xargs -0r rmdir -p --ignore-fail-on-non-empty") and die "system: exit status $?"; +system("find . -type d -empty -print0 | xargs -0rt rmdir -p --ignore-fail-on-non-empty") and die "system: exit status $?"; print STDERR "\nDone.\n"; exit(0);