Implement --dry-run and --humane options
authorcvs <cvs@a5e33b96-951a-0410-ae88-c0fe16d076bb>
Sat, 7 Jan 2006 08:44:02 +0000 (08:44 +0000)
committerZygo Blaxell <zblaxell@waya.furryterror.org>
Sat, 9 Jan 2010 02:17:28 +0000 (21:17 -0500)
git-svn-id: svn+ssh://svn.furryterror.org/r/trunk/mokona/zblaxell@4518 a5e33b96-951a-0410-ae88-c0fe16d076bb

faster-dupemerge

index b7554dee59e78b32f89b88772db070ea58e2321f..18d88cd00385815cd376e9da0c53436a0d2072f3 100755 (executable)
@@ -79,6 +79,8 @@ my $skip_compares = 0;
 my $skip_hashes = 0;
 my $verbose = 0;
 my $debug = 0;
+my $dry_run = 0;
+my $humane = 0;
 my @extra_find_opts = ();
 my @extra_sort_opts = ();
 my $lock_file;
@@ -120,8 +122,12 @@ hard links).
         --debug         show all steps in duplication discovery process
                         (implies --verbose)
 
+       --dry-run       do not lock files or make changes to filesystem
+
         --find          pass next options (up to --) to find command
 
+       --humane        human-readable statistics (e.g. 1 048 576)
+
         --lock FILE     exit immediately (status 10) if unable to obtain a 
                         flock(LOCK_EX|LOCK_NB) on FILE
 
@@ -168,6 +174,10 @@ while ($#ARGV >= 0) {
                }
        } elsif ($arg eq '--debug') {
                $debug = $verbose = 1;
+       } elsif ($arg eq '--dry-run') {
+               $dry_run = 1;
+       } elsif ($arg eq '--humane') {
+               $humane = 1;
        } elsif ($arg eq '--find') {
                while ($#ARGV >= 0) {
                        my $extra_arg = shift(@ARGV);
@@ -192,7 +202,9 @@ if ($skip_hashes && $skip_compares) {
        die "Cannot skip both hashes and compares.\n";
 }
 
-if (defined($lock_file)) {
+@directories or usage;
+
+if (defined($lock_file) && !$dry_run) {
        sysopen(LOCK_FILE, $lock_file, O_CREAT|O_RDONLY, 0666) or die "open: $lock_file: $!";
        flock(LOCK_FILE, LOCK_EX|LOCK_NB) or die "flock: $lock_file: LOCK_EX|LOCK_NB: $!";
        print STDERR "Locked '$lock_file' in LOCK_EX mode.\n" if $verbose;
@@ -200,7 +212,7 @@ if (defined($lock_file)) {
 }
 
 END {
-       if ($lock_obtained) {
+       if ($lock_obtained && !$dry_run) {
                print STDERR "Removing '$lock_file'.\n" if $verbose;
                unlink($lock_file) or warn "unlink: $lock_file: $!";
        }
@@ -244,14 +256,18 @@ my %inode_to_file_name = ();
 # Link files
 sub link_files {
        my ($from, $to) = (@_);
+
+       my $quoted_from = tick_quote($from);
+       my $quoted_to = tick_quote($to);
+       print STDERR "ln -f $quoted_from $quoted_to\n";
+
+       return if $dry_run;
+
        my $inode_dir = $to;
        my $inode_base = $to;
        $inode_dir =~ s:[^/]*$::o;
        $inode_base =~ s:^.*/::os;
        my $tmp_to = File::Temp::tempnam($inode_dir, ".$inode_base.");
-       my $quoted_from = tick_quote($from);
-       my $quoted_to = tick_quote($to);
-       print STDERR "ln -f $quoted_from $quoted_to\n";
        print STDERR "\tlink: $from -> $tmp_to\n" if $debug;
        link($from, $tmp_to) or die "link: $from -> $tmp_to: $!";
        print STDERR "\trename: $tmp_to -> $to\n" if $debug;
@@ -461,9 +477,13 @@ candidate_file:
                                                                        # My random number generator chooses the incumbent's size.
 
                                                                        if ($link_done) {
+                                                                               # Since we're in a dry run, the filesystem doesn't change.
+                                                                               # Our notion of what the filesystem should look like should not change either.
                                                                                delete $inode_to_file_name{$to_inode}->{$to_file};
-                                                                               $inode_to_file_name{$from_inode}->{$to_file} = undef;
-                                                                               $hash_to_inode{$digest} = $from_inode;
+                                                                               unless ($dry_run) {
+                                                                                       $inode_to_file_name{$from_inode}->{$to_file} = undef;
+                                                                                       $hash_to_inode{$digest} = $from_inode;
+                                                                               }
 
                                                                                $hard_links++;
                                                                                if ($to_nlink == 1) {
@@ -553,6 +573,28 @@ surprises               $surprises
 trivially_unique        $trivially_unique
 STATS
 
+if ($humane) {
+       my $max_num_len = 0;
+
+       sub measure_numbers {
+               my ($num) = @_;
+               my $len = length($num);
+               $len += int( (length($num) - 1) / 3);
+               $max_num_len = $len if $len > $max_num_len;
+       }
+
+       (my $dummy = $stats_blob) =~ s/\d+/measure_numbers($&)/geos;
+
+       sub space_numbers {
+               my ($num) = @_;
+               1 while $num =~ s/(\d)(\d\d\d)( \d\d\d)*$/$1 $2$3/os;
+               $num = ' ' x ($max_num_len - length($num)) . $num;
+               return $num;
+       }
+
+       $stats_blob =~ s/\d+/space_numbers($&)/geos;
+}
+
 $stats_blob =~ s/([^\n]*\n[^\n]*? )(\s+)( [^\n]*\n)/$1 . ('.' x length($2)) . $3/oemg;
 
 print STDERR $stats_blob;