dm6: bump version to 0.20101024, more comprehensive garbage collector
[dupemerge] / faster-dupemerge
index 683ed32917c6a72d8e15b52fc1e5aadc518b2b95..4cdef9716fd5e5de600b9810a198936ca9d46301 100755 (executable)
@@ -155,6 +155,7 @@ hard links).
                         S, s - lstat(2) (see source for details)
                         U - unlink(2)
                         . - all inodes with similar attributes done
+                        (123456) - current file size in bytes
 
         --sort          pass next options (up to --) to sort command
 
@@ -185,7 +186,7 @@ while ($#ARGV >= 0) {
        } elsif ($arg eq '--zeros') {
                $collapse_zero = 1;
        } elsif ($arg eq '--trust' || $arg eq '--skip-compare') {
-               $skip_compare_preference = 1;
+               $skip_compares = $skip_compare_preference = 1;
        } elsif ($arg =~ /^--skip-hash(?:=(\d+)([KkMmGgTt]?))?$/os) {
                my ($quantity, $unit) = ($1, $2);
                $unit ||= '_';
@@ -241,7 +242,7 @@ while ($#ARGV >= 0) {
        }
 }
 
-@directories or usage;
+@directories or usage($0);
 
 if (defined($lock_file) && !$dry_run) {
        sysopen(LOCK_FILE, $lock_file, O_CREAT|O_RDONLY, 0666) or die "open: $lock_file: $!";
@@ -355,6 +356,7 @@ hash_file:
                foreach my $filename (sort keys(%{$inode_to_file_name{$candidate}})) {
                        print STDERR "\t\tDigesting file $filename\n" if $debug;
                        if ((-l $filename) || ! -f _) {
+                               print STDERR "\n" if $progress;
                                warn "Bogon file " . tick_quote($filename);
                                $input_bogons++;
                                delete $inode_to_file_name{$candidate}->{$filename};
@@ -403,7 +405,7 @@ incumbent_file:
                                                }
 
                                                if (format_inode($incumbent_dev, $incumbent_ino) ne $incumbent) {
-                                                       warn "$incumbent_file: expected inode $incumbent, found $incumbent_dev:$incumbent_ino";
+                                                       warn "$incumbent_file: expected inode $incumbent, found ".format_inode($incumbent_dev, $incumbent_ino);
                                                        $surprises++;
                                                        next incumbent_file;
                                                }
@@ -424,7 +426,7 @@ candidate_file:
                                                        }
 
                                                        if (format_inode($candidate_dev, $candidate_ino) ne $candidate) {
-                                                               warn "$candidate_file: expected inode $candidate, found $candidate_dev:$candidate_ino";
+                                                               warn "$candidate_file: expected inode $candidate, found ".format_inode($candidate_dev, $candidate_ino);
                                                                $surprises++;
                                                                next candidate_file;
                                                        }
@@ -540,7 +542,7 @@ candidate_file:
 
                                                                        if ($link_done) {
                                                                                delete $inode_to_file_name{$to_inode}->{$to_file};
-                                                                               $inode_to_file_name{$from_inode}->{$to_file} = undef;
+                                                                               $inode_to_file_name{$from_inode}->{$to_file} = undef unless ($dry_run);
                                                                                $hash_to_inode{$digest} = [ $from_inode ];
 
                                                                                $hard_links++;
@@ -588,6 +590,7 @@ end_merge:
 }
 
 my $last_time = 0;
+my $last_size = 0;
 
 while (<FIND>) {
        my ($weak_key, $size, $dev, $ino, $name) = m/^((\d+) \d+ \d+ \d+ -?[\d.]+) (\d+) (\d+) (.+)\0$/so;
@@ -597,7 +600,7 @@ while (<FIND>) {
 
        print STDERR "weak_key=$weak_key inode=$inode name=$name\n" if $debug;
 
-       if ($size >= $skip_hashes_threshold) {
+       if ($skip_hashes_threshold && $size >= $skip_hashes_threshold) {
                $skip_hashes = 1;
                $skip_compares = 0;
        } else {
@@ -607,8 +610,9 @@ while (<FIND>) {
 
        if ($progress) {
                my $this_time = time();
-               if ($this_time != $last_time) {
+               if ($this_time != $last_time && $size != $last_size) {
                        $last_time = $this_time;
+                       $last_size = $size;
                        print STDERR "($size)";
                }
        }
@@ -661,7 +665,7 @@ if ($humane) {
 
        sub space_numbers {
                my ($num) = @_;
-               1 while $num =~ s/(\d)(\d\d\d)( \d\d\d)*$/$1 $2$3/os;
+               1 while $num =~ s/(\d)(\d\d\d)((?: \d\d\d)*)$/$1 $2$3/os;
                $num = ' ' x ($max_num_len - length($num)) . $num;
                return $num;
        }