dupemerge: don't stat during the file collection loop
[dupemerge] / faster-dupemerge
index f1413b47e10a7d60364c14d532efdcdb049a76f0..aa8b142ceeab5bc2390aa7fcc4abef9a10719cc5 100755 (executable)
@@ -134,24 +134,33 @@ hard links).
         --debug         show all steps in duplication discovery process
                         (implies --verbose)
 
-       --dry-run       do not lock files or make changes to filesystem
+        --dry-run       do not lock files or make changes to filesystem
 
         --find          pass next options (up to --) to find command
 
-       --humane        human-readable statistics (e.g. 1 048 576)
+        --humane        human-readable statistics (e.g. 1 048 576)
 
         --lock FILE     exit immediately (status 10) if unable to obtain a 
                         flock(LOCK_EX|LOCK_NB) on FILE
 
-       --lock-rm       remove lock file at exit
+        --lock-rm       remove lock file at exit
+
+        --progress      output single-character progress indicators:
+                        C - compare
+                        H - hash
+                        L - link(2)
+                        R - rename(2)
+                        S, s - lstat(2) (see source for details)
+                        U - unlink(2)
+                        . - all inodes with similar attributes done
 
         --sort          pass next options (up to --) to sort command
 
         --timestamps    mtime may be different for identical files
 
-       --skip-compare  skip byte-by-byte file comparisons
+        --skip-compare  skip byte-by-byte file comparisons
 
-       --skip-hash     skip calculation of hash function on files
+        --skip-hash     skip calculation of hash function on files
 
         --trust         old name for --skip-compare
                         (trust the hash function)
@@ -331,10 +340,11 @@ hash_file:
                        print STDERR "\t\tDigesting file $filename\n" if $debug;
                        if ((-l $filename) || ! -f _) {
                                warn "Bogon file " . tick_quote($filename);
-                               $surprises++;
+                               $input_bogons++;
+                               delete $inode_to_file_name{$candidate}->{$filename};
                                next;
                        }
-                       eval { 
+                       eval {
                                $digest = digest($filename); 
                        };
                        if ($@) {
@@ -553,10 +563,10 @@ candidate_file:
                        delete $inode_to_file_name{$candidate};
                }
        }
+       print STDERR '.' if $progress;
 
 end_merge:
 
-       print STDERR '.' if $progress;
        print STDERR "Merge done.\n" if $debug;
        undef %inode_to_file_name;
 }
@@ -569,12 +579,6 @@ while (<FIND>) {
 
        print STDERR "weak_key=$weak_key inode=$inode name=$name\n" if $debug;
 
-       unless (! (-l $name) && (-f _)) {
-               warn "Bogon file " . tick_quote($name);
-               $input_bogons++;
-               next;
-       }
-
        $input_links++;
        merge_files if $weak_key ne $current_key;
        $current_key = $weak_key;