dupemerge: don't stat during the file collection loop
authorZygo Blaxell <zblaxell@waya.furryterror.org>
Fri, 8 Jan 2010 14:16:02 +0000 (09:16 -0500)
committerZygo Blaxell <zblaxell@waya.furryterror.org>
Sat, 9 Jan 2010 02:25:45 +0000 (21:25 -0500)
Remove the lstat from the find output reading loop.  It's a redundant
copy of the same code in merge_files.

Adjust merge_files to filter out possible non-files that will now leak
through from the find output.

faster-dupemerge

index 91138c8713d8d79c4f730a743eec303ce07097c3..aa8b142ceeab5bc2390aa7fcc4abef9a10719cc5 100755 (executable)
@@ -340,10 +340,11 @@ hash_file:
                        print STDERR "\t\tDigesting file $filename\n" if $debug;
                        if ((-l $filename) || ! -f _) {
                                warn "Bogon file " . tick_quote($filename);
-                               $surprises++;
+                               $input_bogons++;
+                               delete $inode_to_file_name{$candidate}->{$filename};
                                next;
                        }
-                       eval { 
+                       eval {
                                $digest = digest($filename); 
                        };
                        if ($@) {
@@ -578,12 +579,6 @@ while (<FIND>) {
 
        print STDERR "weak_key=$weak_key inode=$inode name=$name\n" if $debug;
 
-       unless (! (-l $name) && (-f _)) {
-               warn "Bogon file " . tick_quote($name);
-               $input_bogons++;
-               next;
-       }
-
        $input_links++;
        merge_files if $weak_key ne $current_key;
        $current_key = $weak_key;