X-Git-Url: http://git.hungrycats.org/cgi-bin/gitweb.cgi?p=dupemerge;a=blobdiff_plain;f=faster-dupemerge;h=44d0e91a929c361059b4e5a7df6f0ed6252d4699;hp=14360dc8c802d83c89132fe5780b7070fd0896f0;hb=85b26d64c5fe49e33e7946498d5dd6c756c415ce;hpb=a433cb10cd4e633289a448649f9509e1b24d9c7e diff --git a/faster-dupemerge b/faster-dupemerge index 14360dc..44d0e91 100755 --- a/faster-dupemerge +++ b/faster-dupemerge @@ -86,6 +86,7 @@ my $collapse_access = 0; my $collapse_timestamp = 0; my $collapse_zero = 0; my $skip_compares = 0; +my $skip_compare_preference = 0; my $skip_hashes = 0; my $skip_hashes_threshold = 0; my $progress = 0; @@ -154,6 +155,7 @@ hard links). S, s - lstat(2) (see source for details) U - unlink(2) . - all inodes with similar attributes done + (123456) - current file size in bytes --sort pass next options (up to --) to sort command @@ -184,7 +186,7 @@ while ($#ARGV >= 0) { } elsif ($arg eq '--zeros') { $collapse_zero = 1; } elsif ($arg eq '--trust' || $arg eq '--skip-compare') { - $skip_compares = 1; + $skip_compares = $skip_compare_preference = 1; } elsif ($arg =~ /^--skip-hash(?:=(\d+)([KkMmGgTt]?))?$/os) { my ($quantity, $unit) = ($1, $2); $unit ||= '_'; @@ -200,7 +202,8 @@ while ($#ARGV >= 0) { t => 1000*1000*1000*1000, T => 1024*1024*1024*1024, ); - $skip_hashes = $skip_hashes_threshold = $quantity * $scale{$unit}; + $skip_hashes = 0; + $skip_hashes_threshold = $quantity * $scale{$unit}; } elsif ($arg eq '--progress') { $progress = 1; } elsif ($arg eq '--verbose') { @@ -239,10 +242,6 @@ while ($#ARGV >= 0) { } } -if ($skip_hashes && $skip_compares) { - die "Cannot skip both hashes and compares.\n"; -} - @directories or usage; if (defined($lock_file) && !$dry_run) { @@ -357,6 +356,7 @@ hash_file: foreach my $filename (sort keys(%{$inode_to_file_name{$candidate}})) { print STDERR "\t\tDigesting file $filename\n" if $debug; if ((-l $filename) || ! -f _) { + print STDERR "\n" if $progress; warn "Bogon file " . tick_quote($filename); $input_bogons++; delete $inode_to_file_name{$candidate}->{$filename}; @@ -542,7 +542,7 @@ candidate_file: if ($link_done) { delete $inode_to_file_name{$to_inode}->{$to_file}; - $inode_to_file_name{$from_inode}->{$to_file} = undef; + $inode_to_file_name{$from_inode}->{$to_file} = undef unless ($dry_run); $hash_to_inode{$digest} = [ $from_inode ]; $hard_links++; @@ -589,6 +589,9 @@ end_merge: undef %inode_to_file_name; } +my $last_time = 0; +my $last_size = 0; + while () { my ($weak_key, $size, $dev, $ino, $name) = m/^((\d+) \d+ \d+ \d+ -?[\d.]+) (\d+) (\d+) (.+)\0$/so; die "read error: $!\nLast input line was '$_'" unless defined($name); @@ -597,7 +600,22 @@ while () { print STDERR "weak_key=$weak_key inode=$inode name=$name\n" if $debug; - $skip_hashes = $size >= $skip_hashes_threshold; + if ($skip_hashes_threshold && $size >= $skip_hashes_threshold) { + $skip_hashes = 1; + $skip_compares = 0; + } else { + $skip_hashes = 0; + $skip_compares = $skip_compare_preference; + } + + if ($progress) { + my $this_time = time(); + if ($this_time != $last_time && $size != $last_size) { + $last_time = $this_time; + $last_size = $size; + print STDERR "($size)"; + } + } $input_links++; merge_files if $weak_key ne $current_key;