X-Git-Url: http://git.hungrycats.org/cgi-bin/gitweb.cgi?p=dupemerge;a=blobdiff_plain;f=faster-dupemerge;h=2c85685b7056ba3c55abb05df99d84faf7aa34bb;hp=fe23f8e70619443a8cc6fb3637dc6c07354b6ba5;hb=5d6afd72794a9be8dcf2040fd4cad7fb1ad75cd6;hpb=f8527d273c71e1dc1302cfe9e854d6a57cf9b48b diff --git a/faster-dupemerge b/faster-dupemerge index fe23f8e..2c85685 100755 --- a/faster-dupemerge +++ b/faster-dupemerge @@ -4,7 +4,7 @@ use Fcntl qw(:DEFAULT :flock); use File::Compare; use File::Temp; -# Copyright (C) 2002-2010 Zygo Blaxell +# Copyright (C) 2002-2012 Zygo Blaxell # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -155,12 +155,14 @@ hard links). S, s - lstat(2) (see source for details) U - unlink(2) . - all inodes with similar attributes done + (123456) - current file size in bytes --sort pass next options (up to --) to sort command --timestamps mtime may be different for identical files - --skip-compare skip byte-by-byte file comparisons + --skip-compare skip byte-by-byte file comparisons, + compare only file hashes --skip-hash[=N] skip calculation of hash function on files larger than N bytes (default 1M). @@ -173,6 +175,12 @@ hard links). --verbose report files as they are considered --zeros hard-link zero-length files too + +--skip-compare and --skip-hash can be combined, in which case a file is +either hashed (if it is below the --skip-hash size threshold) or compared +(if it is above), but never both. + +Version: 0.20120103 USAGE } @@ -185,7 +193,7 @@ while ($#ARGV >= 0) { } elsif ($arg eq '--zeros') { $collapse_zero = 1; } elsif ($arg eq '--trust' || $arg eq '--skip-compare') { - $skip_compare_preference = 1; + $skip_compares = $skip_compare_preference = 1; } elsif ($arg =~ /^--skip-hash(?:=(\d+)([KkMmGgTt]?))?$/os) { my ($quantity, $unit) = ($1, $2); $unit ||= '_'; @@ -241,7 +249,7 @@ while ($#ARGV >= 0) { } } -@directories or usage; +@directories or usage($0); if (defined($lock_file) && !$dry_run) { sysopen(LOCK_FILE, $lock_file, O_CREAT|O_RDONLY, 0666) or die "open: $lock_file: $!"; @@ -404,7 +412,7 @@ incumbent_file: } if (format_inode($incumbent_dev, $incumbent_ino) ne $incumbent) { - warn "$incumbent_file: expected inode $incumbent, found $incumbent_dev:$incumbent_ino"; + warn "$incumbent_file: expected inode $incumbent, found ".format_inode($incumbent_dev, $incumbent_ino); $surprises++; next incumbent_file; } @@ -425,7 +433,7 @@ candidate_file: } if (format_inode($candidate_dev, $candidate_ino) ne $candidate) { - warn "$candidate_file: expected inode $candidate, found $candidate_dev:$candidate_ino"; + warn "$candidate_file: expected inode $candidate, found ".format_inode($candidate_dev, $candidate_ino); $surprises++; next candidate_file; } @@ -589,6 +597,7 @@ end_merge: } my $last_time = 0; +my $last_size = 0; while () { my ($weak_key, $size, $dev, $ino, $name) = m/^((\d+) \d+ \d+ \d+ -?[\d.]+) (\d+) (\d+) (.+)\0$/so; @@ -598,7 +607,7 @@ while () { print STDERR "weak_key=$weak_key inode=$inode name=$name\n" if $debug; - if ($size >= $skip_hashes_threshold) { + if ($skip_hashes_threshold && $size >= $skip_hashes_threshold) { $skip_hashes = 1; $skip_compares = 0; } else { @@ -608,8 +617,9 @@ while () { if ($progress) { my $this_time = time(); - if ($this_time != $last_time) { + if ($this_time != $last_time && $size != $last_size) { $last_time = $this_time; + $last_size = $size; print STDERR "($size)"; } } @@ -662,7 +672,7 @@ if ($humane) { sub space_numbers { my ($num) = @_; - 1 while $num =~ s/(\d)(\d\d\d)( \d\d\d)*$/$1 $2$3/os; + 1 while $num =~ s/(\d)(\d\d\d)((?: \d\d\d)*)$/$1 $2$3/os; $num = ' ' x ($max_num_len - length($num)) . $num; return $num; }