my $collapse_timestamp = 0;
my $collapse_zero = 0;
my $skip_compares = 0;
+my $skip_compare_preference = 0;
my $skip_hashes = 0;
+my $skip_hashes_threshold = 0;
my $progress = 0;
my $verbose = 0;
my $debug = 0;
--skip-compare skip byte-by-byte file comparisons
- --skip-hash skip calculation of hash function on files
+ --skip-hash[=N] skip calculation of hash function on files
+ larger than N bytes (default 1M).
+ Scalars KMGT specify KiB, MiB, GiB, and TiB.
+ Scalars kmgt specify KB, MB, GB, and TB.
--trust old name for --skip-compare
(trust the hash function)
} elsif ($arg eq '--zeros') {
$collapse_zero = 1;
} elsif ($arg eq '--trust' || $arg eq '--skip-compare') {
- $skip_compares = 1;
- } elsif ($arg eq '--skip-hash') {
- $skip_hashes = 1;
+ $skip_compare_preference = 1;
+ } elsif ($arg =~ /^--skip-hash(?:=(\d+)([KkMmGgTt]?))?$/os) {
+ my ($quantity, $unit) = ($1, $2);
+ $unit ||= '_';
+ $quantity ||= 1048576;
+ my %scale = (
+ _ => 1,
+ k => 1000,
+ K => 1024,
+ m => 1000*1000,
+ M => 1024*1024,
+ g => 1000*1000*1000,
+ G => 1024*1024*1024,
+ t => 1000*1000*1000*1000,
+ T => 1024*1024*1024*1024,
+ );
+ $skip_hashes = 0;
+ $skip_hashes_threshold = $quantity * $scale{$unit};
} elsif ($arg eq '--progress') {
$progress = 1;
} elsif ($arg eq '--verbose') {
}
}
-if ($skip_hashes && $skip_compares) {
- die "Cannot skip both hashes and compares.\n";
-}
-
@directories or usage;
if (defined($lock_file) && !$dry_run) {
my ($incumbent_dev,$incumbent_ino,$incumbent_mode,$incumbent_nlink,$incumbent_uid,$incumbent_gid,$incumbent_rdev,$incumbent_size,$incumbent_atime,$incumbent_mtime,$incumbent_ctime,$incumbent_blksize,$incumbent_blocks) = lstat($incumbent_file);
print STDERR "\t\tINCUMBENT dev=$incumbent_dev ino=$incumbent_ino mode=$incumbent_mode nlink=$incumbent_nlink uid=$incumbent_uid gid=$incumbent_gid rdev=$incumbent_rdev size=$incumbent_size atime=$incumbent_atime mtime=$incumbent_mtime ctime=$incumbent_ctime blksize=$incumbent_blksize blocks=$incumbent_blocks _=$incumbent_file\n" if $debug;
- if (!defined($incumbent_blocks)) {
+ if (!defined($incumbent_blocks) || ! (-f _)) {
warn "lstat: $incumbent_file: $!";
$surprises++;
next incumbent_file;
my ($candidate_dev,$candidate_ino,$candidate_mode,$candidate_nlink,$candidate_uid,$candidate_gid,$candidate_rdev,$candidate_size,$candidate_atime,$candidate_mtime,$candidate_ctime,$candidate_blksize,$candidate_blocks) = lstat($candidate_file);
print STDERR "\t\t\tCANDIDATE dev=$candidate_dev ino=$candidate_ino mode=$candidate_mode nlink=$candidate_nlink uid=$candidate_uid gid=$candidate_gid rdev=$candidate_rdev size=$candidate_size atime=$candidate_atime mtime=$candidate_mtime ctime=$candidate_ctime blksize=$candidate_blksize blocks=$candidate_blocks _=$candidate_file\n" if $debug;
- if (!defined($candidate_blocks)) {
+ if (!defined($candidate_blocks) || ! (-f _)) {
warn "lstat: $candidate_file: $!";
$surprises++;
next candidate_file;
}
while (<FIND>) {
- my ($weak_key, $dev, $ino, $name) = m/^(\d+ \d+ \d+ \d+ -?[\d.]+) (\d+) (\d+) (.+)\0$/so;
+ my ($weak_key, $size, $dev, $ino, $name) = m/^((\d+) \d+ \d+ \d+ -?[\d.]+) (\d+) (\d+) (.+)\0$/so;
die "read error: $!\nLast input line was '$_'" unless defined($name);
my $inode = format_inode($dev, $ino);
print STDERR "weak_key=$weak_key inode=$inode name=$name\n" if $debug;
+ if ($size >= $skip_hashes_threshold) {
+ $skip_hashes = 1;
+ $skip_compares = 0;
+ } else {
+ $skip_hashes = 0;
+ $skip_compares = $skip_compare_preference;
+ }
+
$input_links++;
merge_files if $weak_key ne $current_key;
$current_key = $weak_key;