my $skip_hashes = 0;
my $verbose = 0;
my $debug = 0;
+my $dry_run = 0;
+my $humane = 0;
my @extra_find_opts = ();
my @extra_sort_opts = ();
my $lock_file;
if ($skip_hashes) {
return "SKIPPING HASHES";
} else {
- &really_digest($filename);
+ my $digest = &really_digest($filename);
$hash_bytes += -s $filename;
$hash_files++;
+ return $digest
}
}
--debug show all steps in duplication discovery process
(implies --verbose)
+ --dry-run do not lock files or make changes to filesystem
+
--find pass next options (up to --) to find command
+ --humane human-readable statistics (e.g. 1 048 576)
+
--lock FILE exit immediately (status 10) if unable to obtain a
flock(LOCK_EX|LOCK_NB) on FILE
}
} elsif ($arg eq '--debug') {
$debug = $verbose = 1;
+ } elsif ($arg eq '--dry-run') {
+ $dry_run = 1;
+ } elsif ($arg eq '--humane') {
+ $humane = 1;
} elsif ($arg eq '--find') {
while ($#ARGV >= 0) {
my $extra_arg = shift(@ARGV);
die "Cannot skip both hashes and compares.\n";
}
-if (defined($lock_file)) {
+@directories or usage;
+
+if (defined($lock_file) && !$dry_run) {
sysopen(LOCK_FILE, $lock_file, O_CREAT|O_RDONLY, 0666) or die "open: $lock_file: $!";
flock(LOCK_FILE, LOCK_EX|LOCK_NB) or die "flock: $lock_file: LOCK_EX|LOCK_NB: $!";
print STDERR "Locked '$lock_file' in LOCK_EX mode.\n" if $verbose;
}
END {
- if ($lock_obtained) {
+ if ($lock_obtained && !$dry_run) {
print STDERR "Removing '$lock_file'.\n" if $verbose;
unlink($lock_file) or warn "unlink: $lock_file: $!";
}
# Link files
sub link_files {
my ($from, $to) = (@_);
+
+ my $quoted_from = tick_quote($from);
+ my $quoted_to = tick_quote($to);
+ print STDERR "ln -f $quoted_from $quoted_to\n";
+
+ return if $dry_run;
+
my $inode_dir = $to;
my $inode_base = $to;
$inode_dir =~ s:[^/]*$::o;
$inode_base =~ s:^.*/::os;
my $tmp_to = File::Temp::tempnam($inode_dir, ".$inode_base.");
- my $quoted_from = tick_quote($from);
- my $quoted_to = tick_quote($to);
- print STDERR "ln -f $quoted_from $quoted_to\n";
print STDERR "\tlink: $from -> $tmp_to\n" if $debug;
link($from, $tmp_to) or die "link: $from -> $tmp_to: $!";
print STDERR "\trename: $tmp_to -> $to\n" if $debug;
# My random number generator chooses the incumbent's size.
if ($link_done) {
+ # Since we're in a dry run, the filesystem doesn't change.
+ # Our notion of what the filesystem should look like should not change either.
delete $inode_to_file_name{$to_inode}->{$to_file};
- $inode_to_file_name{$from_inode}->{$to_file} = undef;
- $hash_to_inode{$digest} = $from_inode;
+ unless ($dry_run) {
+ $inode_to_file_name{$from_inode}->{$to_file} = undef;
+ $hash_to_inode{$digest} = $from_inode;
+ }
$hard_links++;
if ($to_nlink == 1) {
trivially_unique $trivially_unique
STATS
+if ($humane) {
+ my $max_num_len = 0;
+
+ sub measure_numbers {
+ my ($num) = @_;
+ my $len = length($num);
+ $len += int( (length($num) - 1) / 3);
+ $max_num_len = $len if $len > $max_num_len;
+ }
+
+ (my $dummy = $stats_blob) =~ s/\d+/measure_numbers($&)/geos;
+
+ sub space_numbers {
+ my ($num) = @_;
+ 1 while $num =~ s/(\d)(\d\d\d)( \d\d\d)*$/$1 $2$3/os;
+ $num = ' ' x ($max_num_len - length($num)) . $num;
+ return $num;
+ }
+
+ $stats_blob =~ s/\d+/space_numbers($&)/geos;
+}
+
$stats_blob =~ s/([^\n]*\n[^\n]*? )(\s+)( [^\n]*\n)/$1 . ('.' x length($2)) . $3/oemg;
print STDERR $stats_blob;