From f6ae3a6bb2cad24d00acb5dfb789a28ad693becf Mon Sep 17 00:00:00 2001 From: cvs Date: Sat, 7 Jan 2006 08:44:02 +0000 Subject: [PATCH] Implement --dry-run and --humane options git-svn-id: svn+ssh://svn.furryterror.org/r/trunk/mokona/zblaxell@4518 a5e33b96-951a-0410-ae88-c0fe16d076bb --- faster-dupemerge | 56 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/faster-dupemerge b/faster-dupemerge index b7554de..18d88cd 100755 --- a/faster-dupemerge +++ b/faster-dupemerge @@ -79,6 +79,8 @@ my $skip_compares = 0; my $skip_hashes = 0; my $verbose = 0; my $debug = 0; +my $dry_run = 0; +my $humane = 0; my @extra_find_opts = (); my @extra_sort_opts = (); my $lock_file; @@ -120,8 +122,12 @@ hard links). --debug show all steps in duplication discovery process (implies --verbose) + --dry-run do not lock files or make changes to filesystem + --find pass next options (up to --) to find command + --humane human-readable statistics (e.g. 1 048 576) + --lock FILE exit immediately (status 10) if unable to obtain a flock(LOCK_EX|LOCK_NB) on FILE @@ -168,6 +174,10 @@ while ($#ARGV >= 0) { } } elsif ($arg eq '--debug') { $debug = $verbose = 1; + } elsif ($arg eq '--dry-run') { + $dry_run = 1; + } elsif ($arg eq '--humane') { + $humane = 1; } elsif ($arg eq '--find') { while ($#ARGV >= 0) { my $extra_arg = shift(@ARGV); @@ -192,7 +202,9 @@ if ($skip_hashes && $skip_compares) { die "Cannot skip both hashes and compares.\n"; } -if (defined($lock_file)) { +@directories or usage; + +if (defined($lock_file) && !$dry_run) { sysopen(LOCK_FILE, $lock_file, O_CREAT|O_RDONLY, 0666) or die "open: $lock_file: $!"; flock(LOCK_FILE, LOCK_EX|LOCK_NB) or die "flock: $lock_file: LOCK_EX|LOCK_NB: $!"; print STDERR "Locked '$lock_file' in LOCK_EX mode.\n" if $verbose; @@ -200,7 +212,7 @@ if (defined($lock_file)) { } END { - if ($lock_obtained) { + if ($lock_obtained && !$dry_run) { print STDERR "Removing '$lock_file'.\n" if $verbose; unlink($lock_file) or warn "unlink: $lock_file: $!"; } @@ -244,14 +256,18 @@ my %inode_to_file_name = (); # Link files sub link_files { my ($from, $to) = (@_); + + my $quoted_from = tick_quote($from); + my $quoted_to = tick_quote($to); + print STDERR "ln -f $quoted_from $quoted_to\n"; + + return if $dry_run; + my $inode_dir = $to; my $inode_base = $to; $inode_dir =~ s:[^/]*$::o; $inode_base =~ s:^.*/::os; my $tmp_to = File::Temp::tempnam($inode_dir, ".$inode_base."); - my $quoted_from = tick_quote($from); - my $quoted_to = tick_quote($to); - print STDERR "ln -f $quoted_from $quoted_to\n"; print STDERR "\tlink: $from -> $tmp_to\n" if $debug; link($from, $tmp_to) or die "link: $from -> $tmp_to: $!"; print STDERR "\trename: $tmp_to -> $to\n" if $debug; @@ -461,9 +477,13 @@ candidate_file: # My random number generator chooses the incumbent's size. if ($link_done) { + # Since we're in a dry run, the filesystem doesn't change. + # Our notion of what the filesystem should look like should not change either. delete $inode_to_file_name{$to_inode}->{$to_file}; - $inode_to_file_name{$from_inode}->{$to_file} = undef; - $hash_to_inode{$digest} = $from_inode; + unless ($dry_run) { + $inode_to_file_name{$from_inode}->{$to_file} = undef; + $hash_to_inode{$digest} = $from_inode; + } $hard_links++; if ($to_nlink == 1) { @@ -553,6 +573,28 @@ surprises $surprises trivially_unique $trivially_unique STATS +if ($humane) { + my $max_num_len = 0; + + sub measure_numbers { + my ($num) = @_; + my $len = length($num); + $len += int( (length($num) - 1) / 3); + $max_num_len = $len if $len > $max_num_len; + } + + (my $dummy = $stats_blob) =~ s/\d+/measure_numbers($&)/geos; + + sub space_numbers { + my ($num) = @_; + 1 while $num =~ s/(\d)(\d\d\d)( \d\d\d)*$/$1 $2$3/os; + $num = ' ' x ($max_num_len - length($num)) . $num; + return $num; + } + + $stats_blob =~ s/\d+/space_numbers($&)/geos; +} + $stats_blob =~ s/([^\n]*\n[^\n]*? )(\s+)( [^\n]*\n)/$1 . ('.' x length($2)) . $3/oemg; print STDERR $stats_blob; -- 2.30.2