From: Zygo Blaxell Date: Sun, 24 Oct 2010 21:12:22 +0000 (-0400) Subject: dm6: bump version to 0.20101024, more comprehensive garbage collector X-Git-Url: http://git.hungrycats.org/cgi-bin/gitweb.cgi?p=dupemerge;a=commitdiff_plain;h=0c34a3856f509e8b615afc0fed52b7399fee21df;hp=c75590d6bcfd8cb83a348a3b066a1b26c16fd316 dm6: bump version to 0.20101024, more comprehensive garbage collector The garbage collector now counts all links to files in the link directory, and removes files where all links are accounted for within the link directory. This enables possible future enhancements, like having something analogous to --timestamps and --access in faster-dupemerge, without requiring a special case for each one. --- diff --git a/dm6 b/dm6 index 8c1e3e9..ff3a9eb 100755 --- a/dm6 +++ b/dm6 @@ -41,7 +41,7 @@ sub usage { Usage: $0 link-dir Hashes a NUL-separated list of files on stdin into link-dir. -Version: 0.20100522 +Version: 0.20101024 USAGE } @@ -216,13 +216,44 @@ while () { } # Garbage collection -print STDERR "\nGarbage collection in '$link_dir'..."; +print STDERR "\nGarbage collection in '$link_dir'...\n"; chdir($link_dir) || die "chdir: $link_dir: $!"; -print STDERR "\nRemoving files with link count < 3 and temporary links..."; -system('find . -type f \( -links -3 -o -name ".*" \) -print0 | xargs -0rt rm -f') and die "system: exit status $?"; -print STDERR "\nRemoving empty directories..."; + +my ($last_inode) = ''; +my @last_links; + +sub handle_gc_file { + my ($line) = @_; + my ($inode, $link) = ($line =~ /^(\S+) (.+)\0$/os); + $inode ||= ''; + if ($inode ne $last_inode) { + my ($dev, $ino, $links) = ($last_inode =~ /^(\d+):(\d+):(\d+)$/os); + if (defined($links)) { + if ($links && $links == @last_links) { + print STDERR "rm -f @last_links\n"; + for my $unlink (@last_links) { + unlink($unlink) or warn "unlink: $unlink: $!"; + } + } + } else { + warn "Could not parse '$last_inode' in '$line'" unless $last_inode eq ''; + } + @last_links = (); + } + $last_inode = $inode; + push(@last_links, $link); +} + +print STDERR "Removing files contained entirely in '$link_dir'...\n"; +open(FIND, "find . -type f -printf '%D:%i:%n %p\\0' | sort -z --compress-program=gzip |") or die "open: find: $!"; +while () { + handle_gc_file($_); +} +handle_gc_file(''); + +print STDERR "Removing empty directories...\n"; system("find . -type d -empty -print0 | xargs -0rt rmdir -p --ignore-fail-on-non-empty") and die "system: exit status $?"; -print STDERR "\nDone.\n"; +print STDERR "Done.\n"; exit(0);