+ my $n = $#lj_cache+1;
+ my $i = int(rand($n));
+ my ($img, $page) = @{$lj_cache[$i]};
+
+ # delete this one from @lj_cache and from %lj_cache.
+ #
+ @lj_cache = ( @lj_cache[0 .. $i-1],
+ @lj_cache[$i+1 .. $#lj_cache] );
+ delete $lj_cache{$img};
+
+ # Keep the size of the cache under the limit by nuking older entries
+ #
+ while ($#lj_cache >= $lj_cache_size) {
+ my $pairP = shift @lj_cache;
+ my $img = $pairP->[0];
+ delete $lj_cache{$img};
+ }
+
+ LOG ($verbose_load, "picked image " .($i+1) . "/$n: $img");
+
+ return ($page, $img);
+}
+
+\f
+############################################################################
+#
+# Pick images from ircimages.com (images that have been in the /topic of
+# various IRC channels.)
+#
+############################################################################
+
+my $ircimages_url = "http://ircimages.com/";
+
+# ircimages
+sub pick_from_ircimages($) {
+ my ($timeout) = @_;
+
+ $last_search = $ircimages_url; # for warnings
+
+ my $n = int(rand(2900));
+ my $search_url = $ircimages_url . "page-$n";
+
+ my ( $base, $body ) = get_document ($search_url, undef, $timeout);
+ return () unless $body;
+
+ my @candidates = ();
+
+ $body =~ s/\n/ /gs;
+ $body =~ s/(<A)\b/\n$1/gsi;
+
+ foreach (split (/\n/, $body)) {
+
+ my ($u) = m@<A\s.*\bHREF\s*=\s*([^>]+)>@i;
+ next unless $u;
+
+ if ($u =~ m/^\"([^\"]*)\"/) { $u = $1; } # quoted string
+ elsif ($u =~ m/^([^\s]*)\s/) { $u = $1; } # or token
+
+ next unless ($u =~ m/^https?:/i);
+ next if ($u =~ m@^https?://(searchirc\.com\|ircimages\.com)@i);
+ next unless ($u =~ m@[.](gif|jpg|jpeg|pjpg|pjpeg|png)$@i);
+
+ LOG ($verbose_http, " HREF: $u");
+ push @candidates, $u;
+ }
+
+ LOG ($verbose_filter, "" . $#candidates+1 . " links on $search_url");
+
+ return () if ($#candidates == -1);
+
+ my $i = int(rand($#candidates+1));
+ my $img = $candidates[$i];
+
+ LOG ($verbose_load, "picked image " .($i+1) . "/" . ($#candidates+1) .
+ ": $img");
+
+ $search_url = $img; # hmm...
+ return ($search_url, $img);
+}
+
+\f
+############################################################################
+#
+# Pick images from Twitpic's list of recently-posted images.
+#
+############################################################################
+
+my $twitpic_img_url = "http://twitpic.com/public_timeline/feed.rss";
+
+# With most of our image sources, we get a random page and then select
+# from the images on it. However, in the case of Twitpic, the page
+# of images tends to update slowly; so we'll remember the last N entries
+# on it and randomly select from those, to get a wider variety each time.
+
+my $twitpic_cache_size = 1000;
+my @twitpic_cache = (); # fifo, for ordering by age
+my %twitpic_cache = (); # hash, for detecting dups
+
+# twitpic
+sub pick_from_twitpic_images($) {
+ my ($timeout) = @_;
+
+ $last_search = $twitpic_img_url; # for warnings
+
+ my ( $base, $body ) = get_document ($twitpic_img_url, undef, $timeout);
+
+ # Update the cache.
+
+ if ($body) {
+ $body =~ s/\n/ /gs;
+ $body =~ s/(<item)\b/\n$1/gsi;
+
+ my @items = split (/\n/, $body);
+ shift @items;
+ foreach (@items) {
+ next unless (m@<link>([^<>]*)</link>@si);
+ my $page = html_unquote ($1);
+
+ $page =~ s@/$@@s;
+ $page .= '/full';
+
+ next if ($twitpic_cache{$page}); # already have it
+
+ LOG ($verbose_filter, " candidate: $page");
+ push @twitpic_cache, $page;
+ $twitpic_cache{$page} = $page;
+ }
+ }
+
+ # Pull from the cache.
+
+ return () if ($#twitpic_cache == -1);
+
+ my $n = $#twitpic_cache+1;
+ my $i = int(rand($n));
+ my $page = $twitpic_cache[$i];
+
+ # delete this one from @twitpic_cache and from %twitpic_cache.
+ #
+ @twitpic_cache = ( @twitpic_cache[0 .. $i-1],
+ @twitpic_cache[$i+1 .. $#twitpic_cache] );
+ delete $twitpic_cache{$page};
+
+ # Keep the size of the cache under the limit by nuking older entries
+ #
+ while ($#twitpic_cache >= $twitpic_cache_size) {
+ my $page = shift @twitpic_cache;
+ delete $twitpic_cache{$page};
+ }
+
+ ( $base, $body ) = get_document ($page, undef, $timeout);
+ my $img = undef;
+ $body = '' unless defined($body);
+
+ foreach (split (/<img\s+/, $body)) {
+ my ($src) = m/\bsrc=[\"\'](.*?)[\"\']/si;
+ next unless $src;
+ next if m@/js/@s;
+ next if m@/images/@s;
+
+ $img = $src;
+
+ $img = "http:$img" if ($img =~ m@^//@s); # Oh come on
+
+ # Sometimes these images are hosted on twitpic, sometimes on Amazon.
+ if ($img =~ m@^/@) {
+ $base =~ s@^(https?://[^/]+)/.*@$1@s;
+ $img = $base . $img;
+ }
+ last;
+ }
+
+ if (!$img) {
+ LOG ($verbose_load, "no matching images on $page\n");
+ return ();
+ }
+
+ LOG ($verbose_load, "picked image " .($i+1) . "/$n: $img");
+
+ return ($page, $img);
+}
+
+\f
+############################################################################
+#
+# Pick images from Twitter's list of recently-posted updates.
+#
+############################################################################
+
+# With most of our image sources, we get a random page and then select
+# from the images on it. However, in the case of Twitter, the page
+# of images only updates once a minute; so we'll remember the last N entries
+# on it and randomly select from those, to get a wider variety each time.
+
+my $twitter_img_url = "http://api.twitter.com/1/statuses/" .
+ "public_timeline.json" .
+ "?include_entities=true" .
+ "&include_rts=true" .
+ "&count=200";
+
+my $twitter_cache_size = 1000;
+
+my @twitter_cache = (); # fifo, for ordering by age
+my %twitter_cache = (); # hash, for detecting dups
+
+
+# twitter
+sub pick_from_twitter_images($) {
+ my ($timeout) = @_;
+
+ $last_search = $twitter_img_url; # for warnings
+
+ my ( $base, $body ) = get_document ($twitter_img_url, undef, $timeout);
+ # Update the cache.
+
+ if ($body) {
+ $body =~ s/[\r\n]+/ /gs;
+
+ # Parsing JSON is a pain in the ass. So we halfass it as usual.
+ $body =~ s/^\[|\]$//s;
+ $body =~ s/(\[.*?\])/{ $_ = $1; s@\},@\} @gs; $_; }/gsexi;
+ my @items = split (/\},\{/, $body);
+ foreach (@items) {
+ my ($name) = m@"screen_name":"([^\"]+)"@si;
+ my ($img) = m@"media_url":"([^\"]+)"@si;
+ my ($page) = m@"display_url":"([^\"]+)"@si;
+ next unless ($name && $img && $page);
+ foreach ($img, $page) {
+ s/\\//gs;
+ $_ = "http://$_" unless (m/^http/si);
+ }
+
+ next if ($twitter_cache{$page}); # already have it
+
+ LOG ($verbose_filter, " candidate: $page - $img");
+ push @twitter_cache, $page;
+ $twitter_cache{$page} = $img;
+ }
+ }
+
+ # Pull from the cache.
+
+ return () if ($#twitter_cache == -1);
+
+ my $n = $#twitter_cache+1;
+ my $i = int(rand($n));
+ my $page = $twitter_cache[$i];
+ my $url = $twitter_cache{$page};
+
+ # delete this one from @twitter_cache and from %twitter_cache.
+ #
+ @twitter_cache = ( @twitter_cache[0 .. $i-1],
+ @twitter_cache[$i+1 .. $#twitter_cache] );
+ delete $twitter_cache{$page};
+
+ # Keep the size of the cache under the limit by nuking older entries
+ #
+ while ($#twitter_cache >= $twitter_cache_size) {
+ my $page = shift @twitter_cache;
+ delete $twitter_cache{$page};
+ }
+
+ LOG ($verbose_load, "picked page $url");
+
+ $suppress_audit = 1;
+
+ return ($page, $url);
+}
+
+\f
+############################################################################
+#
+# Pick images from Flickr's page of recently-posted photos.
+#
+############################################################################
+
+my $flickr_img_url = "http://www.flickr.com/explore/";
+
+# Like LiveJournal, the Flickr page of images tends to update slowly,
+# so remember the last N entries on it and randomly select from those.
+
+# I know that Flickr has an API (http://www.flickr.com/services/api/)
+# but it was easy enough to scrape the HTML, so I didn't bother exploring.
+
+my $flickr_cache_size = 1000;
+my @flickr_cache = (); # fifo, for ordering by age
+my %flickr_cache = (); # hash, for detecting dups
+
+
+# flickr_recent
+sub pick_from_flickr_recent($) {
+ my ($timeout) = @_;
+
+ my $start = 16 * int(rand(100));
+
+ $last_search = $flickr_img_url; # for warnings
+ $last_search .= "?start=$start" if ($start > 0);
+
+ my ( $base, $body ) = get_document ($last_search, undef, $timeout);
+
+ # If the document comes back empty. just use the cache.
+ # return () unless $body;
+ $body = '' unless defined($body);
+
+ my $count = 0;
+ my $count2 = 0;
+
+ if ($body =~ m@{ *"_data": \[ ( .*? \} ) \]@six) {
+ $body = $1;
+ } else {
+ LOG ($verbose_load, "flickr unparsable: $last_search");
+ return ();
+ }
+
+ $body =~ s/[\r\n]/ /gs;
+ $body =~ s/(\},) *(\{)/$1\n$2/gs; # "_flickrModelRegistry"
+
+ foreach my $chunk (split (/\n/, $body)) {
+ my ($img) = ($chunk =~ m@"displayUrl": *"(.*?)"@six);
+ next unless defined ($img);
+ $img =~ s/\\//gs;
+ $img = "//" unless ($img =~ m@^/@s);
+ $img = "http:$img" unless ($img =~ m/^http/s);
+
+ my ($user) = ($chunk =~ m/"pathAlias": *"(.*?)"/si);
+ next unless defined ($user);
+
+ my ($id) = ($img =~ m@/\d+/(\d+)_([\da-f]+)_@si);
+ my ($page) = "https://www.flickr.com/photos/$user/$id/";
+
+ # $img =~ s/_[a-z](\.[a-z\d]+)$/$1/si; # take off "thumb" suffix
+
+ $count++;
+ next if ($flickr_cache{$img}); # already have it
+
+ my @pair = ($img, $page, $start);
+ LOG ($verbose_filter, " candidate: $img");
+ push @flickr_cache, \@pair;
+ $flickr_cache{$img} = \@pair;
+ $count2++;
+ }
+
+ return () if ($#flickr_cache == -1);
+
+ my $n = $#flickr_cache+1;
+ my $i = int(rand($n));
+ my ($img, $page) = @{$flickr_cache[$i]};
+
+ # delete this one from @flickr_cache and from %flickr_cache.
+ #
+ @flickr_cache = ( @flickr_cache[0 .. $i-1],
+ @flickr_cache[$i+1 .. $#flickr_cache] );
+ delete $flickr_cache{$img};
+
+ # Keep the size of the cache under the limit by nuking older entries
+ #
+ while ($#flickr_cache >= $flickr_cache_size) {
+ my $pairP = shift @flickr_cache;
+ my $img = $pairP->[0];
+ delete $flickr_cache{$img};
+ }
+
+ LOG ($verbose_load, "picked image " .($i+1) . "/$n: $img");
+
+ return ($page, $img);
+}
+
+\f
+############################################################################
+#
+# Pick images from a random RSS feed on Flickr.
+#
+############################################################################
+
+my $flickr_rss_base = ("http://www.flickr.com/services/feeds/photos_public.gne".
+ "?format=rss_200_enc&tagmode=any&tags=");
+
+# Picks a random RSS feed; picks a random image from that feed;
+# returns 2 URLs: the page containing the image, and the image.
+# Mostly by Joe Mcmahon <mcmahon@yahoo-inc.com>
+#
+# flickr_random
+sub pick_from_flickr_random($) {
+ my $timeout = shift;
+
+ my $words = random_words(',');
+ my $rss = $flickr_rss_base . $words;
+ $last_search = $rss;
+
+ $_ = $words;
+ s/,/ /g;
+
+ print STDERR "\n\n" if ($verbose_load);
+ LOG ($verbose_load, "words: $_");
+ LOG ($verbose_load, "URL: $last_search");
+
+ $suppress_audit = 1;
+
+ my ( $base, $body ) = get_document ($last_search, undef, $timeout);
+ if (!$base || !$body) {
+ $body = undef;
+ return;
+ }
+
+ my $img;
+ ($base, $img) = pick_image_from_rss ($base, $body);
+ $body = undef;
+ return () unless defined ($img);
+
+ LOG ($verbose_load, "redirected to: $base");
+ return ($base, $img);
+}
+
+\f
+############################################################################
+#
+# Pick random images from Instagram.
+#
+############################################################################
+
+my $instagram_url_base = "https://api.instagram.com/v1/media/popular";
+
+# instagram_random
+sub pick_from_instagram($) {
+ my $timeout = shift;
+
+ # Liberated access tokens.
+ # jsdo.it search for: instagram client_id
+ # Google search for: instagram "&client_id=" site:jsfiddle.net
+ my @tokens = (#'b59fbe4563944b6c88cced13495c0f49', # gramfeed.com
+ #'fa26679250df49c48a33fbcf30aae989', # instac.at
+ #'d9494686198d4dfeb954979a3e270e5e', # iconosquare.com
+ #'793ef48bb18e4197b61afce2d799b81c', # jsdo.it
+ #'67b8a3e0073449bba70600d0fc68e6cb', # jsdo.it
+ #'26a098e0df4d4b9ea8b4ce6c505b7742', # jsdo.it
+ #'2437cbcd906a4c10940f990d283d3cd5', # jsdo.it
+ #'191c7d7d5312464cbd92134f36ffdab5', # jsdo.it
+ #'acfec809437b4340b2c38f66503af774', # jsdo.it
+ #'e9f77604a3a24beba949c12d18130988', # jsdo.it
+ #'2cd7bcf68ae346529770073d311575b3', # jsdo.it
+ #'830c600fe8d742e2ab3f3b94f9bb22b7', # jsdo.it
+ #'55865a0397ad41e5997dd95ef4df8da1', # jsdo.it
+ #'192a5742f3644ea8bed1d25e439286a8', # jsdo.it
+ #'38ed1477e7a44595861b8842cdb8ba23', # jsdo.it
+ #'e52f79f645f54488ad0cc47f6f55ade6', # jsfiddle.net
+ );
+
+ my $tok = $tokens[int(rand($#tokens+1))];
+ $last_search = $instagram_url_base . "?client_id=" . $tok;
+
+ print STDERR "\n\n" if ($verbose_load);
+ LOG ($verbose_load, "URL: $last_search");
+
+ my ( $base, $body ) = get_document ($last_search, undef, $timeout);
+ if (!$base || !$body) {
+ $body = undef;
+ return;
+ }
+
+ $body =~ s/("link")/\001$1/gs;
+ my @chunks = split(/\001/, $body);
+ shift @chunks;
+ my @urls = ();
+ foreach (@chunks) {
+ s/\\//gs;
+ my ($url) = m/"link":\s*"(.*?)"/s;
+ my ($img) = m/"standard_resolution":\{"url":\s*"(.*?)"/s;
+ ($img) = m/"url":\s*"(.*?)"/s unless $url;
+ next unless ($url && $img);
+ push @urls, [ $url, $img ];
+ }
+
+ if ($#urls < 0) {
+ LOG ($verbose_load, "no images on $last_search");
+ return ();
+ }
+
+ my $i = int(rand($#urls+1));
+ my ($url, $img) = @{$urls[$i]};
+
+ LOG ($verbose_load, "picked image " .($i+1) . "/" . ($#urls+1) . ": $url");
+ return ($url, $img);
+}
+
+\f
+############################################################################
+#
+# Pick images by waiting for driftnet to populate a temp dir with files.
+# Requires driftnet version 0.1.5 or later.
+# (Driftnet is a program by Chris Lightfoot that sniffs your local ethernet
+# for images being downloaded by others.)
+# Driftnet/webcollage integration by jwz.
+#
+############################################################################
+
+# driftnet
+sub pick_from_driftnet($) {
+ my ($timeout) = @_;
+
+ my $id = $driftnet_magic;
+ my $dir = $driftnet_dir;
+ my $start = time;
+ my $now;
+
+ error ("\$driftnet_dir unset?") unless ($dir);
+ $dir =~ s@/+$@@;
+
+ error ("$dir unreadable") unless (-d "$dir/.");
+
+ $timeout = $http_timeout unless ($timeout);
+ $last_search = $id;
+
+ while ($now = time, $now < $start + $timeout) {
+ opendir (my $dir, $dir) || error ("$dir: $!");
+ while (my $file = readdir($dir)) {
+ next if ($file =~ m/^\./);
+ $file = "$dir/$file";
+ closedir ($dir);
+ LOG ($verbose_load, "picked file $file ($id)");
+ return ($id, $file);
+ }
+ closedir ($dir);
+ }
+ LOG (($verbose_net || $verbose_load), "timed out for $id");
+ return ();
+}
+
+
+sub get_driftnet_file($) {
+ my ($file) = @_;
+
+ error ("\$driftnet_dir unset?") unless ($driftnet_dir);
+
+ my $id = $driftnet_magic;
+ error ("$id: $file not in $driftnet_dir?")
+ unless ($file =~ m@^\Q$driftnet_dir@o);
+
+ open (my $in, '<', $file) || error ("$id: $file: $!");
+ my $body = '';
+ local $/ = undef; # read entire file
+ $body = <$in>;
+ close ($in) || error ("$id: $file: $!");
+ unlink ($file) || error ("$id: $file: rm: $!");
+ return ($id, $body);
+}
+
+
+sub spawn_driftnet($) {
+ my ($cmd) = @_;
+
+ # make a directory to use.
+ while (1) {
+ my $tmp = $ENV{TEMPDIR} || "/tmp";
+ $driftnet_dir = sprintf ("$tmp/driftcollage-%08x", rand(0xffffffff));
+ LOG ($verbose_exec, "mkdir $driftnet_dir");
+ last if mkdir ($driftnet_dir, 0700);
+ }
+
+ if (! ($cmd =~ m/\s/)) {
+ # if the command didn't have any arguments in it, then it must be just
+ # a pointer to the executable. Append the default args to it.
+ my $dargs = $default_driftnet_cmd;
+ $dargs =~ s/^[^\s]+//;
+ $cmd .= $dargs;
+ }
+
+ # point the driftnet command at our newly-minted private directory.
+ #
+ $cmd .= " -d $driftnet_dir";
+ $cmd .= ">/dev/null" unless ($verbose_exec);
+
+ my $pid = fork();
+ if ($pid < 0) { error ("fork: $!\n"); }
+ if ($pid) {
+ # parent fork
+ push @pids_to_kill, $pid;
+ LOG ($verbose_exec, "forked for \"$cmd\"");
+ } else {
+ # child fork
+ nontrapping_system ($cmd) || error ("exec: $!");
+ }
+
+ # wait a bit, then make sure the process actually started up.
+ #
+ sleep (1);
+ error ("pid $pid failed to start \"$cmd\"")
+ unless (1 == kill (0, $pid));
+}
+
+# local-directory
+sub pick_from_local_dir($) {
+ my ($timeout) = @_;
+
+ my $id = $local_magic;
+ $last_search = $id;
+
+ my $dir = $local_dir;
+ error ("\$local_dir unset?") unless ($dir);
+ $dir =~ s@/+$@@;
+
+ error ("$dir unreadable") unless (-d "$dir/.");
+
+ my $v = ($verbose_exec ? "-v" : "");
+ my $pick = `xscreensaver-getimage-file $v "$dir"`;
+ $pick =~ s/\s+$//s;
+ $pick = "$dir/$pick" unless ($pick =~ m@^/@s); # relative path
+
+ LOG ($verbose_load, "picked file $pick ($id)");
+ return ($id, $pick);
+}
+
+
+sub get_local_file($) {
+ my ($file) = @_;
+
+ error ("\$local_dir unset?") unless ($local_dir);
+
+ my $id = $local_magic;
+ error ("$id: $file not in $local_dir?")
+ unless ($file =~ m@^\Q$local_dir@o);
+
+ open (my $in, '<:raw', $file) || error ("$id: $file: $!");
+ local $/ = undef; # read entire file
+ my $body = <$in>;
+ close ($in) || error ("$id: $file: $!");
+ return ($id, $body);
+}
+
+
+\f
+############################################################################
+#
+# Pick a random image in a random way
+#
+############################################################################
+
+
+# Picks a random image on a random page, and returns two URLs:
+# the page containing the image, and the image.
+# Returns () if nothing found this time.
+#
+
+sub pick_image(;$) {
+ my ($timeout) = @_;
+
+ $current_state = "select";
+ $load_method = "none";
+
+ my $n = int(rand(100));
+ my $fn = undef;
+ my $total = 0;
+ my @rest = @search_methods;
+
+ while (@rest) {
+ my $pct = shift @rest;
+ my $name = shift @rest;
+ my $tfn = shift @rest;
+ $total += $pct;
+ if ($total > $n && !defined($fn)) {
+ $fn = $tfn;
+ $current_state = $name;
+ $load_method = $current_state;
+ }
+ }
+
+ if ($total != 100) {
+ error ("internal error: \@search_methods totals to $total%!");
+ }
+
+ record_attempt ($current_state);
+ return $fn->($timeout);
+}
+
+
+\f
+############################################################################
+#
+# Statistics and logging
+#
+############################################################################
+
+sub timestr() {
+ return strftime ("%H:%M:%S: ", localtime);
+}
+
+sub blurb() {
+ return "$progname: " . timestr() . "$current_state: ";
+}
+
+sub error($) {
+ my ($err) = @_;
+ print STDERR blurb() . "$err\n";
+ exit 1;
+}
+
+sub stacktrace() {
+ my $i = 1;
+ print STDERR "$progname: stack trace:\n";
+ while (1) {
+ my ($package, $filename, $line, $subroutine) = caller($i++);
+ last unless defined($package);
+ $filename =~ s@^.*/@@;
+ print STDERR " $filename#$line, $subroutine\n";
+ }
+}
+
+
+my $lastlog = "";
+
+sub clearlog() {
+ $lastlog = "";
+}
+
+sub showlog() {
+ my $head = "$progname: DEBUG: ";
+ foreach (split (/\n/, $lastlog)) {
+ print STDERR "$head$_\n";
+ }
+ $lastlog = "";
+}
+
+sub LOG($$) {
+ my ($print, $msg) = @_;
+ my $blurb = timestr() . "$current_state: ";
+ $lastlog .= "$blurb$msg\n";
+ print STDERR "$progname: $blurb$msg\n" if $print;
+}
+
+
+my %stats_attempts;
+my %stats_successes;
+my %stats_elapsed;
+
+my $last_state = undef;
+sub record_attempt($) {
+ my ($name) = @_;
+
+ if ($last_state) {
+ record_failure($last_state) unless ($image_succeeded > 0);
+ }
+ $last_state = $name;
+
+ clearlog();
+ report_performance();
+
+ start_timer($name);
+ $image_succeeded = 0;
+ $suppress_audit = 0;
+}
+
+sub record_success($$$) {
+ my ($name, $url, $base) = @_;
+ if (defined($stats_successes{$name})) {
+ $stats_successes{$name}++;
+ } else {
+ $stats_successes{$name} = 1;
+ }
+
+ stop_timer ($name, 1);
+ my $o = $current_state;
+ $current_state = $name;
+ save_recent_url ($url, $base);
+ $current_state = $o;
+ $image_succeeded = 1;
+ clearlog();
+}
+
+
+sub record_failure($) {
+ my ($name) = @_;
+
+ return if $image_succeeded;
+
+ stop_timer ($name, 0);
+ if ($verbose_load && !$verbose_exec) {
+
+ if ($suppress_audit) {
+ print STDERR "$progname: " . timestr() . "(audit log suppressed)\n";
+ return;
+ }
+
+ my $o = $current_state;
+ $current_state = "DEBUG";
+
+ my $line = "#" x 78;
+ print STDERR "\n\n\n";
+ print STDERR ("#" x 78) . "\n";
+ print STDERR blurb() . "failed to get an image. Full audit log:\n";
+ print STDERR "\n";
+ showlog();
+ print STDERR ("-" x 78) . "\n";
+ print STDERR "\n\n";
+
+ $current_state = $o;
+ }
+ $image_succeeded = 0;
+}
+
+
+
+sub stats_of($) {
+ my ($name) = @_;
+ my $i = $stats_successes{$name};
+ my $j = $stats_attempts{$name};
+ $i = 0 unless $i;
+ $j = 0 unless $j;
+ return "" . ($j ? int($i * 100 / $j) : "0") . "%";
+}
+
+
+my $current_start_time = 0;
+
+sub start_timer($) {
+ my ($name) = @_;
+ $current_start_time = time;
+
+ if (defined($stats_attempts{$name})) {
+ $stats_attempts{$name}++;
+ } else {
+ $stats_attempts{$name} = 1;
+ }
+ if (!defined($stats_elapsed{$name})) {
+ $stats_elapsed{$name} = 0;
+ }
+}
+
+sub stop_timer($$) {
+ my ($name, $success) = @_;
+ $stats_elapsed{$name} += time - $current_start_time;
+}
+
+
+my $last_report_time = 0;
+sub report_performance() {
+
+ return unless $verbose_warnings;
+
+ my $now = time;
+ return unless ($now >= $last_report_time + $report_performance_interval);
+ my $ot = $last_report_time;
+ $last_report_time = $now;
+
+ return if ($ot == 0);
+
+ my $blurb = "$progname: " . timestr();
+
+ print STDERR "\n";
+ print STDERR "${blurb}Current standings:\n";
+
+ foreach my $name (sort keys (%stats_attempts)) {
+ my $try = $stats_attempts{$name};
+ my $suc = $stats_successes{$name} || 0;
+ my $pct = int($suc * 100 / $try);
+ my $secs = $stats_elapsed{$name};
+ my $secs_link = $secs / $try;
+ print STDERR sprintf ("$blurb %-14s %4s (%d/%d);" .
+ " \t %.1f secs/link\n",
+ "$name:", "$pct%", $suc, $try, $secs_link);
+ }
+}
+
+
+
+my $max_recent_images = 400;
+my $max_recent_sites = 20;
+my @recent_images = ();
+my @recent_sites = ();
+
+sub save_recent_url($$) {
+ my ($url, $base) = @_;
+
+ return unless ($verbose_warnings);
+
+ $_ = $url;
+ my ($site) = m@^https?://([^ \t\n\r/:]+)@;
+ return unless defined ($site);
+
+ if ($base eq $driftnet_magic || $base eq $local_magic) {
+ $site = $base;
+ @recent_images = ();
+ }
+
+ my $done = 0;
+ foreach (@recent_images) {
+ if ($_ eq $url) {
+ print STDERR blurb() . "WARNING: recently-duplicated image: $url" .
+ " (on $base via $last_search)\n";
+ $done = 1;
+ last;
+ }
+ }
+
+ # suppress "duplicate site" warning via %warningless_sites.
+ #
+ if ($warningless_sites{$site}) {
+ $done = 1;
+ } elsif ($site =~ m@([^.]+\.[^.]+\.[^.]+)$@ &&
+ $warningless_sites{$1}) {
+ $done = 1;
+ } elsif ($site =~ m@([^.]+\.[^.]+)$@ &&
+ $warningless_sites{$1}) {
+ $done = 1;
+ }
+
+ if (!$done) {
+ foreach (@recent_sites) {
+ if ($_ eq $site) {
+ print STDERR blurb() . "WARNING: recently-duplicated site: $site" .
+ " ($url on $base via $last_search)\n";
+ last;
+ }
+ }
+ }
+
+ push @recent_images, $url;
+ push @recent_sites, $site;
+ shift @recent_images if ($#recent_images >= $max_recent_images);
+ shift @recent_sites if ($#recent_sites >= $max_recent_sites);
+}
+
+
+\f
+##############################################################################
+#
+# other utilities
+#
+##############################################################################
+
+# Does %-decoding.
+#
+sub url_decode($) {
+ ($_) = @_;
+ tr/+/ /;
+ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
+ return $_;
+}
+
+
+# Given the raw body of a GIF document, returns the dimensions of the image.
+#
+sub gif_size($) {
+ my ($body) = @_;
+ my $type = substr($body, 0, 6);
+ my $s;
+ return () unless ($type =~ /GIF8[7,9]a/);
+ $s = substr ($body, 6, 10);
+ my ($a,$b,$c,$d) = unpack ("C"x4, $s);
+ return () unless defined ($d);
+ return (($b<<8|$a), ($d<<8|$c));
+}
+
+# Given the raw body of a JPEG document, returns the dimensions of the image.
+#
+sub jpeg_size($) {
+ my ($body) = @_;
+ my $i = 0;
+ my $L = length($body);
+
+ my $c1 = substr($body, $i, 1); $i++;
+ my $c2 = substr($body, $i, 1); $i++;
+ return () unless (ord($c1) == 0xFF && ord($c2) == 0xD8);
+
+ my $ch = "0";
+ while (ord($ch) != 0xDA && $i < $L) {
+ # Find next marker, beginning with 0xFF.
+ while (ord($ch) != 0xFF) {
+ return () if (length($body) <= $i);
+ $ch = substr($body, $i, 1); $i++;
+ }
+ # markers can be padded with any number of 0xFF.
+ while (ord($ch) == 0xFF) {
+ return () if (length($body) <= $i);
+ $ch = substr($body, $i, 1); $i++;
+ }
+
+ # $ch contains the value of the marker.
+ my $marker = ord($ch);
+
+ if (($marker >= 0xC0) &&
+ ($marker <= 0xCF) &&
+ ($marker != 0xC4) &&
+ ($marker != 0xCC)) { # it's a SOFn marker
+ $i += 3;
+ return () if (length($body) <= $i);
+ my $s = substr($body, $i, 4); $i += 4;
+ my ($a,$b,$c,$d) = unpack("C"x4, $s);
+ return (($c<<8|$d), ($a<<8|$b));