+\f
+############################################################################
+#
+# Pick images from Twitter's list of recently-posted updates.
+#
+############################################################################
+
+# With most of our image sources, we get a random page and then select
+# from the images on it. However, in the case of Twitter, the page
+# of images only updates once a minute; so we'll remember the last N entries
+# on it and randomly select from those, to get a wider variety each time.
+
+my $twitter_img_url = "http://api.twitter.com/1/statuses/" .
+ "public_timeline.json" .
+ "?include_entities=true" .
+ "&include_rts=true" .
+ "&count=200";
+
+my $twitter_cache_size = 1000;
+
+my @twitter_cache = (); # fifo, for ordering by age
+my %twitter_cache = (); # hash, for detecting dups
+
+
+# twitter
+sub pick_from_twitter_images($) {
+ my ($timeout) = @_;
+
+ $last_search = $twitter_img_url; # for warnings
+
+ my ( $base, $body ) = get_document ($twitter_img_url, undef, $timeout);
+ # Update the cache.
+
+ if ($body) {
+ $body =~ s/[\r\n]+/ /gs;
+
+ # Parsing JSON is a pain in the ass. So we halfass it as usual.
+ $body =~ s/^\[|\]$//s;
+ $body =~ s/(\[.*?\])/{ $_ = $1; s@\},@\} @gs; $_; }/gsexi;
+ my @items = split (/},{/, $body);
+ foreach (@items) {
+ my ($name) = m@"screen_name":"([^\"]+)"@si;
+ my ($img) = m@"media_url":"([^\"]+)"@si;
+ my ($page) = m@"display_url":"([^\"]+)"@si;
+ next unless ($name && $img && $page);
+ foreach ($img, $page) {
+ s/\\//gs;
+ $_ = "http://$_" unless (m/^http/si);
+ }
+
+ next if ($twitter_cache{$page}); # already have it
+
+ LOG ($verbose_filter, " candidate: $page - $img");
+ push @twitter_cache, $page;
+ $twitter_cache{$page} = $img;
+ }
+ }
+
+ # Pull from the cache.
+
+ return () if ($#twitter_cache == -1);
+
+ my $n = $#twitter_cache+1;
+ my $i = int(rand($n));
+ my $page = $twitter_cache[$i];
+ my $url = $twitter_cache{$page};
+
+ # delete this one from @twitter_cache and from %twitter_cache.
+ #
+ @twitter_cache = ( @twitter_cache[0 .. $i-1],
+ @twitter_cache[$i+1 .. $#twitter_cache] );
+ delete $twitter_cache{$page};
+
+ # Keep the size of the cache under the limit by nuking older entries
+ #
+ while ($#twitter_cache >= $twitter_cache_size) {
+ my $page = shift @twitter_cache;
+ delete $twitter_cache{$page};
+ }
+
+ LOG ($verbose_load, "picked page $url");
+
+ $suppress_audit = 1;
+
+ return ($page, $url);
+}
+
+\f
+############################################################################
+#
+# Pick images from Flickr's page of recently-posted photos.
+#
+############################################################################
+
+my $flickr_img_url = "http://www.flickr.com/photos/";
+
+# Like LiveJournal, the Flickr page of images tends to update slowly,
+# so remember the last N entries on it and randomly select from those.
+
+# I know that Flickr has an API (http://www.flickr.com/services/api/)
+# but it was easy enough to scrape the HTML, so I didn't bother exploring.
+
+my $flickr_cache_size = 1000;
+my @flickr_cache = (); # fifo, for ordering by age
+my %flickr_cache = (); # hash, for detecting dups
+
+
+# flickr_recent
+sub pick_from_flickr_recent($) {
+ my ($timeout) = @_;
+
+ my $start = 16 * int(rand(100));
+
+ $last_search = $flickr_img_url; # for warnings
+ $last_search .= "?start=$start" if ($start > 0);
+
+ my ( $base, $body ) = get_document ($last_search, undef, $timeout);
+ return () unless $body;
+
+ $body =~ s/[\r\n]/ /gs;
+ $body =~ s/(<a)\b/\n$1/gsi;
+
+ my $count = 0;
+ my $count2 = 0;
+ foreach (split (/\n/, $body)) {
+ my ($page, $thumb) = m@<A \s [^<>]* \b HREF=\"([^<>\"]+)\" [^<>]* > \s*
+ <IMG \s [^<>]* \b SRC=\"([^<>\"]+)\" @xsi;
+ next unless defined ($thumb);
+ $page = html_unquote ($page);
+ $thumb = html_unquote ($thumb);
+
+ next unless ($thumb =~ m@^http://farm\d*\.static\.?flickr\.com/@);
+
+ my $base = "http://www.flickr.com/";
+ $page =~ s@^/@$base@;
+ $thumb =~ s@^/@$base@;
+
+ my $img = $thumb;
+ $img =~ s/_[a-z](\.[a-z\d]+)$/$1/si; # take off "thumb" suffix
+
+ $count++;
+ next if ($flickr_cache{$img}); # already have it
+
+ my @pair = ($img, $page, $start);
+ LOG ($verbose_filter, " candidate: $img");
+ push @flickr_cache, \@pair;
+ $flickr_cache{$img} = \@pair;
+ $count2++;
+ }
+
+ return () if ($#flickr_cache == -1);
+
+ my $n = $#flickr_cache+1;
+ my $i = int(rand($n));
+ my ($img, $page) = @{$flickr_cache[$i]};
+
+ # delete this one from @flickr_cache and from %flickr_cache.
+ #
+ @flickr_cache = ( @flickr_cache[0 .. $i-1],
+ @flickr_cache[$i+1 .. $#flickr_cache] );
+ delete $flickr_cache{$img};
+
+ # Keep the size of the cache under the limit by nuking older entries
+ #
+ while ($#flickr_cache >= $flickr_cache_size) {
+ my $pairP = shift @flickr_cache;
+ my $img = $pairP->[0];
+ delete $flickr_cache{$img};
+ }
+
+ LOG ($verbose_load, "picked image " .($i+1) . "/$n: $img");
+
+ return ($page, $img);
+}
+
+\f
+############################################################################
+#
+# Pick images from a random RSS feed on Flickr.
+#
+############################################################################
+
+my $flickr_rss_base = ("http://www.flickr.com/services/feeds/photos_public.gne".
+ "?format=rss_200_enc&tagmode=any&tags=");
+
+# Picks a random RSS feed; picks a random image from that feed;
+# returns 2 URLs: the page containing the image, and the image.
+# Mostly by Joe Mcmahon <mcmahon@yahoo-inc.com>
+#
+# flickr_random
+sub pick_from_flickr_random($) {
+ my $timeout = shift;
+
+ my $words = random_words(',');
+ my $rss = $flickr_rss_base . $words;
+ $last_search = $rss;
+
+ $_ = $words;
+ s/,/ /g;
+
+ print STDERR "\n\n" if ($verbose_load);
+ LOG ($verbose_load, "words: $_");
+ LOG ($verbose_load, "URL: $last_search");
+
+ $suppress_audit = 1;
+
+ my ( $base, $body ) = get_document ($last_search, undef, $timeout);
+ if (!$base || !$body) {
+ $body = undef;
+ return;
+ }
+
+ my $img;
+ ($base, $img) = pick_image_from_rss ($base, $body);
+ $body = undef;
+ return () unless defined ($img);
+
+ LOG ($verbose_load, "redirected to: $base");
+ return ($base, $img);
+}
+
+\f
+############################################################################
+#
+# Pick images by waiting for driftnet to populate a temp dir with files.
+# Requires driftnet version 0.1.5 or later.
+# (Driftnet is a program by Chris Lightfoot that sniffs your local ethernet
+# for images being downloaded by others.)
+# Driftnet/webcollage integration by jwz.
+#
+############################################################################
+
+# driftnet
+sub pick_from_driftnet($) {
+ my ($timeout) = @_;
+
+ my $id = $driftnet_magic;
+ my $dir = $driftnet_dir;
+ my $start = time;
+ my $now;
+
+ error ("\$driftnet_dir unset?") unless ($dir);
+ $dir =~ s@/+$@@;
+
+ error ("$dir unreadable") unless (-d "$dir/.");
+
+ $timeout = $http_timeout unless ($timeout);
+ $last_search = $id;
+
+ while ($now = time, $now < $start + $timeout) {
+ opendir (my $dir, $dir) || error ("$dir: $!");
+ while (my $file = readdir($dir)) {
+ next if ($file =~ m/^\./);
+ $file = "$dir/$file";
+ closedir ($dir);
+ LOG ($verbose_load, "picked file $file ($id)");
+ return ($id, $file);
+ }
+ closedir ($dir);
+ }
+ LOG (($verbose_net || $verbose_load), "timed out for $id");
+ return ();
+}
+
+
+sub get_driftnet_file($) {
+ my ($file) = @_;
+
+ error ("\$driftnet_dir unset?") unless ($driftnet_dir);
+
+ my $id = $driftnet_magic;
+ error ("$id: $file not in $driftnet_dir?")
+ unless ($file =~ m@^\Q$driftnet_dir@o);
+
+ open (my $in, '<', $file) || error ("$id: $file: $!");
+ my $body = '';
+ local $/ = undef; # read entire file
+ $body = <$in>;
+ close ($in) || error ("$id: $file: $!");
+ unlink ($file) || error ("$id: $file: rm: $!");
+ return ($id, $body);
+}
+
+
+sub spawn_driftnet($) {
+ my ($cmd) = @_;
+
+ # make a directory to use.
+ while (1) {
+ my $tmp = $ENV{TEMPDIR} || "/tmp";
+ $driftnet_dir = sprintf ("$tmp/driftcollage-%08x", rand(0xffffffff));
+ LOG ($verbose_exec, "mkdir $driftnet_dir");
+ last if mkdir ($driftnet_dir, 0700);
+ }
+
+ if (! ($cmd =~ m/\s/)) {
+ # if the command didn't have any arguments in it, then it must be just
+ # a pointer to the executable. Append the default args to it.
+ my $dargs = $default_driftnet_cmd;
+ $dargs =~ s/^[^\s]+//;
+ $cmd .= $dargs;
+ }
+
+ # point the driftnet command at our newly-minted private directory.
+ #
+ $cmd .= " -d $driftnet_dir";
+ $cmd .= ">/dev/null" unless ($verbose_exec);
+
+ my $pid = fork();
+ if ($pid < 0) { error ("fork: $!\n"); }
+ if ($pid) {
+ # parent fork
+ push @pids_to_kill, $pid;
+ LOG ($verbose_exec, "forked for \"$cmd\"");
+ } else {
+ # child fork
+ nontrapping_system ($cmd) || error ("exec: $!");
+ }
+
+ # wait a bit, then make sure the process actually started up.
+ #
+ sleep (1);
+ error ("pid $pid failed to start \"$cmd\"")
+ unless (1 == kill (0, $pid));
+}
+
+# local-directory
+sub pick_from_local_dir($) {
+ my ($timeout) = @_;
+
+ my $id = $local_magic;
+ $last_search = $id;
+
+ my $dir = $local_dir;
+ error ("\$local_dir unset?") unless ($dir);
+ $dir =~ s@/+$@@;
+
+ error ("$dir unreadable") unless (-d "$dir/.");
+
+ my $v = ($verbose_exec ? "-v" : "");
+ my $pick = `xscreensaver-getimage-file $v "$dir"`;
+ $pick =~ s/\s+$//s;
+ $pick = "$dir/$pick" unless ($pick =~ m@^/@s); # relative path
+
+ LOG ($verbose_load, "picked file $pick ($id)");
+ return ($id, $pick);
+}
+
+
+sub get_local_file($) {
+ my ($file) = @_;
+
+ error ("\$local_dir unset?") unless ($local_dir);
+
+ my $id = $local_magic;
+ error ("$id: $file not in $local_dir?")
+ unless ($file =~ m@^\Q$local_dir@o);
+
+ open (my $in, '<', $file) || error ("$id: $file: $!");
+ local $/ = undef; # read entire file
+ my $body = <$in>;
+ close ($in) || error ("$id: $file: $!");
+ return ($id, $body);
+}
+
+
+\f
+############################################################################
+#
+# Pick a random image in a random way
+#
+############################################################################
+
+
+# Picks a random image on a random page, and returns two URLs:
+# the page containing the image, and the image.
+# Returns () if nothing found this time.
+#
+
+sub pick_image(;$) {
+ my ($timeout) = @_;
+
+ $current_state = "select";
+ $load_method = "none";
+
+ my $n = int(rand(100));
+ my $fn = undef;
+ my $total = 0;
+ my @rest = @search_methods;
+
+ while (@rest) {
+ my $pct = shift @rest;
+ my $name = shift @rest;
+ my $tfn = shift @rest;
+ $total += $pct;
+ if ($total > $n && !defined($fn)) {
+ $fn = $tfn;
+ $current_state = $name;
+ $load_method = $current_state;
+ }
+ }
+
+ if ($total != 100) {
+ error ("internal error: \@search_methods totals to $total%!");
+ }
+
+ record_attempt ($current_state);
+ return $fn->($timeout);
+}
+
+
+\f
+############################################################################
+#
+# Statistics and logging
+#
+############################################################################
+
+sub timestr() {
+ return strftime ("%H:%M:%S: ", localtime);
+}
+
+sub blurb() {
+ return "$progname: " . timestr() . "$current_state: ";
+}
+
+sub error($) {
+ my ($err) = @_;
+ print STDERR blurb() . "$err\n";
+ exit 1;
+}
+
+sub stacktrace() {
+ my $i = 1;
+ print STDERR "$progname: stack trace:\n";
+ while (1) {
+ my ($package, $filename, $line, $subroutine) = caller($i++);
+ last unless defined($package);
+ $filename =~ s@^.*/@@;
+ print STDERR " $filename#$line, $subroutine\n";
+ }
+}
+
+
+my $lastlog = "";
+
+sub clearlog() {
+ $lastlog = "";
+}
+
+sub showlog() {
+ my $head = "$progname: DEBUG: ";
+ foreach (split (/\n/, $lastlog)) {
+ print STDERR "$head$_\n";
+ }
+ $lastlog = "";
+}
+
+sub LOG($$) {
+ my ($print, $msg) = @_;
+ my $blurb = timestr() . "$current_state: ";
+ $lastlog .= "$blurb$msg\n";
+ print STDERR "$progname: $blurb$msg\n" if $print;
+}
+
+
+my %stats_attempts;
+my %stats_successes;
+my %stats_elapsed;
+
+my $last_state = undef;
+sub record_attempt($) {
+ my ($name) = @_;
+
+ if ($last_state) {
+ record_failure($last_state) unless ($image_succeeded > 0);
+ }
+ $last_state = $name;
+
+ clearlog();
+ report_performance();
+
+ start_timer($name);
+ $image_succeeded = 0;
+ $suppress_audit = 0;
+}
+
+sub record_success($$$) {
+ my ($name, $url, $base) = @_;
+ if (defined($stats_successes{$name})) {
+ $stats_successes{$name}++;
+ } else {
+ $stats_successes{$name} = 1;
+ }
+
+ stop_timer ($name, 1);
+ my $o = $current_state;
+ $current_state = $name;
+ save_recent_url ($url, $base);
+ $current_state = $o;
+ $image_succeeded = 1;
+ clearlog();
+}
+
+
+sub record_failure($) {
+ my ($name) = @_;
+
+ return if $image_succeeded;
+
+ stop_timer ($name, 0);
+ if ($verbose_load && !$verbose_exec) {
+
+ if ($suppress_audit) {
+ print STDERR "$progname: " . timestr() . "(audit log suppressed)\n";
+ return;
+ }
+
+ my $o = $current_state;
+ $current_state = "DEBUG";
+
+ my $line = "#" x 78;
+ print STDERR "\n\n\n";
+ print STDERR ("#" x 78) . "\n";
+ print STDERR blurb() . "failed to get an image. Full audit log:\n";
+ print STDERR "\n";
+ showlog();
+ print STDERR ("-" x 78) . "\n";
+ print STDERR "\n\n";
+
+ $current_state = $o;
+ }
+ $image_succeeded = 0;
+}
+
+
+
+sub stats_of($) {
+ my ($name) = @_;
+ my $i = $stats_successes{$name};
+ my $j = $stats_attempts{$name};
+ $i = 0 unless $i;
+ $j = 0 unless $j;
+ return "" . ($j ? int($i * 100 / $j) : "0") . "%";
+}
+
+
+my $current_start_time = 0;
+
+sub start_timer($) {
+ my ($name) = @_;
+ $current_start_time = time;
+
+ if (defined($stats_attempts{$name})) {
+ $stats_attempts{$name}++;
+ } else {
+ $stats_attempts{$name} = 1;
+ }
+ if (!defined($stats_elapsed{$name})) {
+ $stats_elapsed{$name} = 0;
+ }
+}
+
+sub stop_timer($$) {
+ my ($name, $success) = @_;
+ $stats_elapsed{$name} += time - $current_start_time;
+}
+
+
+my $last_report_time = 0;
+sub report_performance() {
+
+ return unless $verbose_warnings;
+
+ my $now = time;
+ return unless ($now >= $last_report_time + $report_performance_interval);
+ my $ot = $last_report_time;
+ $last_report_time = $now;
+
+ return if ($ot == 0);
+
+ my $blurb = "$progname: " . timestr();
+
+ print STDERR "\n";
+ print STDERR "${blurb}Current standings:\n";
+
+ foreach my $name (sort keys (%stats_attempts)) {
+ my $try = $stats_attempts{$name};
+ my $suc = $stats_successes{$name} || 0;
+ my $pct = int($suc * 100 / $try);
+ my $secs = $stats_elapsed{$name};
+ my $secs_link = $secs / $try;
+ print STDERR sprintf ("$blurb %-14s %4s (%d/%d);" .
+ " \t %.1f secs/link\n",
+ "$name:", "$pct%", $suc, $try, $secs_link);
+ }
+}
+
+
+
+my $max_recent_images = 400;
+my $max_recent_sites = 20;
+my @recent_images = ();
+my @recent_sites = ();
+
+sub save_recent_url($$) {
+ my ($url, $base) = @_;
+
+ return unless ($verbose_warnings);
+
+ $_ = $url;
+ my ($site) = m@^http://([^ \t\n\r/:]+)@;
+ return unless defined ($site);
+
+ if ($base eq $driftnet_magic || $base eq $local_magic) {
+ $site = $base;
+ @recent_images = ();
+ }
+
+ my $done = 0;
+ foreach (@recent_images) {
+ if ($_ eq $url) {
+ print STDERR blurb() . "WARNING: recently-duplicated image: $url" .
+ " (on $base via $last_search)\n";
+ $done = 1;
+ last;
+ }
+ }
+
+ # suppress "duplicate site" warning via %warningless_sites.
+ #
+ if ($warningless_sites{$site}) {
+ $done = 1;
+ } elsif ($site =~ m@([^.]+\.[^.]+\.[^.]+)$@ &&
+ $warningless_sites{$1}) {
+ $done = 1;
+ } elsif ($site =~ m@([^.]+\.[^.]+)$@ &&
+ $warningless_sites{$1}) {
+ $done = 1;
+ }
+
+ if (!$done) {
+ foreach (@recent_sites) {
+ if ($_ eq $site) {
+ print STDERR blurb() . "WARNING: recently-duplicated site: $site" .
+ " ($url on $base via $last_search)\n";
+ last;
+ }
+ }
+ }
+
+ push @recent_images, $url;
+ push @recent_sites, $site;
+ shift @recent_images if ($#recent_images >= $max_recent_images);
+ shift @recent_sites if ($#recent_sites >= $max_recent_sites);
+}
+
+
+\f