+\f
+############################################################################
+#
+# Pick images from Twitter's list of recently-posted images.
+#
+############################################################################
+
+my $twitter_img_url = "http://twitpic.com/public_timeline/feed.rss";
+
+# With most of our image sources, we get a random page and then select
+# from the images on it. However, in the case of Twitter, the page
+# of images tends to update slowly; so we'll remember the last N entries
+# on it and randomly select from those, to get a wider variety each time.
+
+my $twit_cache_size = 1000;
+my @twit_cache = (); # fifo, for ordering by age
+my %twit_cache = (); # hash, for detecting dups
+
+# twitter
+sub pick_from_twitter_images($) {
+ my ($timeout) = @_;
+
+ $last_search = $twitter_img_url; # for warnings
+
+ my ( $base, $body ) = get_document ($twitter_img_url, undef, $timeout);
+
+ # Update the cache.
+
+ if ($body) {
+ $body =~ s/\n/ /gs;
+ $body =~ s/(<item)\b/\n$1/gsi;
+
+ my @items = split (/\n/, $body);
+ shift @items;
+ foreach (@items) {
+ next unless (m@<link>([^<>]*)</link>@si);
+ my $page = html_unquote ($1);
+
+ $page =~ s@/$@@s;
+ $page .= '/full';
+
+ next if ($twit_cache{$page}); # already have it
+
+ LOG ($verbose_filter, " candidate: $page");
+ push @twit_cache, $page;
+ $twit_cache{$page} = $page;
+ }
+ }
+
+ # Pull from the cache.
+
+ return () if ($#twit_cache == -1);
+
+ my $n = $#twit_cache+1;
+ my $i = int(rand($n));
+ my $page = $twit_cache[$i];
+
+ # delete this one from @twit_cache and from %twit_cache.
+ #
+ @twit_cache = ( @twit_cache[0 .. $i-1],
+ @twit_cache[$i+1 .. $#twit_cache] );
+ delete $twit_cache{$page};
+
+ # Keep the size of the cache under the limit by nuking older entries
+ #
+ while ($#twit_cache >= $twit_cache_size) {
+ my $page = shift @twit_cache;
+ delete $twit_cache{$page};
+ }
+
+ ( $base, $body ) = get_document ($page, undef, $timeout);
+ my $img = undef;
+
+ foreach (split (/<img\s+/, $body)) {
+ my ($src) = m/\bsrc=[\"\'](.*?)[\"\']/si;
+ next unless $src;
+ next if m@/js/@s;
+ next if m@/images/@s;
+
+ $img = $src;
+
+ # Sometimes these images are hosted on twitpic, sometimes on Amazon.
+ if ($img =~ m@^/@) {
+ $base =~ s@^(https?://[^/]+)/.*@$1@s;
+ $img = $base . $img;
+ }
+ last;
+ }
+
+ if (!$img) {
+ LOG ($verbose_load, "no matching images on $page\n");
+ return ();
+ }
+
+ LOG ($verbose_load, "picked image " .($i+1) . "/$n: $img");
+
+ return ($page, $img);
+}
+