X-Git-Url: http://git.hungrycats.org/cgi-bin/gitweb.cgi?p=xscreensaver;a=blobdiff_plain;f=hacks%2Fwebcollage;h=815d2909aa20e40d102797c8cb930a0fdad0928c;hp=eda9cb759203d43c022368a7158058749159564c;hb=447db08c956099b3b183886729108bf5b364c4b8;hpb=2d04c4f22466851aedb6ed0f2919d148f726b889 diff --git a/hacks/webcollage b/hacks/webcollage index eda9cb75..815d2909 100755 --- a/hacks/webcollage +++ b/hacks/webcollage @@ -60,19 +60,20 @@ use bytes; # Larry can take Unicode and shove it up his ass sideways. my $progname = $0; $progname =~ s@.*/@@g; -my $version = q{ $Revision: 1.123 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; +my $version = q{ $Revision: 1.125 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; my $copyright = "WebCollage $version, Copyright (c) 1999-2005" . " Jamie Zawinski \n" . " http://www.jwz.org/webcollage/\n"; -my @search_methods = ( 60, "altavista", \&pick_from_alta_vista_random_link, - 12, "livejournal", \&pick_from_livejournal_images, - 8, "yahoorand", \&pick_from_yahoo_random_link, - 11, "googlephotos", \&pick_from_google_image_photos, +my @search_methods = ( 58, "altavista", \&pick_from_alta_vista_random_link, + 11, "livejournal", \&pick_from_livejournal_images, + 7, "yahoorand", \&pick_from_yahoo_random_link, + 10, "googlephotos", \&pick_from_google_image_photos, 6, "googleimgs", \&pick_from_google_images, 3, "googlenums", \&pick_from_google_image_numbers, + 5, "flickr", \&pick_from_flickr, # In Apr 2002, Google asked me to stop searching them. # I asked them to add a "random link" url. They said @@ -196,6 +197,7 @@ my %warningless_sites = ( "img.photobucket.com" => 1, "pics.livejournal.com" => 1, "tinypic.com" => 1, + "flickr.com" => 1, "yimg.com" => 1, # This is where dailynews.yahoo.com stores "eimg.com" => 1, # its images, so pick_from_yahoo_news_text() @@ -1710,6 +1712,94 @@ sub pick_from_ircimages { return ($search_url, $img); } + +############################################################################ +# +# Pick images from Flickr's page of recently-posted photos. +# +############################################################################ + +my $flickr_img_url = "http://www.flickr.com/photos/"; + +# Like LiveJournal, the Flickr page of images tends to update slowly, +# so remember the last N entries on it and randomly select from those. + +# I know that Flickr has an API (http://www.flickr.com/services/api/) +# but it was easy enough to scrape the HTML, so I didn't bother exploring. + +my $flickr_cache_size = 1000; +my @flickr_cache = (); # fifo, for ordering by age +my %flickr_cache = (); # hash, for detecting dups + + +# flickr +sub pick_from_flickr { + my ( $timeout ) = @_; + + my $start = 16 * int(rand(100)); + + $last_search = $flickr_img_url; # for warnings + $last_search .= "?start=$start" if ($start > 0); + + my ( $base, $body ) = get_document ($last_search, undef, $timeout); + return () unless $body; + + $body =~ s/[\r\n]/ /gs; + $body =~ s/(]* \b HREF=\"([^<>\"]+)\" [^<>]* > \s* + ]* \b SRC=\"([^<>\"]+)\" @xsi; + next unless defined ($thumb); + $page = html_unquote ($page); + $thumb = html_unquote ($thumb); + + next unless ($thumb =~ m@^http://photos\d*\.flickr\.com/@); + + my $base = "http://www.flickr.com/"; + $page =~ s@^/@$base@; + $thumb =~ s@^/@$base@; + + my $img = $thumb; + $img =~ s/_[a-z](\.[a-z\d]+)$/$1/si; # take off "thumb" suffix + + $count++; + next if ($flickr_cache{$img}); # already have it + + my @pair = ($img, $page, $start); + LOG ($verbose_filter, " candidate: $img"); + push @flickr_cache, \@pair; + $flickr_cache{$img} = \@pair; + $count2++; + } + + return () if ($#flickr_cache == -1); + + my $n = $#flickr_cache+1; + my $i = int(rand($n)); + my ($img, $page) = @{$flickr_cache[$i]}; + + # delete this one from @flickr_cache and from %flickr_cache. + # + @flickr_cache = ( @flickr_cache[0 .. $i-1], + @flickr_cache[$i+1 .. $#flickr_cache] ); + delete $flickr_cache{$img}; + + # Keep the size of the cache under the limit by nuking older entries + # + while ($#flickr_cache >= $flickr_cache_size) { + my $pairP = shift @flickr_cache; + my $img = $pairP->[0]; + delete $flickr_cache{$img}; + } + + LOG ($verbose_load, "picked image " .($i+1) . "/$n: $img"); + + return ($page, $img); +} + ############################################################################ # @@ -2363,7 +2453,7 @@ sub image_to_pnm { } else { LOG (($verbose_pbm || $verbose_load), "not a GIF, JPG, or PNG" . - (($body =~ m@<(base|html|head|body|script|table|a href)>@i) + (($body =~ m@<(base|html|head|body|script|table|a href)\b@i) ? " (looks like HTML)" : "") . ": $url"); $suppress_audit = 1;