+ return () if ($#pages < 0);
+
+ my $i = int(rand($#pages+1));
+ my $page = $pages[$i];
+
+ LOG ($verbose_load, "picked page $page");
+
+ $suppress_audit = 1;
+
+ my ( $base2, $body2 ) = get_document ($page, $base, $timeout);
+
+ if (!$base2 || !$body2) {
+ $body2 = undef;
+ return ();
+ }
+
+ my $img = pick_image_from_body ($base2, $body2);
+ $body2 = undef;
+
+ if ($img) {
+ return ($base2, $img);
+ } else {
+ return ();
+ }
+}
+
+\f
+############################################################################
+#
+# Pick images from random pages returned by the Yahoo Random Link
+#
+############################################################################
+
+# yahoorand
+my $yahoo_random_link = "http://random.yahoo.com/fast/ryl";
+
+
+# Picks a random page; picks a random image on that page;
+# returns two URLs: the page containing the image, and the image.
+# Returns () if nothing found this time.
+#
+sub pick_from_yahoo_random_link($) {
+ my ($timeout) = @_;
+
+ print STDERR "\n\n" if ($verbose_load);
+ LOG ($verbose_load, "URL: $yahoo_random_link");
+
+ $last_search = $yahoo_random_link; # for warnings
+
+ $suppress_audit = 1;
+
+ my ( $base, $body ) = get_document ($yahoo_random_link, undef, $timeout);
+ if (!$base || !$body) {
+ $body = undef;
+ return;
+ }
+
+ LOG ($verbose_load, "redirected to: $base");
+
+ my $img = pick_image_from_body ($base, $body);
+ $body = undef;
+
+ if ($img) {
+ return ($base, $img);
+ } else {
+ return ();
+ }
+}
+
+\f
+############################################################################
+#
+# Pick images from random pages returned by the Alta Vista Random Link
+# Note: this seems to have gotten a *lot* less random lately (2007).
+#
+############################################################################
+
+# altavista
+my $alta_vista_random_link = "http://www.altavista.com/image/randomlink";
+
+
+# Picks a random page; picks a random image on that page;
+# returns two URLs: the page containing the image, and the image.
+# Returns () if nothing found this time.
+#
+sub pick_from_alta_vista_random_link($) {
+ my ($timeout) = @_;
+
+ print STDERR "\n\n" if ($verbose_load);
+ LOG ($verbose_load, "URL: $alta_vista_random_link");
+
+ $last_search = $alta_vista_random_link; # for warnings
+
+ $suppress_audit = 1;
+
+ my ( $base, $body ) = get_document ($alta_vista_random_link,
+ undef, $timeout);
+ if (!$base || !$body) {
+ $body = undef;
+ return;
+ }
+
+ LOG ($verbose_load, "redirected to: $base");
+
+ my $img = pick_image_from_body ($base, $body);
+ $body = undef;
+
+ if ($img) {
+ return ($base, $img);
+ } else {
+ return ();
+ }
+}
+
+\f
+############################################################################
+#
+# Pick images by feeding random words into Alta Vista Image Search
+#
+############################################################################
+
+
+my $alta_vista_images_url = "http://www.altavista.com/image/results" .
+ "?ipht=1" . # photos
+ "&igrph=1" . # graphics
+ "&iclr=1" . # color
+ "&ibw=1" . # b&w
+ "&micat=1" . # no partner sites
+ "&sc=on" . # "site collapse"
+ "&q=";
+
+# avimages
+sub pick_from_alta_vista_images($) {
+ my ($timeout) = @_;
+
+ my $words = random_word();
+ my $page = (int(rand(9)) + 1);
+ my $search_url = $alta_vista_images_url . $words;
+
+ if ($page > 1) {
+ $search_url .= "&pgno=" . $page; # page number
+ $search_url .= "&stq=" . (($page-1) * 12); # first hit result on page
+ }
+
+ my ($search_hit_count, @subpages) =
+ pick_from_search_engine ($timeout, $search_url, $words);
+
+ my @candidates = ();
+ foreach my $u (@subpages) {
+
+ # avimages is encoding their URLs now.
+ next unless ($u =~ s/^.*\*\*(http%3a.*$)/$1/gsi);
+ $u = url_unquote($u);
+
+ next unless ($u =~ m@^http://@i); # skip non-HTTP or relative URLs
+ next if ($u =~ m@[/.]altavista\.com\b@i); # skip altavista builtins
+ next if ($u =~ m@[/.]yahoo\.com\b@i); # yahoo and av in cahoots?
+ next if ($u =~ m@[/.]doubleclick\.net\b@i); # you cretins
+ next if ($u =~ m@[/.]clicktomarket\.com\b@i); # more cretins
+
+ next if ($u =~ m@[/.]viewimages\.com\b@i); # stacked deck
+ next if ($u =~ m@[/.]gettyimages\.com\b@i);
+
+ LOG ($verbose_filter, " candidate: $u");
+ push @candidates, $u;
+ }
+
+ return pick_image_from_pages ($search_url, $search_hit_count, $#subpages+1,
+ $timeout, @candidates);
+}
+
+
+\f
+############################################################################
+#
+# Pick images from Aptix security cameras
+# Cribbed liberally from google image search code.
+# By Jason Sullivan <jasonsul@us.ibm.com>
+#
+############################################################################
+
+my $aptix_images_url = ("http://www.google.com/search" .
+ "?q=inurl:%22jpg/image.jpg%3Fr%3D%22");
+
+# securitycam
+sub pick_from_security_camera($) {
+ my ($timeout) = @_;
+
+ my $page = (int(rand(9)) + 1);
+ my $num = 20; # 20 images per page
+ my $search_url = $aptix_images_url;
+
+ if ($page > 1) {
+ $search_url .= "&start=" . $page*$num; # page number
+ $search_url .= "&num=" . $num; #images per page
+ }
+
+ my ($search_hit_count, @subpages) =
+ pick_from_search_engine ($timeout, $search_url, '');
+
+ my @candidates = ();
+ my %referers;
+ foreach my $u (@subpages) {
+ next if ($u =~ m@[/.]google\.com\b@i); # skip google builtins (most links)
+ next unless ($u =~ m@jpg/image.jpg\?r=@i); # All pics contain this
+
+ LOG ($verbose_filter, " candidate: $u");
+ push @candidates, $u;
+ $referers{$u} = $u;
+ }
+
+ @candidates = depoison (@candidates);
+ return () if ($#candidates < 0);
+ my $i = int(rand($#candidates+1));
+ my $img = $candidates[$i];
+ my $ref = $referers{$img};
+
+ LOG ($verbose_load, "picked image " . ($i+1) . ": $img (on $ref)");
+ return ($ref, $img);
+}
+
+\f
+############################################################################
+#
+# Pick images by feeding random words into Google Image Search.
+# By Charles Gales <gales@us.ibm.com>
+#
+############################################################################
+
+
+my $google_images_url = "http://images.google.com/images" .
+ "?site=images" . # photos
+ "&btnG=Search" . # graphics
+ "&safe=off" . # no screening
+ "&imgsafe=off" .
+ "&q=";
+
+# googleimgs
+sub pick_from_google_images($;$$) {
+ my ($timeout, $words, $max_page) = @_;
+
+ if (!defined($words)) {
+ $words = random_word(); # only one word for Google
+ }
+
+ my $page = (int(rand(9)) + 1);
+ my $num = 20; # 20 images per page
+ my $search_url = $google_images_url . $words;
+
+ if ($page > 1) {
+ $search_url .= "&start=" . $page*$num; # page number
+ $search_url .= "&num=" . $num; #images per page
+ }
+
+ my ($search_hit_count, @subpages) =
+ pick_from_search_engine ($timeout, $search_url, $words);
+
+ my @candidates = ();
+ my %referers;
+ foreach my $u (@subpages) {
+ next unless ($u =~ m@imgres\?imgurl@i); # All pics start with this
+ next if ($u =~ m@[/.]google\.com\b@i); # skip google builtins
+
+ $u = html_unquote($u);
+ if ($u =~ m@^/imgres\?imgurl=(.*?)&imgrefurl=(.*?)\&@) {
+ my $ref = $2;
+ my $img = $1;
+ $img = "http://$img" unless ($img =~ m/^http:/i);
+
+ $ref = url_decode($ref);
+ $img = url_decode($img);
+
+ LOG ($verbose_filter, " candidate: $ref");
+ push @candidates, $img;
+ $referers{$img} = $ref;
+ }
+ }
+
+ @candidates = depoison (@candidates);
+ return () if ($#candidates < 0);
+ my $i = int(rand($#candidates+1));
+ my $img = $candidates[$i];
+ my $ref = $referers{$img};
+
+ LOG ($verbose_load, "picked image " . ($i+1) . ": $img (on $ref)");
+ return ($ref, $img);
+}
+
+
+\f
+############################################################################
+#
+# Pick images by feeding random numbers into Google Image Search.
+# By jwz, suggested by Ian O'Donnell.
+#
+############################################################################
+
+
+# googlenums
+sub pick_from_google_image_numbers($) {
+ my ($timeout) = @_;
+
+ my $max = 9999;
+ my $number = int(rand($max));
+
+ $number = sprintf("%04d", $number)
+ if (rand() < 0.3);
+
+ pick_from_google_images ($timeout, "$number");
+}
+
+
+\f
+############################################################################
+#
+# Pick images by feeding random digital camera file names into
+# Google Image Search.
+# By jwz, inspired by the excellent Random Personal Picture Finder
+# at http://www.diddly.com/random/
+#
+############################################################################
+
+my @photomakers = (
+ #
+ # Common digital camera file name formats, as described at
+ # http://www.diddly.com/random/about.html
+ #
+ sub { sprintf ("dcp%05d.jpg", int(rand(4000))); }, # Kodak
+ sub { sprintf ("dsc%05d.jpg", int(rand(4000))); }, # Nikon
+ sub { sprintf ("dscn%04d.jpg", int(rand(4000))); }, # Nikon
+ sub { sprintf ("mvc-%03d.jpg", int(rand(999))); }, # Sony Mavica
+ sub { sprintf ("mvc%05d.jpg", int(rand(9999))); }, # Sony Mavica
+ sub { sprintf ("P101%04d.jpg", int(rand(9999))); }, # Olympus w/ date=101
+ sub { sprintf ("P%x%02d%04d.jpg", # Olympus
+ int(rand(0xC)), int(rand(30))+1,
+ rand(9999)); },
+ sub { sprintf ("IMG_%03d.jpg", int(rand(999))); }, # ?
+ sub { sprintf ("IMAG%04d.jpg", int(rand(9999))); }, # RCA and Samsung
+ sub { my $n = int(rand(9999)); # Canon
+ sprintf ("1%02d-%04d.jpg", int($n/100), $n); },
+ sub { my $n = int(rand(9999)); # Canon
+ sprintf ("1%02d-%04d_IMG.jpg",
+ int($n/100), $n); },
+ sub { sprintf ("IMG_%04d.jpg", int(rand(9999))); }, # Canon
+ sub { sprintf ("dscf%04d.jpg", int(rand(9999))); }, # Fuji Finepix
+ sub { sprintf ("pdrm%04d.jpg", int(rand(9999))); }, # Toshiba PDR
+ sub { sprintf ("IM%06d.jpg", int(rand(9999))); }, # HP Photosmart
+ sub { sprintf ("EX%06d.jpg", int(rand(9999))); }, # HP Photosmart
+# sub { my $n = int(rand(3)); # Kodak DC-40,50,120
+# sprintf ("DC%04d%s.jpg", int(rand(9999)),
+# $n == 0 ? 'S' : $n == 1 ? 'M' : 'L'); },
+ sub { sprintf ("pict%04d.jpg", int(rand(9999))); }, # Minolta Dimage
+ sub { sprintf ("P%07d.jpg", int(rand(9999))); }, # Kodak DC290
+# sub { sprintf ("%02d%02d%04d.jpg", # Casio QV3000, QV4000
+# int(rand(12))+1, int(rand(31))+1,
+# int(rand(999))); },
+# sub { sprintf ("%02d%x%02d%04d.jpg", # Casio QV7000
+# int(rand(6)), # year
+# int(rand(12))+1, int(rand(31))+1,
+# int(rand(999))); },
+ sub { sprintf ("IMGP%04d.jpg", int(rand(9999))); }, # Pentax Optio S
+ sub { sprintf ("PANA%04d.jpg", int(rand(9999))); }, # Panasonic vid still
+ sub { sprintf ("HPIM%04d.jpg", int(rand(9999))); }, # HP Photosmart
+ sub { sprintf ("PCDV%04d.jpg", int(rand(9999))); }, # ?
+ );
+
+
+# googlephotos
+sub pick_from_google_image_photos($) {
+ my ($timeout) = @_;
+
+ my $i = int(rand($#photomakers + 1));
+ my $fn = $photomakers[$i];
+ my $file = &$fn;
+ my $words .= $file . "%20filetype:jpg";
+
+ pick_from_google_images ($timeout, $words);
+}
+
+
+\f
+############################################################################
+#
+# Pick images by feeding random words into Alta Vista Text Search
+#
+############################################################################
+
+
+my $alta_vista_url = "http://www.altavista.com/web/results" .
+ "?pg=aq" .
+ "&aqmode=s" .
+ "&filetype=html" .
+ "&sc=on" . # "site collapse"
+ "&nbq=50" .
+ "&aqo=";
+
+# avtext
+sub pick_from_alta_vista_text($) {
+ my ($timeout) = @_;
+
+ my $words = random_words('%20');
+ my $page = (int(rand(9)) + 1);
+ my $search_url = $alta_vista_url . $words;
+
+ if ($page > 1) {
+ $search_url .= "&pgno=" . $page;
+ $search_url .= "&stq=" . (($page-1) * 10);
+ }
+
+ my ($search_hit_count, @subpages) =
+ pick_from_search_engine ($timeout, $search_url, $words);
+
+ my @candidates = ();
+ foreach my $u (@subpages) {
+
+ # Those altavista fuckers are playing really nasty redirection games
+ # these days: the filter your clicks through their site, but use
+ # onMouseOver to make it look like they're not! Well, it makes it
+ # easier for us to identify search results...