+ push @urls2, $_;
+ }
+ return @urls2;
+}
+
+
+# given a list of URLs, picks one at random; loads it; and returns a
+# random image from it.
+# returns the url of the page loaded; the url of the image chosen;
+# and a debugging description string.
+#
+sub pick_image_from_pages {
+ my ($base, $total_hit_count, $unfiltered_link_count, $timeout, @pages) = @_;
+
+ $total_hit_count = "?" unless defined($total_hit_count);
+
+ @pages = depoison (@pages);
+ LOG ($verbose_load,
+ "" . ($#pages+1) . " candidates of $unfiltered_link_count links" .
+ " ($total_hit_count total)");
+
+ return () if ($#pages < 0);
+
+ my $i = int(rand($#pages+1));
+ my $page = $pages[$i];
+
+ LOG ($verbose_load, "picked page $page");
+
+ $suppress_audit = 1;
+
+ my ( $base2, $body2 ) = get_document ($page, $base, $timeout);
+
+ if (!$base2 || !$body2) {
+ $body2 = undef;
+ return ();
+ }
+
+ my $img = pick_image_from_body ($base2, $body2);
+ $body2 = undef;
+
+ if ($img) {
+ return ($base2, $img);
+ } else {
+ return ();
+ }
+}
+
+\f
+############################################################################
+#
+# Pick images from random pages returned by the Yahoo Random Link
+#
+############################################################################
+
+# yahoorand
+my $yahoo_random_link = "http://random.yahoo.com/bin/ryl";
+
+
+# Picks a random page; picks a random image on that page;
+# returns two URLs: the page containing the image, and the image.
+# Returns () if nothing found this time.
+#
+sub pick_from_yahoo_random_link {
+ my ( $timeout ) = @_;
+
+ print STDERR "\n\n" if ($verbose_load);
+ LOG ($verbose_load, "URL: $yahoo_random_link");
+
+ $last_search = $yahoo_random_link; # for warnings
+
+ $suppress_audit = 1;
+
+ my ( $base, $body ) = get_document ($yahoo_random_link, undef, $timeout);
+ if (!$base || !$body) {
+ $body = undef;
+ return;
+ }
+
+ LOG ($verbose_load, "redirected to: $base");
+
+ my $img = pick_image_from_body ($base, $body);
+ $body = undef;
+
+ if ($img) {
+ return ($base, $img);
+ } else {
+ return ();
+ }
+}
+
+\f
+############################################################################
+#
+# Pick images by feeding random words into Alta Vista Image Search
+#
+############################################################################
+
+
+my $alta_vista_images_url = "http://www.altavista.com/cgi-bin/query" .
+ "?ipht=1" . # photos
+ "&igrph=1" . # graphics
+ "&iclr=1" . # color
+ "&ibw=1" . # b&w
+ "&micat=1" . # no partner sites
+ "&imgset=1" . # no partner sites
+ "&stype=simage" . # do image search
+ "&mmW=1" . # unknown, but required
+ "&q=";
+
+# imagevista
+sub pick_from_alta_vista_images {
+ my ( $timeout ) = @_;
+
+ my $words = random_words(1);
+ my $page = (int(rand(9)) + 1);
+ my $search_url = $alta_vista_images_url . $words;
+
+ if ($page > 1) {
+ $search_url .= "&pgno=" . $page; # page number
+ $search_url .= "&stq=" . (($page-1) * 12); # first hit result on page
+ }
+
+ my ($search_hit_count, @subpages) =
+ pick_from_search_engine ($timeout, $search_url, $words);
+
+ my @candidates = ();
+ foreach my $u (@subpages) {
+
+ # altavista is encoding their URLs now.
+ next unless ($u =~ m@^/r.*\&r=([^&]+).*@);
+ $u = url_unquote($1);
+
+ next unless ($u =~ m@^http://@i); # skip non-HTTP or relative URLs
+ next if ($u =~ m@[/.]altavista\.com\b@i); # skip altavista builtins
+ next if ($u =~ m@[/.]doubleclick\.net\b@i); # you cretins
+ next if ($u =~ m@[/.]clicktomarket\.com\b@i); # more cretins
+
+ next if ($u =~ m@[/.]viewimages\.com\b@i); # stacked deck
+ next if ($u =~ m@[/.]gettyimages\.com\b@i);
+
+ LOG ($verbose_filter, " candidate: $u");
+ push @candidates, $u;
+ }
+
+ return pick_image_from_pages ($search_url, $search_hit_count, $#subpages+1,
+ $timeout, @candidates);
+}
+
+
+\f
+############################################################################
+#
+# Pick images by feeding random words into Google Image Search.
+# By Charles Gales <gales@us.ibm.com>
+#
+############################################################################
+
+
+my $google_images_url = "http://images.google.com/images" .
+ "?site=images" . # photos
+ "&btnG=Search" . # graphics
+ "&safe=off" . # no screening
+ "&imgsafe=off" .
+ "&q=";
+
+# googleimgs
+sub pick_from_google_images {
+ my ( $timeout ) = @_;
+
+ my $words = random_word; # only one word for Google
+ my $page = (int(rand(9)) + 1);
+ my $num = 20; # 20 images per page
+ my $search_url = $google_images_url . $words;
+
+ if ($page > 1) {
+ $search_url .= "&start=" . $page*$num; # page number
+ $search_url .= "&num=" . $num; #images per page
+ }
+
+ my ($search_hit_count, @subpages) =
+ pick_from_search_engine ($timeout, $search_url, $words);
+
+ my @candidates = ();
+ foreach my $u (@subpages) {
+ next unless ($u =~ m@imgres\?imgurl@i); # All pics start with this
+ next if ($u =~ m@[/.]google\.com\b@i); # skip google builtins
+
+ if ($u =~ m@^/imgres\?imgurl=(.*?)\&imgrefurl=(.*?)\&@) {
+ my $urlf = $2;
+ LOG ($verbose_filter, " candidate: $urlf");
+ push @candidates, $urlf;