+ }
+
+ return pick_image_from_pages ($search_url, $search_hit_count, $#subpages+1,
+ $timeout, @candidates);
+}
+
+
+\f
+############################################################################
+#
+# Pick images by feeding random words into Alta Vista Text Search
+#
+############################################################################
+
+
+my $alta_vista_url = "http://www.altavista.com/cgi-bin/query?pg=q" .
+ "&text=yes&kl=XX&stype=stext&q=";
+
+# altavista
+sub pick_from_alta_vista_text {
+ my ( $timeout ) = @_;
+
+ my $words = random_words;
+ my $page = (int(rand(9)) + 1);
+ my $search_url = $alta_vista_url . $words;
+
+ if ($page > 1) {
+ $search_url .= "&pgno=" . $page;
+ $search_url .= "&stq=" . (($page-1) * 10);
+ }
+
+ my ($search_hit_count, @subpages) =
+ pick_from_search_engine ($timeout, $search_url, $words);
+
+ my @candidates = ();
+ foreach my $u (@subpages) {
+
+ # Those altavista fuckers are playing really nasty redirection games
+ # these days: the filter your clicks through their site, but use
+ # onMouseOver to make it look like they're not! Well, it makes it
+ # easier for us to identify search results...
+ #
+ next unless ($u =~ m@^/r\?ck_sm=[a-zA-Z0-9]+\&ref=[a-zA-Z0-9]+\&r=(.*)@);
+ $u = $1;
+
+ LOG ($verbose_filter, " candidate: $u");
+ push @candidates, $u;
+ }
+
+ return pick_image_from_pages ($search_url, $search_hit_count, $#subpages+1,
+ $timeout, @candidates);
+}
+
+
+\f
+############################################################################
+#
+# Pick images by feeding random words into Hotbot
+#
+############################################################################
+
+my $hotbot_search_url = "http://hotbot.lycos.com/" .
+ "?SM=SC" .
+ "&DV=0" .
+ "&LG=any" .
+ "&FVI=1" .
+ "&DC=100" .
+ "&DE=0" .
+ "&SQ=1" .
+ "&TR=13" .
+ "&AM1=MC" .
+ "&MT=";
+
+sub pick_from_hotbot_text {
+ my ( $timeout ) = @_;
+
+ my $words = random_words;
+ my $search_url = $hotbot_search_url . $words;
+
+ my ($search_hit_count, @subpages) =
+ pick_from_search_engine ($timeout, $search_url, $words);
+
+ my @candidates = ();
+ foreach my $u (@subpages) {
+
+ # Hotbot plays redirection games too
+ next unless ($u =~ m@^/director.asp\?target=([^&]+)@);
+ $u = url_decode($1);
+
+ LOG ($verbose_filter, " candidate: $u");
+ push @candidates, $u;
+ }
+
+ return pick_image_from_pages ($search_url, $search_hit_count, $#subpages+1,
+ $timeout, @candidates);
+}
+
+
+\f
+############################################################################
+#
+# Pick images by feeding random words into Lycos
+#
+############################################################################
+
+my $lycos_search_url = "http://lycospro.lycos.com/srchpro/" .
+ "?lpv=1" .
+ "&t=any" .
+ "&query=";
+
+sub pick_from_lycos_text {
+ my ( $timeout ) = @_;
+
+ my $words = random_words;
+ my $start = int(rand(8)) * 10 + 1;
+ my $search_url = $lycos_search_url . $words . "&start=$start";
+
+ my ($search_hit_count, @subpages) =
+ pick_from_search_engine ($timeout, $search_url, $words);
+
+ my @candidates = ();
+ foreach my $u (@subpages) {
+
+ # Lycos plays exact the same redirection game as hotbot.
+ # Note that "id=0" is used for internal advertising links,
+ # and 1+ are used for search results.
+ next unless ($u =~ m@^http://click.hotbot.com/director.asp\?id=[1-9]\d*&target=([^&]+)@);
+ $u = url_decode($1);
+
+ LOG ($verbose_filter, " candidate: $u");
+ push @candidates, $u;
+ }
+
+ return pick_image_from_pages ($search_url, $search_hit_count, $#subpages+1,
+ $timeout, @candidates);
+}
+
+
+\f
+############################################################################
+#
+# Pick images by feeding random words into news.yahoo.com
+#
+############################################################################
+
+my $yahoo_news_url = "http://search.news.yahoo.com/search/news_photos?" .
+ "&z=&n=100&o=o&2=&3=&p=";
+
+# yahoonews
+sub pick_from_yahoo_news_text {
+ my ( $timeout ) = @_;
+
+ my $words = random_words;
+ my $search_url = $yahoo_news_url . $words;
+
+ my ($search_hit_count, @subpages) =
+ pick_from_search_engine ($timeout, $search_url, $words);
+
+ my @candidates = ();
+ foreach my $u (@subpages) {
+ # only accept URLs on Yahoo's news site
+ next unless ($u =~ m@^http://dailynews.yahoo.com/@i);
+
+ LOG ($verbose_filter, " candidate: $u");
+ push @candidates, $u;
+ }
+
+ return pick_image_from_pages ($search_url, $search_hit_count, $#subpages+1,
+ $timeout, @candidates);