+ }
+
+ @candidates = depoison (@candidates);
+ return () if ($#candidates < 0);
+ my $i = int(rand($#candidates+1));
+ my $img = $candidates[$i];
+ my $ref = $referers{$img};
+
+ LOG ($verbose_load, "picked image " . ($i+1) . ": $img (on $ref)");
+ return ($ref, $img);
+}
+
+
+\f
+############################################################################
+#
+# Pick images by feeding random words into Alta Vista Text Search
+#
+############################################################################
+
+
+my $alta_vista_url_1 = "http://www.altavista.com/cgi-bin/query?pg=q" .
+ "&text=yes&kl=XX&stype=stext&q=";
+my $alta_vista_url_2 = "http://www.altavista.com/sites/search/web?pg=q" .
+ "&kl=XX&search=Search&q=";
+
+my $alta_vista_url = $alta_vista_url_2;
+
+# altavista
+sub pick_from_alta_vista_text {
+ my ( $timeout ) = @_;
+
+ my $words = random_words(1);
+ my $page = (int(rand(9)) + 1);
+ my $search_url = $alta_vista_url . $words;
+
+ if ($page > 1) {
+ $search_url .= "&pgno=" . $page;
+ $search_url .= "&stq=" . (($page-1) * 10);
+ }
+
+ my ($search_hit_count, @subpages) =
+ pick_from_search_engine ($timeout, $search_url, $words);
+
+ my @candidates = ();
+ foreach my $u (@subpages) {
+
+ # Those altavista fuckers are playing really nasty redirection games
+ # these days: the filter your clicks through their site, but use
+ # onMouseOver to make it look like they're not! Well, it makes it
+ # easier for us to identify search results...
+ #
+ next unless ($u =~ m@^/r.*\&r=([^&]+).*@);
+ $u = url_unquote($1);
+
+ LOG ($verbose_filter, " candidate: $u");
+ push @candidates, $u;
+ }
+
+ return pick_image_from_pages ($search_url, $search_hit_count, $#subpages+1,
+ $timeout, @candidates);
+}
+
+
+\f
+############################################################################
+#
+# Pick images by feeding random words into Hotbot
+#
+############################################################################
+
+my $hotbot_search_url = "http://hotbot.lycos.com/" .
+ "?SM=SC" .
+ "&DV=0" .
+ "&LG=any" .
+ "&FVI=1" .
+ "&DC=100" .
+ "&DE=0" .
+ "&SQ=1" .
+ "&TR=13" .
+ "&AM1=MC" .
+ "&MT=";
+
+sub pick_from_hotbot_text {
+ my ( $timeout ) = @_;
+
+ my $words = random_words(0);
+ my $search_url = $hotbot_search_url . $words;
+
+ my ($search_hit_count, @subpages) =
+ pick_from_search_engine ($timeout, $search_url, $words);
+
+ my @candidates = ();
+ foreach my $u (@subpages) {
+
+ # Hotbot plays redirection games too
+ next unless ($u =~ m@^/director.asp\?target=([^&]+)@);
+ $u = url_decode($1);
+
+ LOG ($verbose_filter, " candidate: $u");
+ push @candidates, $u;
+ }
+
+ return pick_image_from_pages ($search_url, $search_hit_count, $#subpages+1,
+ $timeout, @candidates);
+}
+
+
+\f
+############################################################################
+#
+# Pick images by feeding random words into Lycos
+#
+############################################################################
+
+my $lycos_search_url = "http://lycospro.lycos.com/srchpro/" .
+ "?lpv=1" .
+ "&t=any" .
+ "&query=";
+
+sub pick_from_lycos_text {
+ my ( $timeout ) = @_;
+
+ my $words = random_words(0);
+ my $start = int(rand(8)) * 10 + 1;
+ my $search_url = $lycos_search_url . $words . "&start=$start";
+
+ my ($search_hit_count, @subpages) =
+ pick_from_search_engine ($timeout, $search_url, $words);
+
+ my @candidates = ();
+ foreach my $u (@subpages) {
+
+ # Lycos plays exact the same redirection game as hotbot.
+ # Note that "id=0" is used for internal advertising links,
+ # and 1+ are used for search results.
+ next unless ($u =~ m@^http://click.hotbot.com/director.asp
+ .*
+ [?&]id=[1-9]\d*
+ .*
+ \&target=([^&]+)
+ .*
+ @x);
+ $u = url_decode($1);
+
+ LOG ($verbose_filter, " candidate: $u");
+ push @candidates, $u;
+ }
+
+ return pick_image_from_pages ($search_url, $search_hit_count, $#subpages+1,
+ $timeout, @candidates);
+}
+
+
+\f
+############################################################################
+#
+# Pick images by feeding random words into news.yahoo.com
+#
+############################################################################
+
+my $yahoo_news_url = "http://search.news.yahoo.com/search/news_photos?" .
+ "&z=&n=100&o=o&2=&3=&p=";
+
+# yahoonews
+sub pick_from_yahoo_news_text {
+ my ( $timeout ) = @_;
+
+ my $words = random_words(1);
+ my $search_url = $yahoo_news_url . $words;
+
+ my ($search_hit_count, @subpages) =
+ pick_from_search_engine ($timeout, $search_url, $words);
+
+ my @candidates = ();
+ foreach my $u (@subpages) {
+ # only accept URLs on Yahoo's news site
+ next unless ($u =~ m@^http://dailynews\.yahoo\.com/@i ||
+ $u =~ m@^http://story\.news\.yahoo\.com/@i);
+
+ LOG ($verbose_filter, " candidate: $u");
+ push @candidates, $u;
+ }
+
+ return pick_image_from_pages ($search_url, $search_hit_count, $#subpages+1,
+ $timeout, @candidates);
+}
+
+
+
+\f
+############################################################################
+#
+# Pick images by waiting for driftnet to populate a temp dir with files.
+# Requires driftnet version 0.1.5 or later.
+# (Driftnet is a program by Chris Lightfoot that sniffs your local ethernet
+# for images being downloaded by others.)
+# Driftnet/webcollage integration by jwz.
+#
+############################################################################
+
+# driftnet
+sub pick_from_driftnet {
+ my ( $timeout ) = @_;
+
+ my $id = $driftnet_magic;
+ my $dir = $driftnet_dir;
+ my $start = time;
+ my $now;
+
+ error ("\$driftnet_dir unset?") unless ($dir);
+ $dir =~ s@/+$@@;
+
+ error ("$dir unreadable") unless (-d "$dir/.");
+
+ $timeout = $http_timeout unless ($timeout);
+ $last_search = $id;
+
+ while ($now = time, $now < $start + $timeout) {
+ local *DIR;
+ opendir (DIR, $dir) || error ("$dir: $!");
+ while (my $file = readdir(DIR)) {
+ next if ($file =~ m/^\./);
+ $file = "$dir/$file";
+ closedir DIR;
+ LOG ($verbose_load, "picked file $file ($id)");
+ return ($id, $file);
+ }
+ closedir DIR;
+ }
+ LOG (($verbose_net || $verbose_load), "timed out for $id");
+ return ();
+}
+
+
+sub get_driftnet_file {
+ my ($file) = @_;
+
+ error ("\$driftnet_dir unset?") unless ($driftnet_dir);
+
+ my $id = $driftnet_magic;
+ my $re = qr/$driftnet_dir/;
+ error ("$id: $file not in $driftnet_dir?")
+ unless ($file =~ m@^$re@o);
+
+ local *IN;
+ open (IN, $file) || error ("$id: $file: $!");
+ my $body = '';
+ while (<IN>) { $body .= $_; }
+ close IN;
+ unlink ($file);
+ return ($id, $body);
+}
+
+
+sub spawn_driftnet {
+ my ($cmd) = @_;
+
+ # make a directory to use.
+ while (1) {
+ my $tmp = $ENV{TEMPDIR} || "/tmp";
+ $driftnet_dir = sprintf ("$tmp/driftcollage-%08x", rand(0xffffffff));
+ LOG ($verbose_exec, "mkdir $driftnet_dir");
+ last if mkdir ($driftnet_dir, 0700);
+ }
+
+ if (! ($cmd =~ m/\s/)) {
+ # if the command didn't have any arguments in it, then it must be just
+ # a pointer to the executable. Append the default args to it.
+ my $dargs = $default_driftnet_cmd;
+ $dargs =~ s/^[^\s]+//;
+ $cmd .= $dargs;
+ }
+
+ # point the driftnet command at our newly-minted private directory.
+ #
+ $cmd .= " -d $driftnet_dir";
+ $cmd .= ">/dev/null" unless ($verbose_exec);
+
+ my $pid = fork();
+ if ($pid < 0) { error ("fork: $!\n"); }
+ if ($pid) {
+ # parent fork
+ push @pids_to_kill, $pid;
+ LOG ($verbose_exec, "forked for \"$cmd\"");
+ } else {
+ # child fork
+ nontrapping_system ($cmd) || error ("exec: $!");
+ }
+
+ # wait a bit, then make sure the process actually started up.
+ #
+ sleep (1);
+ error ("pid $pid failed to start \"$cmd\"")
+ unless (1 == kill (0, $pid));
+}
+
+\f
+############################################################################
+#
+# Pick a random image in a random way
+#
+############################################################################
+
+
+# Picks a random image on a random page, and returns two URLs:
+# the page containing the image, and the image.
+# Returns () if nothing found this time.
+#
+
+sub pick_image {
+ my ( $timeout ) = @_;
+
+ $current_state = "select";
+ $load_method = "none";
+
+ my $n = int(rand(100));
+ my $fn = undef;
+ my $total = 0;
+ my @rest = @search_methods;
+
+ while (@rest) {
+ my $pct = shift @rest;
+ my $name = shift @rest;
+ my $tfn = shift @rest;
+ $total += $pct;
+ if ($total > $n && !defined($fn)) {
+ $fn = $tfn;
+ $current_state = $name;
+ $load_method = $current_state;
+ }
+ }
+
+ if ($total != 100) {
+ error ("internal error: \@search_methods totals to $total%!");
+ }
+
+ record_attempt ($current_state);
+ return $fn->($timeout);
+}