X-Git-Url: http://git.hungrycats.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=hacks%2Fwebcollage;h=289cd5be48579ca1acfb80bb809545985ecdbe46;hb=93f25dc6827112d98b8b855ea85c8f5eb8123086;hp=0092da3be667ac34ce09b0502d8fc84f7e1eb032;hpb=0ed85ca0e4b0eae40a4f50a51d63f2f41e45373a;p=xscreensaver diff --git a/hacks/webcollage b/hacks/webcollage index 0092da3b..289cd5be 100755 --- a/hacks/webcollage +++ b/hacks/webcollage @@ -27,7 +27,7 @@ require POSIX; use Fcntl ':flock'; # import LOCK_* constants -my $version = q{ $Revision: 1.42 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; +my $version = q{ $Revision: 1.54 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; my $copyright = "WebCollage $version, Copyright (c) 1999" . " Jamie Zawinski \n" . " http://www.jwz.org/xscreensaver/\n"; @@ -40,9 +40,9 @@ my $image_randomizer_1 = "http://www.altavista.com/query" . "?mmdo=3" . "&nbq=12" . "&stype=simage" . - "&oart=1" . - "&obw=1" . - "&oshape=0" . + "&iclr=1" . + "&ibw=1" . + "&iexc=1" . "&what=web" . "&q="; my $image_randomizer_2 = "http://www.hotbot.com/?clickSrc=search" . @@ -56,6 +56,8 @@ my $image_randomizer_2 = "http://www.hotbot.com/?clickSrc=search" . "&MT="; my $image_randomizer_3 = "http://www.altavista.com/cgi-bin/query?pg=q" . "&text=yes&kl=XX&stype=stext&q="; +my $image_randomizer_4 = "http://search.news.yahoo.com/search/news_photos?" . + "&z=&n=100&o=o&2=&3=&p="; # I guess Photopoint got wise to me, because now they are doing error # checking on the user ("u=") and album ("a=") parameters. Oh well. @@ -195,12 +197,12 @@ sub get_document_1 { $cookie = "AV_ALL=1"; } - print S ("GET " . ($http_proxy ? $url : "/$path") . " HTTP/1.0\n" . - "Host: $them\n" . - "User-Agent: $progname/$version\n" . - ($referer ? "Referer: $referer\n" : "") . - ($cookie ? "Cookie: $cookie\n" : "") . - "\n"); + print S ("GET " . ($http_proxy ? $url : "/$path") . " HTTP/1.0\r\n" . + "Host: $them\r\n" . + "User-Agent: $progname/$version\r\n" . + ($referer ? "Referer: $referer\r\n" : "") . + ($cookie ? "Cookie: $cookie\r\n" : "") . + "\r\n"); my $http = ; while () { @@ -565,6 +567,10 @@ sub random_word { $word = ; # toss partial line $word = ; # keep next line } + if (!$word) { + seek( IN, 0, 0 ); + $word = ; + } close (IN); } @@ -580,6 +586,10 @@ sub random_word { $word =~ s/izes$/ize/; $word =~ tr/A-Z/a-z/; + if ( $word =~ s/[ \t\n\r]/\+/g ) { # convert intra-word spaces to "+". + $word = "\%22$word\%22"; # And put quotes (%22) around it. + } + return $word; } @@ -600,7 +610,8 @@ sub pick_from_image_randomizer { my $search_url = ($which == 0 ? $image_randomizer_1 : $which == 1 ? $image_randomizer_2 : - $image_randomizer_3) . + $which == 2 ? $image_randomizer_3 : + $image_randomizer_4) . $words; # Pick a random search-result page instead of always taking the first. @@ -651,8 +662,8 @@ sub pick_from_image_randomizer { $_ = $body; - s/Result [Pp]ages:.*$//s; # trim off page footer - s/^.*?IMAGE RESULTS//s; # trim off page header +# s/Result [Pp]ages:.*$//s; # trim off page footer +# s/^.*?IMAGE RESULTS//s; # trim off page header s/[\r\n\t ]+/ /g; @@ -683,6 +694,8 @@ sub pick_from_image_randomizer { next if ($u =~ m@[/.]wildweb\.com@i); next if ($u =~ m@[/.]digital\.com@i); next if ($u =~ m@[/.]doubleclick\.net@i); + next if ($u =~ m@[/.]freeim\.org@i); + next if ($u =~ m@[/.]clicktomarket\.com@i); # you cretins if ($which == 0 && $u =~ m@[/.]corbis\.com@) { $skipped = 1; @@ -691,6 +704,16 @@ sub pick_from_image_randomizer { } next; + } elsif ($which == 3 && + ($u =~ m@^http://[^/]+$@ || # no slashes + $u =~ m@/$@ || # ends in / + ! ($u =~ m@dailynews\.yahoo\.com@))) { # not dailynews +# $skipped = 1; + if ( $verbose > 3 ) { + print STDERR "$progname: skipping non-AP URL: $u\n"; + } + next; + } elsif ( $rejected_urls{$u} ) { if ( $verbose > 3 ) { my $L = $rejected_urls{$u}; @@ -740,7 +763,9 @@ sub pick_from_image_randomizer { if ($img) { return ($base2, $img, ($which == 0 ? "imagevista" : - $which == 1 ? "hotbot" : "altavista") . + $which == 1 ? "hotbot" : + $which == 2 ? "altavista" : + "ap") . "/$search_count"); } else { return (); @@ -781,6 +806,7 @@ sub pick_image { my ( $timeout ) = @_; my $r = int(rand(100)); + my ($base, $img, $source, $total, $count); if ($r < 20) { @@ -793,6 +819,11 @@ sub pick_image { $total = ++$total_1; $count = ++$count_1 if $img; + } elsif ($r < 70) { + ($base, $img, $source) = pick_from_image_randomizer ($timeout, 3); + $total = ++$total_4; + $count = ++$count_4 if $img; + # } elsif ($r < 70) { # ($base, $img, $source) = pick_from_photo_randomizer ($timeout); # $total = ++$total_4; @@ -1015,7 +1046,14 @@ sub image_to_pnm { $cmd2 = "exec $cmd"; # yes, this really is necessary. if we don't # do this, the process doesn't die properly. - if ($verbose == 0) { + if ($verbose <= 1) { + # + # We get a "giftopnm: got a 'Application Extension' extension" + # warning any time it's an animgif. + # + # Note that "giftopnm: EOF / read error on image data" is not + # always a fatal error -- sometimes the image looks fine anyway. + # $cmd2 .= " 2>/dev/null"; }