X-Git-Url: http://git.hungrycats.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=hacks%2Fwebcollage;h=a630e9938663fa6b20d0ab04b649cc79d768beea;hb=14627f4038ada5d11456f3770090f3c39740d70f;hp=587bb41615233e1fe29f05b4657cd9ea2122f087;hpb=72c1f4c1dc6ab07fe121a327ff1c30bf51ef74c1;p=xscreensaver diff --git a/hacks/webcollage b/hacks/webcollage index 587bb416..a630e993 100755 --- a/hacks/webcollage +++ b/hacks/webcollage @@ -1,6 +1,6 @@ #!/usr/local/bin/perl5 -w # -# webcollage, Copyright (c) 1999 by Jamie Zawinski +# webcollage, Copyright (c) 1999, 2000 by Jamie Zawinski # This program decorates the screen with random images from the web. # One satisfied customer described it as "a nonstop pop culture brainbath." # @@ -27,7 +27,7 @@ require POSIX; use Fcntl ':flock'; # import LOCK_* constants -my $version = q{ $Revision: 1.41 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; +my $version = q{ $Revision: 1.60 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; my $copyright = "WebCollage $version, Copyright (c) 1999" . " Jamie Zawinski \n" . " http://www.jwz.org/xscreensaver/\n"; @@ -40,9 +40,9 @@ my $image_randomizer_1 = "http://www.altavista.com/query" . "?mmdo=3" . "&nbq=12" . "&stype=simage" . - "&oart=1" . - "&obw=1" . - "&oshape=0" . + "&iclr=1" . + "&ibw=1" . + "&iexc=1" . "&what=web" . "&q="; my $image_randomizer_2 = "http://www.hotbot.com/?clickSrc=search" . @@ -56,6 +56,8 @@ my $image_randomizer_2 = "http://www.hotbot.com/?clickSrc=search" . "&MT="; my $image_randomizer_3 = "http://www.altavista.com/cgi-bin/query?pg=q" . "&text=yes&kl=XX&stype=stext&q="; +my $image_randomizer_4 = "http://search.news.yahoo.com/search/news_photos?" . + "&z=&n=100&o=o&2=&3=&p="; # I guess Photopoint got wise to me, because now they are doing error # checking on the user ("u=") and album ("a=") parameters. Oh well. @@ -94,7 +96,10 @@ my $delay = 0; my $wordlist = "/usr/dict/words"; if (!-r $wordlist) { - $wordlist = "/usr/share/lib/dict/words"; # irix + $wordlist = "/usr/share/dict/words"; # BSD +} +if (!-r $wordlist) { + $wordlist = "/usr/share/lib/dict/words"; # Irix } die "$wordlist doesn't exist!\n" unless (-r $wordlist); @@ -163,6 +168,9 @@ sub get_document_1 { $paddr = sockaddr_in($port2, $iaddr); + my $head = ""; + my $body = ""; + @_ = eval { local $SIG{ALRM} = sub { @@ -192,16 +200,14 @@ sub get_document_1 { $cookie = "AV_ALL=1"; } - print S ("GET " . ($http_proxy ? $url : "/$path") . " HTTP/1.0\n" . - "Host: $them\n" . - "User-Agent: $progname/$version\n" . - ($referer ? "Referer: $referer\n" : "") . - ($cookie ? "Cookie: $cookie\n" : "") . - "\n"); + print S ("GET " . ($http_proxy ? $url : "/$path") . " HTTP/1.0\r\n" . + "Host: $them\r\n" . + "User-Agent: $progname/$version\r\n" . + ($referer ? "Referer: $referer\r\n" : "") . + ($cookie ? "Cookie: $cookie\r\n" : "") . + "\r\n"); my $http = ; - my $head = ""; - my $body = ""; while () { $head .= $_; last if m@^[\r\n]@; @@ -221,6 +227,8 @@ sub get_document_1 { die if ($@ && $@ ne "alarm\n"); # propagate errors if ($@) { # timed out + $head = undef; + $body = undef; return (); } else { # didn't @@ -287,11 +295,13 @@ sub get_document { print STDERR "$progname: too many redirects " . "($max_loop_count) from $orig_url\n"; } + $body = undef; return (); } } elsif ( $http =~ m@HTTP/[0-9.]+ [4-9][0-9][0-9]@ ) { # http errors -- return nothing. + $body = undef; return (); } else { @@ -374,6 +384,8 @@ sub pick_image_from_body { " \"$url\": rejecting.\n"; } $rejected_urls{$url} = -1; + $body = undef; + $_ = undef; return (); } @@ -396,6 +408,8 @@ sub pick_image_from_body { " length $L in $url: rejecting.\n"; } $rejected_urls{$url} = $L; + $body = undef; + $_ = undef; return (); } elsif ( $verbose > 2 ) { print STDERR "$progname: keywords of length $L" . @@ -494,6 +508,9 @@ sub pick_image_from_body { } } + $_ = undef; + $body = undef; + if ( $#urls == 0 ) { if ( $verbose > 2 ) { print STDERR "$progname: no images on $base\n"; @@ -528,8 +545,12 @@ sub pick_from_url_randomizer { my ( $base, $body ) = get_document ($random_redirector, undef, $timeout); - return if (!$base || !$body); + if (!$base || !$body) { + $body = undef; + return; + } my $img = pick_image_from_body ($base, $body); + $body = undef; if ($img) { return ($base, $img, "yahoo"); @@ -549,6 +570,10 @@ sub random_word { $word = ; # toss partial line $word = ; # keep next line } + if (!$word) { + seek( IN, 0, 0 ); + $word = ; + } close (IN); } @@ -564,6 +589,10 @@ sub random_word { $word =~ s/izes$/ize/; $word =~ tr/A-Z/a-z/; + if ( $word =~ s/[ \t\n\r]/\+/g ) { # convert intra-word spaces to "+". + $word = "\%22$word\%22"; # And put quotes (%22) around it. + } + return $word; } @@ -584,7 +613,8 @@ sub pick_from_image_randomizer { my $search_url = ($which == 0 ? $image_randomizer_1 : $which == 1 ? $image_randomizer_2 : - $image_randomizer_3) . + $which == 2 ? $image_randomizer_3 : + $image_randomizer_4) . $words; # Pick a random search-result page instead of always taking the first. @@ -607,7 +637,10 @@ sub pick_from_image_randomizer { my ( $base, $body ) = get_document ($search_url, undef, $timeout); if (defined ($timeout)) { $timeout -= (time - $start); - return () if ($timeout <= 0); + if ($timeout <= 0) { + $body = undef; + return (); + } } return () if (! $body); @@ -632,8 +665,10 @@ sub pick_from_image_randomizer { $_ = $body; - s/Result [Pp]ages:.*$//s; # trim off page footer - s/^.*?IMAGE RESULTS//s; # trim off page header +# s/Result [Pp]ages:.*$//s; # trim off page footer +# s/^.*?IMAGE RESULTS//s; # trim off page header + + s/Have you tried these resources.*//s; # let's try it again s/[\r\n\t ]+/ /g; @@ -653,25 +688,44 @@ sub pick_from_image_randomizer { next unless ($u =~ m@^http://@i); # skip non-http and relative urls. - next if ($u =~ m@[/.]altavista\.com@i); # skip altavista builtins - next if ($u =~ m@[/.]av\.com@i); - next if ($u =~ m@[/.]virage\.com@i); - next if ($u =~ m@[/.]photoloft\.com@i); - next if ($u =~ m@[/.]shopping\.com@i); - next if ($u =~ m@[/.]thetrip\.com@i); - next if ($u =~ m@[/.]cmgi\.com@i); - next if ($u =~ m@[/.]intelihealth\.com@i); - next if ($u =~ m@[/.]wildweb\.com@i); - next if ($u =~ m@[/.]digital\.com@i); - next if ($u =~ m@[/.]doubleclick\.net@i); + next if ($u =~ m@[/.]altavista\.com\b@i); # skip altavista builtins + next if ($u =~ m@[/.]altavista\.[a-z]{2}\b@i); # altavista.fr, etc + next if ($u =~ m@[/.]av\.com\b@i); + next if ($u =~ m@[/.]virage\.com\b@i); + next if ($u =~ m@[/.]photoloft\.com\b@i); + next if ($u =~ m@[/.]shopping\.com\b@i); + next if ($u =~ m@[/.]thetrip\.com\b@i); + next if ($u =~ m@[/.]cmgi\.com\b@i); + next if ($u =~ m@[/.]intelihealth\.com\b@i); + next if ($u =~ m@[/.]wildweb\.com\b@i); + next if ($u =~ m@[/.]digital\.com\b@i); + next if ($u =~ m@[/.]doubleclick\.net\b@i); + next if ($u =~ m@[/.]freeim\.org\b@i); + next if ($u =~ m@[/.]clicktomarket\.com\b@i); # you cretins + next if ($u =~ m@[/.]teragram\.com\b@i); + + # must lose this one for altavista, even though it loses images of + # every single customer of akamai. Oh well, those people have lots + # of money, and so their images are probably boring anyway. + next if ($u =~ m@[/.]akamai\.net@i); if ($which == 0 && $u =~ m@[/.]corbis\.com@) { - $skipped = 1; + $skipped = 1; if ( $verbose > 3 ) { print STDERR "$progname: skipping corbis URL: $u\n"; } next; + } elsif ($which == 3 && + ($u =~ m@^http://[^/]+$@ || # no slashes + $u =~ m@/$@ || # ends in / + ! ($u =~ m@dailynews\.yahoo\.com@))) { # not dailynews +# $skipped = 1; + if ( $verbose > 3 ) { + print STDERR "$progname: skipping non-AP URL: $u\n"; + } + next; + } elsif ( $rejected_urls{$u} ) { if ( $verbose > 3 ) { my $L = $rejected_urls{$u}; @@ -691,6 +745,8 @@ sub pick_from_image_randomizer { print STDERR "$progname: found nothing on $base " . "($length bytes, $href_count links).\n"; } + $body = undef; + $_ = undef; return (); } @@ -703,17 +759,25 @@ sub pick_from_image_randomizer { } + $body = undef; + $_ = undef; my ( $base2, $body2 ) = get_document ($subpage, $base, $timeout); - return () if (!$base2 || !$body2); + if (!$base2 || !$body2) { + $body2 = undef; + return (); + } my $img = pick_image_from_body ($base2, $body2); + $body2 = undef; if ($img) { return ($base2, $img, ($which == 0 ? "imagevista" : - $which == 1 ? "hotbot" : "altavista") . + $which == 1 ? "hotbot" : + $which == 2 ? "altavista" : + "ap") . "/$search_count"); } else { return (); @@ -752,8 +816,8 @@ my $count_4 = 0; sub pick_image { my ( $timeout ) = @_; - my $r = int(rand(100)); + my ($base, $img, $source, $total, $count); if ($r < 20) { @@ -766,6 +830,11 @@ sub pick_image { $total = ++$total_1; $count = ++$count_1 if $img; + } elsif ($r < 65) { + ($base, $img, $source) = pick_from_image_randomizer ($timeout, 3); + $total = ++$total_4; + $count = ++$count_4 if $img; + # } elsif ($r < 70) { # ($base, $img, $source) = pick_from_photo_randomizer ($timeout); # $total = ++$total_4; @@ -988,7 +1057,14 @@ sub image_to_pnm { $cmd2 = "exec $cmd"; # yes, this really is necessary. if we don't # do this, the process doesn't die properly. - if ($verbose == 0) { + if ($verbose <= 1) { + # + # We get a "giftopnm: got a 'Application Extension' extension" + # warning any time it's an animgif. + # + # Note that "giftopnm: EOF / read error on image data" is not + # always a fatal error -- sometimes the image looks fine anyway. + # $cmd2 .= " 2>/dev/null"; } @@ -1007,12 +1083,14 @@ sub image_to_pnm { } kill ('TERM', $pid) if ($pid); $timed_out = 1; + $body = undef; }; if (($pid = open(PIPE, "| $cmd2 > $output"))) { $timed_out = 0; alarm $cvt_timeout; print PIPE $body; + $body = undef; close PIPE; if ($verbose > 3) { print STDERR "$progname: awaiting $pid\n"; } @@ -1041,10 +1119,12 @@ sub image_to_pnm { die if ($@ && $@ ne "alarm\n"); # propagate errors if ($@) { # timed out + $body = undef; return (); } else { # didn't alarm 0; + $body = undef; return @_; } } @@ -1138,7 +1218,7 @@ sub x_output { } elsif ((@_ = jpeg_size ($body))) { ($iw, $ih) = @_; $cmd = "djpeg |"; - } elsif ($body =~ "^P\d\n(\d+) (\d+)\n") { + } elsif ($body =~ m/^P\d\n(\d+) (\d+)\n/) { $iw = $1; $ih = $2; $cmd = ""; @@ -1156,6 +1236,7 @@ sub x_output { $cmd .= "pnmpaste - $x $y $image_ppm > $image_tmp1"; open (IMG, "| $cmd") || die ("running $cmd: $!\n"); print IMG $body; + $body = undef; close (IMG); if ($verbose > 1) { print STDERR "$progname: subproc exited normally.\n"; @@ -1170,6 +1251,7 @@ sub x_output { my ($headers, $body) = get_document ($img, $base); if ($body) { handle_image ($base, $img, $body, $source); + $body = undef; } } unlink $image_tmp1, $image_tmp2; @@ -1185,6 +1267,7 @@ sub handle_image { } my ($iw, $ih) = image_to_pnm ($img, $body, $image_tmp1); + $body = undef; return 0 unless ($iw && $ih); my $ow = $iw; # used only for error messages @@ -1493,7 +1576,7 @@ sub main { if (!$root_p && !$no_output_p) { die "$copyright" . - "$progname: the -root argument is manditory (for now.)\n"; + "$progname: the -root argument is mandatory (for now.)\n"; } if (!$no_output_p && !$ENV{DISPLAY}) {