X-Git-Url: http://git.hungrycats.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=hacks%2Fwebcollage;h=289cd5be48579ca1acfb80bb809545985ecdbe46;hb=93f25dc6827112d98b8b855ea85c8f5eb8123086;hp=d5c4ff1354ae0209a6cc62106aa248533ae2f1c2;hpb=c596d1306f5f92f7eab76224d3b049980a14adcb;p=xscreensaver diff --git a/hacks/webcollage b/hacks/webcollage index d5c4ff13..289cd5be 100755 --- a/hacks/webcollage +++ b/hacks/webcollage @@ -27,7 +27,7 @@ require POSIX; use Fcntl ':flock'; # import LOCK_* constants -my $version = q{ $Revision: 1.40 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; +my $version = q{ $Revision: 1.54 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; my $copyright = "WebCollage $version, Copyright (c) 1999" . " Jamie Zawinski \n" . " http://www.jwz.org/xscreensaver/\n"; @@ -40,9 +40,9 @@ my $image_randomizer_1 = "http://www.altavista.com/query" . "?mmdo=3" . "&nbq=12" . "&stype=simage" . - "&oart=1" . - "&obw=1" . - "&oshape=0" . + "&iclr=1" . + "&ibw=1" . + "&iexc=1" . "&what=web" . "&q="; my $image_randomizer_2 = "http://www.hotbot.com/?clickSrc=search" . @@ -56,6 +56,8 @@ my $image_randomizer_2 = "http://www.hotbot.com/?clickSrc=search" . "&MT="; my $image_randomizer_3 = "http://www.altavista.com/cgi-bin/query?pg=q" . "&text=yes&kl=XX&stype=stext&q="; +my $image_randomizer_4 = "http://search.news.yahoo.com/search/news_photos?" . + "&z=&n=100&o=o&2=&3=&p="; # I guess Photopoint got wise to me, because now they are doing error # checking on the user ("u=") and album ("a=") parameters. Oh well. @@ -75,10 +77,11 @@ my $http_proxy = undef; my $http_timeout = 30; my $cvt_timeout = 10; -# if we have xli, use it to write to the root window. else use xv. -my $ppm_to_root_window_cmd_1 = "xli -quiet -onroot -center" . +# programs we can use to write to the root window (tried in ascending order.) +my $ppm_to_root_window_cmd_1 = "xloadimage -onroot -quiet %%PPM%%"; +my $ppm_to_root_window_cmd_2 = "xli -quiet -onroot -center" . " -border black %%PPM%%"; -my $ppm_to_root_window_cmd_2 = "xv -root -rmode 5 -viewonly" . +my $ppm_to_root_window_cmd_3 = "xv -root -rmode 5 -viewonly" . " +noresetroot %%PPM%% -quit"; my $ppm_to_root_window_cmd = undef; # initialized by x_output() @@ -162,6 +165,9 @@ sub get_document_1 { $paddr = sockaddr_in($port2, $iaddr); + my $head = ""; + my $body = ""; + @_ = eval { local $SIG{ALRM} = sub { @@ -191,16 +197,14 @@ sub get_document_1 { $cookie = "AV_ALL=1"; } - print S ("GET " . ($http_proxy ? $url : "/$path") . " HTTP/1.0\n" . - "Host: $them\n" . - "User-Agent: $progname/$version\n" . - ($referer ? "Referer: $referer\n" : "") . - ($cookie ? "Cookie: $cookie\n" : "") . - "\n"); + print S ("GET " . ($http_proxy ? $url : "/$path") . " HTTP/1.0\r\n" . + "Host: $them\r\n" . + "User-Agent: $progname/$version\r\n" . + ($referer ? "Referer: $referer\r\n" : "") . + ($cookie ? "Cookie: $cookie\r\n" : "") . + "\r\n"); my $http = ; - my $head = ""; - my $body = ""; while () { $head .= $_; last if m@^[\r\n]@; @@ -220,6 +224,8 @@ sub get_document_1 { die if ($@ && $@ ne "alarm\n"); # propagate errors if ($@) { # timed out + $head = undef; + $body = undef; return (); } else { # didn't @@ -286,11 +292,13 @@ sub get_document { print STDERR "$progname: too many redirects " . "($max_loop_count) from $orig_url\n"; } + $body = undef; return (); } } elsif ( $http =~ m@HTTP/[0-9.]+ [4-9][0-9][0-9]@ ) { # http errors -- return nothing. + $body = undef; return (); } else { @@ -373,6 +381,8 @@ sub pick_image_from_body { " \"$url\": rejecting.\n"; } $rejected_urls{$url} = -1; + $body = undef; + $_ = undef; return (); } @@ -395,6 +405,8 @@ sub pick_image_from_body { " length $L in $url: rejecting.\n"; } $rejected_urls{$url} = $L; + $body = undef; + $_ = undef; return (); } elsif ( $verbose > 2 ) { print STDERR "$progname: keywords of length $L" . @@ -493,6 +505,9 @@ sub pick_image_from_body { } } + $_ = undef; + $body = undef; + if ( $#urls == 0 ) { if ( $verbose > 2 ) { print STDERR "$progname: no images on $base\n"; @@ -527,8 +542,12 @@ sub pick_from_url_randomizer { my ( $base, $body ) = get_document ($random_redirector, undef, $timeout); - return if (!$base || !$body); + if (!$base || !$body) { + $body = undef; + return; + } my $img = pick_image_from_body ($base, $body); + $body = undef; if ($img) { return ($base, $img, "yahoo"); @@ -548,6 +567,10 @@ sub random_word { $word = ; # toss partial line $word = ; # keep next line } + if (!$word) { + seek( IN, 0, 0 ); + $word = ; + } close (IN); } @@ -563,6 +586,10 @@ sub random_word { $word =~ s/izes$/ize/; $word =~ tr/A-Z/a-z/; + if ( $word =~ s/[ \t\n\r]/\+/g ) { # convert intra-word spaces to "+". + $word = "\%22$word\%22"; # And put quotes (%22) around it. + } + return $word; } @@ -583,7 +610,8 @@ sub pick_from_image_randomizer { my $search_url = ($which == 0 ? $image_randomizer_1 : $which == 1 ? $image_randomizer_2 : - $image_randomizer_3) . + $which == 2 ? $image_randomizer_3 : + $image_randomizer_4) . $words; # Pick a random search-result page instead of always taking the first. @@ -606,7 +634,10 @@ sub pick_from_image_randomizer { my ( $base, $body ) = get_document ($search_url, undef, $timeout); if (defined ($timeout)) { $timeout -= (time - $start); - return () if ($timeout <= 0); + if ($timeout <= 0) { + $body = undef; + return (); + } } return () if (! $body); @@ -631,8 +662,8 @@ sub pick_from_image_randomizer { $_ = $body; - s/Result [Pp]ages:.*$//s; # trim off page footer - s/^.*?IMAGE RESULTS//s; # trim off page header +# s/Result [Pp]ages:.*$//s; # trim off page footer +# s/^.*?IMAGE RESULTS//s; # trim off page header s/[\r\n\t ]+/ /g; @@ -663,6 +694,8 @@ sub pick_from_image_randomizer { next if ($u =~ m@[/.]wildweb\.com@i); next if ($u =~ m@[/.]digital\.com@i); next if ($u =~ m@[/.]doubleclick\.net@i); + next if ($u =~ m@[/.]freeim\.org@i); + next if ($u =~ m@[/.]clicktomarket\.com@i); # you cretins if ($which == 0 && $u =~ m@[/.]corbis\.com@) { $skipped = 1; @@ -671,6 +704,16 @@ sub pick_from_image_randomizer { } next; + } elsif ($which == 3 && + ($u =~ m@^http://[^/]+$@ || # no slashes + $u =~ m@/$@ || # ends in / + ! ($u =~ m@dailynews\.yahoo\.com@))) { # not dailynews +# $skipped = 1; + if ( $verbose > 3 ) { + print STDERR "$progname: skipping non-AP URL: $u\n"; + } + next; + } elsif ( $rejected_urls{$u} ) { if ( $verbose > 3 ) { my $L = $rejected_urls{$u}; @@ -690,6 +733,8 @@ sub pick_from_image_randomizer { print STDERR "$progname: found nothing on $base " . "($length bytes, $href_count links).\n"; } + $body = undef; + $_ = undef; return (); } @@ -702,17 +747,25 @@ sub pick_from_image_randomizer { } + $body = undef; + $_ = undef; my ( $base2, $body2 ) = get_document ($subpage, $base, $timeout); - return () if (!$base2 || !$body2); + if (!$base2 || !$body2) { + $body2 = undef; + return (); + } my $img = pick_image_from_body ($base2, $body2); + $body2 = undef; if ($img) { return ($base2, $img, ($which == 0 ? "imagevista" : - $which == 1 ? "hotbot" : "altavista") . + $which == 1 ? "hotbot" : + $which == 2 ? "altavista" : + "ap") . "/$search_count"); } else { return (); @@ -753,6 +806,7 @@ sub pick_image { my ( $timeout ) = @_; my $r = int(rand(100)); + my ($base, $img, $source, $total, $count); if ($r < 20) { @@ -765,6 +819,11 @@ sub pick_image { $total = ++$total_1; $count = ++$count_1 if $img; + } elsif ($r < 70) { + ($base, $img, $source) = pick_from_image_randomizer ($timeout, 3); + $total = ++$total_4; + $count = ++$count_4 if $img; + # } elsif ($r < 70) { # ($base, $img, $source) = pick_from_photo_randomizer ($timeout); # $total = ++$total_4; @@ -987,7 +1046,14 @@ sub image_to_pnm { $cmd2 = "exec $cmd"; # yes, this really is necessary. if we don't # do this, the process doesn't die properly. - if ($verbose == 0) { + if ($verbose <= 1) { + # + # We get a "giftopnm: got a 'Application Extension' extension" + # warning any time it's an animgif. + # + # Note that "giftopnm: EOF / read error on image data" is not + # always a fatal error -- sometimes the image looks fine anyway. + # $cmd2 .= " 2>/dev/null"; } @@ -1006,12 +1072,14 @@ sub image_to_pnm { } kill ('TERM', $pid) if ($pid); $timed_out = 1; + $body = undef; }; if (($pid = open(PIPE, "| $cmd2 > $output"))) { $timed_out = 0; alarm $cvt_timeout; print PIPE $body; + $body = undef; close PIPE; if ($verbose > 3) { print STDERR "$progname: awaiting $pid\n"; } @@ -1040,10 +1108,12 @@ sub image_to_pnm { die if ($@ && $@ ne "alarm\n"); # propagate errors if ($@) { # timed out + $body = undef; return (); } else { # didn't alarm 0; + $body = undef; return @_; } } @@ -1052,8 +1122,10 @@ sub x_output { my $win_cmd_1 = $ppm_to_root_window_cmd_1; my $win_cmd_2 = $ppm_to_root_window_cmd_2; + my $win_cmd_3 = $ppm_to_root_window_cmd_3; $win_cmd_1 =~ s/^([^ \t\r\n]+).*$/$1/; $win_cmd_2 =~ s/^([^ \t\r\n]+).*$/$1/; + $win_cmd_3 =~ s/^([^ \t\r\n]+).*$/$1/; # make sure the various programs we execute exist, right up front. foreach ("ppmmake", "giftopnm", "djpeg", "pnmpaste", "pnmscale", @@ -1065,8 +1137,10 @@ sub x_output { $ppm_to_root_window_cmd = $ppm_to_root_window_cmd_1; } elsif (which($win_cmd_2)) { $ppm_to_root_window_cmd = $ppm_to_root_window_cmd_2; - } else { - die "$progname: neither $win_cmd_1 nor $win_cmd_2 found on \$PATH.\n"; + } elsif (which($win_cmd_3)) { + $ppm_to_root_window_cmd = $ppm_to_root_window_cmd_3; + } else { + die "$progname: didn't find $win_cmd_1, $win_cmd_2, or $win_cmd_3 on \$PATH.\n"; } $SIG{HUP} = \&x_cleanup; @@ -1151,6 +1225,7 @@ sub x_output { $cmd .= "pnmpaste - $x $y $image_ppm > $image_tmp1"; open (IMG, "| $cmd") || die ("running $cmd: $!\n"); print IMG $body; + $body = undef; close (IMG); if ($verbose > 1) { print STDERR "$progname: subproc exited normally.\n"; @@ -1165,6 +1240,7 @@ sub x_output { my ($headers, $body) = get_document ($img, $base); if ($body) { handle_image ($base, $img, $body, $source); + $body = undef; } } unlink $image_tmp1, $image_tmp2; @@ -1180,6 +1256,7 @@ sub handle_image { } my ($iw, $ih) = image_to_pnm ($img, $body, $image_tmp1); + $body = undef; return 0 unless ($iw && $ih); my $ow = $iw; # used only for error messages @@ -1488,7 +1565,7 @@ sub main { if (!$root_p && !$no_output_p) { die "$copyright" . - "$progname: the -root argument is manditory (for now.)\n"; + "$progname: the -root argument is mandatory (for now.)\n"; } if (!$no_output_p && !$ENV{DISPLAY}) {