X-Git-Url: http://git.hungrycats.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=hacks%2Fwebcollage;h=289cd5be48579ca1acfb80bb809545985ecdbe46;hb=93f25dc6827112d98b8b855ea85c8f5eb8123086;hp=9f767edaabb6abb61ea93d1e6b27db06312d6740;hpb=af290bcdf2d1c61efc8aaaff653745c900cbe98c;p=xscreensaver diff --git a/hacks/webcollage b/hacks/webcollage index 9f767eda..289cd5be 100755 --- a/hacks/webcollage +++ b/hacks/webcollage @@ -27,7 +27,7 @@ require POSIX; use Fcntl ':flock'; # import LOCK_* constants -my $version = q{ $Revision: 1.32 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; +my $version = q{ $Revision: 1.54 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; my $copyright = "WebCollage $version, Copyright (c) 1999" . " Jamie Zawinski \n" . " http://www.jwz.org/xscreensaver/\n"; @@ -36,15 +36,15 @@ my $argv0 = $0; my $progname = $argv0; $progname =~ s@.*/@@g; my $random_redirector = "http://random.yahoo.com/bin/ryl"; -my $image_randomizer_1 = "http://image.altavista.com/cgi-bin/avncgi" . - "?do=3" . - "&verb=n" . - "&oshape=n" . - "&oorder=" . - "&ophoto=1&oart=1&ocolor=1&obw=1" . +my $image_randomizer_1 = "http://www.altavista.com/query" . + "?mmdo=3" . + "&nbq=12" . "&stype=simage" . - "&oprem=0" . - "&query="; + "&iclr=1" . + "&ibw=1" . + "&iexc=1" . + "&what=web" . + "&q="; my $image_randomizer_2 = "http://www.hotbot.com/?clickSrc=search" . "&submit=SEARCH&SM=SC&LG=any" . "&AM0=MC&AT0=words&AW0=" . @@ -56,6 +56,15 @@ my $image_randomizer_2 = "http://www.hotbot.com/?clickSrc=search" . "&MT="; my $image_randomizer_3 = "http://www.altavista.com/cgi-bin/query?pg=q" . "&text=yes&kl=XX&stype=stext&q="; +my $image_randomizer_4 = "http://search.news.yahoo.com/search/news_photos?" . + "&z=&n=100&o=o&2=&3=&p="; + +# I guess Photopoint got wise to me, because now they are doing error +# checking on the user ("u=") and album ("a=") parameters. Oh well. +# +#my $photo_randomizer = "http://albums.photopoint.com/j/View?u=1&a=1&p="; +#my $photo_randomizer_lo = 10000001; +#my $photo_randomizer_hi = 12400000; my $image_ppm = ($ENV{TMPDIR} ? $ENV{TMPDIR} : "/tmp") . "/webcollage." . $$; my $image_tmp1 = $image_ppm . "-1"; @@ -67,8 +76,16 @@ my $img_height; my $http_proxy = undef; my $http_timeout = 30; my $cvt_timeout = 10; -my $ppm_to_root_window_cmd = "xv -root -rmode 5 -viewonly" . - " +noresetroot %%PPM%% -quit"; + +# programs we can use to write to the root window (tried in ascending order.) +my $ppm_to_root_window_cmd_1 = "xloadimage -onroot -quiet %%PPM%%"; +my $ppm_to_root_window_cmd_2 = "xli -quiet -onroot -center" . + " -border black %%PPM%%"; +my $ppm_to_root_window_cmd_3 = "xv -root -rmode 5 -viewonly" . + " +noresetroot %%PPM%% -quit"; + +my $ppm_to_root_window_cmd = undef; # initialized by x_output() + my $filter_cmd = undef; my $post_filter_cmd = undef; my $background = undef; @@ -148,6 +165,9 @@ sub get_document_1 { $paddr = sockaddr_in($port2, $iaddr); + my $head = ""; + my $body = ""; + @_ = eval { local $SIG{ALRM} = sub { @@ -177,16 +197,14 @@ sub get_document_1 { $cookie = "AV_ALL=1"; } - print S ("GET " . ($http_proxy ? $url : "/$path") . " HTTP/1.0\n" . - "Host: $them\n" . - "User-Agent: $progname/$version\n" . - ($referer ? "Referer: $referer\n" : "") . - ($cookie ? "Cookie: $cookie\n" : "") . - "\n"); + print S ("GET " . ($http_proxy ? $url : "/$path") . " HTTP/1.0\r\n" . + "Host: $them\r\n" . + "User-Agent: $progname/$version\r\n" . + ($referer ? "Referer: $referer\r\n" : "") . + ($cookie ? "Cookie: $cookie\r\n" : "") . + "\r\n"); my $http = ; - my $head = ""; - my $body = ""; while () { $head .= $_; last if m@^[\r\n]@; @@ -206,6 +224,8 @@ sub get_document_1 { die if ($@ && $@ ne "alarm\n"); # propagate errors if ($@) { # timed out + $head = undef; + $body = undef; return (); } else { # didn't @@ -272,11 +292,13 @@ sub get_document { print STDERR "$progname: too many redirects " . "($max_loop_count) from $orig_url\n"; } + $body = undef; return (); } } elsif ( $http =~ m@HTTP/[0-9.]+ [4-9][0-9][0-9]@ ) { # http errors -- return nothing. + $body = undef; return (); } else { @@ -359,6 +381,8 @@ sub pick_image_from_body { " \"$url\": rejecting.\n"; } $rejected_urls{$url} = -1; + $body = undef; + $_ = undef; return (); } @@ -381,6 +405,8 @@ sub pick_image_from_body { " length $L in $url: rejecting.\n"; } $rejected_urls{$url} = $L; + $body = undef; + $_ = undef; return (); } elsif ( $verbose > 2 ) { print STDERR "$progname: keywords of length $L" . @@ -479,6 +505,9 @@ sub pick_image_from_body { } } + $_ = undef; + $body = undef; + if ( $#urls == 0 ) { if ( $verbose > 2 ) { print STDERR "$progname: no images on $base\n"; @@ -513,8 +542,12 @@ sub pick_from_url_randomizer { my ( $base, $body ) = get_document ($random_redirector, undef, $timeout); - return if (!$base || !$body); + if (!$base || !$body) { + $body = undef; + return; + } my $img = pick_image_from_body ($base, $body); + $body = undef; if ($img) { return ($base, $img, "yahoo"); @@ -534,6 +567,10 @@ sub random_word { $word = ; # toss partial line $word = ; # keep next line } + if (!$word) { + seek( IN, 0, 0 ); + $word = ; + } close (IN); } @@ -549,6 +586,10 @@ sub random_word { $word =~ s/izes$/ize/; $word =~ tr/A-Z/a-z/; + if ( $word =~ s/[ \t\n\r]/\+/g ) { # convert intra-word spaces to "+". + $word = "\%22$word\%22"; # And put quotes (%22) around it. + } + return $word; } @@ -569,7 +610,8 @@ sub pick_from_image_randomizer { my $search_url = ($which == 0 ? $image_randomizer_1 : $which == 1 ? $image_randomizer_2 : - $image_randomizer_3) . + $which == 2 ? $image_randomizer_3 : + $image_randomizer_4) . $words; # Pick a random search-result page instead of always taking the first. @@ -592,7 +634,10 @@ sub pick_from_image_randomizer { my ( $base, $body ) = get_document ($search_url, undef, $timeout); if (defined ($timeout)) { $timeout -= (time - $start); - return () if ($timeout <= 0); + if ($timeout <= 0) { + $body = undef; + return (); + } } return () if (! $body); @@ -616,9 +661,11 @@ sub pick_from_image_randomizer { my $href_count = 0; $_ = $body; - s/[\r\n\t ]+/ /g; - s/Result Pages:.*$//; # trim off page footer +# s/Result [Pp]ages:.*$//s; # trim off page footer +# s/^.*?IMAGE RESULTS//s; # trim off page header + + s/[\r\n\t ]+/ /g; s/( 3 ) { print STDERR "$progname: skipping corbis URL: $u\n"; } next; + } elsif ($which == 3 && + ($u =~ m@^http://[^/]+$@ || # no slashes + $u =~ m@/$@ || # ends in / + ! ($u =~ m@dailynews\.yahoo\.com@))) { # not dailynews +# $skipped = 1; + if ( $verbose > 3 ) { + print STDERR "$progname: skipping non-AP URL: $u\n"; + } + next; + } elsif ( $rejected_urls{$u} ) { if ( $verbose > 3 ) { my $L = $rejected_urls{$u}; @@ -666,6 +733,8 @@ sub pick_from_image_randomizer { print STDERR "$progname: found nothing on $base " . "($length bytes, $href_count links).\n"; } + $body = undef; + $_ = undef; return (); } @@ -678,17 +747,25 @@ sub pick_from_image_randomizer { } + $body = undef; + $_ = undef; my ( $base2, $body2 ) = get_document ($subpage, $base, $timeout); - return () if (!$base2 || !$body2); + if (!$base2 || !$body2) { + $body2 = undef; + return (); + } my $img = pick_image_from_body ($base2, $body2); + $body2 = undef; if ($img) { return ($base2, $img, ($which == 0 ? "imagevista" : - $which == 1 ? "hotbot" : "altavista") . + $which == 1 ? "hotbot" : + $which == 2 ? "altavista" : + "ap") . "/$search_count"); } else { return (); @@ -696,6 +773,19 @@ sub pick_from_image_randomizer { } +# Using the photo site, generate a random URL that will hopefully point +# to an image. Returns two URLs, both of which are the URL of the image. +# Returns () if nothing found this time. +# +#sub pick_from_photo_randomizer { +# my ( $timeout ) = @_; +# my $n = ($photo_randomizer_lo + +# int(rand() * ($photo_randomizer_hi - $photo_randomizer_lo))); +# my $url = $photo_randomizer . $n; +# return ( $url, $url, "photopoint" ); +#} + + # Picks a random image on a random page, and returns two URLs: # the page containing the image, and the image. # Returns () if nothing found this time. @@ -705,15 +795,18 @@ my $total_0 = 0; my $total_1 = 0; my $total_2 = 0; my $total_3 = 0; +my $total_4 = 0; my $count_0 = 0; my $count_1 = 0; my $count_2 = 0; my $count_3 = 0; +my $count_4 = 0; sub pick_image { my ( $timeout ) = @_; my $r = int(rand(100)); + my ($base, $img, $source, $total, $count); if ($r < 20) { @@ -726,6 +819,16 @@ sub pick_image { $total = ++$total_1; $count = ++$count_1 if $img; + } elsif ($r < 70) { + ($base, $img, $source) = pick_from_image_randomizer ($timeout, 3); + $total = ++$total_4; + $count = ++$count_4 if $img; + +# } elsif ($r < 70) { +# ($base, $img, $source) = pick_from_photo_randomizer ($timeout); +# $total = ++$total_4; +# $count = ++$count_4 if $img; + # } elsif ($r < 80) { # # HotBot sucks: 98% of the time, it says "no pages match your # # search", and then if I load the URL again by hand, it works. @@ -943,7 +1046,14 @@ sub image_to_pnm { $cmd2 = "exec $cmd"; # yes, this really is necessary. if we don't # do this, the process doesn't die properly. - if ($verbose == 0) { + if ($verbose <= 1) { + # + # We get a "giftopnm: got a 'Application Extension' extension" + # warning any time it's an animgif. + # + # Note that "giftopnm: EOF / read error on image data" is not + # always a fatal error -- sometimes the image looks fine anyway. + # $cmd2 .= " 2>/dev/null"; } @@ -962,12 +1072,14 @@ sub image_to_pnm { } kill ('TERM', $pid) if ($pid); $timed_out = 1; + $body = undef; }; if (($pid = open(PIPE, "| $cmd2 > $output"))) { $timed_out = 0; alarm $cvt_timeout; print PIPE $body; + $body = undef; close PIPE; if ($verbose > 3) { print STDERR "$progname: awaiting $pid\n"; } @@ -996,25 +1108,41 @@ sub image_to_pnm { die if ($@ && $@ ne "alarm\n"); # propagate errors if ($@) { # timed out + $body = undef; return (); } else { # didn't alarm 0; + $body = undef; return @_; } } sub x_output { - my $win_cmd = $ppm_to_root_window_cmd; - $win_cmd =~ s/^([^ \t\r\n]+).*$/$1/; + my $win_cmd_1 = $ppm_to_root_window_cmd_1; + my $win_cmd_2 = $ppm_to_root_window_cmd_2; + my $win_cmd_3 = $ppm_to_root_window_cmd_3; + $win_cmd_1 =~ s/^([^ \t\r\n]+).*$/$1/; + $win_cmd_2 =~ s/^([^ \t\r\n]+).*$/$1/; + $win_cmd_3 =~ s/^([^ \t\r\n]+).*$/$1/; # make sure the various programs we execute exist, right up front. foreach ("ppmmake", "giftopnm", "djpeg", "pnmpaste", "pnmscale", - "pnmcut", $win_cmd) { + "pnmcut") { which ($_) || die "$progname: $_ not found on \$PATH.\n"; } + if (which($win_cmd_1)) { + $ppm_to_root_window_cmd = $ppm_to_root_window_cmd_1; + } elsif (which($win_cmd_2)) { + $ppm_to_root_window_cmd = $ppm_to_root_window_cmd_2; + } elsif (which($win_cmd_3)) { + $ppm_to_root_window_cmd = $ppm_to_root_window_cmd_3; + } else { + die "$progname: didn't find $win_cmd_1, $win_cmd_2, or $win_cmd_3 on \$PATH.\n"; + } + $SIG{HUP} = \&x_cleanup; $SIG{INT} = \&x_cleanup; $SIG{QUIT} = \&x_cleanup; @@ -1030,6 +1158,9 @@ sub x_output { which ($_) || die "$progname: $_ not found on \$PATH.\n"; $_ = `$_`; ($img_width, $img_height) = m/dimensions: *(\d+)x(\d+) /; + if (!defined($img_height)) { + die "$progname: xdpyinfo failed.\n"; + } } my $bgcolor = "#000000"; @@ -1094,6 +1225,7 @@ sub x_output { $cmd .= "pnmpaste - $x $y $image_ppm > $image_tmp1"; open (IMG, "| $cmd") || die ("running $cmd: $!\n"); print IMG $body; + $body = undef; close (IMG); if ($verbose > 1) { print STDERR "$progname: subproc exited normally.\n"; @@ -1108,6 +1240,7 @@ sub x_output { my ($headers, $body) = get_document ($img, $base); if ($body) { handle_image ($base, $img, $body, $source); + $body = undef; } } unlink $image_tmp1, $image_tmp2; @@ -1123,6 +1256,7 @@ sub handle_image { } my ($iw, $ih) = image_to_pnm ($img, $body, $image_tmp1); + $body = undef; return 0 unless ($iw && $ih); my $ow = $iw; # used only for error messages @@ -1431,7 +1565,7 @@ sub main { if (!$root_p && !$no_output_p) { die "$copyright" . - "$progname: the -root argument is manditory (for now.)\n"; + "$progname: the -root argument is mandatory (for now.)\n"; } if (!$no_output_p && !$ENV{DISPLAY}) {