use Fcntl ':flock'; # import LOCK_* constants
-my $version = q{ $Revision: 1.32 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/;
+my $version = q{ $Revision: 1.54 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/;
my $copyright = "WebCollage $version, Copyright (c) 1999" .
" Jamie Zawinski <jwz\@jwz.org>\n" .
" http://www.jwz.org/xscreensaver/\n";
my $progname = $argv0; $progname =~ s@.*/@@g;
my $random_redirector = "http://random.yahoo.com/bin/ryl";
-my $image_randomizer_1 = "http://image.altavista.com/cgi-bin/avncgi" .
- "?do=3" .
- "&verb=n" .
- "&oshape=n" .
- "&oorder=" .
- "&ophoto=1&oart=1&ocolor=1&obw=1" .
+my $image_randomizer_1 = "http://www.altavista.com/query" .
+ "?mmdo=3" .
+ "&nbq=12" .
"&stype=simage" .
- "&oprem=0" .
- "&query=";
+ "&iclr=1" .
+ "&ibw=1" .
+ "&iexc=1" .
+ "&what=web" .
+ "&q=";
my $image_randomizer_2 = "http://www.hotbot.com/?clickSrc=search" .
"&submit=SEARCH&SM=SC&LG=any" .
"&AM0=MC&AT0=words&AW0=" .
"&MT=";
my $image_randomizer_3 = "http://www.altavista.com/cgi-bin/query?pg=q" .
"&text=yes&kl=XX&stype=stext&q=";
+my $image_randomizer_4 = "http://search.news.yahoo.com/search/news_photos?" .
+ "&z=&n=100&o=o&2=&3=&p=";
+
+# I guess Photopoint got wise to me, because now they are doing error
+# checking on the user ("u=") and album ("a=") parameters. Oh well.
+#
+#my $photo_randomizer = "http://albums.photopoint.com/j/View?u=1&a=1&p=";
+#my $photo_randomizer_lo = 10000001;
+#my $photo_randomizer_hi = 12400000;
my $image_ppm = ($ENV{TMPDIR} ? $ENV{TMPDIR} : "/tmp") . "/webcollage." . $$;
my $image_tmp1 = $image_ppm . "-1";
my $http_proxy = undef;
my $http_timeout = 30;
my $cvt_timeout = 10;
-my $ppm_to_root_window_cmd = "xv -root -rmode 5 -viewonly" .
- " +noresetroot %%PPM%% -quit";
+
+# programs we can use to write to the root window (tried in ascending order.)
+my $ppm_to_root_window_cmd_1 = "xloadimage -onroot -quiet %%PPM%%";
+my $ppm_to_root_window_cmd_2 = "xli -quiet -onroot -center" .
+ " -border black %%PPM%%";
+my $ppm_to_root_window_cmd_3 = "xv -root -rmode 5 -viewonly" .
+ " +noresetroot %%PPM%% -quit";
+
+my $ppm_to_root_window_cmd = undef; # initialized by x_output()
+
my $filter_cmd = undef;
my $post_filter_cmd = undef;
my $background = undef;
$paddr = sockaddr_in($port2, $iaddr);
+ my $head = "";
+ my $body = "";
+
@_ =
eval {
local $SIG{ALRM} = sub {
$cookie = "AV_ALL=1";
}
- print S ("GET " . ($http_proxy ? $url : "/$path") . " HTTP/1.0\n" .
- "Host: $them\n" .
- "User-Agent: $progname/$version\n" .
- ($referer ? "Referer: $referer\n" : "") .
- ($cookie ? "Cookie: $cookie\n" : "") .
- "\n");
+ print S ("GET " . ($http_proxy ? $url : "/$path") . " HTTP/1.0\r\n" .
+ "Host: $them\r\n" .
+ "User-Agent: $progname/$version\r\n" .
+ ($referer ? "Referer: $referer\r\n" : "") .
+ ($cookie ? "Cookie: $cookie\r\n" : "") .
+ "\r\n");
my $http = <S>;
- my $head = "";
- my $body = "";
while (<S>) {
$head .= $_;
last if m@^[\r\n]@;
die if ($@ && $@ ne "alarm\n"); # propagate errors
if ($@) {
# timed out
+ $head = undef;
+ $body = undef;
return ();
} else {
# didn't
print STDERR "$progname: too many redirects " .
"($max_loop_count) from $orig_url\n";
}
+ $body = undef;
return ();
}
} elsif ( $http =~ m@HTTP/[0-9.]+ [4-9][0-9][0-9]@ ) {
# http errors -- return nothing.
+ $body = undef;
return ();
} else {
" \"$url\": rejecting.\n";
}
$rejected_urls{$url} = -1;
+ $body = undef;
+ $_ = undef;
return ();
}
" length $L in $url: rejecting.\n";
}
$rejected_urls{$url} = $L;
+ $body = undef;
+ $_ = undef;
return ();
} elsif ( $verbose > 2 ) {
print STDERR "$progname: keywords of length $L" .
}
}
+ $_ = undef;
+ $body = undef;
+
if ( $#urls == 0 ) {
if ( $verbose > 2 ) {
print STDERR "$progname: no images on $base\n";
my ( $base, $body ) = get_document ($random_redirector, undef, $timeout);
- return if (!$base || !$body);
+ if (!$base || !$body) {
+ $body = undef;
+ return;
+ }
my $img = pick_image_from_body ($base, $body);
+ $body = undef;
if ($img) {
return ($base, $img, "yahoo");
$word = <IN>; # toss partial line
$word = <IN>; # keep next line
}
+ if (!$word) {
+ seek( IN, 0, 0 );
+ $word = <IN>;
+ }
close (IN);
}
$word =~ s/izes$/ize/;
$word =~ tr/A-Z/a-z/;
+ if ( $word =~ s/[ \t\n\r]/\+/g ) { # convert intra-word spaces to "+".
+ $word = "\%22$word\%22"; # And put quotes (%22) around it.
+ }
+
return $word;
}
my $search_url = ($which == 0 ? $image_randomizer_1 :
$which == 1 ? $image_randomizer_2 :
- $image_randomizer_3) .
+ $which == 2 ? $image_randomizer_3 :
+ $image_randomizer_4) .
$words;
# Pick a random search-result page instead of always taking the first.
my ( $base, $body ) = get_document ($search_url, undef, $timeout);
if (defined ($timeout)) {
$timeout -= (time - $start);
- return () if ($timeout <= 0);
+ if ($timeout <= 0) {
+ $body = undef;
+ return ();
+ }
}
return () if (! $body);
my $href_count = 0;
$_ = $body;
- s/[\r\n\t ]+/ /g;
- s/Result Pages:.*$//; # trim off page footer
+# s/Result [Pp]ages:.*$//s; # trim off page footer
+# s/^.*?IMAGE RESULTS//s; # trim off page header
+
+ s/[\r\n\t ]+/ /g;
s/(<A )/\n$1/gi;
foreach (split(/\n/)) {
next unless ($u =~ m@^http://@i); # skip non-http and relative urls.
next if ($u =~ m@[/.]altavista\.com@i); # skip altavista builtins
+ next if ($u =~ m@[/.]av\.com@i);
+ next if ($u =~ m@[/.]virage\.com@i);
+ next if ($u =~ m@[/.]photoloft\.com@i);
+ next if ($u =~ m@[/.]shopping\.com@i);
+ next if ($u =~ m@[/.]thetrip\.com@i);
+ next if ($u =~ m@[/.]cmgi\.com@i);
+ next if ($u =~ m@[/.]intelihealth\.com@i);
+ next if ($u =~ m@[/.]wildweb\.com@i);
next if ($u =~ m@[/.]digital\.com@i);
next if ($u =~ m@[/.]doubleclick\.net@i);
+ next if ($u =~ m@[/.]freeim\.org@i);
+ next if ($u =~ m@[/.]clicktomarket\.com@i); # you cretins
- if ($which == 0 && $u =~ m@[/.]corbis\.com/@) {
+ if ($which == 0 && $u =~ m@[/.]corbis\.com@) {
$skipped = 1;
if ( $verbose > 3 ) {
print STDERR "$progname: skipping corbis URL: $u\n";
}
next;
+ } elsif ($which == 3 &&
+ ($u =~ m@^http://[^/]+$@ || # no slashes
+ $u =~ m@/$@ || # ends in /
+ ! ($u =~ m@dailynews\.yahoo\.com@))) { # not dailynews
+# $skipped = 1;
+ if ( $verbose > 3 ) {
+ print STDERR "$progname: skipping non-AP URL: $u\n";
+ }
+ next;
+
} elsif ( $rejected_urls{$u} ) {
if ( $verbose > 3 ) {
my $L = $rejected_urls{$u};
print STDERR "$progname: found nothing on $base " .
"($length bytes, $href_count links).\n";
}
+ $body = undef;
+ $_ = undef;
return ();
}
}
+ $body = undef;
+ $_ = undef;
my ( $base2, $body2 ) = get_document ($subpage, $base, $timeout);
- return () if (!$base2 || !$body2);
+ if (!$base2 || !$body2) {
+ $body2 = undef;
+ return ();
+ }
my $img = pick_image_from_body ($base2, $body2);
+ $body2 = undef;
if ($img) {
return ($base2, $img,
($which == 0 ? "imagevista" :
- $which == 1 ? "hotbot" : "altavista") .
+ $which == 1 ? "hotbot" :
+ $which == 2 ? "altavista" :
+ "ap") .
"/$search_count");
} else {
return ();
}
+# Using the photo site, generate a random URL that will hopefully point
+# to an image. Returns two URLs, both of which are the URL of the image.
+# Returns () if nothing found this time.
+#
+#sub pick_from_photo_randomizer {
+# my ( $timeout ) = @_;
+# my $n = ($photo_randomizer_lo +
+# int(rand() * ($photo_randomizer_hi - $photo_randomizer_lo)));
+# my $url = $photo_randomizer . $n;
+# return ( $url, $url, "photopoint" );
+#}
+
+
# Picks a random image on a random page, and returns two URLs:
# the page containing the image, and the image.
# Returns () if nothing found this time.
my $total_1 = 0;
my $total_2 = 0;
my $total_3 = 0;
+my $total_4 = 0;
my $count_0 = 0;
my $count_1 = 0;
my $count_2 = 0;
my $count_3 = 0;
+my $count_4 = 0;
sub pick_image {
my ( $timeout ) = @_;
my $r = int(rand(100));
+
my ($base, $img, $source, $total, $count);
if ($r < 20) {
$total = ++$total_1;
$count = ++$count_1 if $img;
+ } elsif ($r < 70) {
+ ($base, $img, $source) = pick_from_image_randomizer ($timeout, 3);
+ $total = ++$total_4;
+ $count = ++$count_4 if $img;
+
+# } elsif ($r < 70) {
+# ($base, $img, $source) = pick_from_photo_randomizer ($timeout);
+# $total = ++$total_4;
+# $count = ++$count_4 if $img;
+
# } elsif ($r < 80) {
# # HotBot sucks: 98% of the time, it says "no pages match your
# # search", and then if I load the URL again by hand, it works.
$cmd2 = "exec $cmd"; # yes, this really is necessary. if we don't
# do this, the process doesn't die properly.
- if ($verbose == 0) {
+ if ($verbose <= 1) {
+ #
+ # We get a "giftopnm: got a 'Application Extension' extension"
+ # warning any time it's an animgif.
+ #
+ # Note that "giftopnm: EOF / read error on image data" is not
+ # always a fatal error -- sometimes the image looks fine anyway.
+ #
$cmd2 .= " 2>/dev/null";
}
}
kill ('TERM', $pid) if ($pid);
$timed_out = 1;
+ $body = undef;
};
if (($pid = open(PIPE, "| $cmd2 > $output"))) {
$timed_out = 0;
alarm $cvt_timeout;
print PIPE $body;
+ $body = undef;
close PIPE;
if ($verbose > 3) { print STDERR "$progname: awaiting $pid\n"; }
die if ($@ && $@ ne "alarm\n"); # propagate errors
if ($@) {
# timed out
+ $body = undef;
return ();
} else {
# didn't
alarm 0;
+ $body = undef;
return @_;
}
}
sub x_output {
- my $win_cmd = $ppm_to_root_window_cmd;
- $win_cmd =~ s/^([^ \t\r\n]+).*$/$1/;
+ my $win_cmd_1 = $ppm_to_root_window_cmd_1;
+ my $win_cmd_2 = $ppm_to_root_window_cmd_2;
+ my $win_cmd_3 = $ppm_to_root_window_cmd_3;
+ $win_cmd_1 =~ s/^([^ \t\r\n]+).*$/$1/;
+ $win_cmd_2 =~ s/^([^ \t\r\n]+).*$/$1/;
+ $win_cmd_3 =~ s/^([^ \t\r\n]+).*$/$1/;
# make sure the various programs we execute exist, right up front.
foreach ("ppmmake", "giftopnm", "djpeg", "pnmpaste", "pnmscale",
- "pnmcut", $win_cmd) {
+ "pnmcut") {
which ($_) || die "$progname: $_ not found on \$PATH.\n";
}
+ if (which($win_cmd_1)) {
+ $ppm_to_root_window_cmd = $ppm_to_root_window_cmd_1;
+ } elsif (which($win_cmd_2)) {
+ $ppm_to_root_window_cmd = $ppm_to_root_window_cmd_2;
+ } elsif (which($win_cmd_3)) {
+ $ppm_to_root_window_cmd = $ppm_to_root_window_cmd_3;
+ } else {
+ die "$progname: didn't find $win_cmd_1, $win_cmd_2, or $win_cmd_3 on \$PATH.\n";
+ }
+
$SIG{HUP} = \&x_cleanup;
$SIG{INT} = \&x_cleanup;
$SIG{QUIT} = \&x_cleanup;
which ($_) || die "$progname: $_ not found on \$PATH.\n";
$_ = `$_`;
($img_width, $img_height) = m/dimensions: *(\d+)x(\d+) /;
+ if (!defined($img_height)) {
+ die "$progname: xdpyinfo failed.\n";
+ }
}
my $bgcolor = "#000000";
$cmd .= "pnmpaste - $x $y $image_ppm > $image_tmp1";
open (IMG, "| $cmd") || die ("running $cmd: $!\n");
print IMG $body;
+ $body = undef;
close (IMG);
if ($verbose > 1) {
print STDERR "$progname: subproc exited normally.\n";
my ($headers, $body) = get_document ($img, $base);
if ($body) {
handle_image ($base, $img, $body, $source);
+ $body = undef;
}
}
unlink $image_tmp1, $image_tmp2;
}
my ($iw, $ih) = image_to_pnm ($img, $body, $image_tmp1);
+ $body = undef;
return 0 unless ($iw && $ih);
my $ow = $iw; # used only for error messages
if (!$root_p && !$no_output_p) {
die "$copyright" .
- "$progname: the -root argument is manditory (for now.)\n";
+ "$progname: the -root argument is mandatory (for now.)\n";
}
if (!$no_output_p && !$ENV{DISPLAY}) {