X-Git-Url: http://git.hungrycats.org/cgi-bin/gitweb.cgi?p=xscreensaver;a=blobdiff_plain;f=hacks%2Fwebcollage;h=4ed61c7b434dfc7866d323bb397d3d421f74b131;hp=815d2909aa20e40d102797c8cb930a0fdad0928c;hb=3f438031d610c7e15fd33876a879b97e290e05fb;hpb=447db08c956099b3b183886729108bf5b364c4b8 diff --git a/hacks/webcollage b/hacks/webcollage index 815d2909..4ed61c7b 100755 --- a/hacks/webcollage +++ b/hacks/webcollage @@ -60,7 +60,7 @@ use bytes; # Larry can take Unicode and shove it up his ass sideways. my $progname = $0; $progname =~ s@.*/@@g; -my $version = q{ $Revision: 1.125 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; +my $version = q{ $Revision: 1.127 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; my $copyright = "WebCollage $version, Copyright (c) 1999-2005" . " Jamie Zawinski \n" . " http://www.jwz.org/webcollage/\n"; @@ -211,6 +211,39 @@ my %warningless_sites = ( ); +# For decoding HTML-encoded character entities to URLs. +# +my %entity_table = ( + "apos" => '\'', + "quot" => '"', "amp" => '&', "lt" => '<', "gt" => '>', + "nbsp" => ' ', "iexcl" => '¡', "cent" => '¢', "pound" => '£', + "curren" => '¤', "yen" => '¥', "brvbar" => '¦', "sect" => '§', + "uml" => '¨', "copy" => '©', "ordf" => 'ª', "laquo" => '«', + "not" => '¬', "shy" => '­', "reg" => '®', "macr" => '¯', + "deg" => '°', "plusmn" => '±', "sup2" => '²', "sup3" => '³', + "acute" => '´', "micro" => 'µ', "para" => '¶', "middot" => '·', + "cedil" => '¸', "sup1" => '¹', "ordm" => 'º', "raquo" => '»', + "frac14" => '¼', "frac12" => '½', "frac34" => '¾', "iquest" => '¿', + "Agrave" => 'À', "Aacute" => 'Á', "Acirc" => 'Â', "Atilde" => 'Ã', + "Auml" => 'Ä', "Aring" => 'Å', "AElig" => 'Æ', "Ccedil" => 'Ç', + "Egrave" => 'È', "Eacute" => 'É', "Ecirc" => 'Ê', "Euml" => 'Ë', + "Igrave" => 'Ì', "Iacute" => 'Í', "Icirc" => 'Î', "Iuml" => 'Ï', + "ETH" => 'Ð', "Ntilde" => 'Ñ', "Ograve" => 'Ò', "Oacute" => 'Ó', + "Ocirc" => 'Ô', "Otilde" => 'Õ', "Ouml" => 'Ö', "times" => '×', + "Oslash" => 'Ø', "Ugrave" => 'Ù', "Uacute" => 'Ú', "Ucirc" => 'Û', + "Uuml" => 'Ü', "Yacute" => 'Ý', "THORN" => 'Þ', "szlig" => 'ß', + "agrave" => 'à', "aacute" => 'á', "acirc" => 'â', "atilde" => 'ã', + "auml" => 'ä', "aring" => 'å', "aelig" => 'æ', "ccedil" => 'ç', + "egrave" => 'è', "eacute" => 'é', "ecirc" => 'ê', "euml" => 'ë', + "igrave" => 'ì', "iacute" => 'í', "icirc" => 'î', "iuml" => 'ï', + "eth" => 'ð', "ntilde" => 'ñ', "ograve" => 'ò', "oacute" => 'ó', + "ocirc" => 'ô', "otilde" => 'õ', "ouml" => 'ö', "divide" => '÷', + "oslash" => 'ø', "ugrave" => 'ù', "uacute" => 'ú', "ucirc" => 'û', + "uuml" => 'ü', "yacute" => 'ý', "thorn" => 'þ', "yuml" => 'ÿ', + "ndash" => '-', "mdash" => "--" +); + + ############################################################################## # # Various global flags set by command line parameters, or computed @@ -275,8 +308,8 @@ my @tripwire_words = ("aberrate", "abode", "amorphous", "antioch", # returns three values: the HTTP response line; the document headers; # and the document body. # -sub get_document_1 { - my ( $url, $referer, $timeout ) = @_; +sub get_document_1($$$) { + my ($url, $referer, $timeout) = @_; if (!defined($timeout)) { $timeout = $http_timeout; } if ($timeout > $http_timeout) { $timeout = $http_timeout; } @@ -444,8 +477,8 @@ sub get_document_1 { # returns two values: the document headers; and the document body. # if the given URL did a redirect, returns the redirected-to document. # -sub get_document { - my ( $url, $referer, $timeout ) = @_; +sub get_document($$;$) { + my ($url, $referer, $timeout) = @_; my $start = time; if (defined($referer) && $referer eq $driftnet_magic) { @@ -537,7 +570,7 @@ sub get_document { # in again, but you have to present the old cookie to get the new cookie. # So, by doing this, the built-in cypherpunks cookie will never go "stale". # -sub set_cookie { +sub set_cookie($$) { my ($host, $cookie) = @_; my $oc = $cookies{$host}; return unless $oc; @@ -563,8 +596,8 @@ sub set_cookie { # given a URL and the body text at that URL, selects and returns a random # image from it. returns () if no suitable images found. # -sub pick_image_from_body { - my ( $url, $body ) = @_; +sub pick_image_from_body($$) { + my ($url, $body) = @_; my $base = $url; $_ = $url; @@ -779,7 +812,7 @@ sub pick_image_from_body { ############################################################################ -sub pick_dictionary { +sub pick_dictionary() { my @dicts = ("/usr/dict/words", "/usr/share/dict/words", "/usr/share/lib/dict/words"); @@ -794,7 +827,7 @@ sub pick_dictionary { # returns a random word from the dictionary # -sub random_word { +sub random_word() { local *IN; if (! open (IN, "<$wordlist")) { @@ -846,7 +879,7 @@ sub random_word { } -sub random_words { +sub random_words($) { my ($or_p) = @_; my $sep = ($or_p ? "%20OR%20" : "%20"); return (random_word . $sep . @@ -857,20 +890,20 @@ sub random_words { } -sub url_quote { +sub url_quote($) { my ($s) = @_; $s =~ s|([^-a-zA-Z0-9.\@/_\r\n])|sprintf("%%%02X", ord($1))|ge; return $s; } -sub url_unquote { +sub url_unquote($) { my ($s) = @_; $s =~ s/[+]/ /g; $s =~ s/%([a-z0-9]{2})/chr(hex($1))/ige; return $s; } -sub html_quote { +sub html_quote($) { my ($s) = @_; $s =~ s/&/&/gi; $s =~ s/