X-Git-Url: http://git.hungrycats.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=driver%2Fxscreensaver-text;h=1d0170d9fbfaea30c2099a07807eace017082cc2;hb=d6b0217f2417bd19187f0ebc389d6c5c2233b11c;hp=f7d2a992d1858f7f1fa6f69beb18a8dd6f6dbf78;hpb=b81f521c5ad7022ac12db18ca8fcdd9fb063831e;p=xscreensaver diff --git a/driver/xscreensaver-text b/driver/xscreensaver-text index f7d2a992..1d0170d9 100755 --- a/driver/xscreensaver-text +++ b/driver/xscreensaver-text @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright © 2005-2012 Jamie Zawinski +# Copyright © 2005-2016 Jamie Zawinski # # Permission to use, copy, modify, distribute, and sell this software and its # documentation for any purpose is hereby granted without fee, provided that @@ -28,13 +28,16 @@ use strict; # Only error out if we're actually loading a URL instead of local data. BEGIN { eval 'use LWP::UserAgent;' } +# Not sure how prevalent this is. Hope it's part of the default install. +BEGIN { eval 'use HTML::Entities;' } + use Socket; use POSIX qw(strftime); use Text::Wrap qw(wrap); use bytes; my $progname = $0; $progname =~ s@.*/@@g; -my $version = q{ $Revision: 1.27 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; +my ($version) = ('$Revision: 1.44 $' =~ m/\s(\d[.\d]+)\s/s); my $verbose = 0; my $http_proxy = undef; @@ -44,127 +47,68 @@ my $text_mode = 'date'; my $text_literal = ''; my $text_file = ''; my $text_program = ''; -my $text_url = 'http://twitter.com/statuses/public_timeline.atom'; +my $text_url = 'https://en.wikipedia.org/w/index.php?title=Special:NewPages&feed=rss'; # Default URL needs to be set and match what's in OSX/XScreenSaverView.m -my $wrap_columns = undef; +my $wrap_columns = undef; +my $truncate_lines = undef; +my $latin1_p = 0; my $nyarlathotep_p = 0; -# Maps HTML character entities to the corresponding Latin1 characters. -# -my %entity_table = ( - "quot" => '"', "amp" => '&', "lt" => '<', "gt" => '>', - "nbsp" => ' ', "iexcl" => '¡', "cent" => '¢', "pound" => '£', - "curren" => '¤', "yen" => '¥', "brvbar" => '¦', "sect" => '§', - "uml" => '¨', "copy" => '©', "ordf" => 'ª', "laquo" => '«', - "not" => '¬', "shy" => '­', "reg" => '®', "macr" => '¯', - "deg" => '°', "plusmn" => '±', "sup2" => '²', "sup3" => '³', - "acute" => '´', "micro" => 'µ', "para" => '¶', "middot" => '·', - "cedil" => '¸', "sup1" => '¹', "ordm" => 'º', "raquo" => '»', - "frac14" => '¼', "frac12" => '½', "frac34" => '¾', "iquest" => '¿', - "Agrave" => 'À', "Aacute" => 'Á', "Acirc" => 'Â', "Atilde" => 'Ã', - "Auml" => 'Ä', "Aring" => 'Å', "AElig" => 'Æ', "Ccedil" => 'Ç', - "Egrave" => 'È', "Eacute" => 'É', "Ecirc" => 'Ê', "Euml" => 'Ë', - "Igrave" => 'Ì', "Iacute" => 'Í', "Icirc" => 'Î', "Iuml" => 'Ï', - "ETH" => 'Ð', "Ntilde" => 'Ñ', "Ograve" => 'Ò', "Oacute" => 'Ó', - "Ocirc" => 'Ô', "Otilde" => 'Õ', "Ouml" => 'Ö', "times" => '×', - "Oslash" => 'Ø', "Ugrave" => 'Ù', "Uacute" => 'Ú', "Ucirc" => 'Û', - "Uuml" => 'Ü', "Yacute" => 'Ý', "THORN" => 'Þ', "szlig" => 'ß', - "agrave" => 'à', "aacute" => 'á', "acirc" => 'â', "atilde" => 'ã', - "auml" => 'ä', "aring" => 'å', "aelig" => 'æ', "ccedil" => 'ç', - "egrave" => 'è', "eacute" => 'é', "ecirc" => 'ê', "euml" => 'ë', - "igrave" => 'ì', "iacute" => 'í', "icirc" => 'î', "iuml" => 'ï', - "eth" => 'ð', "ntilde" => 'ñ', "ograve" => 'ò', "oacute" => 'ó', - "ocirc" => 'ô', "otilde" => 'õ', "ouml" => 'ö', "divide" => '÷', - "oslash" => 'ø', "ugrave" => 'ù', "uacute" => 'ú', "ucirc" => 'û', - "uuml" => 'ü', "yacute" => 'ý', "thorn" => 'þ', "yuml" => 'ÿ', - "apos" => '\'', - - # HTML 4 entities that do not have 1:1 Latin1 mappings. - "bull" => "*", "hellip"=> "...", "prime" => "'", "Prime" => "\"", - "frasl" => "/", "trade" => "[tm]", "larr" => "<-", "rarr" => "->", - "harr" => "<->", "lArr" => "<=", "rArr" => "=>", "hArr" => "<=>", - "empty" => "Ø", "minus" => "-", "lowast"=> "*", "sim" => "~", - "cong" => "=~", "asymp" => "~", "ne" => "!=", "equiv" => "==", - "le" => "<=", "ge" => ">=", "lang" => "<", "rang" => ">", - "loz" => "<>", "OElig" => "OE", "oelig" => "oe", "Yuml" => "Y", - "circ" => "^", "tilde" => "~", "ensp" => " ", "emsp" => " ", - "thinsp"=> " ", "ndash" => "-", "mdash" => "-", "lsquo" => "`", - "rsquo" => "'", "sbquo" => "'", "ldquo" => "\"", "rdquo" => "\"", - "bdquo" => "\"", "lsaquo"=> "<", "rsaquo"=> ">", -); - -# Maps certain UTF8 characters (2 or 3 bytes) to the corresponding -# Latin1 characters. -# -my %unicode_latin1_table = ( - "\xC2\xA1" => '¡', "\xC2\xA2" => '¢', "\xC2\xA3" => '£', "\xC2\xA4" => '¤', - "\xC2\xA5" => '¥', "\xC2\xA6" => '¦', "\xC2\xA7" => '§', "\xC2\xA8" => '¨', - "\xC2\xA9" => '©', "\xC2\xAA" => 'ª', "\xC2\xAB" => '«', "\xC2\xAC" => '¬', - "\xC2\xAD" => '­', "\xC2\xAE" => '®', "\xC2\xAF" => '¯', "\xC2\xB0" => '°', - "\xC2\xB1" => '±', "\xC2\xB2" => '²', "\xC2\xB3" => '³', "\xC2\xB4" => '´', - "\xC2\xB5" => 'µ', "\xC2\xB6" => '¶', "\xC2\xB7" => '·', "\xC2\xB8" => '¸', - "\xC2\xB9" => '¹', "\xC2\xBA" => 'º', "\xC2\xBB" => '»', "\xC2\xBC" => '¼', - "\xC2\xBD" => '½', "\xC2\xBE" => '¾', "\xC2\xBF" => '¿', "\xC3\x80" => 'À', - "\xC3\x81" => 'Á', "\xC3\x82" => 'Â', "\xC3\x83" => 'Ã', "\xC3\x84" => 'Ä', - "\xC3\x85" => 'Å', "\xC3\x86" => 'Æ', "\xC3\x87" => 'Ç', "\xC3\x88" => 'È', - "\xC3\x89" => 'É', "\xC3\x8A" => 'Ê', "\xC3\x8B" => 'Ë', "\xC3\x8C" => 'Ì', - "\xC3\x8D" => 'Í', "\xC3\x8E" => 'Î', "\xC3\x8F" => 'Ï', "\xC3\x90" => 'Ð', - "\xC3\x91" => 'Ñ', "\xC3\x92" => 'Ò', "\xC3\x93" => 'Ó', "\xC3\x94" => 'Ô', - "\xC3\x95" => 'Õ', "\xC3\x96" => 'Ö', "\xC3\x97" => '×', "\xC3\x98" => 'Ø', - "\xC3\x99" => 'Ù', "\xC3\x9A" => 'Ú', "\xC3\x9B" => 'Û', "\xC3\x9C" => 'Ü', - "\xC3\x9D" => 'Ý', "\xC3\x9E" => 'Þ', "\xC3\x9F" => 'ß', "\xC3\xA0" => 'à', - "\xC3\xA1" => 'á', "\xC3\xA2" => 'â', "\xC3\xA3" => 'ã', "\xC3\xA4" => 'ä', - "\xC3\xA5" => 'å', "\xC3\xA6" => 'æ', "\xC3\xA7" => 'ç', "\xC3\xA8" => 'è', - "\xC3\xA9" => 'é', "\xC3\xAA" => 'ê', "\xC3\xAB" => 'ë', "\xC3\xAC" => 'ì', - "\xC3\xAD" => 'í', "\xC3\xAE" => 'î', "\xC3\xAF" => 'ï', "\xC3\xB0" => 'ð', - "\xC3\xB1" => 'ñ', "\xC3\xB2" => 'ò', "\xC3\xB3" => 'ó', "\xC3\xB4" => 'ô', - "\xC3\xB5" => 'õ', "\xC3\xB6" => 'ö', "\xC3\xB7" => '÷', "\xC3\xB8" => 'ø', - "\xC3\xB9" => 'ù', "\xC3\xBA" => 'ú', "\xC3\xBB" => 'û', "\xC3\xBC" => 'ü', - "\xC3\xBD" => 'ý', "\xC3\xBE" => 'þ', "\xC3\xBF" => 'ÿ', - - "\xE2\x80\x93" => '--', "\xE2\x80\x94" => '--', - "\xE2\x80\x98" => '`', "\xE2\x80\x99" => '\'', - "\xE2\x80\x9C" => "``", "\xE2\x80\x9D" => "''", - "\xE2\x80\xA6" => '...', -); - - # Convert any HTML entities to Latin1 characters. # sub de_entify($) { my ($text) = @_; - $text =~ s/(&(\#)?([[:alpha:]\d]+);?)/ - { - my $c = $3; - if (! defined($2)) { - $c = $entity_table{$c}; # for Á - } else { - if ($c =~ m@^x([\dA-F]+)$@si) { # for A - $c = chr(hex($1)); - } elsif ($c =~ m@^\d+$@si) { # for A - $c = chr($c); - } else { - $c = undef; - } - } - ($c || "[$3]"); # for &unknown; => "[unknown]" - } - /gexi; - return $text; + + return '' unless defined($text); + return $text unless ($text =~ m/&/s); + + # Convert any HTML entities to Unicode characters, + # if the HTML::Entities module is installed. + eval { + my $t2 = $text; + $text = undef; + $text = HTML::Entities::decode_entities ($t2); + }; + return $text if defined($text); + + # If it's not installed, just complain instead of trying to halfass it. + print STDOUT ("\n\tPerl is broken. Do this to repair it:\n" . + "\n\tsudo cpan HTML::Entities\n\n"); + exit (1); } # Convert any Unicode characters to Latin1 if possible. # Unconvertable bytes are left alone. # -sub de_unicoddle($) { +sub utf8_to_latin1($) { my ($text) = @_; - foreach my $key (keys (%unicode_latin1_table)) { - my $val = $unicode_latin1_table{$key}; - $text =~ s/$key/$val/gs; - } + + utf8::encode ($text); # Unpack Unicode back to multi-byte UTF-8. + + # Maybe it would be better to handle this in the Unicode domain + # by doing things like s/\x{2018}/\"/g, but without decoding the + # string back to UTF-8 first, I'm at a loss as to how to have + # "á" print as "\340" instead of as "\303\240". + + $text =~ s/ \xC2 ( [\xA0-\xFF] ) / $1 /gsex; + $text =~ s/ \xC3 ( [\x80-\xFF] ) / chr (ord($1) | 0x40) /gsex; + + # Handles a few 3-byte sequences too. + $text =~ s/\xE2\x80\x93/--/gs; + $text =~ s/\xE2\x80\x94/--/gs; + $text =~ s/\xE2\x80\x98/`/gs; + $text =~ s/\xE2\x80\x99/'/gs; + $text =~ s/\xE2\x80\x9C/``/gs; + $text =~ s/\xE2\x80\x9D/'/gs; + $text =~ s/\xE2\x80\xA2/•/gs; + $text =~ s/\xE2\x80\xA6/.../gs; + $text =~ s/\xE2\x80\xB2/'/gs; + $text =~ s/\xE2\x84\xA2/™/gs; + $text =~ s/\xE2\x86\x90/ ← /gs; + return $text; } @@ -173,13 +117,12 @@ sub de_unicoddle($) { # sub get_x11_prefs() { my $got_any_p = 0; - local *IN; - if (open (IN, "<$config_file")) { + if (open (my $in, '<', $config_file)) { print STDERR "$progname: reading $config_file\n" if ($verbose > 1); - my $body = ''; - while () { $body .= $_; } - close IN; + local $/ = undef; # read entire file + my $body = <$in>; + close $in; $got_any_p = get_x11_prefs_1 ($body); } elsif ($verbose > 1) { @@ -215,6 +158,7 @@ sub get_x11_prefs_1($) { my $got_any_p = 0; $body =~ s@\\\n@@gs; + $body =~ s@^[ \t]*#[^\n]*$@@gm; if ($body =~ m/^[.*]*textMode:[ \t]*([^\s]+)\s*$/im) { $text_mode = $1; @@ -247,7 +191,10 @@ sub get_cocoa_prefs($) { $text_mode = $v if defined ($v); # The "textMode" pref is set to a number instead of a string because I - # can't figure out the black magic to make Cocoa bindings work right. + # couldn't figure out the black magic to make Cocoa bindings work right. + # + # Update: as of 5.33, Cocoa writes strings instead of numbers, but + # pre-existing saved preferences might still have numbers in them. # if ($text_mode eq '0') { $text_mode = 'date'; } elsif ($text_mode eq '1') { $text_mode = 'literal'; } @@ -331,6 +278,8 @@ sub which($) { sub output() { + binmode (STDOUT, ($latin1_p ? ':raw' : ':utf8')); + # Do some basic sanity checking (null text, null file names, etc.) # if (($text_mode eq 'literal' && $text_literal =~ m/^\s*$/i) || @@ -343,6 +292,7 @@ sub output() { if ($text_mode eq 'literal') { $text_literal = strftime ($text_literal, localtime); + $text_literal = utf8_to_latin1($text_literal) if ($latin1_p); $text_literal =~ y/A-Za-z/N-ZA-Mn-za-m/ if ($nyarlathotep_p); print STDOUT $text_literal; print STDOUT "\n" unless ($text_literal =~ m/\n$/s); @@ -351,23 +301,25 @@ sub output() { $text_file =~ s@^~/@$ENV{HOME}/@s; # allow literal "~/" - local *IN; - if (open (IN, "<$text_file")) { + if (open (my $in, '<:raw', $text_file)) { print STDERR "$progname: reading $text_file\n" if ($verbose); + binmode (STDOUT, ':raw'); - if ($wrap_columns && $wrap_columns > 0) { + if (($wrap_columns && $wrap_columns > 0) || $truncate_lines) { # read it, then reformat it. - my $body = ''; - while () { $body .= $_; } - reformat_text ($body); + local $/ = undef; # read entire file + my $body = <$in>; + $body = reformat_text ($body); + print STDOUT $body; } else { - # stream it - while () { + # stream it by lines + while (<$in>) { + $_ = utf8_to_latin1($_) if ($latin1_p); y/A-Za-z/N-ZA-Mn-za-m/ if ($nyarlathotep_p); - print $_; + print STDOUT $_; } } - close IN; + close $in; } else { error ("$text_file: $!"); } @@ -378,10 +330,21 @@ sub output() { $text_program = which ($prog) . $args; print STDERR "$progname: running $text_program\n" if ($verbose); - if ($wrap_columns && $wrap_columns > 0) { + if (($wrap_columns && $wrap_columns > 0) || $truncate_lines) { # read it, then reformat it. - my $body = `( $text_program ) 2>&1`; - reformat_text ($body); + my $lines = 0; + my $body = ""; + my $cmd = "( $text_program ) 2>&1"; + # $cmd .= " | sed -l"; # line buffer instead of 4k pipe buffer + open (my $pipe, '-|:unix', $cmd); + while (my $line = <$pipe>) { + $body .= $line; + $lines++; + last if ($truncate_lines && $lines > $truncate_lines); + } + close $pipe; + $body = reformat_text ($body); + print STDOUT $body; } else { # stream it safe_system ("$text_program"); @@ -409,7 +372,8 @@ sub output() { if (-f "/usr/sbin/system_profiler") { # "Mac OS X 10.4.5 (8H14)" my $sp = # "iMac G5" - `/usr/sbin/system_profiler SPSoftwareDataType SPHardwareDataType`; + `/usr/sbin/system_profiler SPSoftwareDataType SPHardwareDataType 2>/dev/null`; + # system_profiler on OS X 10.10 generates spurious error messages. my ($v) = ($sp =~ m/^\s*System Version:\s*(.*)$/mi); my ($s) = ($sp =~ m/^\s*(?:CPU|Processor) Speed:\s*(.*)$/mi); my ($t) = ($sp =~ m/^\s*(?:Machine|Model) Name:\s*(.*)$/mi); @@ -482,6 +446,10 @@ sub reformat_html($$) { s@[\r\n]@ @gsi; } + # This right here is the part where I doom us all to inhuman + # toil for the One whose Name cannot be expressed in the + # Basic Multilingual Plane. http://jwz.org/b/yhAT He comes. + s@@@gsi; # lose comments s@<(STYLE|SCRIPT)\b[^<>]*>.*?@@gsi; # lose css and js @@ -495,9 +463,57 @@ sub reformat_html($$) { s@<[^<>]*>?@@gs; # lose all other HTML tags $_ = de_entify ($_); # convert HTML entities - # elide any remaining non-Latin1 binary data... - s/([\177-\377]+(\s*[\177-\377]+)[^a-z\d]*)/«...» /g; - #s/([\177-\377]+(\s*[\177-\377]+)[^a-z\d]*)/«$1» /g; + # For Wikipedia: delete anything inside {{ }} and unwrap [[tags]], + # among other things. + # + if ($rss_p eq 'wiki') { + + s@@@gsi; # lose HTML comments again + + # Creation line is often truncated: screws up parsing with unbalanced {{. + s@(: +[^a-zA-Z ]* *Created page) with [^\n]+@$1@s; + + s@/\*.*?\*/@@si; # /* ... */ + + # Try to omit all tables, since they're impossible to read as text. + # + 1 while (s/\{\{[^{}]*}}/ /gs); # {{ ... }} + 1 while (s/\{\|.*?\|\}/\n\n/gs); # {| ... |} + 1 while (s/\|-.*?\|/ /gs); # |- ... | (table cell) + + # Convert anchors to something more readable. + # + s/\[\[([^\[\]\|]+)\|([^\[\]]+)\]\]/$2/gs; # [[link|anchor]] + s/\[\[([^:\[\]\|]+)\]\]/$1/gs; # [[anchor]] + s/\[https?:[^\[\]\s]+\s+([^\[\]]+)\]/$1/gs; # [url anchor] + + # Convert all references to asterisks. + s@\s*\s*.*?@*@gs; # ... -> "*" + s@\n[ \t]*\d+\s*\^\s*http[^\s]+[ \t]*\n@\n@gs; # 1 ^ URL (a Reflist) + + s@\[\[File:([^\|\]]+).*?\]\]@\n$1\n@gs; # [[File: X | ... ]] + s@\[\[Category:.*?\]\]@@gs; # omit categories + + s/<[^<>]*>//gs; # Omit all remaining tags + s/\'{3,}//gs; # Omit ''' and '''' + s/\'\'/\"/gs; # '' -> " + s/\`\`/\"/gs; # `` -> " + s/\"\"+/\"/gs; # "" -> " + + s/^[ \t]*[*#]+[ \t]*$//gm; # Omit lines with just * or # on them + + # Omit trailing headlines with no text after them (e.g. == Notes ==) + 1 while (s/\n==+[ \t]*[^\n=]+[ \t]*==+\s*$/\n/s); + + $_ = de_entify ($_); # convert HTML entities, again + } + + + # elide any remaining non-Latin1 binary data. + if ($latin1_p) { + utf8::encode ($_); # Unpack Unicode back to multi-byte UTF-8. + s/([^\000-\176]+(\s*[^\000-\176]+)[^a-z\d]*)/\xAB...\xBB /g; + } $_ .= "\n"; @@ -506,20 +522,29 @@ sub reformat_html($$) { if (!defined($wrap_columns) || $wrap_columns > 0) { $Text::Wrap::columns = ($wrap_columns || 72); - $_ = wrap ("", " ", $_); # wrap the lines as a paragraph - s/[ \t]+$//gm; # lose whitespace at end of line again + $Text::Wrap::break = '[\s/|]'; # wrap on slashes for URLs + $_ = wrap ("", " ", $_); # wrap the lines as a paragraph + s/[ \t]+$//gm; # lose whitespace at end of line again } s/^\n+//gs; + if ($truncate_lines) { + s/^(([^\n]*\n){$truncate_lines}).*$/$1/s; + } + + $_ = utf8_to_latin1($_) if ($latin1_p); y/A-Za-z/N-ZA-Mn-za-m/ if ($nyarlathotep_p); - print STDOUT $_; + + return $_; } sub reformat_rss($) { my ($body) = @_; + my $wiki_p = ($body =~ m@[^<>]*Wiki@si); + $body =~ s/(<(ITEM|ENTRY)\b)/\001\001$1/gsi; my @items = split (/\001\001/, $body); @@ -541,6 +566,8 @@ sub reformat_rss($) { } } + my $out = ''; + my $i = -1; foreach (@items) { $i++; @@ -578,16 +605,23 @@ sub reformat_rss($) { $title = '' if ($body1 eq $title); # Identical in Twitter's atom feed. - reformat_html ("$title

$body1", 1); - print "\n"; + $out .= reformat_html ("$title

$body1", $wiki_p ? 'wiki' : 'rss'); + $out .= "\n"; + } + + if ($truncate_lines) { + $out =~ s/^(([^\n]*\n){$truncate_lines}).*$/$1/s; } + + return $out; } sub rss_field_to_html($) { my ($body) = @_; - # Assume that if is present, everything inside that. + # If is present, everything inside that is HTML, + # and not double-encoded. # if ($body =~ m/^\s* 0) { print STDERR "$progname: wrapping at $wrap_columns...\n" if ($verbose > 2); $Text::Wrap::columns = $wrap_columns; + $Text::Wrap::break = '[\s/]'; # wrap on slashes for URLs $body = wrap ("", "", $body); $body =~ s/[ \t]+$//gm; } + if ($truncate_lines) { + $body =~ s/^(([^\n]*\n){$truncate_lines}).*$/$1/s; + } + + $body = utf8_to_latin1($body) if ($latin1_p); $body =~ y/A-Za-z/N-ZA-Mn-za-m/ if ($nyarlathotep_p); - print STDOUT $body; + return $body; } @@ -624,16 +663,21 @@ sub reformat_text($) { sub set_proxy($) { my ($ua) = @_; - if (!defined($ENV{http_proxy}) && !defined($ENV{HTTP_PROXY})) { - my $proxy_data = `scutil --proxy 2>/dev/null`; - my ($server) = ($proxy_data =~ m/\bHTTPProxy\s*:\s*([^\s]+)/s); - my ($port) = ($proxy_data =~ m/\bHTTPPort\s*:\s*([^\s]+)/s); - if ($server) { + my $proxy_data = `scutil --proxy 2>/dev/null`; + foreach my $proto ('http', 'https') { + my ($server) = ($proxy_data =~ m/\b${proto}Proxy\s*:\s*([^\s]+)/si); + my ($port) = ($proxy_data =~ m/\b${proto}Port\s*:\s*([^\s]+)/si); + my ($enable) = ($proxy_data =~ m/\b${proto}Enable\s*:\s*([^\s]+)/si); + + if ($server && $enable) { # Note: this ignores the "ExceptionsList". - $ENV{http_proxy} = "http://" . $server . ($port ? ":$port" : "") . "/"; - print STDERR "$progname: MacOS proxy: $ENV{http_proxy}\n" - if ($verbose > 2) - } + my $proto2 = 'http'; + $ENV{"${proto}_proxy"} = ("${proto2}://" . $server . + ($port ? ":$port" : "") . "/"); + print STDERR "$progname: MacOS $proto proxy: " . + $ENV{"${proto}_proxy"} . "\n" + if ($verbose > 2); + } } $ua->env_proxy(); @@ -670,16 +714,19 @@ sub get_url_text($) { $ct = 'text/plain'; } + utf8::decode ($body); # Pack multi-byte UTF-8 back into wide chars. + $ct = guess_content_type ($ct, $body); if ($ct eq 'html') { print STDERR "$progname: converting HTML...\n" if ($verbose > 2); - reformat_html ($body, 0); + $body = reformat_html ($body, 0); } elsif ($ct eq 'rss') { - reformat_rss ($body); + $body = reformat_rss ($body); } else { print STDERR "$progname: plain text...\n" if ($verbose > 2); - reformat_text ($body); + $body = reformat_text ($body); } + print STDOUT $body; } @@ -717,6 +764,10 @@ sub usage() { " it will be converted to plain-text.\n" . "\n" . " --cols N Wrap lines at this column. Default 72.\n" . + "\n" . + " --lines N No more than N lines of output.\n" . + "\n" . + " --latin1 Emit Latin1 instead of UTF-8.\n" . "\n"); exit 1; } @@ -734,6 +785,8 @@ sub main() { $load_p = 0; } elsif (m/^--?text$/) { $text_mode = 'literal'; $text_literal = shift @ARGV || ''; + $text_literal =~ s@\\n@\n@gs; + $text_literal =~ s@\\\n@\n@gs; $load_p = 0; } elsif (m/^--?file$/) { $text_mode = 'file'; $text_file = shift @ARGV || ''; @@ -745,7 +798,9 @@ sub main() { $text_url = shift @ARGV || ''; $load_p = 0; } elsif (m/^--?col(umn)?s?$/) { $wrap_columns = 0 + shift @ARGV; } + elsif (m/^--?lines?$/) { $truncate_lines = 0 + shift @ARGV; } elsif (m/^--?cocoa$/) { $cocoa_id = shift @ARGV; } + elsif (m/^--?latin1$/) { $latin1_p++; } elsif (m/^--?nyarlathotep$/) { $nyarlathotep_p++; } elsif (m/^-./) { usage; } else { usage; } @@ -786,8 +841,8 @@ sub main() { # 3) Different behavior than MacOS 10.1 through 10.4; and 4) # Different behavior than every other Unix in the world. # - # See http://jwz.livejournal.com/817438.html, and for those of - # you inside Apple, "Problem ID 5606018". + # See http://jwz.org/b/DHke, and for those of you inside Apple, + # "Problem ID 5606018". # # One workaround would be to rewrite the savers to have an # internal buffer, and always read as much data as possible as