X-Git-Url: http://git.hungrycats.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=driver%2Fxscreensaver-text;h=1d0170d9fbfaea30c2099a07807eace017082cc2;hb=d6b0217f2417bd19187f0ebc389d6c5c2233b11c;hp=f7d2a992d1858f7f1fa6f69beb18a8dd6f6dbf78;hpb=b81f521c5ad7022ac12db18ca8fcdd9fb063831e;p=xscreensaver

diff --git a/driver/xscreensaver-text b/driver/xscreensaver-text
index f7d2a992..1d0170d9 100755
--- a/driver/xscreensaver-text
+++ b/driver/xscreensaver-text
@@ -1,5 +1,5 @@
 #!/usr/bin/perl -w
-# Copyright © 2005-2012 Jamie Zawinski <jwz@jwz.org>
+# Copyright Â© 2005-2016 Jamie Zawinski <jwz@jwz.org>
 #
 # Permission to use, copy, modify, distribute, and sell this software and its
 # documentation for any purpose is hereby granted without fee, provided that
@@ -28,13 +28,16 @@ use strict;
 # Only error out if we're actually loading a URL instead of local data.
 BEGIN { eval 'use LWP::UserAgent;' }
 
+# Not sure how prevalent this is. Hope it's part of the default install.
+BEGIN { eval 'use HTML::Entities;' }
+
 use Socket;
 use POSIX qw(strftime);
 use Text::Wrap qw(wrap);
 use bytes;
 
 my $progname = $0; $progname =~ s@.*/@@g;
-my $version = q{ $Revision: 1.27 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/;
+my ($version) = ('$Revision: 1.44 $' =~ m/\s(\d[.\d]+)\s/s);
 
 my $verbose = 0;
 my $http_proxy = undef;
@@ -44,127 +47,68 @@ my $text_mode     = 'date';
 my $text_literal  = '';
 my $text_file     = '';
 my $text_program  = '';
-my $text_url      = 'http://twitter.com/statuses/public_timeline.atom';
+my $text_url      = 'https://en.wikipedia.org/w/index.php?title=Special:NewPages&feed=rss';
 # Default URL needs to be set and match what's in OSX/XScreenSaverView.m
 
-my $wrap_columns  = undef;
+my $wrap_columns   = undef;
+my $truncate_lines = undef;
+my $latin1_p = 0;
 my $nyarlathotep_p = 0;
 
 
-# Maps HTML character entities to the corresponding Latin1 characters.
-#
-my %entity_table = (
-   "quot"   => '"', "amp"    => '&', "lt"     => '<', "gt"     => '>',
-   "nbsp"   => ' ', "iexcl"  => '¡', "cent"   => '¢', "pound"  => '£',
-   "curren" => '¤', "yen"    => '¥', "brvbar" => '¦', "sect"   => '§',
-   "uml"    => '¨', "copy"   => '©', "ordf"   => 'ª', "laquo"  => '«',
-   "not"    => '¬', "shy"    => '­', "reg"    => '®', "macr"   => '¯',
-   "deg"    => '°', "plusmn" => '±', "sup2"   => '²', "sup3"   => '³',
-   "acute"  => '´', "micro"  => 'µ', "para"   => '¶', "middot" => '·',
-   "cedil"  => '¸', "sup1"   => '¹', "ordm"   => 'º', "raquo"  => '»',
-   "frac14" => '¼', "frac12" => '½', "frac34" => '¾', "iquest" => '¿',
-   "Agrave" => 'À', "Aacute" => 'Á', "Acirc"  => 'Â', "Atilde" => 'Ã',
-   "Auml"   => 'Ä', "Aring"  => 'Å', "AElig"  => 'Æ', "Ccedil" => 'Ç',
-   "Egrave" => 'È', "Eacute" => 'É', "Ecirc"  => 'Ê', "Euml"   => 'Ë',
-   "Igrave" => 'Ì', "Iacute" => 'Í', "Icirc"  => 'Î', "Iuml"   => 'Ï',
-   "ETH"    => 'Ð', "Ntilde" => 'Ñ', "Ograve" => 'Ò', "Oacute" => 'Ó',
-   "Ocirc"  => 'Ô', "Otilde" => 'Õ', "Ouml"   => 'Ö', "times"  => '×',
-   "Oslash" => 'Ø', "Ugrave" => 'Ù', "Uacute" => 'Ú', "Ucirc"  => 'Û',
-   "Uuml"   => 'Ü', "Yacute" => 'Ý', "THORN"  => 'Þ', "szlig"  => 'ß',
-   "agrave" => 'à', "aacute" => 'á', "acirc"  => 'â', "atilde" => 'ã',
-   "auml"   => 'ä', "aring"  => 'å', "aelig"  => 'æ', "ccedil" => 'ç',
-   "egrave" => 'è', "eacute" => 'é', "ecirc"  => 'ê', "euml"   => 'ë',
-   "igrave" => 'ì', "iacute" => 'í', "icirc"  => 'î', "iuml"   => 'ï',
-   "eth"    => 'ð', "ntilde" => 'ñ', "ograve" => 'ò', "oacute" => 'ó',
-   "ocirc"  => 'ô', "otilde" => 'õ', "ouml"   => 'ö', "divide" => '÷',
-   "oslash" => 'ø', "ugrave" => 'ù', "uacute" => 'ú', "ucirc"  => 'û',
-   "uuml"   => 'ü', "yacute" => 'ý', "thorn"  => 'þ', "yuml"   => 'ÿ',
-   "apos"   => '\'',
-
-   # HTML 4 entities that do not have 1:1 Latin1 mappings.
-   "bull"  => "*",   "hellip"=> "...",  "prime" => "'",  "Prime" => "\"",
-   "frasl" => "/",   "trade" => "[tm]", "larr"  => "<-", "rarr"  => "->",
-   "harr"  => "<->", "lArr"  => "<=",   "rArr"  => "=>", "hArr"  => "<=>",
-   "empty" => "Ø",   "minus" => "-",    "lowast"=> "*",  "sim"   => "~",
-   "cong"  => "=~",  "asymp" => "~",    "ne"    => "!=", "equiv" => "==",
-   "le"    => "<=",  "ge"    => ">=",   "lang"  => "<",  "rang"  => ">",
-   "loz"   => "<>",  "OElig" => "OE",   "oelig" => "oe", "Yuml"  => "Y",
-   "circ"  => "^",   "tilde" => "~",    "ensp"  => " ",  "emsp"  => " ",
-   "thinsp"=> " ",   "ndash" => "-",    "mdash" => "-",  "lsquo" => "`",
-   "rsquo" => "'",   "sbquo" => "'",    "ldquo" => "\"", "rdquo" => "\"",
-   "bdquo" => "\"",  "lsaquo"=> "<",    "rsaquo"=> ">",
-);
-
-# Maps certain UTF8 characters (2 or 3 bytes) to the corresponding
-# Latin1 characters.
-#
-my %unicode_latin1_table = (
-   "\xC2\xA1" => '¡', "\xC2\xA2" => '¢', "\xC2\xA3" => '£', "\xC2\xA4" => '¤',
-   "\xC2\xA5" => '¥', "\xC2\xA6" => '¦', "\xC2\xA7" => '§', "\xC2\xA8" => '¨',
-   "\xC2\xA9" => '©', "\xC2\xAA" => 'ª', "\xC2\xAB" => '«', "\xC2\xAC" => '¬',
-   "\xC2\xAD" => '­', "\xC2\xAE" => '®', "\xC2\xAF" => '¯', "\xC2\xB0" => '°',
-   "\xC2\xB1" => '±', "\xC2\xB2" => '²', "\xC2\xB3" => '³', "\xC2\xB4" => '´',
-   "\xC2\xB5" => 'µ', "\xC2\xB6" => '¶', "\xC2\xB7" => '·', "\xC2\xB8" => '¸',
-   "\xC2\xB9" => '¹', "\xC2\xBA" => 'º', "\xC2\xBB" => '»', "\xC2\xBC" => '¼',
-   "\xC2\xBD" => '½', "\xC2\xBE" => '¾', "\xC2\xBF" => '¿', "\xC3\x80" => 'À',
-   "\xC3\x81" => 'Á', "\xC3\x82" => 'Â', "\xC3\x83" => 'Ã', "\xC3\x84" => 'Ä',
-   "\xC3\x85" => 'Å', "\xC3\x86" => 'Æ', "\xC3\x87" => 'Ç', "\xC3\x88" => 'È',
-   "\xC3\x89" => 'É', "\xC3\x8A" => 'Ê', "\xC3\x8B" => 'Ë', "\xC3\x8C" => 'Ì',
-   "\xC3\x8D" => 'Í', "\xC3\x8E" => 'Î', "\xC3\x8F" => 'Ï', "\xC3\x90" => 'Ð',
-   "\xC3\x91" => 'Ñ', "\xC3\x92" => 'Ò', "\xC3\x93" => 'Ó', "\xC3\x94" => 'Ô',
-   "\xC3\x95" => 'Õ', "\xC3\x96" => 'Ö', "\xC3\x97" => '×', "\xC3\x98" => 'Ø',
-   "\xC3\x99" => 'Ù', "\xC3\x9A" => 'Ú', "\xC3\x9B" => 'Û', "\xC3\x9C" => 'Ü',
-   "\xC3\x9D" => 'Ý', "\xC3\x9E" => 'Þ', "\xC3\x9F" => 'ß', "\xC3\xA0" => 'à',
-   "\xC3\xA1" => 'á', "\xC3\xA2" => 'â', "\xC3\xA3" => 'ã', "\xC3\xA4" => 'ä',
-   "\xC3\xA5" => 'å', "\xC3\xA6" => 'æ', "\xC3\xA7" => 'ç', "\xC3\xA8" => 'è',
-   "\xC3\xA9" => 'é', "\xC3\xAA" => 'ê', "\xC3\xAB" => 'ë', "\xC3\xAC" => 'ì',
-   "\xC3\xAD" => 'í', "\xC3\xAE" => 'î', "\xC3\xAF" => 'ï', "\xC3\xB0" => 'ð',
-   "\xC3\xB1" => 'ñ', "\xC3\xB2" => 'ò', "\xC3\xB3" => 'ó', "\xC3\xB4" => 'ô',
-   "\xC3\xB5" => 'õ', "\xC3\xB6" => 'ö', "\xC3\xB7" => '÷', "\xC3\xB8" => 'ø',
-   "\xC3\xB9" => 'ù', "\xC3\xBA" => 'ú', "\xC3\xBB" => 'û', "\xC3\xBC" => 'ü',
-   "\xC3\xBD" => 'ý', "\xC3\xBE" => 'þ', "\xC3\xBF" => 'ÿ',
-
-   "\xE2\x80\x93" => '--',  "\xE2\x80\x94" => '--',
-   "\xE2\x80\x98" => '`',   "\xE2\x80\x99" => '\'',
-   "\xE2\x80\x9C" => "``",  "\xE2\x80\x9D" => "''",
-   "\xE2\x80\xA6" => '...',
-);
-
-
 # Convert any HTML entities to Latin1 characters.
 #
 sub de_entify($) {
   my ($text) = @_;
-  $text =~ s/(&(\#)?([[:alpha:]\d]+);?)/
-    {
-     my $c = $3;
-     if (! defined($2)) {
-       $c = $entity_table{$c};		# for &Aacute;
-     } else {
-       if ($c =~ m@^x([\dA-F]+)$@si) {	# for &#x41;
-         $c = chr(hex($1));
-       } elsif ($c =~ m@^\d+$@si) {	# for &#65;
-         $c = chr($c);
-       } else {
-         $c = undef;
-       }
-     }
-     ($c || "[$3]");			# for &unknown; => "[unknown]"
-    }
-   /gexi;
-  return $text;
+
+  return '' unless defined($text);
+  return $text unless ($text =~ m/&/s);
+
+  # Convert any HTML entities to Unicode characters,
+  # if the HTML::Entities module is installed.
+  eval {
+    my $t2 = $text;
+    $text = undef;
+    $text = HTML::Entities::decode_entities ($t2);
+  };
+  return $text if defined($text);
+
+  # If it's not installed, just complain instead of trying to halfass it.
+  print STDOUT ("\n\tPerl is broken. Do this to repair it:\n" .
+                "\n\tsudo cpan HTML::Entities\n\n");
+  exit (1);
 }
 
 
 # Convert any Unicode characters to Latin1 if possible.
 # Unconvertable bytes are left alone.
 #
-sub de_unicoddle($) {
+sub utf8_to_latin1($) {
   my ($text) = @_;
-  foreach my $key (keys (%unicode_latin1_table)) {
-    my $val = $unicode_latin1_table{$key};
-    $text =~ s/$key/$val/gs;
-  }
+
+  utf8::encode ($text);  # Unpack Unicode back to multi-byte UTF-8.
+
+  # Maybe it would be better to handle this in the Unicode domain
+  # by doing things like s/\x{2018}/\"/g, but without decoding the
+  # string back to UTF-8 first, I'm at a loss as to how to have
+  # "&aacute;" print as "\340" instead of as "\303\240".
+
+  $text =~ s/ \xC2 ( [\xA0-\xFF] ) / $1 /gsex;
+  $text =~ s/ \xC3 ( [\x80-\xFF] ) / chr (ord($1) | 0x40) /gsex;
+
+  # Handles a few 3-byte sequences too.
+  $text =~ s/\xE2\x80\x93/--/gs;
+  $text =~ s/\xE2\x80\x94/--/gs;
+  $text =~ s/\xE2\x80\x98/`/gs;
+  $text =~ s/\xE2\x80\x99/'/gs;
+  $text =~ s/\xE2\x80\x9C/``/gs;
+  $text =~ s/\xE2\x80\x9D/'/gs;
+  $text =~ s/\xE2\x80\xA2/&bull;/gs;
+  $text =~ s/\xE2\x80\xA6/.../gs;
+  $text =~ s/\xE2\x80\xB2/'/gs;
+  $text =~ s/\xE2\x84\xA2/&trade;/gs;
+  $text =~ s/\xE2\x86\x90/ &larr; /gs;
+
   return $text;
 }
 
@@ -173,13 +117,12 @@ sub de_unicoddle($) {
 #
 sub get_x11_prefs() {
   my $got_any_p = 0;
-  local *IN;
 
-  if (open (IN, "<$config_file")) {
+  if (open (my $in, '<', $config_file)) {
     print STDERR "$progname: reading $config_file\n" if ($verbose > 1);
-    my $body = '';
-    while (<IN>) { $body .= $_; }
-    close IN;
+    local $/ = undef;  # read entire file
+    my $body = <$in>;
+    close $in;
     $got_any_p = get_x11_prefs_1 ($body);
 
   } elsif ($verbose > 1) {
@@ -215,6 +158,7 @@ sub get_x11_prefs_1($) {
 
   my $got_any_p = 0;
   $body =~ s@\\\n@@gs;
+  $body =~ s@^[ \t]*#[^\n]*$@@gm;
 
   if ($body =~ m/^[.*]*textMode:[ \t]*([^\s]+)\s*$/im) {
     $text_mode = $1;
@@ -247,7 +191,10 @@ sub get_cocoa_prefs($) {
   $text_mode = $v if defined ($v);
 
   # The "textMode" pref is set to a number instead of a string because I
-  # can't figure out the black magic to make Cocoa bindings work right.
+  # couldn't figure out the black magic to make Cocoa bindings work right.
+  #
+  # Update: as of 5.33, Cocoa writes strings instead of numbers, but 
+  # pre-existing saved preferences might still have numbers in them.
   #
   if    ($text_mode eq '0') { $text_mode = 'date';    }
   elsif ($text_mode eq '1') { $text_mode = 'literal'; }
@@ -331,6 +278,8 @@ sub which($) {
 
 sub output() {
 
+  binmode (STDOUT, ($latin1_p ? ':raw' : ':utf8'));
+
   # Do some basic sanity checking (null text, null file names, etc.)
   #
   if (($text_mode eq 'literal' && $text_literal =~ m/^\s*$/i) ||
@@ -343,6 +292,7 @@ sub output() {
 
   if ($text_mode eq 'literal') {
     $text_literal = strftime ($text_literal, localtime);
+    $text_literal = utf8_to_latin1($text_literal) if ($latin1_p);
     $text_literal =~ y/A-Za-z/N-ZA-Mn-za-m/ if ($nyarlathotep_p);
     print STDOUT $text_literal;
     print STDOUT "\n" unless ($text_literal =~ m/\n$/s);
@@ -351,23 +301,25 @@ sub output() {
 
     $text_file =~ s@^~/@$ENV{HOME}/@s;     # allow literal "~/"
 
-    local *IN;
-    if (open (IN, "<$text_file")) {
+    if (open (my $in, '<:raw', $text_file)) {
       print STDERR "$progname: reading $text_file\n" if ($verbose);
+      binmode (STDOUT, ':raw');
 
-      if ($wrap_columns && $wrap_columns > 0) {
+      if (($wrap_columns && $wrap_columns > 0) || $truncate_lines) {
         # read it, then reformat it.
-        my $body = '';
-        while (<IN>) { $body .= $_; }
-        reformat_text ($body);
+        local $/ = undef;  # read entire file
+        my $body = <$in>;
+        $body = reformat_text ($body);
+        print STDOUT $body;
       } else {
-        # stream it
-        while (<IN>) { 
+        # stream it by lines
+        while (<$in>) { 
+          $_ = utf8_to_latin1($_) if ($latin1_p);
           y/A-Za-z/N-ZA-Mn-za-m/ if ($nyarlathotep_p);
-          print $_;
+          print STDOUT $_;
         }
       }
-      close IN;
+      close $in;
     } else {
       error ("$text_file: $!");
     }
@@ -378,10 +330,21 @@ sub output() {
     $text_program = which ($prog) . $args;
     print STDERR "$progname: running $text_program\n" if ($verbose);
 
-    if ($wrap_columns && $wrap_columns > 0) {
+    if (($wrap_columns && $wrap_columns > 0) || $truncate_lines) {
       # read it, then reformat it.
-      my $body = `( $text_program ) 2>&1`;
-      reformat_text ($body);
+      my $lines = 0;
+      my $body = "";
+      my $cmd = "( $text_program ) 2>&1";
+      # $cmd .= " | sed -l"; # line buffer instead of 4k pipe buffer
+      open (my $pipe, '-|:unix', $cmd);
+      while (my $line = <$pipe>) {
+        $body .= $line;
+        $lines++;
+        last if ($truncate_lines && $lines > $truncate_lines);
+      }
+      close $pipe;
+      $body = reformat_text ($body);
+      print STDOUT $body;
     } else {
       # stream it
       safe_system ("$text_program");
@@ -409,7 +372,8 @@ sub output() {
 
     if (-f "/usr/sbin/system_profiler") {   # "Mac OS X 10.4.5 (8H14)"
       my $sp =				    # "iMac G5"
-        `/usr/sbin/system_profiler SPSoftwareDataType SPHardwareDataType`;
+        `/usr/sbin/system_profiler SPSoftwareDataType SPHardwareDataType 2>/dev/null`;
+      # system_profiler on OS X 10.10 generates spurious error messages.
       my ($v) = ($sp =~ m/^\s*System Version:\s*(.*)$/mi);
       my ($s) = ($sp =~ m/^\s*(?:CPU|Processor) Speed:\s*(.*)$/mi);
       my ($t) = ($sp =~ m/^\s*(?:Machine|Model) Name:\s*(.*)$/mi);
@@ -482,6 +446,10 @@ sub reformat_html($$) {
     s@[\r\n]@ @gsi;
   }
 
+  # This right here is the part where I doom us all to inhuman
+  # toil for the One whose Name cannot be expressed in the
+  # Basic Multilingual Plane. http://jwz.org/b/yhAT He comes.
+
   s@<!--.*?-->@@gsi;				 # lose comments
   s@<(STYLE|SCRIPT)\b[^<>]*>.*?</\1\s*>@@gsi;    # lose css and js
 
@@ -495,9 +463,57 @@ sub reformat_html($$) {
   s@<[^<>]*>?@@gs;                # lose all other HTML tags
   $_ = de_entify ($_);            # convert HTML entities
 
-  # elide any remaining non-Latin1 binary data...
-  s/([\177-\377]+(\s*[\177-\377]+)[^a-z\d]*)/«...» /g;
-  #s/([\177-\377]+(\s*[\177-\377]+)[^a-z\d]*)/«$1» /g;
+  # For Wikipedia: delete anything inside {{ }} and unwrap [[tags]],
+  # among other things.
+  #
+  if ($rss_p eq 'wiki') {
+
+    s@<!--.*?-->@@gsi;				 # lose HTML comments again
+
+    # Creation line is often truncated: screws up parsing with unbalanced {{.
+    s@(: +[^a-zA-Z ]* *Created page) with [^\n]+@$1@s;
+
+    s@/\*.*?\*/@@si;				   # /* ... */
+
+    # Try to omit all tables, since they're impossible to read as text.
+    #
+    1 while (s/\{\{[^{}]*}}/ /gs);		   # {{ ... }}
+    1 while (s/\{\|.*?\|\}/\n\n/gs);		   # {| ... |}
+    1 while (s/\|-.*?\|/ /gs);			   # |- ... |  (table cell)
+
+    # Convert anchors to something more readable.
+    #
+    s/\[\[([^\[\]\|]+)\|([^\[\]]+)\]\]/$2/gs;	   # [[link|anchor]]
+    s/\[\[([^:\[\]\|]+)\]\]/$1/gs;		   # [[anchor]]
+    s/\[https?:[^\[\]\s]+\s+([^\[\]]+)\]/$1/gs;	   # [url anchor]
+
+    # Convert all references to asterisks.
+    s@\s*<ref>\s*.*?</ref>@*@gs;		   # <ref> ... <ref> ->  "*"
+    s@\n[ \t]*\d+\s*\^\s*http[^\s]+[ \t]*\n@\n@gs; # 1 ^ URL (a Reflist)
+
+    s@\[\[File:([^\|\]]+).*?\]\]@\n$1\n@gs;	  # [[File: X | ... ]]
+    s@\[\[Category:.*?\]\]@@gs;			  # omit categories
+
+    s/<[^<>]*>//gs;	# Omit all remaining tags
+    s/\'{3,}//gs;	# Omit ''' and ''''
+    s/\'\'/\"/gs;	# ''  ->  "
+    s/\`\`/\"/gs;	# ``  ->  "
+    s/\"\"+/\"/gs;	# ""  ->  "
+
+    s/^[ \t]*[*#]+[ \t]*$//gm;	# Omit lines with just * or # on them
+
+    # Omit trailing headlines with no text after them (e.g. == Notes ==)
+    1 while (s/\n==+[ \t]*[^\n=]+[ \t]*==+\s*$/\n/s);
+
+    $_ = de_entify ($_);            # convert HTML entities, again
+  }
+
+
+  # elide any remaining non-Latin1 binary data.
+  if ($latin1_p) {
+    utf8::encode ($_);  # Unpack Unicode back to multi-byte UTF-8.
+    s/([^\000-\176]+(\s*[^\000-\176]+)[^a-z\d]*)/\xAB...\xBB /g;
+  }
 
   $_ .= "\n";
 
@@ -506,20 +522,29 @@ sub reformat_html($$) {
 
   if (!defined($wrap_columns) || $wrap_columns > 0) {
     $Text::Wrap::columns = ($wrap_columns || 72);
-    $_ = wrap ("", "  ", $_);     # wrap the lines as a paragraph
-    s/[ \t]+$//gm;                # lose whitespace at end of line again
+    $Text::Wrap::break = '[\s/|]';  # wrap on slashes for URLs
+    $_ = wrap ("", "  ", $_);       # wrap the lines as a paragraph
+    s/[ \t]+$//gm;                  # lose whitespace at end of line again
   }
 
   s/^\n+//gs;
 
+  if ($truncate_lines) {
+    s/^(([^\n]*\n){$truncate_lines}).*$/$1/s;
+  }
+
+  $_ = utf8_to_latin1($_) if ($latin1_p);
   y/A-Za-z/N-ZA-Mn-za-m/ if ($nyarlathotep_p);
-  print STDOUT $_;
+
+  return $_;
 }
 
 
 sub reformat_rss($) {
   my ($body) = @_;
 
+  my $wiki_p = ($body =~ m@<generator>[^<>]*Wiki@si);
+
   $body =~ s/(<(ITEM|ENTRY)\b)/\001\001$1/gsi;
   my @items = split (/\001\001/, $body);
 
@@ -541,6 +566,8 @@ sub reformat_rss($) {
     }
   }
 
+  my $out = '';
+
   my $i = -1;
   foreach (@items) {
     $i++;
@@ -578,16 +605,23 @@ sub reformat_rss($) {
 
     $title = '' if ($body1 eq $title);  # Identical in Twitter's atom feed.
 
-    reformat_html ("$title<P>$body1", 1);
-    print "\n";
+    $out .= reformat_html ("$title<P>$body1", $wiki_p ? 'wiki' : 'rss');
+    $out .= "\n";
+  }
+
+  if ($truncate_lines) {
+    $out =~ s/^(([^\n]*\n){$truncate_lines}).*$/$1/s;
   }
+
+  return $out;
 }
 
 
 sub rss_field_to_html($) {
   my ($body) = @_;
 
-  # Assume that if <![CDATA[...]]> is present, everything inside that.
+  # If <![CDATA[...]]> is present, everything inside that is HTML,
+  # and not double-encoded.
   #
   if ($body =~ m/^\s*<!\[CDATA\[(.*?)\]\s*\]/is) {
     $body = $1;
@@ -595,7 +629,6 @@ sub rss_field_to_html($) {
     $body = de_entify ($body);      # convert entities to get HTML from XML
   }
 
-  $body = de_unicoddle ($body);     # convert UTF8 to Latin1
   return $body;
 }
 
@@ -608,12 +641,18 @@ sub reformat_text($) {
   if ($wrap_columns && $wrap_columns > 0) {
     print STDERR "$progname: wrapping at $wrap_columns...\n" if ($verbose > 2);
     $Text::Wrap::columns = $wrap_columns;
+    $Text::Wrap::break = '[\s/]';  # wrap on slashes for URLs
     $body = wrap ("", "", $body);
     $body =~ s/[ \t]+$//gm;
   }
 
+  if ($truncate_lines) {
+    $body =~ s/^(([^\n]*\n){$truncate_lines}).*$/$1/s;
+  }
+
+  $body = utf8_to_latin1($body) if ($latin1_p);
   $body =~ y/A-Za-z/N-ZA-Mn-za-m/ if ($nyarlathotep_p);
-  print STDOUT $body;
+  return $body;
 }
 
 
@@ -624,16 +663,21 @@ sub reformat_text($) {
 sub set_proxy($) {
   my ($ua) = @_;
 
-  if (!defined($ENV{http_proxy}) && !defined($ENV{HTTP_PROXY})) {
-    my $proxy_data = `scutil --proxy 2>/dev/null`;
-    my ($server) = ($proxy_data =~ m/\bHTTPProxy\s*:\s*([^\s]+)/s);
-    my ($port)   = ($proxy_data =~ m/\bHTTPPort\s*:\s*([^\s]+)/s);
-    if ($server) {
+  my $proxy_data = `scutil --proxy 2>/dev/null`;
+  foreach my $proto ('http', 'https') {
+    my ($server) = ($proxy_data =~ m/\b${proto}Proxy\s*:\s*([^\s]+)/si);
+    my ($port)   = ($proxy_data =~ m/\b${proto}Port\s*:\s*([^\s]+)/si);
+    my ($enable) = ($proxy_data =~ m/\b${proto}Enable\s*:\s*([^\s]+)/si);
+
+    if ($server && $enable) {
       # Note: this ignores the "ExceptionsList".
-      $ENV{http_proxy} = "http://" . $server . ($port ? ":$port" : "") . "/";
-      print STDERR "$progname: MacOS proxy: $ENV{http_proxy}\n"
-        if ($verbose > 2)
-      }
+      my $proto2 = 'http';
+      $ENV{"${proto}_proxy"} = ("${proto2}://" . $server .
+                                ($port ? ":$port" : "") . "/");
+      print STDERR "$progname: MacOS $proto proxy: " .
+                   $ENV{"${proto}_proxy"} . "\n"
+        if ($verbose > 2);
+    }
   }
 
   $ua->env_proxy();
@@ -670,16 +714,19 @@ sub get_url_text($) {
     $ct = 'text/plain';
   }
 
+  utf8::decode ($body);  # Pack multi-byte UTF-8 back into wide chars.
+
   $ct = guess_content_type ($ct, $body);
   if ($ct eq 'html') {
     print STDERR "$progname: converting HTML...\n" if ($verbose > 2);
-    reformat_html ($body, 0);
+    $body = reformat_html ($body, 0);
   } elsif ($ct eq 'rss')  {
-    reformat_rss ($body);
+    $body = reformat_rss ($body);
   } else {
     print STDERR "$progname: plain text...\n" if ($verbose > 2);
-    reformat_text ($body);
+    $body = reformat_text ($body);
   }
+  print STDOUT $body;
 }
 
 
@@ -717,6 +764,10 @@ sub usage() {
     "                        it will be converted to plain-text.\n" .
     "\n" .
     "       --cols N         Wrap lines at this column.  Default 72.\n" .
+    "\n" .
+    "       --lines N        No more than N lines of output.\n" .
+    "\n" .
+    "       --latin1         Emit Latin1 instead of UTF-8.\n" .
     "\n");
   exit 1;
 }
@@ -734,6 +785,8 @@ sub main() {
                               $load_p = 0; }
     elsif (m/^--?text$/)    { $text_mode = 'literal';
                               $text_literal = shift @ARGV || '';
+                              $text_literal =~ s@\\n@\n@gs;
+                              $text_literal =~ s@\\\n@\n@gs;
                               $load_p = 0; }
     elsif (m/^--?file$/)    { $text_mode = 'file';
                               $text_file = shift @ARGV || '';
@@ -745,7 +798,9 @@ sub main() {
                               $text_url = shift @ARGV || '';
                               $load_p = 0; }
     elsif (m/^--?col(umn)?s?$/) { $wrap_columns = 0 + shift @ARGV; }
+    elsif (m/^--?lines?$/)  { $truncate_lines = 0 + shift @ARGV; }
     elsif (m/^--?cocoa$/)   { $cocoa_id = shift @ARGV; }
+    elsif (m/^--?latin1$/)  { $latin1_p++; }
     elsif (m/^--?nyarlathotep$/) { $nyarlathotep_p++; }
     elsif (m/^-./) { usage; }
     else { usage; }
@@ -786,8 +841,8 @@ sub main() {
     # 3) Different behavior than MacOS 10.1 through 10.4; and 4)
     # Different behavior than every other Unix in the world.
     #
-    # See http://jwz.livejournal.com/817438.html, and for those of
-    # you inside Apple, "Problem ID 5606018".
+    # See http://jwz.org/b/DHke, and for those of you inside Apple,
+    # "Problem ID 5606018".
     #
     # One workaround would be to rewrite the savers to have an
     # internal buffer, and always read as much data as possible as