X-Git-Url: http://git.hungrycats.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=driver%2Fxscreensaver-text;h=e4c588c50cf41cd929f032ea11b34343c8cdae83;hb=ff35d056d723c9a5ffe728dbba5f1c25e141be04;hp=1f411851070f56173c581398397d038482f9293e;hpb=ec8d2b32b63649e6d32bdfb306eda062769af823;p=xscreensaver

diff --git a/driver/xscreensaver-text b/driver/xscreensaver-text
index 1f411851..e4c588c5 100755
--- a/driver/xscreensaver-text
+++ b/driver/xscreensaver-text
@@ -1,5 +1,5 @@
 #!/usr/bin/perl -w
-# Copyright © 2005-2011 Jamie Zawinski <jwz@jwz.org>
+# Copyright © 2005-2013 Jamie Zawinski <jwz@jwz.org>
 #
 # Permission to use, copy, modify, distribute, and sell this software and its
 # documentation for any purpose is hereby granted without fee, provided that
@@ -24,13 +24,17 @@ require 5;
 #use diagnostics;	# Fails on some MacOS 10.5 systems
 use strict;
 
+# Some Linux systems don't install LWP by default!
+# Only error out if we're actually loading a URL instead of local data.
+BEGIN { eval 'use LWP::UserAgent;' }
+
 use Socket;
 use POSIX qw(strftime);
 use Text::Wrap qw(wrap);
 use bytes;
 
 my $progname = $0; $progname =~ s@.*/@@g;
-my $version = q{ $Revision: 1.24 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/;
+my $version = q{ $Revision: 1.29 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/;
 
 my $verbose = 0;
 my $http_proxy = undef;
@@ -40,7 +44,8 @@ my $text_mode     = 'date';
 my $text_literal  = '';
 my $text_file     = '';
 my $text_program  = '';
-my $text_url      = '';
+my $text_url      = 'http://twitter.com/statuses/public_timeline.atom';
+# Default URL needs to be set and match what's in OSX/XScreenSaverView.m
 
 my $wrap_columns  = undef;
 my $nyarlathotep_p = 0;
@@ -192,15 +197,16 @@ sub get_x11_prefs() {
   }
 
   if ($verbose > 1) {
-    printf STDERR "$progname: mode:    $text_mode\n";
-    printf STDERR "$progname: literal: $text_literal\n";
-    printf STDERR "$progname: file:    $text_file\n";
-    printf STDERR "$progname: program: $text_program\n";
-    printf STDERR "$progname: url:     $text_url\n";
+    print STDERR "$progname: mode:    $text_mode\n";
+    print STDERR "$progname: literal: $text_literal\n";
+    print STDERR "$progname: file:    $text_file\n";
+    print STDERR "$progname: program: $text_program\n";
+    print STDERR "$progname: url:     $text_url\n";
   }
 
   $text_mode =~ tr/A-Z/a-z/;
   $text_literal =~ s@\\n@\n@gs;
+  $text_literal =~ s@\\\n@\n@gs;
 }
 
 
@@ -251,6 +257,8 @@ sub get_cocoa_prefs($) {
 
   $v = get_cocoa_pref_1 ($id, "textLiteral");
   $text_literal = $v if defined ($v);
+  $text_literal =~ s@\\n@\n@gs;
+  $text_literal =~ s@\\\n@\n@gs;
 
   $v = get_cocoa_pref_1 ($id, "textFile");
   $text_file = $v if defined ($v);
@@ -426,167 +434,6 @@ sub output() {
 }
 
 
-# Loads the given URL, returns: $http, $head, $body.
-#
-sub get_url_1($;$) {
-  my ($url, $referer) = @_;
-  
-  $url =~ s@^feed:@http:@si;
-  if (! ($url =~ m@^http://@i)) {
-    error ("not an HTTP URL: $url");
-  }
-
-  my ($url_proto, $dummy, $serverstring, $path) = split(/\//, $url, 4);
-  $path = "" unless $path;
-
-  my ($them,$port) = split(/:/, $serverstring);
-  $port = 80 unless $port;
-
-  my $them2 = $them;
-  my $port2 = $port;
-  if ($http_proxy) {
-    $serverstring = $http_proxy if $http_proxy;
-    $serverstring =~ s@^[a-z]+://@@;
-    ($them2,$port2) = split(/:/, $serverstring);
-    $port2 = 80 unless $port2;
-  }
-
-  my ($remote, $iaddr, $paddr, $proto, $line);
-  $remote = $them2;
-  if ($port2 =~ /\D/) { $port2 = getservbyname($port2, 'tcp') }
-  if (!$port2) {
-    error ("unrecognised port in $url");
-  }
-
-  $iaddr = inet_aton($remote);
-  error ("host not found: $remote") unless ($iaddr);
-
-  $paddr   = sockaddr_in($port2, $iaddr);
-
-
-  my $head = "";
-  my $body = "";
-
-  $proto   = getprotobyname('tcp');
-  if (!socket(S, PF_INET, SOCK_STREAM, $proto)) {
-    error ("socket: $!");
-  }
-  if (!connect(S, $paddr)) {
-    error ("connect($serverstring): $!");
-  }
-
-  select(S); $| = 1; select(STDOUT);
-
-  my $user_agent = "$progname/$version";
-
-  my $hdrs = ("GET " . ($http_proxy ? $url : "/$path") . " HTTP/1.0\r\n" .
-              "Host: $them\r\n" .
-              "User-Agent: $user_agent\r\n");
-  if ($referer) {
-    $hdrs .= "Referer: $referer\r\n";
-  }
-  $hdrs .= "\r\n";
-
-  if ($verbose > 3) {
-    foreach (split('\r?\n', $hdrs)) {
-      print STDERR "  ==> $_\n";
-    }
-  }
-  print S $hdrs;
-  my $http = <S> || "";
-
-  $_  = $http;
-  s/[\r\n]+$//s;
-  print STDERR "  <== $_\n" if ($verbose > 3);
-
-  while (<S>) {
-    $head .= $_;
-    s/[\r\n]+$//s;
-    last if m@^$@;
-    print STDERR "  <== $_\n" if ($verbose > 3);
-  }
-
-  print STDERR "  <== \n" if ($verbose > 4);
-  my $lines = 0;
-  while (<S>) {
-    s/\r\n/\n/gs;
-    print STDERR "  <== $_" if ($verbose > 4);
-    $body .= $_;
-    $lines++;
-  }
-
-  print STDERR "  <== [ body ]: $lines lines, " . length($body) . " bytes\n"
-    if ($verbose == 4);
-
-  close S;
-
-  if (!$http) {
-    error ("null response: $url");
-  }
-
-  return ( $http, $head, $body );
-}
-
-
-# Loads the given URL, processes redirects, returns (content-type, body).
-#
-sub get_url($;$) {
-  my ($url, $referer) = @_;
-
-  print STDERR "$progname: loading $url\n" if ($verbose > 2);
-
-  my $orig_url = $url;
-  my $loop_count = 0;
-  my $max_loop_count = 10;
-
-  do {
-    my ( $http, $head, $body ) = get_url_1 ($url, $referer);
-
-    $http =~ s/[\r\n]+$//s;
-
-    if ( $http =~ m@^HTTP/[0-9.]+ 30[123]@ ) {
-      $_ = $head;
-
-      my ( $location ) = m@^location:[ \t]*(.*)$@im;
-      if ( $location ) {
-        $location =~ s/[\r\n]$//;
-
-        print STDERR "$progname: redirect from $url to $location\n"
-          if ($verbose > 3);
-
-        $referer = $url;
-        $url = $location;
-
-        if ($url =~ m@^/@) {
-          $referer =~ m@^(http://[^/]+)@i;
-          $url = $1 . $url;
-        } elsif (! ($url =~ m@^[a-z]+:@i)) {
-          $_ = $referer;
-          s@[^/]+$@@g if m@^http://[^/]+/@i;
-          $_ .= "/" if m@^http://[^/]+$@i;
-          $url = $_ . $url;
-        }
-
-      } else {
-        error ("no Location with \"$http\"");
-      }
-
-      if ($loop_count++ > $max_loop_count) {
-        error ("too many redirects ($max_loop_count) from $orig_url");
-      }
-
-    } elsif ( $http =~ m@^HTTP/[0-9.]+ ([4-9][0-9][0-9].*)$@ ) {
-      error ("failed: $1 ($url)");
-
-    } else {
-      my $ct = 'text/plain';
-      $ct = $1 if ($head =~ m/^content-type:\s*([^\s]+)/mi);
-      return ($ct, $body);
-    }
-  } while (1);
-}
-
-
 # Make an educated guess as to what's in this document.
 # We don't necessarily take the Content-Type header at face value.
 # Returns 'html', 'rss', or 'text';
@@ -614,6 +461,7 @@ sub guess_content_type($$) {
   return 'text';
 }
 
+
 sub reformat_html($$) {
   my ($body, $rss_p) = @_;
   $_ = $body;
@@ -647,6 +495,19 @@ sub reformat_html($$) {
   s@<[^<>]*>?@@gs;                # lose all other HTML tags
   $_ = de_entify ($_);            # convert HTML entities
 
+  # For Wikipedia: delete anything inside {{ }} and unwrap [[tags]]
+  #
+  if ($rss_p eq 'wiki') {
+    s@/\*.*?\*/@@si;				# /* ... */
+    1 while (s/{{[^{}]*}}//gs);			# {{ ... }}
+    s/\[\[([^:\[\]\|]+)\|([^\[\]]+)\]\]/$2/gs;	# [[link|anchor]]
+    s/\[\[([^:\[\]\|]+)\]\]/$1/gs;		# [[anchor]]
+    s/\[http:[^\[\]\s]+\s+([^\[\]]+)\]/$1/gs;	# [url anchor]
+#   s@\s*<ref>.*?</ref>@*@gs;			# <ref>url<ref>  ->  "*"
+    s/<[^<>]*>//gs;				# <tags> -- omit.
+  }
+
+
   # elide any remaining non-Latin1 binary data...
   s/([\177-\377]+(\s*[\177-\377]+)[^a-z\d]*)/«...» /g;
   #s/([\177-\377]+(\s*[\177-\377]+)[^a-z\d]*)/«$1» /g;
@@ -658,6 +519,7 @@ sub reformat_html($$) {
 
   if (!defined($wrap_columns) || $wrap_columns > 0) {
     $Text::Wrap::columns = ($wrap_columns || 72);
+    $Text::Wrap::break = '[\s/]';  # wrap on slashes for URLs
     $_ = wrap ("", "  ", $_);     # wrap the lines as a paragraph
     s/[ \t]+$//gm;                # lose whitespace at end of line again
   }
@@ -672,6 +534,8 @@ sub reformat_html($$) {
 sub reformat_rss($) {
   my ($body) = @_;
 
+  my $wiki_p = ($body =~ m@<generator>[^<>]*Wiki@si);
+
   $body =~ s/(<(ITEM|ENTRY)\b)/\001\001$1/gsi;
   my @items = split (/\001\001/, $body);
 
@@ -730,7 +594,7 @@ sub reformat_rss($) {
 
     $title = '' if ($body1 eq $title);  # Identical in Twitter's atom feed.
 
-    reformat_html ("$title<P>$body1", 1);
+    reformat_html ("$title<P>$body1", $wiki_p ? 'wiki' : 'rss');
     print "\n";
   }
 }
@@ -760,6 +624,7 @@ sub reformat_text($) {
   if ($wrap_columns && $wrap_columns > 0) {
     print STDERR "$progname: wrapping at $wrap_columns...\n" if ($verbose > 2);
     $Text::Wrap::columns = $wrap_columns;
+    $Text::Wrap::break = '[\s/]';  # wrap on slashes for URLs
     $body = wrap ("", "", $body);
     $body =~ s/[ \t]+$//gm;
   }
@@ -773,39 +638,54 @@ sub reformat_text($) {
 # variables or by parsing the output of the (MacOS) program "scutil",
 # which tells us what the system-wide proxy settings are.
 #
-sub set_proxy() {
-
-  # historical suckage: the environment variable name is lower case.
-  $http_proxy = $ENV{http_proxy} || $ENV{HTTP_PROXY};
-
-  if (defined ($http_proxy)) {
-    if ($http_proxy && $http_proxy =~ m@^http://([^/]*)/?$@ ) {
-      # historical suckage: allow "http://host:port" as well as "host:port".
-      $http_proxy = $1;
-    }
+sub set_proxy($) {
+  my ($ua) = @_;
 
-  } else {
+  if (!defined($ENV{http_proxy}) && !defined($ENV{HTTP_PROXY})) {
     my $proxy_data = `scutil --proxy 2>/dev/null`;
     my ($server) = ($proxy_data =~ m/\bHTTPProxy\s*:\s*([^\s]+)/s);
     my ($port)   = ($proxy_data =~ m/\bHTTPPort\s*:\s*([^\s]+)/s);
-    # Note: this ignores the "ExceptionsList".
     if ($server) {
-      $http_proxy = $server;
-      $http_proxy .= ":$port" if $port;
-    }
+      # Note: this ignores the "ExceptionsList".
+      $ENV{http_proxy} = "http://" . $server . ($port ? ":$port" : "") . "/";
+      print STDERR "$progname: MacOS proxy: $ENV{http_proxy}\n"
+        if ($verbose > 2)
+      }
   }
 
-  print STDERR "$progname: proxy server: $http_proxy\n" 
-    if ($verbose > 2 && $http_proxy);
+  $ua->env_proxy();
 }
 
 
 sub get_url_text($) {
   my ($url) = @_;
 
-  set_proxy();
+  my $ua = eval 'LWP::UserAgent->new';
+
+  if (! $ua) {
+    print STDOUT ("\n\tPerl is broken. Do this to repair it:\n" .
+                  "\n\tsudo cpan LWP::UserAgent\n\n");
+    return;
+  }
 
-  my ($ct, $body) = get_url ($url);
+  set_proxy ($ua);
+  $ua->agent ("$progname/$version");
+  my $res = $ua->get ($url);
+  my $body;
+  my $ct;
+
+  if ($res && $res->is_success) {
+    $body = $res->decoded_content || '';
+    $ct   = $res->header ('Content-Type') || 'text/plain';
+
+  } else {
+    my $err = ($res ? $res->status_line : '') || '';
+    $err = 'unknown error' unless $err;
+    $err = "$url: $err";
+    # error ($err);
+    $body = "Error loading URL $err\n\n";
+    $ct = 'text/plain';
+  }
 
   $ct = guess_content_type ($ct, $body);
   if ($ct eq 'html') {