#!/usr/bin/perl -w
-# Copyright © 2005 Jamie Zawinski <jwz@jwz.org>
+# Copyright © 2005-2016 Jamie Zawinski <jwz@jwz.org>
#
# Permission to use, copy, modify, distribute, and sell this software and its
# documentation for any purpose is hereby granted without fee, provided that
# .xscreensaver file. It may load a file, a URL, run a program, or just
# print the date.
#
+# In a native MacOS build of xscreensaver, this script is included in
+# the Contents/Resources/ directory of each screen saver .bundle that
+# uses it; and in that case, it looks up its resources using
+# /usr/bin/defaults instead.
+#
# Created: 19-Mar-2005.
require 5;
-use diagnostics;
+#use diagnostics; # Fails on some MacOS 10.5 systems
use strict;
+
+# Some Linux systems don't install LWP by default!
+# Only error out if we're actually loading a URL instead of local data.
+BEGIN { eval 'use LWP::UserAgent;' }
+
+# Not sure how prevalent this is. Hope it's part of the default install.
+BEGIN { eval 'use HTML::Entities;' }
+
use Socket;
use POSIX qw(strftime);
use Text::Wrap qw(wrap);
use bytes;
my $progname = $0; $progname =~ s@.*/@@g;
-my $version = q{ $Revision: 1.7 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/;
+my ($version) = ('$Revision: 1.44 $' =~ m/\s(\d[.\d]+)\s/s);
my $verbose = 0;
my $http_proxy = undef;
my $text_literal = '';
my $text_file = '';
my $text_program = '';
-my $text_url = '';
-
-my $wrap_columns = undef;
-
+my $text_url = 'https://en.wikipedia.org/w/index.php?title=Special:NewPages&feed=rss';
+# Default URL needs to be set and match what's in OSX/XScreenSaverView.m
-# Maps HTML character entities to the corresponding Latin1 characters.
-#
-my %entity_table = (
- "quot" => '"', "amp" => '&', "lt" => '<', "gt" => '>',
- "nbsp" => ' ', "iexcl" => '¡', "cent" => '¢', "pound" => '£',
- "curren" => '¤', "yen" => '¥', "brvbar" => '¦', "sect" => '§',
- "uml" => '¨', "copy" => '©', "ordf" => 'ª', "laquo" => '«',
- "not" => '¬', "shy" => '', "reg" => '®', "macr" => '¯',
- "deg" => '°', "plusmn" => '±', "sup2" => '²', "sup3" => '³',
- "acute" => '´', "micro" => 'µ', "para" => '¶', "middot" => '·',
- "cedil" => '¸', "sup1" => '¹', "ordm" => 'º', "raquo" => '»',
- "frac14" => '¼', "frac12" => '½', "frac34" => '¾', "iquest" => '¿',
- "Agrave" => 'À', "Aacute" => 'Á', "Acirc" => 'Â', "Atilde" => 'Ã',
- "Auml" => 'Ä', "Aring" => 'Å', "AElig" => 'Æ', "Ccedil" => 'Ç',
- "Egrave" => 'È', "Eacute" => 'É', "Ecirc" => 'Ê', "Euml" => 'Ë',
- "Igrave" => 'Ì', "Iacute" => 'Í', "Icirc" => 'Î', "Iuml" => 'Ï',
- "ETH" => 'Ð', "Ntilde" => 'Ñ', "Ograve" => 'Ò', "Oacute" => 'Ó',
- "Ocirc" => 'Ô', "Otilde" => 'Õ', "Ouml" => 'Ö', "times" => '×',
- "Oslash" => 'Ø', "Ugrave" => 'Ù', "Uacute" => 'Ú', "Ucirc" => 'Û',
- "Uuml" => 'Ü', "Yacute" => 'Ý', "THORN" => 'Þ', "szlig" => 'ß',
- "agrave" => 'à', "aacute" => 'á', "acirc" => 'â', "atilde" => 'ã',
- "auml" => 'ä', "aring" => 'å', "aelig" => 'æ', "ccedil" => 'ç',
- "egrave" => 'è', "eacute" => 'é', "ecirc" => 'ê', "euml" => 'ë',
- "igrave" => 'ì', "iacute" => 'í', "icirc" => 'î', "iuml" => 'ï',
- "eth" => 'ð', "ntilde" => 'ñ', "ograve" => 'ò', "oacute" => 'ó',
- "ocirc" => 'ô', "otilde" => 'õ', "ouml" => 'ö', "divide" => '÷',
- "oslash" => 'ø', "ugrave" => 'ù', "uacute" => 'ú', "ucirc" => 'û',
- "uuml" => 'ü', "yacute" => 'ý', "thorn" => 'þ', "yuml" => 'ÿ',
- "apos" => '\''
-);
-
-# Maps certain UTF8 characters (2 or 3 bytes) to the corresponding
-# Latin1 characters.
-#
-my %unicode_latin1_table = (
- "\xC2\xA1" => '¡', "\xC2\xA2" => '¢', "\xC2\xA3" => '£', "\xC2\xA4" => '¤',
- "\xC2\xA5" => '¥', "\xC2\xA6" => '¦', "\xC2\xA7" => '§', "\xC2\xA8" => '¨',
- "\xC2\xA9" => '©', "\xC2\xAA" => 'ª', "\xC2\xAB" => '«', "\xC2\xAC" => '¬',
- "\xC2\xAD" => '', "\xC2\xAE" => '®', "\xC2\xAF" => '¯', "\xC2\xB0" => '°',
- "\xC2\xB1" => '±', "\xC2\xB2" => '²', "\xC2\xB3" => '³', "\xC2\xB4" => '´',
- "\xC2\xB5" => 'µ', "\xC2\xB6" => '¶', "\xC2\xB7" => '·', "\xC2\xB8" => '¸',
- "\xC2\xB9" => '¹', "\xC2\xBA" => 'º', "\xC2\xBB" => '»', "\xC2\xBC" => '¼',
- "\xC2\xBD" => '½', "\xC2\xBE" => '¾', "\xC2\xBF" => '¿', "\xC3\x80" => 'À',
- "\xC3\x81" => 'Á', "\xC3\x82" => 'Â', "\xC3\x83" => 'Ã', "\xC3\x84" => 'Ä',
- "\xC3\x85" => 'Å', "\xC3\x86" => 'Æ', "\xC3\x87" => 'Ç', "\xC3\x88" => 'È',
- "\xC3\x89" => 'É', "\xC3\x8A" => 'Ê', "\xC3\x8B" => 'Ë', "\xC3\x8C" => 'Ì',
- "\xC3\x8D" => 'Í', "\xC3\x8E" => 'Î', "\xC3\x8F" => 'Ï', "\xC3\x90" => 'Ð',
- "\xC3\x91" => 'Ñ', "\xC3\x92" => 'Ò', "\xC3\x93" => 'Ó', "\xC3\x94" => 'Ô',
- "\xC3\x95" => 'Õ', "\xC3\x96" => 'Ö', "\xC3\x97" => '×', "\xC3\x98" => 'Ø',
- "\xC3\x99" => 'Ù', "\xC3\x9A" => 'Ú', "\xC3\x9B" => 'Û', "\xC3\x9C" => 'Ü',
- "\xC3\x9D" => 'Ý', "\xC3\x9E" => 'Þ', "\xC3\x9F" => 'ß', "\xC3\xA0" => 'à',
- "\xC3\xA1" => 'á', "\xC3\xA2" => 'â', "\xC3\xA3" => 'ã', "\xC3\xA4" => 'ä',
- "\xC3\xA5" => 'å', "\xC3\xA6" => 'æ', "\xC3\xA7" => 'ç', "\xC3\xA8" => 'è',
- "\xC3\xA9" => 'é', "\xC3\xAA" => 'ê', "\xC3\xAB" => 'ë', "\xC3\xAC" => 'ì',
- "\xC3\xAD" => 'í', "\xC3\xAE" => 'î', "\xC3\xAF" => 'ï', "\xC3\xB0" => 'ð',
- "\xC3\xB1" => 'ñ', "\xC3\xB2" => 'ò', "\xC3\xB3" => 'ó', "\xC3\xB4" => 'ô',
- "\xC3\xB5" => 'õ', "\xC3\xB6" => 'ö', "\xC3\xB7" => '÷', "\xC3\xB8" => 'ø',
- "\xC3\xB9" => 'ù', "\xC3\xBA" => 'ú', "\xC3\xBB" => 'û', "\xC3\xBC" => 'ü',
- "\xC3\xBD" => 'ý', "\xC3\xBE" => 'þ', "\xC3\xBF" => 'ÿ',
-
- "\xE2\x80\x93" => '--', "\xE2\x80\x94" => '--',
- "\xE2\x80\x98" => '`', "\xE2\x80\x99" => '\'',
- "\xE2\x80\x9C" => "``", "\xE2\x80\x9D" => "''",
- "\xE2\x80\xA6" => '...',
-);
+my $wrap_columns = undef;
+my $truncate_lines = undef;
+my $latin1_p = 0;
+my $nyarlathotep_p = 0;
# Convert any HTML entities to Latin1 characters.
#
sub de_entify($) {
my ($text) = @_;
- $text =~ s/(&(\#)?([[:alpha:]\d]+);?)/
- {
- my $c;
- if ($2) {
- $c = chr($3); # the &#number is always decimal, right?
- } else {
- $c = $entity_table{$3};
- }
-# print STDERR "$progname: warning: unknown HTML character entity \"$1\"\n"
-# unless $c;
- ($c ? $c : "[$3]");
- }
- /gexi;
- return $text;
+
+ return '' unless defined($text);
+ return $text unless ($text =~ m/&/s);
+
+ # Convert any HTML entities to Unicode characters,
+ # if the HTML::Entities module is installed.
+ eval {
+ my $t2 = $text;
+ $text = undef;
+ $text = HTML::Entities::decode_entities ($t2);
+ };
+ return $text if defined($text);
+
+ # If it's not installed, just complain instead of trying to halfass it.
+ print STDOUT ("\n\tPerl is broken. Do this to repair it:\n" .
+ "\n\tsudo cpan HTML::Entities\n\n");
+ exit (1);
}
# Convert any Unicode characters to Latin1 if possible.
# Unconvertable bytes are left alone.
#
-sub de_unicoddle($) {
+sub utf8_to_latin1($) {
my ($text) = @_;
- foreach my $key (keys (%unicode_latin1_table)) {
- my $val = $unicode_latin1_table{$key};
- $text =~ s/$key/$val/gs;
- }
+
+ utf8::encode ($text); # Unpack Unicode back to multi-byte UTF-8.
+
+ # Maybe it would be better to handle this in the Unicode domain
+ # by doing things like s/\x{2018}/\"/g, but without decoding the
+ # string back to UTF-8 first, I'm at a loss as to how to have
+ # "á" print as "\340" instead of as "\303\240".
+
+ $text =~ s/ \xC2 ( [\xA0-\xFF] ) / $1 /gsex;
+ $text =~ s/ \xC3 ( [\x80-\xFF] ) / chr (ord($1) | 0x40) /gsex;
+
+ # Handles a few 3-byte sequences too.
+ $text =~ s/\xE2\x80\x93/--/gs;
+ $text =~ s/\xE2\x80\x94/--/gs;
+ $text =~ s/\xE2\x80\x98/`/gs;
+ $text =~ s/\xE2\x80\x99/'/gs;
+ $text =~ s/\xE2\x80\x9C/``/gs;
+ $text =~ s/\xE2\x80\x9D/'/gs;
+ $text =~ s/\xE2\x80\xA2/•/gs;
+ $text =~ s/\xE2\x80\xA6/.../gs;
+ $text =~ s/\xE2\x80\xB2/'/gs;
+ $text =~ s/\xE2\x84\xA2/™/gs;
+ $text =~ s/\xE2\x86\x90/ ← /gs;
+
return $text;
}
# Reads the prefs we use from ~/.xscreensaver
#
-sub get_prefs() {
-
+sub get_x11_prefs() {
my $got_any_p = 0;
- local *IN;
- if (open (IN, "<$config_file")) {
+ if (open (my $in, '<', $config_file)) {
print STDERR "$progname: reading $config_file\n" if ($verbose > 1);
- my $body = '';
- while (<IN>) { $body .= $_; }
- close IN;
- $got_any_p = get_prefs_1 ($body);
+ local $/ = undef; # read entire file
+ my $body = <$in>;
+ close $in;
+ $got_any_p = get_x11_prefs_1 ($body);
} elsif ($verbose > 1) {
print STDERR "$progname: $config_file: $!\n";
}
- if (! $got_any_p) {
+ if (! $got_any_p && defined ($ENV{DISPLAY})) {
# We weren't able to read settings from the .xscreensaver file.
# Fall back to any settings in the X resource database
# (/usr/X11R6/lib/X11/app-defaults/XScreenSaver)
#
print STDERR "$progname: reading X resources\n" if ($verbose > 1);
my $body = `appres XScreenSaver xscreensaver -1`;
- $got_any_p = get_prefs_1 ($body);
+ $got_any_p = get_x11_prefs_1 ($body);
}
if ($verbose > 1) {
- printf STDERR "$progname: mode: $text_mode\n";
- printf STDERR "$progname: literal: $text_literal\n";
- printf STDERR "$progname: file: $text_file\n";
- printf STDERR "$progname: program: $text_program\n";
- printf STDERR "$progname: url: $text_url\n";
+ print STDERR "$progname: mode: $text_mode\n";
+ print STDERR "$progname: literal: $text_literal\n";
+ print STDERR "$progname: file: $text_file\n";
+ print STDERR "$progname: program: $text_program\n";
+ print STDERR "$progname: url: $text_url\n";
}
$text_mode =~ tr/A-Z/a-z/;
$text_literal =~ s@\\n@\n@gs;
+ $text_literal =~ s@\\\n@\n@gs;
}
-sub get_prefs_1($) {
+sub get_x11_prefs_1($) {
my ($body) = @_;
my $got_any_p = 0;
$body =~ s@\\\n@@gs;
+ $body =~ s@^[ \t]*#[^\n]*$@@gm;
if ($body =~ m/^[.*]*textMode:[ \t]*([^\s]+)\s*$/im) {
$text_mode = $1;
}
+sub get_cocoa_prefs($) {
+ my ($id) = @_;
+ my $v;
+
+ print STDERR "$progname: reading Cocoa prefs: \"$id\"\n" if ($verbose > 1);
+
+ $v = get_cocoa_pref_1 ($id, "textMode");
+ $text_mode = $v if defined ($v);
+
+ # The "textMode" pref is set to a number instead of a string because I
+ # couldn't figure out the black magic to make Cocoa bindings work right.
+ #
+ # Update: as of 5.33, Cocoa writes strings instead of numbers, but
+ # pre-existing saved preferences might still have numbers in them.
+ #
+ if ($text_mode eq '0') { $text_mode = 'date'; }
+ elsif ($text_mode eq '1') { $text_mode = 'literal'; }
+ elsif ($text_mode eq '2') { $text_mode = 'file'; }
+ elsif ($text_mode eq '3') { $text_mode = 'url'; }
+ elsif ($text_mode eq '4') { $text_mode = 'program'; }
+
+ $v = get_cocoa_pref_1 ($id, "textLiteral");
+ $text_literal = $v if defined ($v);
+ $text_literal =~ s@\\n@\n@gs;
+ $text_literal =~ s@\\\n@\n@gs;
+
+ $v = get_cocoa_pref_1 ($id, "textFile");
+ $text_file = $v if defined ($v);
+
+ $v = get_cocoa_pref_1 ($id, "textProgram");
+ $text_program = $v if defined ($v);
+
+ $v = get_cocoa_pref_1 ($id, "textURL");
+ $text_url = $v if defined ($v);
+}
+
+
+sub get_cocoa_pref_1($$) {
+ my ($id, $key) = @_;
+ # make sure there's nothing stupid/malicious in either string.
+ $id =~ s/[^-a-z\d. ]/_/gsi;
+ $key =~ s/[^-a-z\d. ]/_/gsi;
+ my $cmd = "defaults -currentHost read \"$id\" \"$key\"";
+
+ print STDERR "$progname: executing $cmd\n"
+ if ($verbose > 3);
+
+ my $val = `$cmd 2>/dev/null`;
+ $val =~ s/^\s+//s;
+ $val =~ s/\s+$//s;
+
+ print STDERR "$progname: Cocoa: $id $key = \"$val\"\n"
+ if ($verbose > 2);
+
+ $val = undef if ($val =~ m/^$/s);
+
+ return $val;
+}
+
+
# like system() but checks errors.
#
sub safe_system(@) {
sub output() {
+ binmode (STDOUT, ($latin1_p ? ':raw' : ':utf8'));
+
# Do some basic sanity checking (null text, null file names, etc.)
#
if (($text_mode eq 'literal' && $text_literal =~ m/^\s*$/i) ||
if ($text_mode eq 'literal') {
$text_literal = strftime ($text_literal, localtime);
+ $text_literal = utf8_to_latin1($text_literal) if ($latin1_p);
+ $text_literal =~ y/A-Za-z/N-ZA-Mn-za-m/ if ($nyarlathotep_p);
print STDOUT $text_literal;
print STDOUT "\n" unless ($text_literal =~ m/\n$/s);
} elsif ($text_mode eq 'file') {
- local *IN;
- if (open (IN, "<$text_file")) {
+ $text_file =~ s@^~/@$ENV{HOME}/@s; # allow literal "~/"
+
+ if (open (my $in, '<:raw', $text_file)) {
print STDERR "$progname: reading $text_file\n" if ($verbose);
+ binmode (STDOUT, ':raw');
- if ($wrap_columns && $wrap_columns > 0) {
+ if (($wrap_columns && $wrap_columns > 0) || $truncate_lines) {
# read it, then reformat it.
- my $body = '';
- while (<IN>) { $body .= $_; }
- reformat_text ($body);
+ local $/ = undef; # read entire file
+ my $body = <$in>;
+ $body = reformat_text ($body);
+ print STDOUT $body;
} else {
- # stream it
- while (<IN>) { print $_; }
+ # stream it by lines
+ while (<$in>) {
+ $_ = utf8_to_latin1($_) if ($latin1_p);
+ y/A-Za-z/N-ZA-Mn-za-m/ if ($nyarlathotep_p);
+ print STDOUT $_;
+ }
}
- close IN;
+ close $in;
} else {
error ("$text_file: $!");
}
$text_program = which ($prog) . $args;
print STDERR "$progname: running $text_program\n" if ($verbose);
- if ($wrap_columns && $wrap_columns > 0) {
+ if (($wrap_columns && $wrap_columns > 0) || $truncate_lines) {
# read it, then reformat it.
- my $body = `( $text_program ) 2>&1`;
- reformat_text ($body);
+ my $lines = 0;
+ my $body = "";
+ my $cmd = "( $text_program ) 2>&1";
+ # $cmd .= " | sed -l"; # line buffer instead of 4k pipe buffer
+ open (my $pipe, '-|:unix', $cmd);
+ while (my $line = <$pipe>) {
+ $body .= $line;
+ $lines++;
+ last if ($truncate_lines && $lines > $truncate_lines);
+ }
+ close $pipe;
+ $body = reformat_text ($body);
+ print STDOUT $body;
} else {
# stream it
safe_system ("$text_program");
} else { # $text_mode eq 'date'
- safe_system ("uname", "-n");
- if (-f "/etc/redhat-release") { system ("cat", "/etc/redhat-release"); }
- safe_system ("uname", "-sr");
- print "\n";
- safe_system ("date", "+%c");
- print "\n";
- my $ut = `uptime`;
- $ut =~ s/^[ \d:]*(am|pm)?//i;
- $ut =~ s/,\s*(load)/\n$1/;
- print "$ut\n";
- }
-
-}
-
-
-# Loads the given URL, returns: $http, $head, $body.
-#
-sub get_url_1($;$) {
- my ($url, $referer) = @_;
-
- if (! ($url =~ m@^http://@i)) {
- error ("not an HTTP URL: $url");
- }
-
- my ($url_proto, $dummy, $serverstring, $path) = split(/\//, $url, 4);
- $path = "" unless $path;
-
- my ($them,$port) = split(/:/, $serverstring);
- $port = 80 unless $port;
-
- my $them2 = $them;
- my $port2 = $port;
- if ($http_proxy) {
- $serverstring = $http_proxy if $http_proxy;
- $serverstring =~ s@^[a-z]+://@@;
- ($them2,$port2) = split(/:/, $serverstring);
- $port2 = 80 unless $port2;
- }
-
- my ($remote, $iaddr, $paddr, $proto, $line);
- $remote = $them2;
- if ($port2 =~ /\D/) { $port2 = getservbyname($port2, 'tcp') }
- if (!$port2) {
- error ("unrecognised port in $url");
- }
-
- $iaddr = inet_aton($remote);
- error ("host not found: $remote") unless ($iaddr);
-
- $paddr = sockaddr_in($port2, $iaddr);
-
-
- my $head = "";
- my $body = "";
-
- $proto = getprotobyname('tcp');
- if (!socket(S, PF_INET, SOCK_STREAM, $proto)) {
- error ("socket: $!");
- }
- if (!connect(S, $paddr)) {
- error ("connect($serverstring): $!");
- }
+ my $n = `uname -n`;
+ $n =~ s/\.local\n/\n/s;
+ print $n;
- select(S); $| = 1; select(STDOUT);
+ my $unamep = 1;
- my $user_agent = "$progname/$version";
-
- my $hdrs = ("GET " . ($http_proxy ? $url : "/$path") . " HTTP/1.0\r\n" .
- "Host: $them\r\n" .
- "User-Agent: $user_agent\r\n");
- if ($referer) {
- $hdrs .= "Referer: $referer\r\n";
- }
- $hdrs .= "\r\n";
-
- if ($verbose > 3) {
- foreach (split('\r?\n', $hdrs)) {
- print STDERR " ==> $_\n";
+ if (-f "/etc/redhat-release") { # "Fedora Core release 4 (Stentz)"
+ safe_system ("cat", "/etc/redhat-release");
}
- }
- print S $hdrs;
- my $http = <S> || "";
-
- $_ = $http;
- s/[\r\n]+$//s;
- print STDERR " <== $_\n" if ($verbose > 3);
- while (<S>) {
- $head .= $_;
- s/[\r\n]+$//s;
- last if m@^$@;
- print STDERR " <== $_\n" if ($verbose > 3);
- }
-
- print STDERR " <== \n" if ($verbose > 4);
- my $lines = 0;
- while (<S>) {
- s/\r\n/\n/gs;
- print STDERR " <== $_" if ($verbose > 4);
- $body .= $_;
- $lines++;
- }
+ if (-f "/etc/release") { # "Solaris 10 3/05 s10_74L2a X86"
+ safe_system ("head", "-1", "/etc/release");
+ }
- print STDERR " <== [ body ]: $lines lines, " . length($body) . " bytes\n"
- if ($verbose == 4);
+ if (-f "/usr/sbin/system_profiler") { # "Mac OS X 10.4.5 (8H14)"
+ my $sp = # "iMac G5"
+ `/usr/sbin/system_profiler SPSoftwareDataType SPHardwareDataType 2>/dev/null`;
+ # system_profiler on OS X 10.10 generates spurious error messages.
+ my ($v) = ($sp =~ m/^\s*System Version:\s*(.*)$/mi);
+ my ($s) = ($sp =~ m/^\s*(?:CPU|Processor) Speed:\s*(.*)$/mi);
+ my ($t) = ($sp =~ m/^\s*(?:Machine|Model) Name:\s*(.*)$/mi);
+ print "$v\n" if ($v);
+ print "$s $t\n" if ($s && $t);
+ $unamep = !defined ($v);
+ }
- close S;
+ if ($unamep) {
+ safe_system ("uname", "-sr"); # "Linux 2.6.15-1.1831_FC4"
+ }
- if (!$http) {
- error ("null response: $url");
+ print "\n";
+ safe_system ("date", "+%c");
+ print "\n";
+ my $ut = `uptime`;
+ $ut =~ s/^[ \d:]*(am|pm)?//i;
+ $ut =~ s/,\s*(load)/\n$1/;
+ print "$ut\n";
}
- return ( $http, $head, $body );
-}
-
-
-# Loads the given URL, processes redirects, returns (content-type, body).
-#
-sub get_url($;$) {
- my ($url, $referer) = @_;
-
- print STDERR "$progname: loading $url\n" if ($verbose > 2);
-
- my $orig_url = $url;
- my $loop_count = 0;
- my $max_loop_count = 10;
-
- do {
- my ( $http, $head, $body ) = get_url_1 ($url, $referer);
-
- $http =~ s/[\r\n]+$//s;
-
- if ( $http =~ m@^HTTP/[0-9.]+ 30[123]@ ) {
- $_ = $head;
-
- my ( $location ) = m@^location:[ \t]*(.*)$@im;
- if ( $location ) {
- $location =~ s/[\r\n]$//;
-
- print STDERR "$progname: redirect from $url to $location\n"
- if ($verbose > 3);
-
- $referer = $url;
- $url = $location;
-
- if ($url =~ m@^/@) {
- $referer =~ m@^(http://[^/]+)@i;
- $url = $1 . $url;
- } elsif (! ($url =~ m@^[a-z]+:@i)) {
- $_ = $referer;
- s@[^/]+$@@g if m@^http://[^/]+/@i;
- $_ .= "/" if m@^http://[^/]+$@i;
- $url = $_ . $url;
- }
-
- } else {
- error ("no Location with \"$http\"");
- }
-
- if ($loop_count++ > $max_loop_count) {
- error ("too many redirects ($max_loop_count) from $orig_url");
- }
-
- } elsif ( $http =~ m@^HTTP/[0-9.]+ ([4-9][0-9][0-9].*)$@ ) {
- error ("failed: $1 ($url)");
-
- } else {
- my $ct = 'text/plain';
- $ct = $1 if ($head =~ m/^content-type:\s*([^\s]+)/mi);
- return ($ct, $body);
- }
- } while (1);
}
return 'text';
}
+
sub reformat_html($$) {
my ($body, $rss_p) = @_;
$_ = $body;
+ # In HTML, try to preserve newlines inside of PRE.
+ #
if (! $rss_p) {
- # In HTML, unfold lines (this breaks PRE. Sue me.)
+ s@(<PRE\b[^<>]*>\s*)(.*?)(</PRE)@{
+ my ($a, $b, $c) = ($1, $2, $3);
+ $b =~ s/[\r\n]/<BR>/gs;
+ $a . $b . $c;
+ }@gsexi;
+ }
+
+ if (! $rss_p) {
+ # In HTML, unfold lines.
# In RSS, assume \n means literal line break.
s@[\r\n]@ @gsi;
}
+ # This right here is the part where I doom us all to inhuman
+ # toil for the One whose Name cannot be expressed in the
+ # Basic Multilingual Plane. http://jwz.org/b/yhAT He comes.
+
s@<!--.*?-->@@gsi; # lose comments
s@<(STYLE|SCRIPT)\b[^<>]*>.*?</\1\s*>@@gsi; # lose css and js
s@<[^<>]*>?@@gs; # lose all other HTML tags
$_ = de_entify ($_); # convert HTML entities
- # elide any remaining non-Latin1 binary data...
- s/([\177-\377]+(\s*[\177-\377]+)[^a-z\d]*)/«...» /g;
- #s/([\177-\377]+(\s*[\177-\377]+)[^a-z\d]*)/«$1» /g;
+ # For Wikipedia: delete anything inside {{ }} and unwrap [[tags]],
+ # among other things.
+ #
+ if ($rss_p eq 'wiki') {
+
+ s@<!--.*?-->@@gsi; # lose HTML comments again
+
+ # Creation line is often truncated: screws up parsing with unbalanced {{.
+ s@(: +[^a-zA-Z ]* *Created page) with [^\n]+@$1@s;
+
+ s@/\*.*?\*/@@si; # /* ... */
+
+ # Try to omit all tables, since they're impossible to read as text.
+ #
+ 1 while (s/\{\{[^{}]*}}/ /gs); # {{ ... }}
+ 1 while (s/\{\|.*?\|\}/\n\n/gs); # {| ... |}
+ 1 while (s/\|-.*?\|/ /gs); # |- ... | (table cell)
+
+ # Convert anchors to something more readable.
+ #
+ s/\[\[([^\[\]\|]+)\|([^\[\]]+)\]\]/$2/gs; # [[link|anchor]]
+ s/\[\[([^:\[\]\|]+)\]\]/$1/gs; # [[anchor]]
+ s/\[https?:[^\[\]\s]+\s+([^\[\]]+)\]/$1/gs; # [url anchor]
+
+ # Convert all references to asterisks.
+ s@\s*<ref>\s*.*?</ref>@*@gs; # <ref> ... <ref> -> "*"
+ s@\n[ \t]*\d+\s*\^\s*http[^\s]+[ \t]*\n@\n@gs; # 1 ^ URL (a Reflist)
+
+ s@\[\[File:([^\|\]]+).*?\]\]@\n$1\n@gs; # [[File: X | ... ]]
+ s@\[\[Category:.*?\]\]@@gs; # omit categories
+
+ s/<[^<>]*>//gs; # Omit all remaining tags
+ s/\'{3,}//gs; # Omit ''' and ''''
+ s/\'\'/\"/gs; # '' -> "
+ s/\`\`/\"/gs; # `` -> "
+ s/\"\"+/\"/gs; # "" -> "
+
+ s/^[ \t]*[*#]+[ \t]*$//gm; # Omit lines with just * or # on them
+
+ # Omit trailing headlines with no text after them (e.g. == Notes ==)
+ 1 while (s/\n==+[ \t]*[^\n=]+[ \t]*==+\s*$/\n/s);
+
+ $_ = de_entify ($_); # convert HTML entities, again
+ }
+
+
+ # elide any remaining non-Latin1 binary data.
+ if ($latin1_p) {
+ utf8::encode ($_); # Unpack Unicode back to multi-byte UTF-8.
+ s/([^\000-\176]+(\s*[^\000-\176]+)[^a-z\d]*)/\xAB...\xBB /g;
+ }
$_ .= "\n";
if (!defined($wrap_columns) || $wrap_columns > 0) {
$Text::Wrap::columns = ($wrap_columns || 72);
- $_ = wrap ("", " ", $_); # wrap the lines as a paragraph
- s/[ \t]+$//gm; # lose whitespace at end of line again
+ $Text::Wrap::break = '[\s/|]'; # wrap on slashes for URLs
+ $_ = wrap ("", " ", $_); # wrap the lines as a paragraph
+ s/[ \t]+$//gm; # lose whitespace at end of line again
+ }
+
+ s/^\n+//gs;
+
+ if ($truncate_lines) {
+ s/^(([^\n]*\n){$truncate_lines}).*$/$1/s;
}
- print STDOUT $_;
+ $_ = utf8_to_latin1($_) if ($latin1_p);
+ y/A-Za-z/N-ZA-Mn-za-m/ if ($nyarlathotep_p);
+
+ return $_;
}
sub reformat_rss($) {
my ($body) = @_;
+ my $wiki_p = ($body =~ m@<generator>[^<>]*Wiki@si);
+
$body =~ s/(<(ITEM|ENTRY)\b)/\001\001$1/gsi;
my @items = split (/\001\001/, $body);
}
}
+ my $out = '';
+
my $i = -1;
foreach (@items) {
$i++;
$title = rss_field_to_html ($title || '');
$body1 = rss_field_to_html ($body1 || '');
- reformat_html ("$title<P>$body1", 1);
- print "\n";
+ $title = '' if ($body1 eq $title); # Identical in Twitter's atom feed.
+
+ $out .= reformat_html ("$title<P>$body1", $wiki_p ? 'wiki' : 'rss');
+ $out .= "\n";
}
+
+ if ($truncate_lines) {
+ $out =~ s/^(([^\n]*\n){$truncate_lines}).*$/$1/s;
+ }
+
+ return $out;
}
sub rss_field_to_html($) {
my ($body) = @_;
- # Assume that if <![CDATA[...]]> is present, everything inside that.
+ # If <![CDATA[...]]> is present, everything inside that is HTML,
+ # and not double-encoded.
#
if ($body =~ m/^\s*<!\[CDATA\[(.*?)\]\s*\]/is) {
$body = $1;
$body = de_entify ($body); # convert entities to get HTML from XML
}
- $body = de_unicoddle ($body); # convert UTF8 to Latin1
return $body;
}
if ($wrap_columns && $wrap_columns > 0) {
print STDERR "$progname: wrapping at $wrap_columns...\n" if ($verbose > 2);
$Text::Wrap::columns = $wrap_columns;
+ $Text::Wrap::break = '[\s/]'; # wrap on slashes for URLs
$body = wrap ("", "", $body);
$body =~ s/[ \t]+$//gm;
}
- print STDOUT $body;
+ if ($truncate_lines) {
+ $body =~ s/^(([^\n]*\n){$truncate_lines}).*$/$1/s;
+ }
+
+ $body = utf8_to_latin1($body) if ($latin1_p);
+ $body =~ y/A-Za-z/N-ZA-Mn-za-m/ if ($nyarlathotep_p);
+ return $body;
+}
+
+
+# Figure out what the proxy server should be, either from environment
+# variables or by parsing the output of the (MacOS) program "scutil",
+# which tells us what the system-wide proxy settings are.
+#
+sub set_proxy($) {
+ my ($ua) = @_;
+
+ my $proxy_data = `scutil --proxy 2>/dev/null`;
+ foreach my $proto ('http', 'https') {
+ my ($server) = ($proxy_data =~ m/\b${proto}Proxy\s*:\s*([^\s]+)/si);
+ my ($port) = ($proxy_data =~ m/\b${proto}Port\s*:\s*([^\s]+)/si);
+ my ($enable) = ($proxy_data =~ m/\b${proto}Enable\s*:\s*([^\s]+)/si);
+
+ if ($server && $enable) {
+ # Note: this ignores the "ExceptionsList".
+ my $proto2 = 'http';
+ $ENV{"${proto}_proxy"} = ("${proto2}://" . $server .
+ ($port ? ":$port" : "") . "/");
+ print STDERR "$progname: MacOS $proto proxy: " .
+ $ENV{"${proto}_proxy"} . "\n"
+ if ($verbose > 2);
+ }
+ }
+
+ $ua->env_proxy();
}
sub get_url_text($) {
my ($url) = @_;
- # historical suckage: the environment variable name is lower case.
- $http_proxy = $ENV{http_proxy} || $ENV{HTTP_PROXY};
+ my $ua = eval 'LWP::UserAgent->new';
- if ($http_proxy && $http_proxy =~ m@^http://([^/]*)/?$@ ) {
- # historical suckage: allow "http://host:port" as well as "host:port".
- $http_proxy = $1;
+ if (! $ua) {
+ print STDOUT ("\n\tPerl is broken. Do this to repair it:\n" .
+ "\n\tsudo cpan LWP::UserAgent\n\n");
+ return;
}
- my ($ct, $body) = get_url ($url);
+ set_proxy ($ua);
+ $ua->agent ("$progname/$version");
+ my $res = $ua->get ($url);
+ my $body;
+ my $ct;
+
+ if ($res && $res->is_success) {
+ $body = $res->decoded_content || '';
+ $ct = $res->header ('Content-Type') || 'text/plain';
+
+ } else {
+ my $err = ($res ? $res->status_line : '') || '';
+ $err = 'unknown error' unless $err;
+ $err = "$url: $err";
+ # error ($err);
+ $body = "Error loading URL $err\n\n";
+ $ct = 'text/plain';
+ }
+
+ utf8::decode ($body); # Pack multi-byte UTF-8 back into wide chars.
$ct = guess_content_type ($ct, $body);
if ($ct eq 'html') {
print STDERR "$progname: converting HTML...\n" if ($verbose > 2);
- reformat_html ($body, 0);
+ $body = reformat_html ($body, 0);
} elsif ($ct eq 'rss') {
- reformat_rss ($body);
+ $body = reformat_rss ($body);
} else {
print STDERR "$progname: plain text...\n" if ($verbose > 2);
- reformat_text ($body);
+ $body = reformat_text ($body);
}
+ print STDOUT $body;
}
" it will be converted to plain-text.\n" .
"\n" .
" --cols N Wrap lines at this column. Default 72.\n" .
+ "\n" .
+ " --lines N No more than N lines of output.\n" .
+ "\n" .
+ " --latin1 Emit Latin1 instead of UTF-8.\n" .
"\n");
exit 1;
}
sub main() {
my $load_p = 1;
+ my $cocoa_id = undef;
while ($#ARGV >= 0) {
$_ = shift @ARGV;
elsif (m/^--?date$/) { $text_mode = 'date';
$load_p = 0; }
elsif (m/^--?text$/) { $text_mode = 'literal';
- $text_literal = shift @ARGV;
+ $text_literal = shift @ARGV || '';
+ $text_literal =~ s@\\n@\n@gs;
+ $text_literal =~ s@\\\n@\n@gs;
$load_p = 0; }
elsif (m/^--?file$/) { $text_mode = 'file';
- $text_file = shift @ARGV;
+ $text_file = shift @ARGV || '';
$load_p = 0; }
elsif (m/^--?program$/) { $text_mode = 'program';
- $text_program = shift @ARGV;
+ $text_program = shift @ARGV || '';
$load_p = 0; }
elsif (m/^--?url$/) { $text_mode = 'url';
- $text_url = shift @ARGV;
+ $text_url = shift @ARGV || '';
$load_p = 0; }
elsif (m/^--?col(umn)?s?$/) { $wrap_columns = 0 + shift @ARGV; }
+ elsif (m/^--?lines?$/) { $truncate_lines = 0 + shift @ARGV; }
+ elsif (m/^--?cocoa$/) { $cocoa_id = shift @ARGV; }
+ elsif (m/^--?latin1$/) { $latin1_p++; }
+ elsif (m/^--?nyarlathotep$/) { $nyarlathotep_p++; }
elsif (m/^-./) { usage; }
else { usage; }
}
- get_prefs() if ($load_p);
+ if ($load_p) {
+
+ if (!defined ($cocoa_id)) {
+ # see OSX/XScreenSaverView.m
+ $cocoa_id = $ENV{XSCREENSAVER_CLASSPATH};
+ }
+
+ if (defined ($cocoa_id)) {
+ get_cocoa_prefs($cocoa_id);
+ } else {
+ get_x11_prefs();
+ }
+ }
+
output();
+
+
+ if (defined ($cocoa_id)) {
+ #
+ # On MacOS, sleep for 10 seconds between when the last output is
+ # printed, and when this process exits. This is because MacOS
+ # 10.5.0 and later broke ptys in a new and exciting way: basically,
+ # once the process at the end of the pty exits, you have exactly
+ # 1 second to read all the queued data off the pipe before it is
+ # summarily flushed.
+ #
+ # Many of the screen savers were written to depend on being able
+ # to read a small number of bytes, and continue reading until they
+ # reached EOF. This is no longer possible.
+ #
+ # Note that the current MacOS behavior has all four of these
+ # awesome properties: 1) Inconvenient; 2) Has no sane workaround;
+ # 3) Different behavior than MacOS 10.1 through 10.4; and 4)
+ # Different behavior than every other Unix in the world.
+ #
+ # See http://jwz.org/b/DHke, and for those of you inside Apple,
+ # "Problem ID 5606018".
+ #
+ # One workaround would be to rewrite the savers to have an
+ # internal buffer, and always read as much data as possible as
+ # soon as a pipe has input available. However, that's a lot more
+ # work, so instead, let's just not exit right away, and hope that
+ # 10 seconds is enough.
+ #
+ # This will solve the problem for invocations of xscreensaver-text
+ # that produce little output (e.g., date-mode); and won't solve it
+ # in cases where a large amount of text is generated in a short
+ # amount of time (e.g., url-mode.)
+ #
+ sleep (10);
+ }
}
main();