#!/usr/bin/perl -w
-# Copyright © 2001-2011 Jamie Zawinski <jwz@jwz.org>.
+# Copyright © 2001-2017 Jamie Zawinski <jwz@jwz.org>.
#
# Permission to use, copy, modify, distribute, and sell this software and its
# documentation for any purpose is hereby granted without fee, provided that
# but in Perl 5.10, both of these load, and cause errors!
# So we have to check for S_ISUID instead of S_ISDIR? WTF?
-use bytes; # Larry can take Unicode and shove it up his ass sideways.
- # Perl 5.8.0 causes us to start getting incomprehensible
- # errors about UTF-8 all over the place without this.
-
use Digest::MD5 qw(md5_base64);
-use LWP::Simple qw($ua);
+
+# Some Linux systems don't install LWP by default!
+# Only error out if we're actually loading a URL instead of local data.
+BEGIN { eval 'use LWP::Simple;' }
my $progname = $0; $progname =~ s@.*/@@g;
-my $version = q{ $Revision: 1.30 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/;
+my ($version) = ('$Revision: 1.43 $' =~ m/\s(\d[.\d]+)\s/s);
my $verbose = 0;
my $dd = "$ENV{HOME}/Library/Caches"; # MacOS location
if (-d $dd) {
$cache_file_name = "$dd/org.jwz.xscreensaver.getimage.cache";
- } elsif (-d "$ENV{HOME}/tmp") {
+ } elsif (-d "$ENV{HOME}/.cache") { # Gnome "FreeDesktop XDG" location
+ $dd = "$ENV{HOME}/.cache/xscreensaver";
+ if (! -d $dd) { mkdir ($dd) || error ("mkdir $dd: $!"); }
+ $cache_file_name = "$dd/xscreensaver-getimage.cache"
+ } elsif (-d "$ENV{HOME}/tmp") { # If ~/tmp/ exists, use it.
$cache_file_name = "$ENV{HOME}/tmp/.xscreensaver-getimage.cache";
} else {
$cache_file_name = "$ENV{HOME}/.xscreensaver-getimage.cache";
print $cache_fd "$dir\n";
foreach (@all_files) {
my $f = $_; # stupid Perl. do this to avoid modifying @all_files!
- $f =~ s@^\Q$dir\L/@@so || die; # remove $dir from front
+ $f =~ s@^\Q$dir/@@so || die; # remove $dir from front
print $cache_fd "$f\n";
}
}
}
+sub html_unquote($) {
+ my ($h) = @_;
+
+ # This only needs to handle entities that occur in RSS, not full HTML.
+ my %ent = ( 'amp' => '&', 'lt' => '<', 'gt' => '>',
+ 'quot' => '"', 'apos' => "'" );
+ $h =~ s/(&(\#)?([[:alpha:]\d]+);?)/
+ {
+ my ($o, $c) = ($1, $3);
+ if (! defined($2)) {
+ $c = $ent{$c}; # for <
+ } else {
+ if ($c =~ m@^x([\dA-F]+)$@si) { # for A
+ $c = chr(hex($1));
+ } elsif ($c =~ m@^\d+$@si) { # for A
+ $c = chr($c);
+ } else {
+ $c = undef;
+ }
+ }
+ ($c || $o);
+ }
+ /gexi;
+ return $h;
+}
+
+
+
+# Figure out what the proxy server should be, either from environment
+# variables or by parsing the output of the (MacOS) program "scutil",
+# which tells us what the system-wide proxy settings are.
+#
+sub set_proxy($) {
+ my ($ua) = @_;
+
+ my $proxy_data = `scutil --proxy 2>/dev/null`;
+ foreach my $proto ('http', 'https') {
+ my ($server) = ($proxy_data =~ m/\b${proto}Proxy\s*:\s*([^\s]+)/si);
+ my ($port) = ($proxy_data =~ m/\b${proto}Port\s*:\s*([^\s]+)/si);
+ my ($enable) = ($proxy_data =~ m/\b${proto}Enable\s*:\s*([^\s]+)/si);
+
+ if ($server && $enable) {
+ # Note: this ignores the "ExceptionsList".
+ my $proto2 = 'http';
+ $ENV{"${proto}_proxy"} = ("${proto2}://" . $server .
+ ($port ? ":$port" : "") . "/");
+ print STDERR "$progname: MacOS $proto proxy: " .
+ $ENV{"${proto}_proxy"} . "\n"
+ if ($verbose > 2);
+ }
+ }
+
+ $ua->env_proxy();
+}
+
+
+sub init_lwp() {
+ if (! defined ($LWP::Simple::ua)) {
+ error ("\n\n\tPerl is broken. Do this to repair it:\n" .
+ "\n\tsudo cpan LWP::Simple LWP::Protocol::https Mozilla::CA\n");
+ }
+ set_proxy ($LWP::Simple::ua);
+}
+
+
# Returns a list of the image enclosures in the RSS or Atom feed.
# Elements of the list are references, [ "url", "guid" ].
#
+sub parse_feed($);
sub parse_feed($) {
my ($url) = @_;
- $ua->agent ("$progname/$version");
- $ua->timeout (10); # bail sooner than the default of 3 minutes
+ init_lwp();
+ $LWP::Simple::ua->agent ("$progname/$version");
+ $LWP::Simple::ua->timeout (10); # bail sooner than the default of 3 minutes
+
+
+ # Half the time, random Linux systems don't have Mozilla::CA installed,
+ # which results in "Can't verify SSL peers without knowning which
+ # Certificate Authorities to trust".
+ #
+ # In xscreensaver-text we just disabled certificate checks. However,
+ # malicious images really do exist, so for xscreensaver-getimage-file,
+ # let's actually require that SSL be installed properly.
+
my $body = (LWP::Simple::get($url) || '');
- error ("not an RSS or Atom feed: $url")
- unless ($body =~ m@^<\?xml\s@si);
+ if ($body !~ m@^\s*<(\?xml|rss)\b@si) {
+ # Not an RSS/Atom feed. Try RSS autodiscovery.
+
+ # (Great news, everybody: Flickr no longer provides RSS for "Sets",
+ # only for "Photostreams", and only the first 20 images of those.
+ # Thanks, assholes.)
+
+ error ("null response: $url")
+ if ($body =~ m/^\s*$/s);
+
+ error ("not an RSS or Atom feed, or HTML: $url")
+ unless ($body =~ m@<(HEAD|BODY|A|IMG)\b@si);
+
+ # Find the first <link> with RSS or Atom in it, and use that instead.
+
+ $body =~ s@<LINK\s+([^<>]*)>@{
+ my $p = $1;
+ if ($p =~ m! \b REL \s* = \s* ['"]? alternate \b!six &&
+ $p =~ m! \b TYPE \s* = \s* ['"]? application/(atom|rss) !six &&
+ $p =~ m! \b HREF \s* = \s* ['"] ( [^<>'"]+ ) !six
+ ) {
+ my $u2 = html_unquote ($1);
+ if ($u2 =~ m!^/!s) {
+ my ($h) = ($url =~ m!^([a-z]+://[^/]+)!si);
+ $u2 = "$h$u2";
+ }
+ print STDERR "$progname: found feed: $u2\n"
+ if ($verbose);
+ return parse_feed ($u2);
+ }
+ '';
+ }@gsexi;
+
+ error ("no RSS or Atom feed for HTML page: $url");
+ }
+
$body =~ s@(<ENTRY|<ITEM)@\001$1@gsi;
my @items = split(/\001/, $body);
if ($type) {
$href = undef unless ($type =~ m@^image/@si); # omit videos
}
- $iurl = $href if ($href);
+ $iurl = html_unquote($href) if $href;
}
$link;
}!gsexi;
$item =~ s!(<MEDIA:CONTENT[^<>]*>)!{
my $link = $1;
my ($href) = ($link =~ m/\bURL\s*=\s*[\"\']([^<>\'\"]+)/si);
- $iurl = $href if $href;
+ $iurl = html_unquote($href) if $href;
+ $link;
+ }!gsexi;
+ }
+
+ # Then look for <enclosure url="..."/>
+ #
+ if (! $iurl) {
+ $item =~ s!(<ENCLOSURE[^<>]*>)!{
+ my $link = $1;
+ my ($type) = ($link =~ m/\bTYPE\s*=\s*[\"\']?([^<>\'\"]+)/si);
+ my ($href) = ($link =~ m/\bURL\s*=\s*[\"\']([^<>\'\"]+)/si);
+ $iurl = html_unquote($href)
+ if ($href && $type && $type =~ m@^image/@si); # omit videos
$link;
}!gsexi;
}
- # Then look for <description>... with an <img href="..."> inside.
+ # Ok, maybe there's an image in the <url> field?
+ #
+ if (! $iurl) {
+ $item =~ s!((<URL\b[^<>]*>)([^<>]*))!{
+ my ($all, $u2) = ($1, $3);
+ $iurl = html_unquote($u2) if ($u2 =~ m/$good_file_re/io);
+ $all;
+ }!gsexi;
+ }
+
+ # Then look for <description>... with an <img src="..."> inside.
#
if (! $iurl) {
$item =~ s!(<description[^<>]*>.*?</description>)!{
my $desc = $1;
- $desc =~ s/</</gs;
- $desc =~ s/>/>/gs;
- $desc =~ s/"/\"/gs;
- $desc =~ s/'/\'/gs;
- $desc =~ s/&/&/gs;
+ $desc = html_unquote($desc);
my ($href) = ($desc =~ m@<IMG[^<>]*\bSRC=[\"\']?([^\"\'<>]+)@si);
$iurl = $href if ($href);
$desc;
sub download_image($$$) {
my ($url, $uid, $dir) = @_;
- my ($ext) = ($url =~ m@\.([a-z\d]+)$@si);
+ my $url2 = $url;
+ $url2 =~ s/\#.*$//s; # Omit search terms after file extension
+ $url2 =~ s/\?.*$//s;
+ my ($ext) = ($url2 =~ m@\.([a-z\d]+)$@si);
+
+ # If the feed hasn't put a sane extension on their URLs, nothing's going
+ # to work. This code assumes that file names have extensions, even the
+ # ones in the cache directory.
+ #
+ if (! $ext) {
+ print STDERR "$progname: skipping extensionless URL: $url\n"
+ if ($verbose > 1);
+ return undef;
+ }
+
+ # Don't bother downloading files that we will reject anyway.
+ #
+ if (! ($url2 =~ m/$good_file_re/io)) {
+ print STDERR "$progname: skipping non-image URL: $url\n"
+ if ($verbose > 1);
+ return undef;
+ }
+
my $file = md5_file ($uid);
$file .= '.' . lc($ext) if $ext;
# Special-case kludge for Flickr:
# Their RSS feeds sometimes include only the small versions of the images.
- # So if the URL ends in "s" (75x75), "t" (100x100) or "m" (240x240),then
- # munge it to be "b" (1024x1024).
+ # So if the URL ends in one of the "small-size" letters, change it to "b".
+ #
+ # _o orig, 1600 +
+ # _k large, 2048 max
+ # _h large, 1600 max
+ # _b large, 1024 max
+ # _c medium, 800 max
+ # _z medium, 640 max
+ # "" medium, 500 max
+ # _n small, 320 max
+ # _m small, 240 max
+ # _t thumb, 100 max
+ # _q square, 150x150
+ # _s square, 75x75
#
- $url =~ s@_[stm](\.[a-z]+)$@_b$1@si
+ # Note: if we wanted to get the _k or _o version instead of the _b or _h
+ # version, we'd need to crack the DRM -- which is easy: see crack_secret
+ # in "https://www.jwz.org/hacks/galdown".
+ #
+ $url =~ s@_[sqtmnzc](\.[a-z]+)$@_b$1@si
if ($url =~ m@^https?://[^/?#&]*?flickr\.com/@si);
print STDERR "$progname: downloading: $dir/$file for $uid / $url\n"
if ($verbose > 1);
- $ua->agent ("$progname/$version");
+ init_lwp();
+ $LWP::Simple::ua->agent ("$progname/$version");
my $status = LWP::Simple::mirror ($url, "$dir/$file");
if (!LWP::Simple::is_success ($status)) {
print STDERR "$progname: error $status: $url\n"; # keep going
my $dir = "$ENV{HOME}/Library/Caches"; # MacOS location
if (-d $dir) {
$dir = "$dir/org.jwz.xscreensaver.feeds";
- } elsif (-d "$ENV{HOME}/tmp") {
+ } elsif (-d "$ENV{HOME}/.cache") { # Gnome "FreeDesktop XDG" location
+ $dir = "$ENV{HOME}/.cache/xscreensaver";
+ if (! -d $dir) { mkdir ($dir) || error ("mkdir $dir: $!"); }
+ $dir .= "/feeds";
+ if (! -d $dir) { mkdir ($dir) || error ("mkdir $dir: $!"); }
+ } elsif (-d "$ENV{HOME}/tmp") { # If ~/tmp/ exists, use it.
$dir = "$ENV{HOME}/tmp/.xscreensaver-feeds";
} else {
$dir = "$ENV{HOME}/.xscreensaver-feeds";
my $poll_p = ($mtime + $feed_max_age < time);
- $poll_p = 1 unless ($cache_p); # poll again now with --no-cache cmd line arg.
+ # --no-cache cmd line arg means poll again right now.
+ $poll_p = 1 unless ($cache_p);
- # Even if the cache is young, let's make sure there are at least
- # a few files in it, and re-check if not.
+ # Even if the cache is young, make sure there is at least one file,
+ # and re-check if not.
#
if (! $poll_p) {
my $count = 0;
#
my $count = 0;
my @urls = parse_feed ($url);
+ print STDERR "$progname: " . ($#urls + 1) . " images\n"
+ if ($verbose > 1);
foreach my $p (@urls) {
my ($furl, $id) = @$p;
my $f = download_image ($furl, $id, $dir);
$count++;
}
- print STDERR "$progname: empty feed: $url\n" if ($count <= 0);
+ my $empty_p = ($count <= 0);
# Now delete any files that are no longer in the feed.
# But if there was nothing in the feed (network failure?)
}
}
- # Both feed and cache are empty. No files at all.
+ # Both feed and cache are empty. No files at all. Bail.
error ("empty feed: $url") if ($kept <= 1);
+ # Feed is empty, but we have some files from last time. Warn.
+ print STDERR "$progname: empty feed: using cache: $url\n"
+ if ($empty_p);
+
$mtime = time(); # update the timestamp
} else {
write_cache ($dir);
-# @all_files = sort(@all_files);
-
if ($#all_files < 0) {
print STDERR "$progname: no files in $dir\n";
exit 1;
my $file = $all_files[$n];
if (large_enough_p ($file)) {
if (! $url) {
- $file =~ s@^\Q$dir\L/@@so || die; # remove $dir from front
+ $file =~ s@^\Q$dir/@@so || die; # remove $dir from front
}
return $file;
}
print STDERR "$progname: no suitable images in $dir " .
"(after $max_tries tries)\n";
+
+ # If we got here, blow away the cache. Maybe it's stale.
+ unlink $cache_file_name if $cache_file_name;
+
exit 1;
}
my ($w, $h) = image_file_size ($file);
if (!defined ($h)) {
+
+ # Nonexistent files are obviously too small!
+ # Already printed $verbose message about the file not existing.
+ return 0 unless -f $file;
+
print STDERR "$progname: $file: unable to determine image size\n"
if ($verbose);
# Assume that unknown files are of good sizes: this will happen if
sub image_file_size($) {
my ($file) = @_;
my $in;
- if (! open ($in, '<', $file)) {
+ if (! open ($in, '<:raw', $file)) {
print STDERR "$progname: $file: $!\n" if ($verbose);
- return undef;
+ return ();
}
- binmode ($in); # Larry can take Unicode and shove it up his ass sideways.
my $body = '';
sysread ($in, $body, 1024 * 50); # The first 50k should be enough.
close $in; # (It's not for certain huge jpegs...
}
sub usage() {
- print STDERR "usage: $progname [--verbose] directory\n" .
+ print STDERR "usage: $progname [--verbose] directory-or-feed-url\n\n" .
" Prints the name of a randomly-selected image file. The directory\n" .
" is searched recursively. Images smaller than " .
"${min_image_width}x${min_image_height} are excluded.\n" .
"\n" .
" The directory may also be the URL of an RSS/Atom feed. Enclosed\n" .
- " images will be downloaded cached locally.\n" .
+ " images will be downloaded and cached locally.\n" .
"\n";
exit 1;
}
while ($_ = $ARGV[0]) {
shift @ARGV;
- if ($_ eq "--verbose") { $verbose++; }
- elsif (m/^-v+$/) { $verbose += length($_)-1; }
- elsif ($_ eq "--name") { } # ignored, for compatibility
- elsif ($_ eq "--spotlight") { $use_spotlight_p = 1; }
- elsif ($_ eq "--no-spotlight") { $use_spotlight_p = 0; }
- elsif ($_ eq "--cache") { $cache_p = 1; }
- elsif ($_ eq "--no-cache") { $cache_p = 0; }
- elsif (m/^-./) { usage; }
- elsif (!defined($dir)) { $dir = $_; }
- else { usage; }
+ if (m/^--?verbose$/s) { $verbose++; }
+ elsif (m/^-v+$/s) { $verbose += length($_)-1; }
+ elsif (m/^--?name$/s) { } # ignored, for compatibility
+ elsif (m/^--?spotlight$/s) { $use_spotlight_p = 1; }
+ elsif (m/^--?no-spotlight$/s) { $use_spotlight_p = 0; }
+ elsif (m/^--?cache$/s) { $cache_p = 1; }
+ elsif (m/^--?no-?cache$/s) { $cache_p = 0; }
+ elsif (m/^-./) { usage; }
+ elsif (!defined($dir)) { $dir = $_; }
+ else { usage; }
}
usage unless (defined($dir));