X-Git-Url: http://git.hungrycats.org/cgi-bin/gitweb.cgi?p=xscreensaver;a=blobdiff_plain;f=driver%2Fxscreensaver-getimage-file;h=f5b7c10748044fae26de2f3e6ecff4ee7fda93d5;hp=7d946ecab80633e2d9179b9b890225026e8630f5;hb=4361b69d3178d7fc98d0388f9a223af6c2651aba;hpb=ec8d2b32b63649e6d32bdfb306eda062769af823 diff --git a/driver/xscreensaver-getimage-file b/driver/xscreensaver-getimage-file index 7d946eca..f5b7c107 100755 --- a/driver/xscreensaver-getimage-file +++ b/driver/xscreensaver-getimage-file @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright © 2001-2011 Jamie Zawinski . +# Copyright © 2001-2017 Jamie Zawinski . # # Permission to use, copy, modify, distribute, and sell this software and its # documentation for any purpose is hereby granted without fee, provided that @@ -45,16 +45,15 @@ import Fcntl ':mode' unless defined &S_ISUID; # but it is here in Perl 5.8 # but in Perl 5.10, both of these load, and cause errors! # So we have to check for S_ISUID instead of S_ISDIR? WTF? -use bytes; # Larry can take Unicode and shove it up his ass sideways. - # Perl 5.8.0 causes us to start getting incomprehensible - # errors about UTF-8 all over the place without this. - use Digest::MD5 qw(md5_base64); -use LWP::Simple qw($ua); + +# Some Linux systems don't install LWP by default! +# Only error out if we're actually loading a URL instead of local data. +BEGIN { eval 'use LWP::Simple;' } my $progname = $0; $progname =~ s@.*/@@g; -my $version = q{ $Revision: 1.30 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; +my ($version) = ('$Revision: 1.43 $' =~ m/\s(\d[.\d]+)\s/s); my $verbose = 0; @@ -247,7 +246,11 @@ sub read_cache($) { my $dd = "$ENV{HOME}/Library/Caches"; # MacOS location if (-d $dd) { $cache_file_name = "$dd/org.jwz.xscreensaver.getimage.cache"; - } elsif (-d "$ENV{HOME}/tmp") { + } elsif (-d "$ENV{HOME}/.cache") { # Gnome "FreeDesktop XDG" location + $dd = "$ENV{HOME}/.cache/xscreensaver"; + if (! -d $dd) { mkdir ($dd) || error ("mkdir $dd: $!"); } + $cache_file_name = "$dd/xscreensaver-getimage.cache" + } elsif (-d "$ENV{HOME}/tmp") { # If ~/tmp/ exists, use it. $cache_file_name = "$ENV{HOME}/tmp/.xscreensaver-getimage.cache"; } else { $cache_file_name = "$ENV{HOME}/.xscreensaver-getimage.cache"; @@ -309,7 +312,7 @@ sub write_cache($) { print $cache_fd "$dir\n"; foreach (@all_files) { my $f = $_; # stupid Perl. do this to avoid modifying @all_files! - $f =~ s@^\Q$dir\L/@@so || die; # remove $dir from front + $f =~ s@^\Q$dir/@@so || die; # remove $dir from front print $cache_fd "$f\n"; } } @@ -325,19 +328,130 @@ sub write_cache($) { } +sub html_unquote($) { + my ($h) = @_; + + # This only needs to handle entities that occur in RSS, not full HTML. + my %ent = ( 'amp' => '&', 'lt' => '<', 'gt' => '>', + 'quot' => '"', 'apos' => "'" ); + $h =~ s/(&(\#)?([[:alpha:]\d]+);?)/ + { + my ($o, $c) = ($1, $3); + if (! defined($2)) { + $c = $ent{$c}; # for < + } else { + if ($c =~ m@^x([\dA-F]+)$@si) { # for A + $c = chr(hex($1)); + } elsif ($c =~ m@^\d+$@si) { # for A + $c = chr($c); + } else { + $c = undef; + } + } + ($c || $o); + } + /gexi; + return $h; +} + + + +# Figure out what the proxy server should be, either from environment +# variables or by parsing the output of the (MacOS) program "scutil", +# which tells us what the system-wide proxy settings are. +# +sub set_proxy($) { + my ($ua) = @_; + + my $proxy_data = `scutil --proxy 2>/dev/null`; + foreach my $proto ('http', 'https') { + my ($server) = ($proxy_data =~ m/\b${proto}Proxy\s*:\s*([^\s]+)/si); + my ($port) = ($proxy_data =~ m/\b${proto}Port\s*:\s*([^\s]+)/si); + my ($enable) = ($proxy_data =~ m/\b${proto}Enable\s*:\s*([^\s]+)/si); + + if ($server && $enable) { + # Note: this ignores the "ExceptionsList". + my $proto2 = 'http'; + $ENV{"${proto}_proxy"} = ("${proto2}://" . $server . + ($port ? ":$port" : "") . "/"); + print STDERR "$progname: MacOS $proto proxy: " . + $ENV{"${proto}_proxy"} . "\n" + if ($verbose > 2); + } + } + + $ua->env_proxy(); +} + + +sub init_lwp() { + if (! defined ($LWP::Simple::ua)) { + error ("\n\n\tPerl is broken. Do this to repair it:\n" . + "\n\tsudo cpan LWP::Simple LWP::Protocol::https Mozilla::CA\n"); + } + set_proxy ($LWP::Simple::ua); +} + + # Returns a list of the image enclosures in the RSS or Atom feed. # Elements of the list are references, [ "url", "guid" ]. # +sub parse_feed($); sub parse_feed($) { my ($url) = @_; - $ua->agent ("$progname/$version"); - $ua->timeout (10); # bail sooner than the default of 3 minutes + init_lwp(); + $LWP::Simple::ua->agent ("$progname/$version"); + $LWP::Simple::ua->timeout (10); # bail sooner than the default of 3 minutes + + + # Half the time, random Linux systems don't have Mozilla::CA installed, + # which results in "Can't verify SSL peers without knowning which + # Certificate Authorities to trust". + # + # In xscreensaver-text we just disabled certificate checks. However, + # malicious images really do exist, so for xscreensaver-getimage-file, + # let's actually require that SSL be installed properly. + my $body = (LWP::Simple::get($url) || ''); - error ("not an RSS or Atom feed: $url") - unless ($body =~ m@^<\?xml\s@si); + if ($body !~ m@^\s*<(\?xml|rss)\b@si) { + # Not an RSS/Atom feed. Try RSS autodiscovery. + + # (Great news, everybody: Flickr no longer provides RSS for "Sets", + # only for "Photostreams", and only the first 20 images of those. + # Thanks, assholes.) + + error ("null response: $url") + if ($body =~ m/^\s*$/s); + + error ("not an RSS or Atom feed, or HTML: $url") + unless ($body =~ m@<(HEAD|BODY|A|IMG)\b@si); + + # Find the first with RSS or Atom in it, and use that instead. + + $body =~ s@]*)>@{ + my $p = $1; + if ($p =~ m! \b REL \s* = \s* ['"]? alternate \b!six && + $p =~ m! \b TYPE \s* = \s* ['"]? application/(atom|rss) !six && + $p =~ m! \b HREF \s* = \s* ['"] ( [^<>'"]+ ) !six + ) { + my $u2 = html_unquote ($1); + if ($u2 =~ m!^/!s) { + my ($h) = ($url =~ m!^([a-z]+://[^/]+)!si); + $u2 = "$h$u2"; + } + print STDERR "$progname: found feed: $u2\n" + if ($verbose); + return parse_feed ($u2); + } + ''; + }@gsexi; + + error ("no RSS or Atom feed for HTML page: $url"); + } + $body =~ s@(]*>)!{ my $link = $1; my ($href) = ($link =~ m/\bURL\s*=\s*[\"\']([^<>\'\"]+)/si); - $iurl = $href if $href; + $iurl = html_unquote($href) if $href; + $link; + }!gsexi; + } + + # Then look for + # + if (! $iurl) { + $item =~ s!(]*>)!{ + my $link = $1; + my ($type) = ($link =~ m/\bTYPE\s*=\s*[\"\']?([^<>\'\"]+)/si); + my ($href) = ($link =~ m/\bURL\s*=\s*[\"\']([^<>\'\"]+)/si); + $iurl = html_unquote($href) + if ($href && $type && $type =~ m@^image/@si); # omit videos $link; }!gsexi; } - # Then look for ... with an inside. + # Ok, maybe there's an image in the field? + # + if (! $iurl) { + $item =~ s!((]*>)([^<>]*))!{ + my ($all, $u2) = ($1, $3); + $iurl = html_unquote($u2) if ($u2 =~ m/$good_file_re/io); + $all; + }!gsexi; + } + + # Then look for ... with an inside. # if (! $iurl) { $item =~ s!(]*>.*?)!{ my $desc = $1; - $desc =~ s/<//gs; - $desc =~ s/"/\"/gs; - $desc =~ s/'/\'/gs; - $desc =~ s/&/&/gs; + $desc = html_unquote($desc); my ($href) = ($desc =~ m@]*\bSRC=[\"\']?([^\"\'<>]+)@si); $iurl = $href if ($href); $desc; @@ -446,7 +579,29 @@ sub md5_file($) { sub download_image($$$) { my ($url, $uid, $dir) = @_; - my ($ext) = ($url =~ m@\.([a-z\d]+)$@si); + my $url2 = $url; + $url2 =~ s/\#.*$//s; # Omit search terms after file extension + $url2 =~ s/\?.*$//s; + my ($ext) = ($url2 =~ m@\.([a-z\d]+)$@si); + + # If the feed hasn't put a sane extension on their URLs, nothing's going + # to work. This code assumes that file names have extensions, even the + # ones in the cache directory. + # + if (! $ext) { + print STDERR "$progname: skipping extensionless URL: $url\n" + if ($verbose > 1); + return undef; + } + + # Don't bother downloading files that we will reject anyway. + # + if (! ($url2 =~ m/$good_file_re/io)) { + print STDERR "$progname: skipping non-image URL: $url\n" + if ($verbose > 1); + return undef; + } + my $file = md5_file ($uid); $file .= '.' . lc($ext) if $ext; @@ -460,15 +615,32 @@ sub download_image($$$) { # Special-case kludge for Flickr: # Their RSS feeds sometimes include only the small versions of the images. - # So if the URL ends in "s" (75x75), "t" (100x100) or "m" (240x240),then - # munge it to be "b" (1024x1024). + # So if the URL ends in one of the "small-size" letters, change it to "b". + # + # _o orig, 1600 + + # _k large, 2048 max + # _h large, 1600 max + # _b large, 1024 max + # _c medium, 800 max + # _z medium, 640 max + # "" medium, 500 max + # _n small, 320 max + # _m small, 240 max + # _t thumb, 100 max + # _q square, 150x150 + # _s square, 75x75 # - $url =~ s@_[stm](\.[a-z]+)$@_b$1@si + # Note: if we wanted to get the _k or _o version instead of the _b or _h + # version, we'd need to crack the DRM -- which is easy: see crack_secret + # in "https://www.jwz.org/hacks/galdown". + # + $url =~ s@_[sqtmnzc](\.[a-z]+)$@_b$1@si if ($url =~ m@^https?://[^/?#&]*?flickr\.com/@si); print STDERR "$progname: downloading: $dir/$file for $uid / $url\n" if ($verbose > 1); - $ua->agent ("$progname/$version"); + init_lwp(); + $LWP::Simple::ua->agent ("$progname/$version"); my $status = LWP::Simple::mirror ($url, "$dir/$file"); if (!LWP::Simple::is_success ($status)) { print STDERR "$progname: error $status: $url\n"; # keep going @@ -488,7 +660,12 @@ sub mirror_feed($) { my $dir = "$ENV{HOME}/Library/Caches"; # MacOS location if (-d $dir) { $dir = "$dir/org.jwz.xscreensaver.feeds"; - } elsif (-d "$ENV{HOME}/tmp") { + } elsif (-d "$ENV{HOME}/.cache") { # Gnome "FreeDesktop XDG" location + $dir = "$ENV{HOME}/.cache/xscreensaver"; + if (! -d $dir) { mkdir ($dir) || error ("mkdir $dir: $!"); } + $dir .= "/feeds"; + if (! -d $dir) { mkdir ($dir) || error ("mkdir $dir: $!"); } + } elsif (-d "$ENV{HOME}/tmp") { # If ~/tmp/ exists, use it. $dir = "$ENV{HOME}/tmp/.xscreensaver-feeds"; } else { $dir = "$ENV{HOME}/.xscreensaver-feeds"; @@ -526,10 +703,11 @@ sub mirror_feed($) { my $poll_p = ($mtime + $feed_max_age < time); - $poll_p = 1 unless ($cache_p); # poll again now with --no-cache cmd line arg. + # --no-cache cmd line arg means poll again right now. + $poll_p = 1 unless ($cache_p); - # Even if the cache is young, let's make sure there are at least - # a few files in it, and re-check if not. + # Even if the cache is young, make sure there is at least one file, + # and re-check if not. # if (! $poll_p) { my $count = 0; @@ -565,6 +743,8 @@ sub mirror_feed($) { # my $count = 0; my @urls = parse_feed ($url); + print STDERR "$progname: " . ($#urls + 1) . " images\n" + if ($verbose > 1); foreach my $p (@urls) { my ($furl, $id) = @$p; my $f = download_image ($furl, $id, $dir); @@ -573,7 +753,7 @@ sub mirror_feed($) { $count++; } - print STDERR "$progname: empty feed: $url\n" if ($count <= 0); + my $empty_p = ($count <= 0); # Now delete any files that are no longer in the feed. # But if there was nothing in the feed (network failure?) @@ -594,9 +774,13 @@ sub mirror_feed($) { } } - # Both feed and cache are empty. No files at all. + # Both feed and cache are empty. No files at all. Bail. error ("empty feed: $url") if ($kept <= 1); + # Feed is empty, but we have some files from last time. Warn. + print STDERR "$progname: empty feed: using cache: $url\n" + if ($empty_p); + $mtime = time(); # update the timestamp } else { @@ -673,8 +857,6 @@ sub find_random_file($) { write_cache ($dir); -# @all_files = sort(@all_files); - if ($#all_files < 0) { print STDERR "$progname: no files in $dir\n"; exit 1; @@ -687,7 +869,7 @@ sub find_random_file($) { my $file = $all_files[$n]; if (large_enough_p ($file)) { if (! $url) { - $file =~ s@^\Q$dir\L/@@so || die; # remove $dir from front + $file =~ s@^\Q$dir/@@so || die; # remove $dir from front } return $file; } @@ -695,6 +877,10 @@ sub find_random_file($) { print STDERR "$progname: no suitable images in $dir " . "(after $max_tries tries)\n"; + + # If we got here, blow away the cache. Maybe it's stale. + unlink $cache_file_name if $cache_file_name; + exit 1; } @@ -705,6 +891,11 @@ sub large_enough_p($) { my ($w, $h) = image_file_size ($file); if (!defined ($h)) { + + # Nonexistent files are obviously too small! + # Already printed $verbose message about the file not existing. + return 0 unless -f $file; + print STDERR "$progname: $file: unable to determine image size\n" if ($verbose); # Assume that unknown files are of good sizes: this will happen if @@ -819,11 +1010,10 @@ sub image_size($) { sub image_file_size($) { my ($file) = @_; my $in; - if (! open ($in, '<', $file)) { + if (! open ($in, '<:raw', $file)) { print STDERR "$progname: $file: $!\n" if ($verbose); - return undef; + return (); } - binmode ($in); # Larry can take Unicode and shove it up his ass sideways. my $body = ''; sysread ($in, $body, 1024 * 50); # The first 50k should be enough. close $in; # (It's not for certain huge jpegs... @@ -838,13 +1028,13 @@ sub error($) { } sub usage() { - print STDERR "usage: $progname [--verbose] directory\n" . + print STDERR "usage: $progname [--verbose] directory-or-feed-url\n\n" . " Prints the name of a randomly-selected image file. The directory\n" . " is searched recursively. Images smaller than " . "${min_image_width}x${min_image_height} are excluded.\n" . "\n" . " The directory may also be the URL of an RSS/Atom feed. Enclosed\n" . - " images will be downloaded cached locally.\n" . + " images will be downloaded and cached locally.\n" . "\n"; exit 1; } @@ -854,16 +1044,16 @@ sub main() { while ($_ = $ARGV[0]) { shift @ARGV; - if ($_ eq "--verbose") { $verbose++; } - elsif (m/^-v+$/) { $verbose += length($_)-1; } - elsif ($_ eq "--name") { } # ignored, for compatibility - elsif ($_ eq "--spotlight") { $use_spotlight_p = 1; } - elsif ($_ eq "--no-spotlight") { $use_spotlight_p = 0; } - elsif ($_ eq "--cache") { $cache_p = 1; } - elsif ($_ eq "--no-cache") { $cache_p = 0; } - elsif (m/^-./) { usage; } - elsif (!defined($dir)) { $dir = $_; } - else { usage; } + if (m/^--?verbose$/s) { $verbose++; } + elsif (m/^-v+$/s) { $verbose += length($_)-1; } + elsif (m/^--?name$/s) { } # ignored, for compatibility + elsif (m/^--?spotlight$/s) { $use_spotlight_p = 1; } + elsif (m/^--?no-spotlight$/s) { $use_spotlight_p = 0; } + elsif (m/^--?cache$/s) { $cache_p = 1; } + elsif (m/^--?no-?cache$/s) { $cache_p = 0; } + elsif (m/^-./) { usage; } + elsif (!defined($dir)) { $dir = $_; } + else { usage; } } usage unless (defined($dir));