X-Git-Url: http://git.hungrycats.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=driver%2Fxscreensaver-getimage-file;h=6335d931e8cb4dced2ffffc8cacd1a75052d0f20;hb=78add6e627ee5f10e1fa6f3852602ea5066eee5a;hp=cc61a08d86f69ae796b52d8542bc734f1b6f10ba;hpb=ff35d056d723c9a5ffe728dbba5f1c25e141be04;p=xscreensaver diff --git a/driver/xscreensaver-getimage-file b/driver/xscreensaver-getimage-file index cc61a08d..6335d931 100755 --- a/driver/xscreensaver-getimage-file +++ b/driver/xscreensaver-getimage-file @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright © 2001-2012 Jamie Zawinski . +# Copyright © 2001-2018 Jamie Zawinski . # # Permission to use, copy, modify, distribute, and sell this software and its # documentation for any purpose is hereby granted without fee, provided that @@ -45,10 +45,6 @@ import Fcntl ':mode' unless defined &S_ISUID; # but it is here in Perl 5.8 # but in Perl 5.10, both of these load, and cause errors! # So we have to check for S_ISUID instead of S_ISDIR? WTF? -use bytes; # Larry can take Unicode and shove it up his ass sideways. - # Perl 5.8.0 causes us to start getting incomprehensible - # errors about UTF-8 all over the place without this. - use Digest::MD5 qw(md5_base64); # Some Linux systems don't install LWP by default! @@ -57,7 +53,7 @@ BEGIN { eval 'use LWP::Simple;' } my $progname = $0; $progname =~ s@.*/@@g; -my $version = q{ $Revision: 1.34 $ }; $version =~ s/^[^0-9]+([0-9.]+).*$/$1/; +my ($version) = ('$Revision: 1.46 $' =~ m/\s(\d[.\d]+)\s/s); my $verbose = 0; @@ -99,8 +95,8 @@ my $good_file_re = '\.(' . join("|", @good_extensions) . ')$'; # my @nondir_extensions = ('ai', 'bmp', 'bz2', 'cr2', 'crw', 'db', 'dmg', 'eps', 'gz', 'hqx', 'htm', 'html', 'icns', 'ilbm', 'mov', - 'nef', 'pbm', 'pdf', 'pl', 'ppm', 'ps', 'psd', 'sea', 'sh', 'shtml', - 'tar', 'tgz', 'thb', 'txt', 'xcf', 'xmp', 'Z', 'zip' ); + 'nef', 'pbm', 'pdf', 'php', 'pl', 'ppm', 'ps', 'psd', 'sea', 'sh', + 'shtml', 'tar', 'tgz', 'thb', 'txt', 'xcf', 'xmp', 'Z', 'zip' ); my $nondir_re = '\.(' . join("|", @nondir_extensions) . ')$'; @@ -108,8 +104,8 @@ my $nondir_re = '\.(' . join("|", @nondir_extensions) . ')$'; # this is so that you can use an image directory that contains both big # images and thumbnails, and have it only select the big versions. # -my $min_image_width = 255; -my $min_image_height = 255; +my $min_image_width = 500; +my $min_image_height = 500; my @all_files = (); # list of "good" files we've collected my %seen_inodes; # for breaking recursive symlink loops @@ -250,7 +246,11 @@ sub read_cache($) { my $dd = "$ENV{HOME}/Library/Caches"; # MacOS location if (-d $dd) { $cache_file_name = "$dd/org.jwz.xscreensaver.getimage.cache"; - } elsif (-d "$ENV{HOME}/tmp") { + } elsif (-d "$ENV{HOME}/.cache") { # Gnome "FreeDesktop XDG" location + $dd = "$ENV{HOME}/.cache/xscreensaver"; + if (! -d $dd) { mkdir ($dd) || error ("mkdir $dd: $!"); } + $cache_file_name = "$dd/xscreensaver-getimage.cache" + } elsif (-d "$ENV{HOME}/tmp") { # If ~/tmp/ exists, use it. $cache_file_name = "$ENV{HOME}/tmp/.xscreensaver-getimage.cache"; } else { $cache_file_name = "$ENV{HOME}/.xscreensaver-getimage.cache"; @@ -312,7 +312,7 @@ sub write_cache($) { print $cache_fd "$dir\n"; foreach (@all_files) { my $f = $_; # stupid Perl. do this to avoid modifying @all_files! - $f =~ s@^\Q$dir\L/@@so || die; # remove $dir from front + $f =~ s@^\Q$dir/@@so || die; # remove $dir from front print $cache_fd "$f\n"; } } @@ -330,6 +330,8 @@ sub write_cache($) { sub html_unquote($) { my ($h) = @_; + + # This only needs to handle entities that occur in RSS, not full HTML. my %ent = ( 'amp' => '&', 'lt' => '<', 'gt' => '>', 'quot' => '"', 'apos' => "'" ); $h =~ s/(&(\#)?([[:alpha:]\d]+);?)/ @@ -354,19 +356,28 @@ sub html_unquote($) { +# Figure out what the proxy server should be, either from environment +# variables or by parsing the output of the (MacOS) program "scutil", +# which tells us what the system-wide proxy settings are. +# sub set_proxy($) { my ($ua) = @_; - if (!defined($ENV{http_proxy}) && !defined($ENV{HTTP_PROXY})) { - my $proxy_data = `scutil --proxy 2>/dev/null`; - my ($server) = ($proxy_data =~ m/\bHTTPProxy\s*:\s*([^\s]+)/s); - my ($port) = ($proxy_data =~ m/\bHTTPPort\s*:\s*([^\s]+)/s); - if ($server) { + my $proxy_data = `scutil --proxy 2>/dev/null`; + foreach my $proto ('http', 'https') { + my ($server) = ($proxy_data =~ m/\b${proto}Proxy\s*:\s*([^\s]+)/si); + my ($port) = ($proxy_data =~ m/\b${proto}Port\s*:\s*([^\s]+)/si); + my ($enable) = ($proxy_data =~ m/\b${proto}Enable\s*:\s*([^\s]+)/si); + + if ($server && $enable) { # Note: this ignores the "ExceptionsList". - $ENV{http_proxy} = "http://" . $server . ($port ? ":$port" : "") . "/"; - print STDERR "$progname: MacOS proxy: $ENV{http_proxy}\n" - if ($verbose > 2) - } + my $proto2 = 'http'; + $ENV{"${proto}_proxy"} = ("${proto2}://" . $server . + ($port ? ":$port" : "") . "/"); + print STDERR "$progname: MacOS $proto proxy: " . + $ENV{"${proto}_proxy"} . "\n" + if ($verbose > 2); + } } $ua->env_proxy(); @@ -376,12 +387,50 @@ sub set_proxy($) { sub init_lwp() { if (! defined ($LWP::Simple::ua)) { error ("\n\n\tPerl is broken. Do this to repair it:\n" . - "\n\tsudo cpan LWP::Simple\n"); + "\n\tsudo cpan LWP::Simple LWP::Protocol::https Mozilla::CA\n"); } set_proxy ($LWP::Simple::ua); } +sub sanity_check_lwp() { + my $url1 = 'https://www.mozilla.org/'; + my $url2 = 'http://www.mozilla.org/'; + my $body = (LWP::Simple::get($url1) || ''); + if (length($body) < 10240) { + my $err = ""; + $body = (LWP::Simple::get($url2) || ''); + if (length($body) < 10240) { + $err = "Perl is broken: neither HTTP nor HTTPS URLs work."; + } else { + $err = "Perl is broken: HTTP URLs work but HTTPS URLs don't."; + } + $err .= "\nMaybe try: sudo cpan -f Mozilla::CA LWP::Protocol::https"; + $err =~ s/^/\t/gm; + error ("\n\n$err\n"); + } +} + + +# If the URL does not already end with an extension appropriate for the +# content-type, add it after a "#" search. +# +# This is for when we know the content type of the URL, but the URL is +# some crazy thing without an extension. The files on disk need to have +# proper extensions. +# +sub force_extension($$) { + my ($url, $ct) = @_; + return $url unless (defined($url) && defined($ct)); + my ($ext) = ($ct =~ m@^image/([-a-z\d]+)@si); + return $url unless $ext; + $ext = lc($ext); + $ext = 'jpg' if ($ext eq 'jpeg'); + return $url if ($url =~ m/\.$ext$/si); + return "$url#.$ext"; +} + + # Returns a list of the image enclosures in the RSS or Atom feed. # Elements of the list are references, [ "url", "guid" ]. # @@ -393,11 +442,30 @@ sub parse_feed($) { $LWP::Simple::ua->agent ("$progname/$version"); $LWP::Simple::ua->timeout (10); # bail sooner than the default of 3 minutes + + # Half the time, random Linux systems don't have Mozilla::CA installed, + # which results in "Can't verify SSL peers without knowning which + # Certificate Authorities to trust". + # + # In xscreensaver-text we just disabled certificate checks. However, + # malicious images really do exist, so for xscreensaver-getimage-file, + # let's actually require that SSL be installed properly. + + my $body = (LWP::Simple::get($url) || ''); - if ($body !~ m@^<\?xml\s@si) { + if ($body !~ m@^\s*<(\?xml|rss)\b@si) { # Not an RSS/Atom feed. Try RSS autodiscovery. + # (Great news, everybody: Flickr no longer provides RSS for "Sets", + # only for "Photostreams", and only the first 20 images of those. + # Thanks, assholes.) + + if ($body =~ m/^\s*$/s) { + sanity_check_lwp(); + error ("null response: $url"); + } + error ("not an RSS or Atom feed, or HTML: $url") unless ($body =~ m@<(HEAD|BODY|A|IMG)\b@si); @@ -410,6 +478,10 @@ sub parse_feed($) { $p =~ m! \b HREF \s* = \s* ['"] ( [^<>'"]+ ) !six ) { my $u2 = html_unquote ($1); + if ($u2 =~ m!^/!s) { + my ($h) = ($url =~ m!^([a-z]+://[^/]+)!si); + $u2 = "$h$u2"; + } print STDERR "$progname: found feed: $u2\n" if ($verbose); return parse_feed ($u2); @@ -435,66 +507,74 @@ sub parse_feed($) { # First look for # if (! $iurl) { - $item =~ s!(]*>)!{ - my $link = $1; - my ($rel) = ($link =~ m/\bREL\s*=\s*[\"\']?([^<>\'\"]+)/si); - my ($type) = ($link =~ m/\bTYPE\s*=\s*[\"\']?([^<>\'\"]+)/si); + foreach my $link ($item =~ m@]*>@gsi) { + last if $iurl; my ($href) = ($link =~ m/\bHREF\s*=\s*[\"\']([^<>\'\"]+)/si); - - if ($rel && lc($rel) eq 'enclosure') { - if ($type) { - $href = undef unless ($type =~ m@^image/@si); # omit videos - } - $iurl = html_unquote($href) if $href; - } - $link; - }!gsexi; + my ($type) = ($link =~ m/\bTYPE\s*=\s*[\"\']?([^<>\'\"]+)/si); + my ($rel) = ($link =~ m/\bREL\s*=\s*[\"\']?([^<>\'\"]+)/si); + $href = undef unless (lc($rel || '') eq 'enclosure'); + $href = undef if ($type && $type !~ m@^image/@si); # omit videos + $iurl = html_unquote($href) if $href; + $iurl = force_extension ($iurl, $type); + } } # Then look for # if (! $iurl) { - $item =~ s!(]*>)!{ - my $link = $1; + foreach my $link ($item =~ m@]*>@gsi) { + last if $iurl; my ($href) = ($link =~ m/\bURL\s*=\s*[\"\']([^<>\'\"]+)/si); + my ($type) = ($link =~ m/\bTYPE\s*=\s*[\"\']?([^<>\'\"]+)/si); + my ($med) = ($link =~ m/\bMEDIUM\s*=\s*[\"\']?([^<>\'\"]+)/si); + $type = 'image/jpeg' if (!$type && lc($med || '') eq 'image'); + $href = undef if ($type && $type !~ m@^image/@si); # omit videos $iurl = html_unquote($href) if $href; - $link; - }!gsexi; + $iurl = force_extension ($iurl, $type); + } } # Then look for # if (! $iurl) { - $item =~ s!(]*>)!{ - my $link = $1; - my ($type) = ($link =~ m/\bTYPE\s*=\s*[\"\']?([^<>\'\"]+)/si); + foreach my $link ($item =~ m@]*>@gsi) { + last if $iurl; my ($href) = ($link =~ m/\bURL\s*=\s*[\"\']([^<>\'\"]+)/si); - $iurl = html_unquote($href) - if ($href && $type && $type =~ m@^image/@si); # omit videos - $link; - }!gsexi; + my ($type) = ($link =~ m/\bTYPE\s*=\s*[\"\']?([^<>\'\"]+)/si); + $href = undef if ($type && $type !~ m@^image/@si); # omit videos + $iurl = html_unquote($href) if ($href); + $iurl = force_extension ($iurl, $type); + } } # Ok, maybe there's an image in the field? # if (! $iurl) { - $item =~ s!((]*>)([^<>]*))!{ - my ($all, $u2) = ($1, $3); + foreach my $link ($item =~ m@]*>([^<>]*)@gsi) { + last if $iurl; + my $u2 = $1; $iurl = html_unquote($u2) if ($u2 =~ m/$good_file_re/io); - $all; - }!gsexi; + if (! $iurl) { + my $u3 = $u2; + $u3 =~ s/#.*$//gs; + $u3 =~ s/[?&].*$//gs; + $iurl = html_unquote($u2) if ($u3 =~ m/$good_file_re/io); + } + } } # Then look for ... with an inside. # if (! $iurl) { - $item =~ s!(]*>.*?)!{ - my $desc = $1; - $desc = html_unquote($desc); + foreach my $link ($item =~ m@]*>(.*?)@gsi) { + last if $iurl; + my $desc = html_unquote($1); my ($href) = ($desc =~ m@]*\bSRC=[\"\']?([^\"\'<>]+)@si); - $iurl = $href if ($href); - $desc; - }!gsexi; + $iurl = html_unquote($href) if ($href); + # If IMG SRC has a bogus extension, pretend it's a JPEG. + $iurl = force_extension ($iurl, 'image/jpeg') + if ($iurl && $iurl !~ m/$good_file_re/io); + } } # Could also do , but the above probably covers all @@ -511,6 +591,8 @@ sub parse_feed($) { # Then look for ... ($id) = ($item =~ m!]*>\s*([^<>]+?)\s*!si) unless $id; + # If we only have a GUID or LINK, but it's an image, use that. + $iurl = $id if (!$iurl && $id && $id =~ m/$good_file_re/io); if ($iurl) { $id = $iurl unless $id; @@ -541,13 +623,80 @@ sub md5_file($) { } +# expands the first URL relative to the second. +# +sub expand_url($$) { + my ($url, $base) = @_; + + $url =~ s/^\s+//gs; # lose whitespace at front and back + $url =~ s/\s+$//gs; + + if (! ($url =~ m/^[a-z]+:/)) { + + $base =~ s@(\#.*)$@@; # strip anchors + $base =~ s@(\?.*)$@@; # strip arguments + $base =~ s@/[^/]*$@/@; # take off trailing file component + + my $tail = ''; + if ($url =~ s@(\#.*)$@@) { $tail = $1; } # save anchors + if ($url =~ s@(\?.*)$@@) { $tail = "$1$tail"; } # save arguments + + my $base2 = $base; + + $base2 =~ s@^([a-z]+:/+[^/]+)/.*@$1@ # if url is an absolute path + if ($url =~ m@^/@); + + my $ourl = $url; + + $url = $base2 . $url; + $url =~ s@/\./@/@g; # expand "." + 1 while ($url =~ s@/[^/]+/\.\./@/@s); # expand ".." + + $url .= $tail; # put anchors/args back + + print STDERR "$progname: relative URL: $ourl --> $url\n" + if ($verbose > 1); + + } else { + print STDERR "$progname: absolute URL: $url\n" + if ($verbose > 2); + } + + return $url; +} + + # Given the URL of an image, download it into the given directory # and return the file name. # sub download_image($$$) { my ($url, $uid, $dir) = @_; - my ($ext) = ($url =~ m@\.([a-z\d]+)$@si); + my $url2 = $url; + $url2 =~ s/\#.*$//s; # Omit search terms after file extension + $url2 =~ s/\?.*$//s; + my ($ext) = ($url =~ m@\.([a-z\d]+)$@si); + ($ext) = ($url2 =~ m@\.([a-z\d]+)$@si) unless $ext; + + # If the feed hasn't put a sane extension on their URLs, nothing's going + # to work. This code assumes that file names have extensions, even the + # ones in the cache directory. + # + if (! $ext) { + print STDERR "$progname: skipping extensionless URL: $url\n" + if ($verbose > 1); + return undef; + } + + # Don't bother downloading files that we will reject anyway. + # + if (! ($url =~ m/$good_file_re/io || + $url2 =~ m/$good_file_re/io)) { + print STDERR "$progname: skipping non-image URL: $url\n" + if ($verbose > 1); + return undef; + } + my $file = md5_file ($uid); $file .= '.' . lc($ext) if $ext; @@ -561,16 +710,34 @@ sub download_image($$$) { # Special-case kludge for Flickr: # Their RSS feeds sometimes include only the small versions of the images. - # So if the URL ends in "s" (75x75), "t" (100x100) or "m" (240x240),then - # munge it to be "b" (1024x1024). + # So if the URL ends in one of the "small-size" letters, change it to "b". + # + # _o orig, 1600 + + # _k large, 2048 max + # _h large, 1600 max + # _b large, 1024 max + # _c medium, 800 max + # _z medium, 640 max + # "" medium, 500 max + # _n small, 320 max + # _m small, 240 max + # _t thumb, 100 max + # _q square, 150x150 + # _s square, 75x75 # - $url =~ s@_[stm](\.[a-z]+)$@_b$1@si + # Note: if we wanted to get the _k or _o version instead of the _b or _h + # version, we'd need to crack the DRM -- which is easy: see crack_secret + # in "https://www.jwz.org/hacks/galdown". + # + $url =~ s@_[sqtmnzc](\.[a-z]+)$@_b$1@si if ($url =~ m@^https?://[^/?#&]*?flickr\.com/@si); print STDERR "$progname: downloading: $dir/$file for $uid / $url\n" if ($verbose > 1); init_lwp(); $LWP::Simple::ua->agent ("$progname/$version"); + + $url =~ s/\#.*$//s; # Omit search terms my $status = LWP::Simple::mirror ($url, "$dir/$file"); if (!LWP::Simple::is_success ($status)) { print STDERR "$progname: error $status: $url\n"; # keep going @@ -590,7 +757,12 @@ sub mirror_feed($) { my $dir = "$ENV{HOME}/Library/Caches"; # MacOS location if (-d $dir) { $dir = "$dir/org.jwz.xscreensaver.feeds"; - } elsif (-d "$ENV{HOME}/tmp") { + } elsif (-d "$ENV{HOME}/.cache") { # Gnome "FreeDesktop XDG" location + $dir = "$ENV{HOME}/.cache/xscreensaver"; + if (! -d $dir) { mkdir ($dir) || error ("mkdir $dir: $!"); } + $dir .= "/feeds"; + if (! -d $dir) { mkdir ($dir) || error ("mkdir $dir: $!"); } + } elsif (-d "$ENV{HOME}/tmp") { # If ~/tmp/ exists, use it. $dir = "$ENV{HOME}/tmp/.xscreensaver-feeds"; } else { $dir = "$ENV{HOME}/.xscreensaver-feeds"; @@ -628,10 +800,11 @@ sub mirror_feed($) { my $poll_p = ($mtime + $feed_max_age < time); - $poll_p = 1 unless ($cache_p); # poll again now with --no-cache cmd line arg. + # --no-cache cmd line arg means poll again right now. + $poll_p = 1 unless ($cache_p); - # Even if the cache is young, let's make sure there are at least - # a few files in it, and re-check if not. + # Even if the cache is young, make sure there is at least one file, + # and re-check if not. # if (! $poll_p) { my $count = 0; @@ -671,13 +844,14 @@ sub mirror_feed($) { if ($verbose > 1); foreach my $p (@urls) { my ($furl, $id) = @$p; + $furl = expand_url ($furl, $url); my $f = download_image ($furl, $id, $dir); next unless $f; $files{$f} = 1; # Got it, don't delete $count++; } - print STDERR "$progname: empty feed: $url\n" if ($count <= 0); + my $empty_p = ($count <= 0); # Now delete any files that are no longer in the feed. # But if there was nothing in the feed (network failure?) @@ -698,9 +872,13 @@ sub mirror_feed($) { } } - # Both feed and cache are empty. No files at all. + # Both feed and cache are empty. No files at all. Bail. error ("empty feed: $url") if ($kept <= 1); + # Feed is empty, but we have some files from last time. Warn. + print STDERR "$progname: empty feed: using cache: $url\n" + if ($empty_p); + $mtime = time(); # update the timestamp } else { @@ -777,8 +955,6 @@ sub find_random_file($) { write_cache ($dir); -# @all_files = sort(@all_files); - if ($#all_files < 0) { print STDERR "$progname: no files in $dir\n"; exit 1; @@ -791,7 +967,7 @@ sub find_random_file($) { my $file = $all_files[$n]; if (large_enough_p ($file)) { if (! $url) { - $file =~ s@^\Q$dir\L/@@so || die; # remove $dir from front + $file =~ s@^\Q$dir/@@so || die; # remove $dir from front } return $file; } @@ -932,11 +1108,10 @@ sub image_size($) { sub image_file_size($) { my ($file) = @_; my $in; - if (! open ($in, '<', $file)) { + if (! open ($in, '<:raw', $file)) { print STDERR "$progname: $file: $!\n" if ($verbose); return (); } - binmode ($in); # Larry can take Unicode and shove it up his ass sideways. my $body = ''; sysread ($in, $body, 1024 * 50); # The first 50k should be enough. close $in; # (It's not for certain huge jpegs... @@ -951,7 +1126,7 @@ sub error($) { } sub usage() { - print STDERR "usage: $progname [--verbose] directory\n" . + print STDERR "usage: $progname [--verbose] directory-or-feed-url\n\n" . " Prints the name of a randomly-selected image file. The directory\n" . " is searched recursively. Images smaller than " . "${min_image_width}x${min_image_height} are excluded.\n" . @@ -967,16 +1142,16 @@ sub main() { while ($_ = $ARGV[0]) { shift @ARGV; - if ($_ eq "--verbose") { $verbose++; } - elsif (m/^-v+$/) { $verbose += length($_)-1; } - elsif ($_ eq "--name") { } # ignored, for compatibility - elsif ($_ eq "--spotlight") { $use_spotlight_p = 1; } - elsif ($_ eq "--no-spotlight") { $use_spotlight_p = 0; } - elsif ($_ eq "--cache") { $cache_p = 1; } - elsif ($_ eq "--no-cache") { $cache_p = 0; } - elsif (m/^-./) { usage; } - elsif (!defined($dir)) { $dir = $_; } - else { usage; } + if (m/^--?verbose$/s) { $verbose++; } + elsif (m/^-v+$/s) { $verbose += length($_)-1; } + elsif (m/^--?name$/s) { } # ignored, for compatibility + elsif (m/^--?spotlight$/s) { $use_spotlight_p = 1; } + elsif (m/^--?no-spotlight$/s) { $use_spotlight_p = 0; } + elsif (m/^--?cache$/s) { $cache_p = 1; } + elsif (m/^--?no-?cache$/s) { $cache_p = 0; } + elsif (m/^-./) { usage; } + elsif (!defined($dir)) { $dir = $_; } + else { usage; } } usage unless (defined($dir));