X-Git-Url: http://git.hungrycats.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=driver%2Fxscreensaver-getimage-file;h=6335d931e8cb4dced2ffffc8cacd1a75052d0f20;hb=78add6e627ee5f10e1fa6f3852602ea5066eee5a;hp=981ddb753d6510edce449841f2db11eb8185b5bb;hpb=aa75c7476aeaa84cf3abc192b376a8b03c325213;p=xscreensaver diff --git a/driver/xscreensaver-getimage-file b/driver/xscreensaver-getimage-file index 981ddb75..6335d931 100755 --- a/driver/xscreensaver-getimage-file +++ b/driver/xscreensaver-getimage-file @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright © 2001-2016 Jamie Zawinski . +# Copyright © 2001-2018 Jamie Zawinski . # # Permission to use, copy, modify, distribute, and sell this software and its # documentation for any purpose is hereby granted without fee, provided that @@ -45,10 +45,6 @@ import Fcntl ':mode' unless defined &S_ISUID; # but it is here in Perl 5.8 # but in Perl 5.10, both of these load, and cause errors! # So we have to check for S_ISUID instead of S_ISDIR? WTF? -use bytes; # Larry can take Unicode and shove it up his ass sideways. - # Perl 5.8.0 causes us to start getting incomprehensible - # errors about UTF-8 all over the place without this. - use Digest::MD5 qw(md5_base64); # Some Linux systems don't install LWP by default! @@ -57,7 +53,7 @@ BEGIN { eval 'use LWP::Simple;' } my $progname = $0; $progname =~ s@.*/@@g; -my ($version) = ('$Revision: 1.40 $' =~ m/\s(\d[.\d]+)\s/s); +my ($version) = ('$Revision: 1.46 $' =~ m/\s(\d[.\d]+)\s/s); my $verbose = 0; @@ -99,8 +95,8 @@ my $good_file_re = '\.(' . join("|", @good_extensions) . ')$'; # my @nondir_extensions = ('ai', 'bmp', 'bz2', 'cr2', 'crw', 'db', 'dmg', 'eps', 'gz', 'hqx', 'htm', 'html', 'icns', 'ilbm', 'mov', - 'nef', 'pbm', 'pdf', 'pl', 'ppm', 'ps', 'psd', 'sea', 'sh', 'shtml', - 'tar', 'tgz', 'thb', 'txt', 'xcf', 'xmp', 'Z', 'zip' ); + 'nef', 'pbm', 'pdf', 'php', 'pl', 'ppm', 'ps', 'psd', 'sea', 'sh', + 'shtml', 'tar', 'tgz', 'thb', 'txt', 'xcf', 'xmp', 'Z', 'zip' ); my $nondir_re = '\.(' . join("|", @nondir_extensions) . ')$'; @@ -108,8 +104,8 @@ my $nondir_re = '\.(' . join("|", @nondir_extensions) . ')$'; # this is so that you can use an image directory that contains both big # images and thumbnails, and have it only select the big versions. # -my $min_image_width = 255; -my $min_image_height = 255; +my $min_image_width = 500; +my $min_image_height = 500; my @all_files = (); # list of "good" files we've collected my %seen_inodes; # for breaking recursive symlink loops @@ -254,7 +250,7 @@ sub read_cache($) { $dd = "$ENV{HOME}/.cache/xscreensaver"; if (! -d $dd) { mkdir ($dd) || error ("mkdir $dd: $!"); } $cache_file_name = "$dd/xscreensaver-getimage.cache" - } elsif (-d "$ENV{HOME}/tmp") { # If ~/.tmp/ exists, use it. + } elsif (-d "$ENV{HOME}/tmp") { # If ~/tmp/ exists, use it. $cache_file_name = "$ENV{HOME}/tmp/.xscreensaver-getimage.cache"; } else { $cache_file_name = "$ENV{HOME}/.xscreensaver-getimage.cache"; @@ -391,12 +387,50 @@ sub set_proxy($) { sub init_lwp() { if (! defined ($LWP::Simple::ua)) { error ("\n\n\tPerl is broken. Do this to repair it:\n" . - "\n\tsudo cpan LWP::Simple\n"); + "\n\tsudo cpan LWP::Simple LWP::Protocol::https Mozilla::CA\n"); } set_proxy ($LWP::Simple::ua); } +sub sanity_check_lwp() { + my $url1 = 'https://www.mozilla.org/'; + my $url2 = 'http://www.mozilla.org/'; + my $body = (LWP::Simple::get($url1) || ''); + if (length($body) < 10240) { + my $err = ""; + $body = (LWP::Simple::get($url2) || ''); + if (length($body) < 10240) { + $err = "Perl is broken: neither HTTP nor HTTPS URLs work."; + } else { + $err = "Perl is broken: HTTP URLs work but HTTPS URLs don't."; + } + $err .= "\nMaybe try: sudo cpan -f Mozilla::CA LWP::Protocol::https"; + $err =~ s/^/\t/gm; + error ("\n\n$err\n"); + } +} + + +# If the URL does not already end with an extension appropriate for the +# content-type, add it after a "#" search. +# +# This is for when we know the content type of the URL, but the URL is +# some crazy thing without an extension. The files on disk need to have +# proper extensions. +# +sub force_extension($$) { + my ($url, $ct) = @_; + return $url unless (defined($url) && defined($ct)); + my ($ext) = ($ct =~ m@^image/([-a-z\d]+)@si); + return $url unless $ext; + $ext = lc($ext); + $ext = 'jpg' if ($ext eq 'jpeg'); + return $url if ($url =~ m/\.$ext$/si); + return "$url#.$ext"; +} + + # Returns a list of the image enclosures in the RSS or Atom feed. # Elements of the list are references, [ "url", "guid" ]. # @@ -408,17 +442,29 @@ sub parse_feed($) { $LWP::Simple::ua->agent ("$progname/$version"); $LWP::Simple::ua->timeout (10); # bail sooner than the default of 3 minutes + + # Half the time, random Linux systems don't have Mozilla::CA installed, + # which results in "Can't verify SSL peers without knowning which + # Certificate Authorities to trust". + # + # In xscreensaver-text we just disabled certificate checks. However, + # malicious images really do exist, so for xscreensaver-getimage-file, + # let's actually require that SSL be installed properly. + + my $body = (LWP::Simple::get($url) || ''); - if ($body !~ m@^<\?xml\s@si) { + if ($body !~ m@^\s*<(\?xml|rss)\b@si) { # Not an RSS/Atom feed. Try RSS autodiscovery. # (Great news, everybody: Flickr no longer provides RSS for "Sets", # only for "Photostreams", and only the first 20 images of those. # Thanks, assholes.) - error ("null response: $url") - if ($body =~ m/^\s*$/s); + if ($body =~ m/^\s*$/s) { + sanity_check_lwp(); + error ("null response: $url"); + } error ("not an RSS or Atom feed, or HTML: $url") unless ($body =~ m@<(HEAD|BODY|A|IMG)\b@si); @@ -461,66 +507,74 @@ sub parse_feed($) { # First look for # if (! $iurl) { - $item =~ s!(]*>)!{ - my $link = $1; - my ($rel) = ($link =~ m/\bREL\s*=\s*[\"\']?([^<>\'\"]+)/si); - my ($type) = ($link =~ m/\bTYPE\s*=\s*[\"\']?([^<>\'\"]+)/si); + foreach my $link ($item =~ m@]*>@gsi) { + last if $iurl; my ($href) = ($link =~ m/\bHREF\s*=\s*[\"\']([^<>\'\"]+)/si); - - if ($rel && lc($rel) eq 'enclosure') { - if ($type) { - $href = undef unless ($type =~ m@^image/@si); # omit videos - } - $iurl = html_unquote($href) if $href; - } - $link; - }!gsexi; + my ($type) = ($link =~ m/\bTYPE\s*=\s*[\"\']?([^<>\'\"]+)/si); + my ($rel) = ($link =~ m/\bREL\s*=\s*[\"\']?([^<>\'\"]+)/si); + $href = undef unless (lc($rel || '') eq 'enclosure'); + $href = undef if ($type && $type !~ m@^image/@si); # omit videos + $iurl = html_unquote($href) if $href; + $iurl = force_extension ($iurl, $type); + } } # Then look for # if (! $iurl) { - $item =~ s!(]*>)!{ - my $link = $1; + foreach my $link ($item =~ m@]*>@gsi) { + last if $iurl; my ($href) = ($link =~ m/\bURL\s*=\s*[\"\']([^<>\'\"]+)/si); + my ($type) = ($link =~ m/\bTYPE\s*=\s*[\"\']?([^<>\'\"]+)/si); + my ($med) = ($link =~ m/\bMEDIUM\s*=\s*[\"\']?([^<>\'\"]+)/si); + $type = 'image/jpeg' if (!$type && lc($med || '') eq 'image'); + $href = undef if ($type && $type !~ m@^image/@si); # omit videos $iurl = html_unquote($href) if $href; - $link; - }!gsexi; + $iurl = force_extension ($iurl, $type); + } } # Then look for # if (! $iurl) { - $item =~ s!(]*>)!{ - my $link = $1; - my ($type) = ($link =~ m/\bTYPE\s*=\s*[\"\']?([^<>\'\"]+)/si); + foreach my $link ($item =~ m@]*>@gsi) { + last if $iurl; my ($href) = ($link =~ m/\bURL\s*=\s*[\"\']([^<>\'\"]+)/si); - $iurl = html_unquote($href) - if ($href && $type && $type =~ m@^image/@si); # omit videos - $link; - }!gsexi; + my ($type) = ($link =~ m/\bTYPE\s*=\s*[\"\']?([^<>\'\"]+)/si); + $href = undef if ($type && $type !~ m@^image/@si); # omit videos + $iurl = html_unquote($href) if ($href); + $iurl = force_extension ($iurl, $type); + } } # Ok, maybe there's an image in the field? # if (! $iurl) { - $item =~ s!((]*>)([^<>]*))!{ - my ($all, $u2) = ($1, $3); + foreach my $link ($item =~ m@]*>([^<>]*)@gsi) { + last if $iurl; + my $u2 = $1; $iurl = html_unquote($u2) if ($u2 =~ m/$good_file_re/io); - $all; - }!gsexi; + if (! $iurl) { + my $u3 = $u2; + $u3 =~ s/#.*$//gs; + $u3 =~ s/[?&].*$//gs; + $iurl = html_unquote($u2) if ($u3 =~ m/$good_file_re/io); + } + } } # Then look for ... with an inside. # if (! $iurl) { - $item =~ s!(]*>.*?)!{ - my $desc = $1; - $desc = html_unquote($desc); + foreach my $link ($item =~ m@]*>(.*?)@gsi) { + last if $iurl; + my $desc = html_unquote($1); my ($href) = ($desc =~ m@]*\bSRC=[\"\']?([^\"\'<>]+)@si); - $iurl = $href if ($href); - $desc; - }!gsexi; + $iurl = html_unquote($href) if ($href); + # If IMG SRC has a bogus extension, pretend it's a JPEG. + $iurl = force_extension ($iurl, 'image/jpeg') + if ($iurl && $iurl !~ m/$good_file_re/io); + } } # Could also do , but the above probably covers all @@ -537,6 +591,8 @@ sub parse_feed($) { # Then look for ... ($id) = ($item =~ m!]*>\s*([^<>]+?)\s*!si) unless $id; + # If we only have a GUID or LINK, but it's an image, use that. + $iurl = $id if (!$iurl && $id && $id =~ m/$good_file_re/io); if ($iurl) { $id = $iurl unless $id; @@ -567,6 +623,49 @@ sub md5_file($) { } +# expands the first URL relative to the second. +# +sub expand_url($$) { + my ($url, $base) = @_; + + $url =~ s/^\s+//gs; # lose whitespace at front and back + $url =~ s/\s+$//gs; + + if (! ($url =~ m/^[a-z]+:/)) { + + $base =~ s@(\#.*)$@@; # strip anchors + $base =~ s@(\?.*)$@@; # strip arguments + $base =~ s@/[^/]*$@/@; # take off trailing file component + + my $tail = ''; + if ($url =~ s@(\#.*)$@@) { $tail = $1; } # save anchors + if ($url =~ s@(\?.*)$@@) { $tail = "$1$tail"; } # save arguments + + my $base2 = $base; + + $base2 =~ s@^([a-z]+:/+[^/]+)/.*@$1@ # if url is an absolute path + if ($url =~ m@^/@); + + my $ourl = $url; + + $url = $base2 . $url; + $url =~ s@/\./@/@g; # expand "." + 1 while ($url =~ s@/[^/]+/\.\./@/@s); # expand ".." + + $url .= $tail; # put anchors/args back + + print STDERR "$progname: relative URL: $ourl --> $url\n" + if ($verbose > 1); + + } else { + print STDERR "$progname: absolute URL: $url\n" + if ($verbose > 2); + } + + return $url; +} + + # Given the URL of an image, download it into the given directory # and return the file name. # @@ -576,7 +675,8 @@ sub download_image($$$) { my $url2 = $url; $url2 =~ s/\#.*$//s; # Omit search terms after file extension $url2 =~ s/\?.*$//s; - my ($ext) = ($url2 =~ m@\.([a-z\d]+)$@si); + my ($ext) = ($url =~ m@\.([a-z\d]+)$@si); + ($ext) = ($url2 =~ m@\.([a-z\d]+)$@si) unless $ext; # If the feed hasn't put a sane extension on their URLs, nothing's going # to work. This code assumes that file names have extensions, even the @@ -590,7 +690,8 @@ sub download_image($$$) { # Don't bother downloading files that we will reject anyway. # - if (! ($url2 =~ m/$good_file_re/io)) { + if (! ($url =~ m/$good_file_re/io || + $url2 =~ m/$good_file_re/io)) { print STDERR "$progname: skipping non-image URL: $url\n" if ($verbose > 1); return undef; @@ -624,6 +725,10 @@ sub download_image($$$) { # _q square, 150x150 # _s square, 75x75 # + # Note: if we wanted to get the _k or _o version instead of the _b or _h + # version, we'd need to crack the DRM -- which is easy: see crack_secret + # in "https://www.jwz.org/hacks/galdown". + # $url =~ s@_[sqtmnzc](\.[a-z]+)$@_b$1@si if ($url =~ m@^https?://[^/?#&]*?flickr\.com/@si); @@ -631,6 +736,8 @@ sub download_image($$$) { if ($verbose > 1); init_lwp(); $LWP::Simple::ua->agent ("$progname/$version"); + + $url =~ s/\#.*$//s; # Omit search terms my $status = LWP::Simple::mirror ($url, "$dir/$file"); if (!LWP::Simple::is_success ($status)) { print STDERR "$progname: error $status: $url\n"; # keep going @@ -655,7 +762,7 @@ sub mirror_feed($) { if (! -d $dir) { mkdir ($dir) || error ("mkdir $dir: $!"); } $dir .= "/feeds"; if (! -d $dir) { mkdir ($dir) || error ("mkdir $dir: $!"); } - } elsif (-d "$ENV{HOME}/tmp") { # If ~/.tmp/ exists, use it. + } elsif (-d "$ENV{HOME}/tmp") { # If ~/tmp/ exists, use it. $dir = "$ENV{HOME}/tmp/.xscreensaver-feeds"; } else { $dir = "$ENV{HOME}/.xscreensaver-feeds"; @@ -737,6 +844,7 @@ sub mirror_feed($) { if ($verbose > 1); foreach my $p (@urls) { my ($furl, $id) = @$p; + $furl = expand_url ($furl, $url); my $f = download_image ($furl, $id, $dir); next unless $f; $files{$f} = 1; # Got it, don't delete