+sub html_unquote($) {
+ my ($h) = @_;
+
+ # This only needs to handle entities that occur in RSS, not full HTML.
+ my %ent = ( 'amp' => '&', 'lt' => '<', 'gt' => '>',
+ 'quot' => '"', 'apos' => "'" );
+ $h =~ s/(&(\#)?([[:alpha:]\d]+);?)/
+ {
+ my ($o, $c) = ($1, $3);
+ if (! defined($2)) {
+ $c = $ent{$c}; # for <
+ } else {
+ if ($c =~ m@^x([\dA-F]+)$@si) { # for A
+ $c = chr(hex($1));
+ } elsif ($c =~ m@^\d+$@si) { # for A
+ $c = chr($c);
+ } else {
+ $c = undef;
+ }
+ }
+ ($c || $o);
+ }
+ /gexi;
+ return $h;
+}
+
+
+
+# Figure out what the proxy server should be, either from environment
+# variables or by parsing the output of the (MacOS) program "scutil",
+# which tells us what the system-wide proxy settings are.
+#
+sub set_proxy($) {
+ my ($ua) = @_;
+
+ my $proxy_data = `scutil --proxy 2>/dev/null`;
+ foreach my $proto ('http', 'https') {
+ my ($server) = ($proxy_data =~ m/\b${proto}Proxy\s*:\s*([^\s]+)/si);
+ my ($port) = ($proxy_data =~ m/\b${proto}Port\s*:\s*([^\s]+)/si);
+ my ($enable) = ($proxy_data =~ m/\b${proto}Enable\s*:\s*([^\s]+)/si);
+
+ if ($server && $enable) {
+ # Note: this ignores the "ExceptionsList".
+ my $proto2 = 'http';
+ $ENV{"${proto}_proxy"} = ("${proto2}://" . $server .
+ ($port ? ":$port" : "") . "/");
+ print STDERR "$progname: MacOS $proto proxy: " .
+ $ENV{"${proto}_proxy"} . "\n"
+ if ($verbose > 2);
+ }
+ }
+
+ $ua->env_proxy();
+}
+
+
+sub init_lwp() {
+ if (! defined ($LWP::Simple::ua)) {
+ error ("\n\n\tPerl is broken. Do this to repair it:\n" .
+ "\n\tsudo cpan LWP::Simple LWP::Protocol::https Mozilla::CA\n");
+ }
+ set_proxy ($LWP::Simple::ua);
+}
+
+
+sub sanity_check_lwp() {
+ my $url1 = 'https://www.mozilla.org/';
+ my $url2 = 'http://www.mozilla.org/';
+ my $body = (LWP::Simple::get($url1) || '');
+ if (length($body) < 10240) {
+ my $err = "";
+ $body = (LWP::Simple::get($url2) || '');
+ if (length($body) < 10240) {
+ $err = "Perl is broken: neither HTTP nor HTTPS URLs work.";
+ } else {
+ $err = "Perl is broken: HTTP URLs work but HTTPS URLs don't.";
+ }
+ $err .= "\nMaybe try: sudo cpan -f Mozilla::CA LWP::Protocol::https";
+ $err =~ s/^/\t/gm;
+ error ("\n\n$err\n");
+ }
+}
+
+
+# If the URL does not already end with an extension appropriate for the
+# content-type, add it after a "#" search.
+#
+# This is for when we know the content type of the URL, but the URL is
+# some crazy thing without an extension. The files on disk need to have
+# proper extensions.
+#
+sub force_extension($$) {
+ my ($url, $ct) = @_;
+ return $url unless (defined($url) && defined($ct));
+ my ($ext) = ($ct =~ m@^image/([-a-z\d]+)@si);
+ return $url unless $ext;
+ $ext = lc($ext);
+ $ext = 'jpg' if ($ext eq 'jpeg');
+ return $url if ($url =~ m/\.$ext$/si);
+ return "$url#.$ext";
+}
+
+