X-Git-Url: http://git.hungrycats.org/cgi-bin/gitweb.cgi?p=xscreensaver;a=blobdiff_plain;f=OSX%2Fiostextclient.m;h=ea9f3b3e8dd3fb470c359132b041f189c430b224;hp=f1f3ad7aed9686638388e7171184c8c0d9bd8857;hb=88cfe534a698a0562e81345957a50714af1453bc;hpb=d1ae2829ff0fd2a96c16a0c8c5420efaa47d7b30

diff --git a/OSX/iostextclient.m b/OSX/iostextclient.m
index f1f3ad7a..ea9f3b3e 100644
--- a/OSX/iostextclient.m
+++ b/OSX/iostextclient.m
@@ -1,4 +1,4 @@
-/* xscreensaver, Copyright (c) 2012-2014 Jamie Zawinski <jwz@jwz.org>
+/* xscreensaver, Copyright (c) 2012-2015 Jamie Zawinski <jwz@jwz.org>
  *
  * Permission to use, copy, modify, distribute, and sell this software and its
  * documentation for any purpose is hereby granted without fee, provided that
@@ -20,9 +20,14 @@
 
 #include "textclient.h"
 #include "resources.h"
+#include "utf8wc.h"
 
 #include <stdio.h>
 
+#undef countof
+#define countof(x) (sizeof((x))/sizeof((*x)))
+
+
 extern const char *progname;
 
 struct text_data {
@@ -110,16 +115,236 @@ date_string (void)
 }
 
 
-static void
-strip_html (char *html)
+/* Returns a copy of the string with some basic HTML entities decoded.
+ */
+static char *
+decode_entities (const char *html)
+{
+  char *ret = (char *) malloc ((strlen(html) * 4) + 1);  // room for UTF8
+  const char *in = html;
+  char *out = ret;
+  *out = 0;
+
+  const struct { const char *c; const char *e; } entities[] = {
+
+    { "amp", "&" },
+    { "lt",  "<" },
+    { "gt",  ">" },
+
+    // Convert Latin1 to UTF8
+    { "nbsp", " " },			// Â  160
+    { "iexcl", "\302\241" },		// Â¡ 161
+    { "cent", "\302\242" },		// Â¢ 162
+    { "pound", "\302\243" },		// Â£ 163
+    { "curren", "\302\244" },		// Â¤ 164
+    { "yen", "\302\245" },		// Â¥ 165
+    { "brvbar", "\302\246" },		// Â¦ 166
+    { "sect", "\302\247" },		// Â§ 167
+    { "uml", "\302\250" },		// Â¨ 168
+    { "copy", "\302\251" },		// Â© 169
+    { "ordf", "\302\252" },		// Âª 170
+    { "laquo", "\302\253" },		// Â« 171
+    { "not", "\302\254" },		// Â¬ 172
+    { "shy", "\302\255" },		// Â­ 173
+    { "reg", "\302\256" },		// Â® 174
+    { "macr", "\302\257" },		// Â¯ 175
+    { "deg", "\302\260" },		// Â° 176
+    { "plusmn", "\302\261" },		// Â± 177
+    { "sup2", "\302\262" },		// Â² 178
+    { "sup3", "\302\263" },		// Â³ 179
+    { "acute", "\302\264" },		// Â´ 180
+    { "micro", "\302\265" },		// Âµ 181
+    { "para", "\302\266" },		// Â¶ 182
+    { "middot", "\302\267" },		// Â· 183
+    { "cedil", "\302\270" },		// Â¸ 184
+    { "sup1", "\302\271" },		// Â¹ 185
+    { "ordm", "\302\272" },		// Âº 186
+    { "raquo", "\302\273" },		// Â» 187
+    { "frac14", "\302\274" },		// Â¼ 188
+    { "frac12", "\302\275" },		// Â½ 189
+    { "frac34", "\302\276" },		// Â¾ 190
+    { "iquest", "\302\277" },		// Â¿ 191
+    { "Agrave", "\303\200" },		// Ã 192
+    { "Aacute", "\303\201" },		// Ã 193
+    { "Acirc", "\303\202" },		// Ã 194
+    { "Atilde", "\303\203" },		// Ã 195
+    { "Auml", "\303\204" },		// Ã 196
+    { "Aring", "\303\205" },		// Ã 197
+    { "AElig", "\303\206" },		// Ã 198
+    { "Ccedil", "\303\207" },		// Ã 199
+    { "Egrave", "\303\210" },		// Ã 200
+    { "Eacute", "\303\211" },		// Ã 201
+    { "Ecirc", "\303\212" },		// Ã 202
+    { "Euml", "\303\213" },		// Ã 203
+    { "Igrave", "\303\214" },		// Ã 204
+    { "Iacute", "\303\215" },		// Ã 205
+    { "Icirc", "\303\216" },		// Ã 206
+    { "Iuml", "\303\217" },		// Ã 207
+    { "ETH", "\303\220" },		// Ã 208
+    { "Ntilde", "\303\221" },		// Ã 209
+    { "Ograve", "\303\222" },		// Ã 210
+    { "Oacute", "\303\223" },		// Ã 211
+    { "Ocirc", "\303\224" },		// Ã 212
+    { "Otilde", "\303\225" },		// Ã 213
+    { "Ouml", "\303\226" },		// Ã 214
+    { "times", "\303\227" },		// Ã 215
+    { "Oslash", "\303\230" },		// Ã 216
+    { "Ugrave", "\303\231" },		// Ã 217
+    { "Uacute", "\303\232" },		// Ã 218
+    { "Ucirc", "\303\233" },		// Ã 219
+    { "Uuml", "\303\234" },		// Ã 220
+    { "Yacute", "\303\235" },		// Ã 221
+    { "THORN", "\303\236" },		// Ã 222
+    { "szlig", "\303\237" },		// Ã 223
+    { "agrave", "\303\240" },		// Ã  224
+    { "aacute", "\303\241" },		// Ã¡ 225
+    { "acirc", "\303\242" },		// Ã¢ 226
+    { "atilde", "\303\243" },		// Ã£ 227
+    { "auml", "\303\244" },		// Ã¤ 228
+    { "aring", "\303\245" },		// Ã¥ 229
+    { "aelig", "\303\246" },		// Ã¦ 230
+    { "ccedil", "\303\247" },		// Ã§ 231
+    { "egrave", "\303\250" },		// Ã¨ 232
+    { "eacute", "\303\251" },		// Ã© 233
+    { "ecirc", "\303\252" },		// Ãª 234
+    { "euml", "\303\253" },		// Ã« 235
+    { "igrave", "\303\254" },		// Ã¬ 236
+    { "iacute", "\303\255" },		// Ã­ 237
+    { "icirc", "\303\256" },		// Ã® 238
+    { "iuml", "\303\257" },		// Ã¯ 239
+    { "eth", "\303\260" },		// Ã° 240
+    { "ntilde", "\303\261" },		// Ã± 241
+    { "ograve", "\303\262" },		// Ã² 242
+    { "oacute", "\303\263" },		// Ã³ 243
+    { "ocirc", "\303\264" },		// Ã´ 244
+    { "otilde", "\303\265" },		// Ãµ 245
+    { "ouml", "\303\266" },		// Ã¶ 246
+    { "divide", "\303\267" },		// Ã· 247
+    { "oslash", "\303\270" },		// Ã¸ 248
+    { "ugrave", "\303\271" },		// Ã¹ 249
+    { "uacute", "\303\272" },		// Ãº 250
+    { "ucirc", "\303\273" },		// Ã» 251
+    { "uuml", "\303\274" },		// Ã¼ 252
+    { "yacute", "\303\275" },		// Ã½ 253
+    { "thorn", "\303\276" },		// Ã¾ 254
+    { "yuml", "\303\277" },		// Ã¿ 255
+
+      // And some random others
+    { "bdquo", "\342\200\236" },	// â
+    { "bull", "\342\200\242" },		// â¢
+    { "circ", "\313\206" },		// Ë
+    { "cong", "\342\211\205" },		// â
+    { "empty", "\342\210\205" },	// â
+    { "emsp", "\342\200\203" },		// â
+    { "ensp", "\342\200\202" },		// â
+    { "equiv", "\342\211\241" },	// â¡
+    { "frasl", "\342\201\204" },	// â
+    { "ge", "\342\211\245" },		// â¥
+    { "hArr", "\342\207\224" },		// â
+    { "harr", "\342\206\224" },		// â
+    { "hellip", "\342\200\246" },	// â¦
+    { "lArr", "\342\207\220" },		// â
+    { "lang", "\342\237\250" },		// â¨
+    { "larr", "\342\206\220" },		// â
+    { "ldquo", "\342\200\234" },	// â
+    { "le", "\342\211\244" },		// â¤
+    { "lowast", "\342\210\227" },	// â
+    { "loz", "\342\227\212" },		// â
+    { "lsaquo", "\342\200\271" },	// â¹
+    { "lsquo", "\342\200\230" },	// â
+    { "mdash", "\342\200\224" },	// â
+    { "minus", "\342\210\222" },	// â
+    { "ndash", "\342\200\223" },	// â
+    { "ne", "\342\211\240" },		// â 
+    { "OElig", "\305\222" },		// Å
+    { "oelig", "\305\223" },		// Å
+    { "prime", "\342\200\262" },	// â²
+    { "quot", "\342\200\235" },		// â
+    { "rArr", "\342\207\222" },		// â
+    { "rang", "\342\237\251" },		// â©
+    { "rarr", "\342\206\222" },		// â
+    { "rdquo", "\342\200\235" },	// â
+    { "rsaquo", "\342\200\272" },	// âº
+    { "rsquo", "\342\200\231" },	// â
+    { "sbquo", "\342\200\232" },	// â
+    { "sim", "\342\210\274" },		// â¼
+    { "thinsp", "\342\200\211" },	// â
+    { "tilde", "\313\234" },		// Ë
+    { "trade", "\342\204\242" },	// â¢
+  };
+
+  while (*in) {
+    if (*in == '&') {
+      int done = 0;
+      if (in[1] == '#' && in[2] == 'x') {			// &#x41;
+        unsigned long i = 0;
+        in += 2;
+        while ((*in >= '0' && *in <= '9') ||
+               (*in >= 'A' && *in <= 'F') ||
+               (*in >= 'a' && *in <= 'f')) {
+          i = (i * 16) + (*in >= 'a' ? *in - 'a' + 16 :
+                          *in >= 'A' ? *in - 'A' + 16 :
+                          *in - '0');
+          in++;
+        }
+        *out += utf8_encode (i, out, strlen(out));
+        done = 1;
+      } else if (in[1] == '#') {				// &#65;
+        unsigned long i = 0;
+        in++;
+        while (*in >= '0' && *in <= '9') {
+          i = (i * 10) + (*in - '0');
+          in++;
+        }
+        *out += utf8_encode (i, out, strlen(out));
+        done = 1;
+      } else {
+        int i;
+        for (i = 0; !done && i < countof(entities); i++) {
+          if (!strncmp (in+1, entities[i].c, strlen(entities[i].c))) {
+            strcpy (out, entities[i].e);
+            in  += strlen(entities[i].c) + 1;
+            out += strlen(entities[i].e);
+            done = 1;
+          }
+        }
+      }
+
+      if (done) {
+        if (*in == ';')
+          in++;
+      } else {
+        *out++ = *in++;
+      }
+    } else {
+      *out++ = *in++;
+    }
+  }
+  *out = 0;
+
+  /* Shrink */
+  ret = realloc (ret, out - ret + 1);
+
+  return ret;
+}
+
+
+/* Returns a copy of the HTML string that has been converted to plain text,
+   in UTF8 encoding.  HTML tags are stripped, <BR> and <P> are converted
+   to newlines, and some basic HTML entities are decoded.
+ */
+static char *
+strip_html (const char *html)
 {
   int tag = 0;
   int comment = 0;
   int white = 0;
   int nl = 0;
-  int entity = 0;
-  char *out = html;
-  for (char *in = html; *in; in++) {
+  char *ret = (char *) malloc ((strlen(html) * 4) + 1);  // room for UTF8
+  char *out = ret;
+  *out = 0;
+
+  for (const char *in = html; *in; in++) {
     if (comment) {
       if (!strncmp (in, "-->", 3)) {
         comment = 0;
@@ -127,15 +352,9 @@ strip_html (char *html)
     } else if (tag) {
       if (*in == '>') {
         tag = 0;
-        entity = 0;
-      }
-    } else if (entity) {
-      if (*in == ';') {
-        entity = 0;
       }
     } else if (*in == '<') {
       tag = 1;
-      entity = 0;
       if (!strncmp (in, "<!--", 4)) {
         comment = 1;
         tag = 0;
@@ -148,112 +367,6 @@ strip_html (char *html)
         if (nl < 2) { *out++ = '\n'; nl++; }
         white = 1;
       }
-    } else if (*in == '&') {
-      char *ss = 0;
-      entity = 1;
-
-      if      (!strncmp (in, "&amp", 4))    ss = "&";
-      else if (!strncmp (in, "&lt", 3))     ss = "<";
-      else if (!strncmp (in, "&gt", 3))     ss = ">";
-      else if (!strncmp (in, "&nbsp", 5))   ss = " ";
-
-      else if (!strncmp (in, "&AElig", 6))  ss = "AE";
-      else if (!strncmp (in, "&aelig", 6))  ss = "ae";
-      else if (!strncmp (in, "&bdquo", 6))  ss = "\"";
-      else if (!strncmp (in, "&brvbar", 7)) ss = "|";
-      else if (!strncmp (in, "&bull", 5))   ss = "*";
-      else if (!strncmp (in, "&circ", 5))   ss = "^";
-      else if (!strncmp (in, "&cong", 5))   ss = "=~";
-      else if (!strncmp (in, "&copy", 5))   ss = "(c)";
-      else if (!strncmp (in, "&curren", 7)) ss = "$";
-      else if (!strncmp (in, "&deg", 4))    ss = ".";
-      else if (!strncmp (in, "&divide", 7)) ss = "/";
-      else if (!strncmp (in, "&empty", 6))  ss = "0";
-      else if (!strncmp (in, "&emsp", 5))   ss = " ";
-      else if (!strncmp (in, "&ensp", 5))   ss = " ";
-      else if (!strncmp (in, "&equiv", 6))  ss = "==";
-      else if (!strncmp (in, "&frac12", 7)) ss = "1/2";
-      else if (!strncmp (in, "&frac14", 7)) ss = "1/4";
-      else if (!strncmp (in, "&frac34", 7)) ss = "3/4";
-      else if (!strncmp (in, "&frasl", 6))  ss = "/";
-      else if (!strncmp (in, "&ge", 3))     ss = ">=";
-      else if (!strncmp (in, "&hArr", 5))   ss = "<=>";
-      else if (!strncmp (in, "&harr", 5))   ss = "<->";
-      else if (!strncmp (in, "&hellip", 7)) ss = "...";
-      else if (!strncmp (in, "&iquest", 7)) ss = "?";
-      else if (!strncmp (in, "&lArr", 5))   ss = "<=";
-      else if (!strncmp (in, "&lang", 5))   ss = "<";
-      else if (!strncmp (in, "&laquo", 6))  ss = "<<";
-      else if (!strncmp (in, "&larr", 5))   ss = "<-";
-      else if (!strncmp (in, "&ldquo", 6))  ss = "\"";
-      else if (!strncmp (in, "&le", 3))     ss = "<=";
-      else if (!strncmp (in, "&lowast", 7)) ss = "*";
-      else if (!strncmp (in, "&loz", 4))    ss = "<>";
-      else if (!strncmp (in, "&lsaquo", 7)) ss = "<";
-      else if (!strncmp (in, "&lsquo", 6))  ss = "`";
-      else if (!strncmp (in, "&macr", 5))   ss = "'";
-      else if (!strncmp (in, "&mdash", 6))  ss = "--";
-      else if (!strncmp (in, "&micro", 6))  ss = "u";
-      else if (!strncmp (in, "&middot", 7)) ss = ".";
-      else if (!strncmp (in, "&minus", 6))  ss = "-";
-      else if (!strncmp (in, "&ndash", 6))  ss = "-";
-      else if (!strncmp (in, "&ne", 3))     ss = "!=";
-      else if (!strncmp (in, "&not", 4))    ss = "!";
-      else if (!strncmp (in, "&OElig", 6))  ss = "OE";
-      else if (!strncmp (in, "&oelig", 6))  ss = "oe";
-      else if (!strncmp (in, "&ordf", 5))   ss = "_";
-      else if (!strncmp (in, "&ordm", 5))   ss = "_";
-      else if (!strncmp (in, "&para", 5))   ss = "PP";
-      else if (!strncmp (in, "&plusmn", 7)) ss = "+/-";
-      else if (!strncmp (in, "&pound", 6))  ss = "#";
-      else if (!strncmp (in, "&prime", 6))  ss = "'";
-      else if (!strncmp (in, "&quot", 5))   ss = "\"";
-      else if (!strncmp (in, "&rArr", 5))   ss = "=>";
-      else if (!strncmp (in, "&rang", 5))   ss = ">";
-      else if (!strncmp (in, "&raquo", 6))  ss = ">>";
-      else if (!strncmp (in, "&rarr", 5))   ss = "->";
-      else if (!strncmp (in, "&rdquo", 6))  ss = "\"";
-      else if (!strncmp (in, "&reg", 4))    ss = "(r)";
-      else if (!strncmp (in, "&rsaquo", 7)) ss = ">";
-      else if (!strncmp (in, "&rsquo", 6))  ss = "'";
-      else if (!strncmp (in, "&sbquo", 6))  ss = "'";
-      else if (!strncmp (in, "&sect", 5))   ss = "SS";
-      else if (!strncmp (in, "&shy", 4))    ss = "";
-      else if (!strncmp (in, "&sim", 4))    ss = "~";
-      else if (!strncmp (in, "&sup1", 5))   ss = "[1]";
-      else if (!strncmp (in, "&sup2", 5))   ss = "[2]";
-      else if (!strncmp (in, "&sup3", 5))   ss = "[3]";
-      else if (!strncmp (in, "&szlig", 6))  ss = "B";
-      else if (!strncmp (in, "&thinsp", 7)) ss = " ";
-      else if (!strncmp (in, "&thorn", 6))  ss = "|";
-      else if (!strncmp (in, "&tilde", 6))  ss = "!";
-      else if (!strncmp (in, "&times", 6))  ss = "x";
-      else if (!strncmp (in, "&trade", 6))  ss = "[tm]";
-      else if (!strncmp (in, "&uml", 4))    ss = ":";
-      else if (!strncmp (in, "&yen", 4))    ss = "Y";
-
-      if (ss) {
-        strcpy (out, ss);
-        out += strlen(ss);
-      } else if (!strncmp (in, "&#", 2)) {	// &#65;
-        int i = 0;
-        for (char *in2 = in+2; *in2 >= '0' && *in2 <= '9'; in2++)
-          i = (i * 10) + (*in2 - '0');
-        *out = (i > 255 ? '?' : i);
-      } else if (!strncmp (in, "&#x", 3)) {	// &#x41;
-        int i = 0;
-        for (char *in2 = in+3;
-             ((*in2 >= '0' && *in2 <= '9') ||
-              (*in2 >= 'A' && *in2 <= 'F') ||
-              (*in2 >= 'a' && *in2 <= 'f'));
-             in2++)
-          i = (i * 16) + (*in2 >= 'a' ? *in2 - 'a' + 16 :
-                          *in2 >= 'A' ? *in2 - 'A' + 16 :
-                          *in2 - '0');
-        *out = (i > 255 ? '?' : i);
-      } else {
-        *out++ = in[1];    // first character of entity, e.g. &eacute;.
-      }
     } else if (*in == ' ' || *in == '\t' || *in == '\r' || *in == '\n') {
       if (!white && out != html)
         *out++ = ' ';
@@ -265,6 +378,14 @@ strip_html (char *html)
     }
   }
   *out = 0;
+
+  {
+    char *ret2 = decode_entities (ret);
+    free (ret);
+    ret = ret2;
+  }
+
+  return ret;
 }
 
 
@@ -294,8 +415,9 @@ copy_rss_field (const char *s)
     if (! s3) return 0;
     memcpy (s3, s, s2-s);
     s3[s2-s] = 0;
-    strip_html (s3);
-    return s3;
+    char *s4 = strip_html (s3);
+    free (s3);
+    return s4;
   }
 }
 
@@ -326,15 +448,146 @@ pick_rss_field (const char *a, const char *b, const char *c, const char *d)
 }
 
 
+/* Strip some Wikipedia formatting from the string to make it more readable.
+ */
 static void
-strip_rss (char *rss)
+strip_wiki (char *text)
+{
+  char *in = text;
+  char *out = text;
+  while (*in) 
+    {
+      if (!strncmp (in, "<!--", 4))		/* <!-- ... --> */
+        {
+          char *e = strstr (in+4, "-->");
+          if (e) in = e + 3;
+        }
+      else if (!strncmp (in, "/*", 2))		/* ... */
+        {
+          char *e = strstr (in+2, "*/");
+          if (e) in = e + 2;
+        }
+      else if (!strncmp (in, "{{Infobox", 9))	/* {{Infobox ... \n}}\n */
+        {
+          char *e = strstr (in+2, "\n}}");
+          if (e) in = e + 3;
+          else *out++ = *in++;
+        }
+      else if (!strncmp (in, "{{", 2))		/* {{ ...table... }} */
+        {
+          char *e = strstr (in+2, "}}");
+          if (e) in = e + 2;
+          else *out++ = *in++;
+        }
+      else if (!strncmp (in, "{|", 2))		/* {| ...table... |} */
+        {
+          char *e = strstr (in+2, "|}");
+          if (e) in = e + 2;
+          else *out++ = *in++;
+        }
+      else if (!strncmp (in, "|-", 2))		/* |- ...table cell... | */
+        {
+          char *e = strstr (in+2, "|");
+          if (e) in = e + 1;
+          else *out++ = *in++;
+        }
+      else if (!strncmp (in, "<ref", 4))	/* <ref>...</ref> -> "*" */
+        {
+          char *e1 = strstr (in+4, "/>");
+          char *e2 = strstr (in+4, "</ref>");
+          if (e1 && e1 < e2) in = e1 + 2;
+          else if (e2) in = e2 + 6;
+          else *out++ = *in++;
+
+          *out++ = '*';
+        }
+      else if (!strncmp (in, "<", 1))		/* <...> */
+        {
+          char *e = strstr (in+1, ">");
+          if (e) in = e + 1;
+        }
+      else if (!strncmp (in, "[[", 2))		/* [[ ... ]] */
+        {
+          char *e1 = strstr (in+2, "|");
+          char *e2 = strstr (in+2, "]]");
+          if (e1 && e2 && e1 < e2)		/* [[link|anchor]] */
+            {
+              long L = e2 - e1 - 1;
+              memmove (out, e1+1, L);
+              out += L;
+              in = e2+2;
+            }
+          else if (e2)				/* [[link]] */
+            {
+              long L = e2 - in - 2;
+              memmove (out, in+2, L);
+              out += L;
+              in = e2+2;
+            }
+          else
+            *out++ = *in++;
+        }
+      else if (!strncmp (in, "[", 1))		/* [ ... ] */
+        {
+          char *e1 = strstr (in+2, " ");
+          char *e2 = strstr (in+2, "]");
+          if (e1 && e2 && e1 < e2)		/* [url anchor] */
+            {
+              long L = e2 - e1 - 1;
+              memmove (out, e1+1, L);
+              out += L;
+              in = e2+2;
+            }
+          else
+            *out++ = *in++;
+        }
+      else if (!strncmp (in, "''''", 4))	/* omit '''' */
+        in += 4;
+      else if (!strncmp (in, "'''", 3)) 	/* omit ''' */
+        in += 3;
+      else if (!strncmp (in, "''", 2) ||	/* '' or `` or "" -> " */
+               !strncmp (in, "``", 2) ||
+               !strncmp (in, "\"\"", 2))
+        {
+          *out++ = '"';
+          in += 2;
+        }
+      else
+        {
+          *out++ = *in++;
+        }
+    }
+  *out = 0;
+
+  /* Collapse newlines */
+  in = text;
+  out = text;
+  while (*in) 
+    {
+      while (!strncmp(in, "\n\n\n", 3))
+        in++;
+      *out++ = *in++;
+    }
+  *out = 0;
+}
+
+
+/* Returns a copy of the RSS document that has been converted to plain text,
+   in UTF8 encoding.  Rougly, it uses the contents of the <description> field
+   of each <item>, and decodes HTML within it.
+ */
+static char *
+strip_rss (const char *rss)
 {
-  char *out = rss;
+  char *ret = malloc (strlen(rss) * 4 + 1);  // room for UTF8
+  char *out = ret;
   const char *a = 0, *b = 0, *c = 0, *d = 0, *t = 0;
   int head = 1;
   int done = 0;
+  int wiki_p = !!strcasestr (rss, "<generator>MediaWiki");
 
-  for (char *in = rss; *in; in++) {
+  *out = 0;
+  for (const char *in = rss; *in; in++) {
     if (*in == '<') {
       if (!strncasecmp (in, "<item", 5) ||	// New item, dump.
           !strncasecmp (in, "<entry", 6)) {
@@ -387,8 +640,17 @@ strip_rss (char *rss)
     goto DONE;
   }
 
-  // Now decode it a second time.
-  strip_html (rss);
+  ret = strip_html (ret);
+
+  if (wiki_p) {
+    char *ret2;
+    strip_wiki (ret);
+    ret2 = decode_entities (ret);
+    free (ret);
+    ret = ret2;
+  }
+
+  return ret;
 }
 
 
@@ -461,6 +723,9 @@ strip_backslashes (char *s)
 }
 
 
+/* Returns the contents of the URL as plain text.
+   HTML and RSS are decoded.
+ */
 static char *
 url_string (const char *url)
 {
@@ -470,7 +735,7 @@ url_string (const char *url)
                        encoding:NSISOLatin1StringEncoding]];
   NSString *body =
     [NSString stringWithContentsOfURL: nsurl
-              encoding: NSISOLatin1StringEncoding
+              encoding: NSUTF8StringEncoding
               error: nil];
   if (! body)
     return 0;
@@ -508,16 +773,22 @@ url_string (const char *url)
   else
     type = TEXT;
 
-  char *body2 = strdup ([body cStringUsingEncoding:NSISOLatin1StringEncoding]);
+  char *body2 = strdup ([body cStringUsingEncoding:NSUTF8StringEncoding]);
+  char *body3 = 0;
 
   switch (type) {
-  case HTML: strip_html (body2); break;
-  case RSS:  strip_rss (body2);  break;
+  case HTML: body3 = strip_html (body2); break;
+  case RSS:  body3 = strip_rss (body2);  break;
   case TEXT: break;
   default: abort(); break;
   }
 
-  return body2;
+  if (body3) {
+    free (body2);
+    return body3;
+  } else {
+    return body2;
+  }
 }