git.hungrycats.org Git - xscreensaver/blob - OSX/iostextclient.m

   1 /* xscreensaver, Copyright (c) 2012-2015 Jamie Zawinski <jwz@jwz.org>
   2  *
   3  * Permission to use, copy, modify, distribute, and sell this software and its
   4  * documentation for any purpose is hereby granted without fee, provided that
   5  * the above copyright notice appear in all copies and that both that
   6  * copyright notice and this permission notice appear in supporting
   7  * documentation.  No representations are made about the suitability of this
   8  * software for any purpose.  It is provided "as is" without express or
   9  * implied warranty.
  10  *
  11  * Loading URLs and returning the underlying text.
  12  *
  13  * This is necessary because iOS doesn't have Perl installed, so we can't
  14  * run "xscreensaver-text" to do this.
  15  */
  16
  17 #include "utils.h"
  18
  19 #ifdef USE_IPHONE /* whole file -- see utils/textclient.c */
  20
  21 #include "textclient.h"
  22 #include "resources.h"
  23 #include "utf8wc.h"
  24
  25 #include <stdio.h>
  26
  27 #undef countof
  28 #define countof(x) (sizeof((x))/sizeof((*x)))
  29
  30
  31 extern const char *progname;
  32
  33 struct text_data {
  34
  35   enum { DATE, LITERAL, URL } mode;
  36   char *literal, *url;
  37
  38   int columns;
  39   int max_lines;
  40   char *buf;
  41   int buf_size;
  42   char *fp;
  43
  44 };
  45
  46
  47 text_data *
  48 textclient_open (Display *dpy)
  49 {
  50   text_data *d = (text_data *) calloc (1, sizeof (*d));
  51
  52 # ifdef DEBUG
  53   fprintf (stderr, "%s: textclient: init\n", progname);
  54 # endif
  55
  56   char *s = get_string_resource (dpy, "textMode", "TextMode");
  57   if (!s || !*s || !strcasecmp (s, "date") || !strcmp (s, "0"))
  58     d->mode = DATE;
  59   else if (!strcasecmp (s, "literal") || !strcmp (s, "1"))
  60     d->mode = LITERAL;
  61   else if (!strcasecmp (s, "url") || !strcmp (s, "3"))
  62     d->mode = URL;
  63   else
  64     d->mode = DATE;
  65
  66   d->literal = get_string_resource (dpy, "textLiteral", "TextLiteral");
  67   d->url = get_string_resource (dpy, "textURL", "TextURL");
  68
  69   return d;
  70 }
  71
  72
  73 void
  74 textclient_close (text_data *d)
  75 {
  76 # ifdef DEBUG
  77   fprintf (stderr, "%s: textclient: free\n", progname);
  78 # endif
  79
  80   if (d->buf) free (d->buf);
  81   if (d->literal) free (d->literal);
  82   if (d->url) free (d->url);
  83   free (d);
  84 }
  85
  86
  87 static char *
  88 date_string (void)
  89 {
  90   UIDevice *dd = [UIDevice currentDevice];
  91   NSString *name = [dd name];                   // My iPhone
  92   NSString *model = [dd model];                 // iPad
  93   // NSString *system = [dd systemName];        // iPhone OS
  94   NSString *vers = [dd systemVersion];          // 5.0
  95   NSString *date =
  96     [NSDateFormatter
  97       localizedStringFromDate:[NSDate date]
  98       dateStyle: NSDateFormatterMediumStyle
  99       timeStyle: NSDateFormatterMediumStyle];
 100   NSString *nl = @"\n";
 101
 102   NSString *result = name;
 103   result = [result stringByAppendingString: nl];
 104   result = [result stringByAppendingString: model];
 105   // result = [result stringByAppendingString: nl];
 106   // result = [result stringByAppendingString: system];
 107   result = [result stringByAppendingString: @" "];
 108   result = [result stringByAppendingString: vers];
 109   result = [result stringByAppendingString: nl];
 110   result = [result stringByAppendingString: nl];
 111   result = [result stringByAppendingString: date];
 112   result = [result stringByAppendingString: nl];
 113   result = [result stringByAppendingString: nl];
 114   return strdup ([result cStringUsingEncoding:NSISOLatin1StringEncoding]);
 115 }
 116
 117
 118 /* Returns a copy of the string with some basic HTML entities decoded.
 119  */
 120 static char *
 121 decode_entities (const char *html)
 122 {
 123   char *ret = (char *) malloc ((strlen(html) * 4) + 1);  // room for UTF8
 124   const char *in = html;
 125   char *out = ret;
 126   *out = 0;
 127
 128   const struct { const char *c; const char *e; } entities[] = {
 129
 130     { "amp", "&" },
 131     { "lt",  "<" },
 132     { "gt",  ">" },
 133
 134     // Convert Latin1 to UTF8
 135     { "nbsp", " " },                    //   160
 136     { "iexcl", "\302\241" },            // ¡ 161
 137     { "cent", "\302\242" },             // ¢ 162
 138     { "pound", "\302\243" },            // £ 163
 139     { "curren", "\302\244" },           // ¤ 164
 140     { "yen", "\302\245" },              // ¥ 165
 141     { "brvbar", "\302\246" },           // ¦ 166
 142     { "sect", "\302\247" },             // § 167
 143     { "uml", "\302\250" },              // ¨ 168
 144     { "copy", "\302\251" },             // © 169
 145     { "ordf", "\302\252" },             // ª 170
 146     { "laquo", "\302\253" },            // « 171
 147     { "not", "\302\254" },              // ¬ 172
 148     { "shy", "\302\255" },              //  173
 149     { "reg", "\302\256" },              // ® 174
 150     { "macr", "\302\257" },             // ¯ 175
 151     { "deg", "\302\260" },              // ° 176
 152     { "plusmn", "\302\261" },           // ± 177
 153     { "sup2", "\302\262" },             // ² 178
 154     { "sup3", "\302\263" },             // ³ 179
 155     { "acute", "\302\264" },            // ´ 180
 156     { "micro", "\302\265" },            // µ 181
 157     { "para", "\302\266" },             // ¶ 182
 158     { "middot", "\302\267" },           // · 183
 159     { "cedil", "\302\270" },            // ¸ 184
 160     { "sup1", "\302\271" },             // ¹ 185
 161     { "ordm", "\302\272" },             // º 186
 162     { "raquo", "\302\273" },            // » 187
 163     { "frac14", "\302\274" },           // ¼ 188
 164     { "frac12", "\302\275" },           // ½ 189
 165     { "frac34", "\302\276" },           // ¾ 190
 166     { "iquest", "\302\277" },           // ¿ 191
 167     { "Agrave", "\303\200" },           // À 192
 168     { "Aacute", "\303\201" },           // Á 193
 169     { "Acirc", "\303\202" },            // Â 194
 170     { "Atilde", "\303\203" },           // Ã 195
 171     { "Auml", "\303\204" },             // Ä 196
 172     { "Aring", "\303\205" },            // Å 197
 173     { "AElig", "\303\206" },            // Æ 198
 174     { "Ccedil", "\303\207" },           // Ç 199
 175     { "Egrave", "\303\210" },           // È 200
 176     { "Eacute", "\303\211" },           // É 201
 177     { "Ecirc", "\303\212" },            // Ê 202
 178     { "Euml", "\303\213" },             // Ë 203
 179     { "Igrave", "\303\214" },           // Ì 204
 180     { "Iacute", "\303\215" },           // Í 205
 181     { "Icirc", "\303\216" },            // Î 206
 182     { "Iuml", "\303\217" },             // Ï 207
 183     { "ETH", "\303\220" },              // Ð 208
 184     { "Ntilde", "\303\221" },           // Ñ 209
 185     { "Ograve", "\303\222" },           // Ò 210
 186     { "Oacute", "\303\223" },           // Ó 211
 187     { "Ocirc", "\303\224" },            // Ô 212
 188     { "Otilde", "\303\225" },           // Õ 213
 189     { "Ouml", "\303\226" },             // Ö 214
 190     { "times", "\303\227" },            // × 215
 191     { "Oslash", "\303\230" },           // Ø 216
 192     { "Ugrave", "\303\231" },           // Ù 217
 193     { "Uacute", "\303\232" },           // Ú 218
 194     { "Ucirc", "\303\233" },            // Û 219
 195     { "Uuml", "\303\234" },             // Ü 220
 196     { "Yacute", "\303\235" },           // Ý 221
 197     { "THORN", "\303\236" },            // Þ 222
 198     { "szlig", "\303\237" },            // ß 223
 199     { "agrave", "\303\240" },           // à 224
 200     { "aacute", "\303\241" },           // á 225
 201     { "acirc", "\303\242" },            // â 226
 202     { "atilde", "\303\243" },           // ã 227
 203     { "auml", "\303\244" },             // ä 228
 204     { "aring", "\303\245" },            // å 229
 205     { "aelig", "\303\246" },            // æ 230
 206     { "ccedil", "\303\247" },           // ç 231
 207     { "egrave", "\303\250" },           // è 232
 208     { "eacute", "\303\251" },           // é 233
 209     { "ecirc", "\303\252" },            // ê 234
 210     { "euml", "\303\253" },             // ë 235
 211     { "igrave", "\303\254" },           // ì 236
 212     { "iacute", "\303\255" },           // í 237
 213     { "icirc", "\303\256" },            // î 238
 214     { "iuml", "\303\257" },             // ï 239
 215     { "eth", "\303\260" },              // ð 240
 216     { "ntilde", "\303\261" },           // ñ 241
 217     { "ograve", "\303\262" },           // ò 242
 218     { "oacute", "\303\263" },           // ó 243
 219     { "ocirc", "\303\264" },            // ô 244
 220     { "otilde", "\303\265" },           // õ 245
 221     { "ouml", "\303\266" },             // ö 246
 222     { "divide", "\303\267" },           // ÷ 247
 223     { "oslash", "\303\270" },           // ø 248
 224     { "ugrave", "\303\271" },           // ù 249
 225     { "uacute", "\303\272" },           // ú 250
 226     { "ucirc", "\303\273" },            // û 251
 227     { "uuml", "\303\274" },             // ü 252
 228     { "yacute", "\303\275" },           // ý 253
 229     { "thorn", "\303\276" },            // þ 254
 230     { "yuml", "\303\277" },             // ÿ 255
 231
 232       // And some random others
 233     { "bdquo", "\342\200\236" },        // „
 234     { "bull", "\342\200\242" },         // •
 235     { "circ", "\313\206" },             // ˆ
 236     { "cong", "\342\211\205" },         // ≅
 237     { "empty", "\342\210\205" },        // ∅
 238     { "emsp", "\342\200\203" },         //
 239     { "ensp", "\342\200\202" },         //
 240     { "equiv", "\342\211\241" },        // ≡
 241     { "frasl", "\342\201\204" },        // ⁄
 242     { "ge", "\342\211\245" },           // ≥
 243     { "hArr", "\342\207\224" },         // ⇔
 244     { "harr", "\342\206\224" },         // ↔
 245     { "hellip", "\342\200\246" },       // …
 246     { "lArr", "\342\207\220" },         // ⇐
 247     { "lang", "\342\237\250" },         // ⟨
 248     { "larr", "\342\206\220" },         // ←
 249     { "ldquo", "\342\200\234" },        // “
 250     { "le", "\342\211\244" },           // ≤
 251     { "lowast", "\342\210\227" },       // ∗
 252     { "loz", "\342\227\212" },          // ◊
 253     { "lsaquo", "\342\200\271" },       // ‹
 254     { "lsquo", "\342\200\230" },        // ‘
 255     { "mdash", "\342\200\224" },        // —
 256     { "minus", "\342\210\222" },        // −
 257     { "ndash", "\342\200\223" },        // –
 258     { "ne", "\342\211\240" },           // ≠
 259     { "OElig", "\305\222" },            // Œ
 260     { "oelig", "\305\223" },            // œ
 261     { "prime", "\342\200\262" },        // ′
 262     { "quot", "\342\200\235" },         // ”
 263     { "rArr", "\342\207\222" },         // ⇒
 264     { "rang", "\342\237\251" },         // ⟩
 265     { "rarr", "\342\206\222" },         // →
 266     { "rdquo", "\342\200\235" },        // ”
 267     { "rsaquo", "\342\200\272" },       // ›
 268     { "rsquo", "\342\200\231" },        // ’
 269     { "sbquo", "\342\200\232" },        // ‚
 270     { "sim", "\342\210\274" },          // ∼
 271     { "thinsp", "\342\200\211" },       //
 272     { "tilde", "\313\234" },            // ˜
 273     { "trade", "\342\204\242" },        // ™
 274   };
 275
 276   while (*in) {
 277     if (*in == '&') {
 278       int done = 0;
 279       if (in[1] == '#' && in[2] == 'x') {                       // &#x41;
 280         unsigned long i = 0;
 281         in += 2;
 282         while ((*in >= '0' && *in <= '9') ||
 283                (*in >= 'A' && *in <= 'F') ||
 284                (*in >= 'a' && *in <= 'f')) {
 285           i = (i * 16) + (*in >= 'a' ? *in - 'a' + 16 :
 286                           *in >= 'A' ? *in - 'A' + 16 :
 287                           *in - '0');
 288           in++;
 289         }
 290         *out += utf8_encode (i, out, strlen(out));
 291         done = 1;
 292       } else if (in[1] == '#') {                                // &#65;
 293         unsigned long i = 0;
 294         in++;
 295         while (*in >= '0' && *in <= '9') {
 296           i = (i * 10) + (*in - '0');
 297           in++;
 298         }
 299         *out += utf8_encode (i, out, strlen(out));
 300         done = 1;
 301       } else {
 302         int i;
 303         for (i = 0; !done && i < countof(entities); i++) {
 304           if (!strncmp (in+1, entities[i].c, strlen(entities[i].c))) {
 305             strcpy (out, entities[i].e);
 306             in  += strlen(entities[i].c) + 1;
 307             out += strlen(entities[i].e);
 308             done = 1;
 309           }
 310         }
 311       }
 312
 313       if (done) {
 314         if (*in == ';')
 315           in++;
 316       } else {
 317         *out++ = *in++;
 318       }
 319     } else {
 320       *out++ = *in++;
 321     }
 322   }
 323   *out = 0;
 324
 325   /* Shrink */
 326   ret = realloc (ret, out - ret + 1);
 327
 328   return ret;
 329 }
 330
 331
 332 /* Returns a copy of the HTML string that has been converted to plain text,
 333    in UTF8 encoding.  HTML tags are stripped, <BR> and <P> are converted
 334    to newlines, and some basic HTML entities are decoded.
 335  */
 336 static char *
 337 strip_html (const char *html)
 338 {
 339   int tag = 0;
 340   int comment = 0;
 341   int white = 0;
 342   int nl = 0;
 343   char *ret = (char *) malloc ((strlen(html) * 4) + 1);  // room for UTF8
 344   char *out = ret;
 345   *out = 0;
 346
 347   for (const char *in = html; *in; in++) {
 348     if (comment) {
 349       if (!strncmp (in, "-->", 3)) {
 350         comment = 0;
 351       }
 352     } else if (tag) {
 353       if (*in == '>') {
 354         tag = 0;
 355       }
 356     } else if (*in == '<') {
 357       tag = 1;
 358       if (!strncmp (in, "<!--", 4)) {
 359         comment = 1;
 360         tag = 0;
 361       } else if (!strncasecmp (in, "<BR", 3)) {
 362         *out++ = '\n';
 363         white = 1;
 364         nl++;
 365       } else if (!strncasecmp (in, "<P", 2)) {
 366         if (nl < 2) { *out++ = '\n'; nl++; }
 367         if (nl < 2) { *out++ = '\n'; nl++; }
 368         white = 1;
 369       }
 370     } else if (*in == ' ' || *in == '\t' || *in == '\r' || *in == '\n') {
 371       if (!white && out != html)
 372         *out++ = ' ';
 373       white = 1;
 374     } else {
 375       *out++ = *in;
 376       white = 0;
 377       nl = 0;
 378     }
 379   }
 380   *out = 0;
 381
 382   {
 383     char *ret2 = decode_entities (ret);
 384     free (ret);
 385     ret = ret2;
 386   }
 387
 388   return ret;
 389 }
 390
 391
 392 static char *
 393 copy_rss_field (const char *s)
 394 {
 395   if (!s) return 0;
 396   while (*s && *s != '>')                       // Skip forward to >
 397     s++;
 398   if (! *s) return 0;
 399   s++;
 400
 401   if (!strncmp (s, "<![CDATA[", 9)) {           // CDATA quoting
 402     s += 9;
 403     char *e = strstr (s, "]]");
 404     if (e) *e = 0;
 405     unsigned long L = strlen (s);
 406     char *s2 = (char *) malloc (L+1);
 407     memcpy (s2, s, L+1);
 408     return s2;
 409
 410   } else {                                      // Entity-encoded.
 411     const char *s2;
 412     for (s2 = s; *s2 && *s2 != '<'; s2++)       // Terminate at <
 413       ;
 414     char *s3 = (char *) malloc (s2 - s + 1);
 415     if (! s3) return 0;
 416     memcpy (s3, s, s2-s);
 417     s3[s2-s] = 0;
 418     char *s4 = strip_html (s3);
 419     free (s3);
 420     return s4;
 421   }
 422 }
 423
 424
 425 static char *
 426 pick_rss_field (const char *a, const char *b, const char *c, const char *d)
 427 {
 428   // Pick the longest of the fields.
 429   char *a2 = copy_rss_field (a);
 430   char *b2 = copy_rss_field (b);
 431   char *c2 = copy_rss_field (c);
 432   char *d2 = copy_rss_field (d);
 433   unsigned long al = a2 ? strlen(a2) : 0;
 434   unsigned long bl = b2 ? strlen(b2) : 0;
 435   unsigned long cl = c2 ? strlen(c2) : 0;
 436   unsigned long dl = d2 ? strlen(d2) : 0;
 437   char *ret = 0;
 438
 439   if      (al > bl && al > cl && al > dl) ret = a2;
 440   else if (bl > al && bl > cl && bl > dl) ret = b2;
 441   else if (cl > al && cl > bl && cl > dl) ret = c2;
 442   else ret = d2;
 443   if (a2 && a2 != ret) free (a2);
 444   if (b2 && b2 != ret) free (b2);
 445   if (c2 && c2 != ret) free (c2);
 446   if (d2 && d2 != ret) free (d2);
 447   return ret;
 448 }
 449
 450
 451 /* Strip some Wikipedia formatting from the string to make it more readable.
 452  */
 453 static void
 454 strip_wiki (char *text)
 455 {
 456   char *in = text;
 457   char *out = text;
 458   while (*in)
 459     {
 460       if (!strncmp (in, "<!--", 4))             /* <!-- ... --> */
 461         {
 462           char *e = strstr (in+4, "-->");
 463           if (e) in = e + 3;
 464         }
 465       else if (!strncmp (in, "/*", 2))          /* ... */
 466         {
 467           char *e = strstr (in+2, "*/");
 468           if (e) in = e + 2;
 469         }
 470       else if (!strncmp (in, "{{Infobox", 9))   /* {{Infobox ... \n}}\n */
 471         {
 472           char *e = strstr (in+2, "\n}}");
 473           if (e) in = e + 3;
 474           else *out++ = *in++;
 475         }
 476       else if (!strncmp (in, "{{", 2))          /* {{ ...table... }} */
 477         {
 478           char *e = strstr (in+2, "}}");
 479           if (e) in = e + 2;
 480           else *out++ = *in++;
 481         }
 482       else if (!strncmp (in, "{|", 2))          /* {| ...table... |} */
 483         {
 484           char *e = strstr (in+2, "|}");
 485           if (e) in = e + 2;
 486           else *out++ = *in++;
 487         }
 488       else if (!strncmp (in, "|-", 2))          /* |- ...table cell... | */
 489         {
 490           char *e = strstr (in+2, "|");
 491           if (e) in = e + 1;
 492           else *out++ = *in++;
 493         }
 494       else if (!strncmp (in, "<ref", 4))        /* <ref>...</ref> -> "*" */
 495         {
 496           char *e1 = strstr (in+4, "/>");
 497           char *e2 = strstr (in+4, "</ref>");
 498           if (e1 && e1 < e2) in = e1 + 2;
 499           else if (e2) in = e2 + 6;
 500           else *out++ = *in++;
 501
 502           *out++ = '*';
 503         }
 504       else if (!strncmp (in, "<", 1))           /* <...> */
 505         {
 506           char *e = strstr (in+1, ">");
 507           if (e) in = e + 1;
 508         }
 509       else if (!strncmp (in, "[[", 2))          /* [[ ... ]] */
 510         {
 511           char *e1 = strstr (in+2, "|");
 512           char *e2 = strstr (in+2, "]]");
 513           if (e1 && e2 && e1 < e2)              /* [[link|anchor]] */
 514             {
 515               long L = e2 - e1 - 1;
 516               memmove (out, e1+1, L);
 517               out += L;
 518               in = e2+2;
 519             }
 520           else if (e2)                          /* [[link]] */
 521             {
 522               long L = e2 - in - 2;
 523               memmove (out, in+2, L);
 524               out += L;
 525               in = e2+2;
 526             }
 527           else
 528             *out++ = *in++;
 529         }
 530       else if (!strncmp (in, "[", 1))           /* [ ... ] */
 531         {
 532           char *e1 = strstr (in+2, " ");
 533           char *e2 = strstr (in+2, "]");
 534           if (e1 && e2 && e1 < e2)              /* [url anchor] */
 535             {
 536               long L = e2 - e1 - 1;
 537               memmove (out, e1+1, L);
 538               out += L;
 539               in = e2+2;
 540             }
 541           else
 542             *out++ = *in++;
 543         }
 544       else if (!strncmp (in, "''''", 4))        /* omit '''' */
 545         in += 4;
 546       else if (!strncmp (in, "'''", 3))         /* omit ''' */
 547         in += 3;
 548       else if (!strncmp (in, "''", 2) ||        /* '' or `` or "" -> " */
 549                !strncmp (in, "``", 2) ||
 550                !strncmp (in, "\"\"", 2))
 551         {
 552           *out++ = '"';
 553           in += 2;
 554         }
 555       else
 556         {
 557           *out++ = *in++;
 558         }
 559     }
 560   *out = 0;
 561
 562   /* Collapse newlines */
 563   in = text;
 564   out = text;
 565   while (*in)
 566     {
 567       while (!strncmp(in, "\n\n\n", 3))
 568         in++;
 569       *out++ = *in++;
 570     }
 571   *out = 0;
 572 }
 573
 574
 575 /* Returns a copy of the RSS document that has been converted to plain text,
 576    in UTF8 encoding.  Rougly, it uses the contents of the <description> field
 577    of each <item>, and decodes HTML within it.
 578  */
 579 static char *
 580 strip_rss (const char *rss)
 581 {
 582   char *ret = malloc (strlen(rss) * 4 + 1);  // room for UTF8
 583   char *out = ret;
 584   const char *a = 0, *b = 0, *c = 0, *d = 0, *t = 0;
 585   int head = 1;
 586   int done = 0;
 587   int wiki_p = !!strcasestr (rss, "<generator>MediaWiki");
 588
 589   *out = 0;
 590   for (const char *in = rss; *in; in++) {
 591     if (*in == '<') {
 592       if (!strncasecmp (in, "<item", 5) ||      // New item, dump.
 593           !strncasecmp (in, "<entry", 6)) {
 594       DONE:
 595         head = 0;
 596         char *title = copy_rss_field (t);
 597         char *body  = pick_rss_field (a, b, c, d);
 598
 599         a = b = c = d = t = 0;
 600
 601         if (title && body && !strcmp (title, body)) {
 602           free (title);
 603           title = 0;
 604         }
 605
 606         if (title) {
 607           strcpy (out, title);
 608           free (title);
 609           out += strlen (out);
 610           strcpy (out, "\n\n");
 611           out += strlen (out);
 612         }
 613
 614         if (body) {
 615           strcpy (out, body);
 616           free (body);
 617           out += strlen (out);
 618           strcpy (out, "<P>");
 619           out += strlen (out);
 620         }
 621
 622       } else if (head) {   // still before first <item>
 623         ;
 624       } else if (!strncasecmp (in, "<title", 6)) {
 625         t = in+6;
 626       } else if (!strncasecmp (in, "<summary", 8)) {
 627         d = in+8;
 628       } else if (!strncasecmp (in, "<description", 12)) {
 629         a = in+12;
 630       } else if (!strncasecmp (in, "<content:encoded", 16)) {
 631         c = in+16;
 632       } else if (!strncasecmp (in, "<content", 8)) {
 633         b = in+8;
 634       }
 635     }
 636   }
 637
 638   if (! done) {         // Finish off the final item.
 639     done = 1;
 640     goto DONE;
 641   }
 642
 643   ret = strip_html (ret);
 644
 645   if (wiki_p) {
 646     char *ret2;
 647     strip_wiki (ret);
 648     ret2 = decode_entities (ret);
 649     free (ret);
 650     ret = ret2;
 651   }
 652
 653   return ret;
 654 }
 655
 656
 657 static void
 658 wrap_text (char *body, int columns, int max_lines)
 659 {
 660   int col = 0, last_col = 0;
 661   char *last_space = 0;
 662   int lines = 0;
 663   if (! body) return;
 664   for (char *p = body; *p; p++) {
 665     if (*p == '\r' || *p == '\n' || *p == ' ' || *p == '\t') {
 666       if (col > columns && last_space) {
 667         *last_space = '\n';
 668         col = col - last_col;
 669       }
 670       last_space = p;
 671       last_col = col;
 672     }
 673     if (*p == '\r' || *p == '\n') {
 674       col = 0;
 675       last_col = 0;
 676       last_space = 0;
 677       lines++;
 678       if (max_lines && lines >= max_lines)
 679         {
 680           *p = 0;
 681           break;
 682         }
 683     } else {
 684       col++;
 685     }
 686   }
 687 }
 688
 689
 690 static void
 691 rewrap_text (char *body, int columns)
 692 {
 693   if (! body) return;
 694   for (char *p = body; *p; p++) {
 695     if (*p == '\n') {
 696       if (p[1] == '\n')
 697         p++;
 698       else
 699         *p = ' ';
 700     }
 701   }
 702   wrap_text (body, columns, 0);
 703 }
 704
 705
 706
 707 static void
 708 strip_backslashes (char *s)
 709 {
 710   char *out = s;
 711   for (char *in = s; *in; in++) {
 712     if (*in == '\\') {
 713       in++;
 714       if      (*in == 'n') *out++ = '\n';
 715       else if (*in == 'r') *out++ = '\r';
 716       else if (*in == 't') *out++ = '\t';
 717       else *out++ = *in;
 718     } else {
 719       *out++ = *in;
 720     }
 721   }
 722   *out = 0;
 723 }
 724
 725
 726 /* Returns the contents of the URL as plain text.
 727    HTML and RSS are decoded.
 728  */
 729 static char *
 730 url_string (const char *url)
 731 {
 732   NSURL *nsurl =
 733     [NSURL URLWithString:
 734              [NSString stringWithCString: url
 735                        encoding:NSISOLatin1StringEncoding]];
 736   NSString *body =
 737     [NSString stringWithContentsOfURL: nsurl
 738               encoding: NSUTF8StringEncoding
 739               error: nil];
 740   if (! body)
 741     return 0;
 742
 743   enum { RSS, HTML, TEXT } type;
 744
 745   // Only search the first 1/2 K of the document while determining type.
 746
 747   unsigned long L = [body length];
 748   if (L > 512) L = 512;
 749   NSString *head = [[[body substringToIndex: L]
 750                       stringByTrimmingCharactersInSet:
 751                         [NSCharacterSet whitespaceAndNewlineCharacterSet]]
 752                      lowercaseString];
 753   if ([head hasPrefix:@"<?xml"] ||
 754       [head hasPrefix:@"<!doctype rss"])
 755     type = RSS;
 756   else if ([head hasPrefix:@"<!doctype html"] ||
 757            [head hasPrefix:@"<html"] ||
 758            [head hasPrefix:@"<head"])
 759     type = HTML;
 760   else if ([head rangeOfString:@"<base"].length ||
 761            [head rangeOfString:@"<body"].length ||
 762            [head rangeOfString:@"<script"].length ||
 763            [head rangeOfString:@"<style"].length ||
 764            [head rangeOfString:@"<a href"].length)
 765     type = HTML;
 766   else if ([head rangeOfString:@"<channel"].length ||
 767            [head rangeOfString:@"<generator"].length ||
 768            [head rangeOfString:@"<description"].length ||
 769            [head rangeOfString:@"<content"].length ||
 770            [head rangeOfString:@"<feed"].length ||
 771            [head rangeOfString:@"<entry"].length)
 772     type = RSS;
 773   else
 774     type = TEXT;
 775
 776   char *body2 = strdup ([body cStringUsingEncoding:NSUTF8StringEncoding]);
 777   char *body3 = 0;
 778
 779   switch (type) {
 780   case HTML: body3 = strip_html (body2); break;
 781   case RSS:  body3 = strip_rss (body2);  break;
 782   case TEXT: break;
 783   default: abort(); break;
 784   }
 785
 786   if (body3) {
 787     free (body2);
 788     return body3;
 789   } else {
 790     return body2;
 791   }
 792 }
 793
 794
 795 int
 796 textclient_getc (text_data *d)
 797 {
 798   if (!d->fp || !*d->fp) {
 799     if (d->buf) {
 800       free (d->buf);
 801       d->buf = 0;
 802       d->fp = 0;
 803     }
 804     switch (d->mode) {
 805     case DATE: DATE:
 806       d->buf = date_string();
 807       break;
 808     case LITERAL:
 809       if (!d->literal || !*d->literal)
 810         goto DATE;
 811       d->buf = (char *) malloc (strlen (d->literal) + 3);
 812       strcpy (d->buf, d->literal);
 813       strcat (d->buf, "\n");
 814       strip_backslashes (d->buf);
 815       d->fp = d->buf;
 816       break;
 817     case URL:
 818       if (!d->url || !*d->url)
 819         goto DATE;
 820       d->buf = url_string (d->url);
 821       break;
 822     default:
 823       abort();
 824     }
 825     if (d->columns > 10)
 826       wrap_text (d->buf, d->columns, d->max_lines);
 827     d->fp = d->buf;
 828   }
 829
 830   if (!d->fp || !*d->fp)
 831     return -1;
 832
 833   unsigned char c = (unsigned char) *d->fp++;
 834   return (int) c;
 835 }
 836
 837
 838 Bool
 839 textclient_putc (text_data *d, XKeyEvent *k)
 840 {
 841   return False;
 842 }
 843
 844
 845 void
 846 textclient_reshape (text_data *d,
 847                     int pix_w, int pix_h,
 848                     int char_w, int char_h,
 849                     int max_lines)
 850 {
 851   d->columns = char_w;
 852   d->max_lines = max_lines;
 853   rewrap_text (d->buf, d->columns);
 854 }
 855
 856 #endif /* USE_IPHONE -- whole file */