git.hungrycats.org Git - xscreensaver/blob - utils/textclient-mobile.c

   1 /* xscreensaver, Copyright (c) 2012-2016 Jamie Zawinski <jwz@jwz.org>
   2  *
   3  * Permission to use, copy, modify, distribute, and sell this software and its
   4  * documentation for any purpose is hereby granted without fee, provided that
   5  * the above copyright notice appear in all copies and that both that
   6  * copyright notice and this permission notice appear in supporting
   7  * documentation.  No representations are made about the suitability of this
   8  * software for any purpose.  It is provided "as is" without express or
   9  * implied warranty.
  10  *
  11  * Loading URLs and returning the underlying text.
  12  *
  13  * This is necessary because iOS and Android don't have Perl installed,
  14  * so we can't just run "xscreensaver-text" at the end of a pipe to do this.
  15  */
  16
  17 #include "utils.h"
  18
  19 #if defined(USE_IPHONE) || defined(HAVE_ANDROID)  /* whole file */
  20
  21 #include "textclient.h"
  22 #include "resources.h"
  23 #include "utf8wc.h"
  24
  25 #include <stdio.h>
  26
  27 #undef countof
  28 #define countof(x) (sizeof((x))/sizeof((*x)))
  29
  30
  31 extern const char *progname;
  32
  33 struct text_data {
  34
  35   enum { DATE, LITERAL, URL } mode;
  36   char *literal, *url;
  37
  38   Display *dpy;
  39   int columns;
  40   int max_lines;
  41   char *buf;
  42   int buf_size;
  43   char *fp;
  44
  45 };
  46
  47
  48 text_data *
  49 textclient_open (Display *dpy)
  50 {
  51   text_data *d = (text_data *) calloc (1, sizeof (*d));
  52
  53 # ifdef DEBUG
  54   fprintf (stderr, "%s: textclient: init\n", progname);
  55 # endif
  56
  57   char *s = get_string_resource (dpy, "textMode", "TextMode");
  58   if (!s || !*s || !strcasecmp (s, "date") || !strcmp (s, "0"))
  59     d->mode = DATE;
  60   else if (!strcasecmp (s, "literal") || !strcmp (s, "1"))
  61     d->mode = LITERAL;
  62   else if (!strcasecmp (s, "url") || !strcmp (s, "3"))
  63     d->mode = URL;
  64   else
  65     d->mode = DATE;
  66
  67   d->dpy = dpy;
  68   d->literal = get_string_resource (dpy, "textLiteral", "TextLiteral");
  69   d->url = get_string_resource (dpy, "textURL", "TextURL");
  70
  71   return d;
  72 }
  73
  74
  75 void
  76 textclient_close (text_data *d)
  77 {
  78 # ifdef DEBUG
  79   fprintf (stderr, "%s: textclient: free\n", progname);
  80 # endif
  81
  82   if (d->buf) free (d->buf);
  83   if (d->literal) free (d->literal);
  84   if (d->url) free (d->url);
  85   free (d);
  86 }
  87
  88
  89 /* Returns a copy of the string with some basic HTML entities decoded.
  90  */
  91 static char *
  92 decode_entities (const char *html)
  93 {
  94   char *ret = (char *) malloc ((strlen(html) * 4) + 1);  // room for UTF8
  95   const char *in = html;
  96   char *out = ret;
  97   *out = 0;
  98
  99   const struct { const char *c; const char *e; } entities[] = {
 100
 101     { "amp", "&" },
 102     { "lt",  "<" },
 103     { "gt",  ">" },
 104
 105     // Convert Latin1 to UTF8
 106     { "nbsp", " " },                    //   160
 107     { "iexcl", "\302\241" },            // ¡ 161
 108     { "cent", "\302\242" },             // ¢ 162
 109     { "pound", "\302\243" },            // £ 163
 110     { "curren", "\302\244" },           // ¤ 164
 111     { "yen", "\302\245" },              // ¥ 165
 112     { "brvbar", "\302\246" },           // ¦ 166
 113     { "sect", "\302\247" },             // § 167
 114     { "uml", "\302\250" },              // ¨ 168
 115     { "copy", "\302\251" },             // © 169
 116     { "ordf", "\302\252" },             // ª 170
 117     { "laquo", "\302\253" },            // « 171
 118     { "not", "\302\254" },              // ¬ 172
 119     { "shy", "\302\255" },              //  173
 120     { "reg", "\302\256" },              // ® 174
 121     { "macr", "\302\257" },             // ¯ 175
 122     { "deg", "\302\260" },              // ° 176
 123     { "plusmn", "\302\261" },           // ± 177
 124     { "sup2", "\302\262" },             // ² 178
 125     { "sup3", "\302\263" },             // ³ 179
 126     { "acute", "\302\264" },            // ´ 180
 127     { "micro", "\302\265" },            // µ 181
 128     { "para", "\302\266" },             // ¶ 182
 129     { "middot", "\302\267" },           // · 183
 130     { "cedil", "\302\270" },            // ¸ 184
 131     { "sup1", "\302\271" },             // ¹ 185
 132     { "ordm", "\302\272" },             // º 186
 133     { "raquo", "\302\273" },            // » 187
 134     { "frac14", "\302\274" },           // ¼ 188
 135     { "frac12", "\302\275" },           // ½ 189
 136     { "frac34", "\302\276" },           // ¾ 190
 137     { "iquest", "\302\277" },           // ¿ 191
 138     { "Agrave", "\303\200" },           // À 192
 139     { "Aacute", "\303\201" },           // Á 193
 140     { "Acirc", "\303\202" },            // Â 194
 141     { "Atilde", "\303\203" },           // Ã 195
 142     { "Auml", "\303\204" },             // Ä 196
 143     { "Aring", "\303\205" },            // Å 197
 144     { "AElig", "\303\206" },            // Æ 198
 145     { "Ccedil", "\303\207" },           // Ç 199
 146     { "Egrave", "\303\210" },           // È 200
 147     { "Eacute", "\303\211" },           // É 201
 148     { "Ecirc", "\303\212" },            // Ê 202
 149     { "Euml", "\303\213" },             // Ë 203
 150     { "Igrave", "\303\214" },           // Ì 204
 151     { "Iacute", "\303\215" },           // Í 205
 152     { "Icirc", "\303\216" },            // Î 206
 153     { "Iuml", "\303\217" },             // Ï 207
 154     { "ETH", "\303\220" },              // Ð 208
 155     { "Ntilde", "\303\221" },           // Ñ 209
 156     { "Ograve", "\303\222" },           // Ò 210
 157     { "Oacute", "\303\223" },           // Ó 211
 158     { "Ocirc", "\303\224" },            // Ô 212
 159     { "Otilde", "\303\225" },           // Õ 213
 160     { "Ouml", "\303\226" },             // Ö 214
 161     { "times", "\303\227" },            // × 215
 162     { "Oslash", "\303\230" },           // Ø 216
 163     { "Ugrave", "\303\231" },           // Ù 217
 164     { "Uacute", "\303\232" },           // Ú 218
 165     { "Ucirc", "\303\233" },            // Û 219
 166     { "Uuml", "\303\234" },             // Ü 220
 167     { "Yacute", "\303\235" },           // Ý 221
 168     { "THORN", "\303\236" },            // Þ 222
 169     { "szlig", "\303\237" },            // ß 223
 170     { "agrave", "\303\240" },           // à 224
 171     { "aacute", "\303\241" },           // á 225
 172     { "acirc", "\303\242" },            // â 226
 173     { "atilde", "\303\243" },           // ã 227
 174     { "auml", "\303\244" },             // ä 228
 175     { "aring", "\303\245" },            // å 229
 176     { "aelig", "\303\246" },            // æ 230
 177     { "ccedil", "\303\247" },           // ç 231
 178     { "egrave", "\303\250" },           // è 232
 179     { "eacute", "\303\251" },           // é 233
 180     { "ecirc", "\303\252" },            // ê 234
 181     { "euml", "\303\253" },             // ë 235
 182     { "igrave", "\303\254" },           // ì 236
 183     { "iacute", "\303\255" },           // í 237
 184     { "icirc", "\303\256" },            // î 238
 185     { "iuml", "\303\257" },             // ï 239
 186     { "eth", "\303\260" },              // ð 240
 187     { "ntilde", "\303\261" },           // ñ 241
 188     { "ograve", "\303\262" },           // ò 242
 189     { "oacute", "\303\263" },           // ó 243
 190     { "ocirc", "\303\264" },            // ô 244
 191     { "otilde", "\303\265" },           // õ 245
 192     { "ouml", "\303\266" },             // ö 246
 193     { "divide", "\303\267" },           // ÷ 247
 194     { "oslash", "\303\270" },           // ø 248
 195     { "ugrave", "\303\271" },           // ù 249
 196     { "uacute", "\303\272" },           // ú 250
 197     { "ucirc", "\303\273" },            // û 251
 198     { "uuml", "\303\274" },             // ü 252
 199     { "yacute", "\303\275" },           // ý 253
 200     { "thorn", "\303\276" },            // þ 254
 201     { "yuml", "\303\277" },             // ÿ 255
 202
 203       // And some random others
 204     { "bdquo", "\342\200\236" },        // ~
 205     { "bull", "\342\200\242" },         // ~
 206     { "circ", "\313\206" },             // ~
 207     { "cong", "\342\211\205" },         // ~
 208     { "empty", "\342\210\205" },        // ~
 209     { "emsp", "\342\200\203" },         // ~
 210     { "ensp", "\342\200\202" },         // ~
 211     { "equiv", "\342\211\241" },        // ~
 212     { "frasl", "\342\201\204" },        // ~
 213     { "ge", "\342\211\245" },           // ~
 214     { "hArr", "\342\207\224" },         // ~
 215     { "harr", "\342\206\224" },         // ~
 216     { "hellip", "\342\200\246" },       // ~
 217     { "lArr", "\342\207\220" },         // ~
 218     { "lang", "\342\237\250" },         // ~
 219     { "larr", "\342\206\220" },         // ~
 220     { "ldquo", "\342\200\234" },        // ~
 221     { "le", "\342\211\244" },           // ~
 222     { "lowast", "\342\210\227" },       // ~
 223     { "loz", "\342\227\212" },          // ~
 224     { "lsaquo", "\342\200\271" },       // ~
 225     { "lsquo", "\342\200\230" },        // ~
 226     { "mdash", "\342\200\224" },        // ~
 227     { "minus", "\342\210\222" },        // ~
 228     { "ndash", "\342\200\223" },        // ~
 229     { "ne", "\342\211\240" },           // ~
 230     { "OElig", "\305\222" },            // ~
 231     { "oelig", "\305\223" },            // ~
 232     { "prime", "\342\200\262" },        // ~
 233     { "quot", "\342\200\235" },         // ~
 234     { "rArr", "\342\207\222" },         // ~
 235     { "rang", "\342\237\251" },         // ~
 236     { "rarr", "\342\206\222" },         // ~
 237     { "rdquo", "\342\200\235" },        // ~
 238     { "rsaquo", "\342\200\272" },       // ~
 239     { "rsquo", "\342\200\231" },        // ~
 240     { "sbquo", "\342\200\232" },        // ~
 241     { "sim", "\342\210\274" },          // ~
 242     { "thinsp", "\342\200\211" },       // ~
 243     { "tilde", "\313\234" },            // ~
 244     { "trade", "\342\204\242" },        // ~
 245   };
 246
 247   while (*in) {
 248     if (*in == '&') {
 249       int done = 0;
 250       if (in[1] == '#' && in[2] == 'x') {                       // &#x41;
 251         unsigned long i = 0;
 252         in += 2;
 253         while ((*in >= '0' && *in <= '9') ||
 254                (*in >= 'A' && *in <= 'F') ||
 255                (*in >= 'a' && *in <= 'f')) {
 256           i = (i * 16) + (*in >= 'a' ? *in - 'a' + 16 :
 257                           *in >= 'A' ? *in - 'A' + 16 :
 258                           *in - '0');
 259           in++;
 260         }
 261         *out += utf8_encode (i, out, strlen(out));
 262         done = 1;
 263       } else if (in[1] == '#') {                                // &#65;
 264         unsigned long i = 0;
 265         in++;
 266         while (*in >= '0' && *in <= '9') {
 267           i = (i * 10) + (*in - '0');
 268           in++;
 269         }
 270         *out += utf8_encode (i, out, strlen(out));
 271         done = 1;
 272       } else {
 273         int i;
 274         for (i = 0; !done && i < countof(entities); i++) {
 275           if (!strncmp (in+1, entities[i].c, strlen(entities[i].c))) {
 276             strcpy (out, entities[i].e);
 277             in  += strlen(entities[i].c) + 1;
 278             out += strlen(entities[i].e);
 279             done = 1;
 280           }
 281         }
 282       }
 283
 284       if (done) {
 285         if (*in == ';')
 286           in++;
 287       } else {
 288         *out++ = *in++;
 289       }
 290     } else {
 291       *out++ = *in++;
 292     }
 293   }
 294   *out = 0;
 295
 296   /* Shrink */
 297   ret = realloc (ret, out - ret + 1);
 298
 299   return ret;
 300 }
 301
 302
 303 /* Returns a copy of the HTML string that has been converted to plain text,
 304    in UTF8 encoding.  HTML tags are stripped, <BR> and <P> are converted
 305    to newlines, and some basic HTML entities are decoded.
 306  */
 307 char *
 308 textclient_strip_html (const char *html)
 309 {
 310   int tag = 0;
 311   int comment = 0;
 312   int white = 0;
 313   int nl = 0;
 314   char *ret = (char *) malloc ((strlen(html) * 4) + 1);  // room for UTF8
 315   char *out = ret;
 316   *out = 0;
 317
 318   for (const char *in = html; *in; in++) {
 319     if (comment) {
 320       if (!strncmp (in, "-->", 3)) {
 321         comment = 0;
 322       }
 323     } else if (tag) {
 324       if (*in == '>') {
 325         tag = 0;
 326       }
 327     } else if (*in == '<') {
 328       tag = 1;
 329       if (!strncmp (in, "<!--", 4)) {
 330         comment = 1;
 331         tag = 0;
 332       } else if (!strncasecmp (in, "<BR", 3)) {
 333         *out++ = '\n';
 334         white = 1;
 335         nl++;
 336       } else if (!strncasecmp (in, "<P", 2)) {
 337         if (nl < 2) { *out++ = '\n'; nl++; }
 338         if (nl < 2) { *out++ = '\n'; nl++; }
 339         white = 1;
 340       }
 341     } else if (*in == ' ' || *in == '\t' || *in == '\r' || *in == '\n') {
 342       if (!white && out != html)
 343         *out++ = ' ';
 344       white = 1;
 345     } else {
 346       *out++ = *in;
 347       white = 0;
 348       nl = 0;
 349     }
 350   }
 351   *out = 0;
 352
 353   {
 354     char *ret2 = decode_entities (ret);
 355     free (ret);
 356     ret = ret2;
 357   }
 358
 359   return ret;
 360 }
 361
 362
 363 static char *
 364 copy_rss_field (const char *s)
 365 {
 366   if (!s) return 0;
 367   while (*s && *s != '>')                       // Skip forward to >
 368     s++;
 369   if (! *s) return 0;
 370   s++;
 371
 372   if (!strncmp (s, "<![CDATA[", 9)) {           // CDATA quoting
 373     s += 9;
 374     char *e = strstr (s, "]]");
 375     if (e) *e = 0;
 376     unsigned long L = strlen (s);
 377     char *s2 = (char *) malloc (L+1);
 378     memcpy (s2, s, L+1);
 379     return s2;
 380
 381   } else {                                      // Entity-encoded.
 382     const char *s2;
 383     for (s2 = s; *s2 && *s2 != '<'; s2++)       // Terminate at <
 384       ;
 385     char *s3 = (char *) malloc (s2 - s + 1);
 386     if (! s3) return 0;
 387     memcpy (s3, s, s2-s);
 388     s3[s2-s] = 0;
 389     char *s4 = textclient_strip_html (s3);
 390     free (s3);
 391     return s4;
 392   }
 393 }
 394
 395
 396 static char *
 397 pick_rss_field (const char *a, const char *b, const char *c, const char *d)
 398 {
 399   // Pick the longest of the fields.
 400   char *a2 = copy_rss_field (a);
 401   char *b2 = copy_rss_field (b);
 402   char *c2 = copy_rss_field (c);
 403   char *d2 = copy_rss_field (d);
 404   unsigned long al = a2 ? strlen(a2) : 0;
 405   unsigned long bl = b2 ? strlen(b2) : 0;
 406   unsigned long cl = c2 ? strlen(c2) : 0;
 407   unsigned long dl = d2 ? strlen(d2) : 0;
 408   char *ret = 0;
 409
 410   if      (al > bl && al > cl && al > dl) ret = a2;
 411   else if (bl > al && bl > cl && bl > dl) ret = b2;
 412   else if (cl > al && cl > bl && cl > dl) ret = c2;
 413   else ret = d2;
 414   if (a2 && a2 != ret) free (a2);
 415   if (b2 && b2 != ret) free (b2);
 416   if (c2 && c2 != ret) free (c2);
 417   if (d2 && d2 != ret) free (d2);
 418   return ret;
 419 }
 420
 421
 422 /* Strip some Wikipedia formatting from the string to make it more readable.
 423  */
 424 static void
 425 strip_wiki (char *text)
 426 {
 427   char *in = text;
 428   char *out = text;
 429   while (*in)
 430     {
 431       if (!strncmp (in, "<!--", 4))             /* <!-- ... --> */
 432         {
 433           char *e = strstr (in+4, "-->");
 434           if (e) in = e + 3;
 435         }
 436       else if (!strncmp (in, "/*", 2))          /* ... */
 437         {
 438           char *e = strstr (in+2, "*/");
 439           if (e) in = e + 2;
 440           else *out++ = *in++;
 441         }
 442       else if (!strncmp (in, "{{Infobox", 9))   /* {{Infobox ... \n}}\n */
 443         {
 444           char *e = strstr (in+2, "\n}}");
 445           if (e) in = e + 3;
 446           else *out++ = *in++;
 447         }
 448       else if (!strncmp (in, "{{", 2))          /* {{ ...table... }} */
 449         {
 450           char *e = strstr (in+2, "}}");
 451           if (e) in = e + 2;
 452           else *out++ = *in++;
 453         }
 454       else if (!strncmp (in, "{|", 2))          /* {| ...table... |} */
 455         {
 456           char *e = strstr (in+2, "|}");
 457           if (e) in = e + 2;
 458           else *out++ = *in++;
 459         }
 460       else if (!strncmp (in, "|-", 2))          /* |- ...table cell... | */
 461         {
 462           char *e = strstr (in+2, "|");
 463           if (e) in = e + 1;
 464           else *out++ = *in++;
 465         }
 466       else if (!strncmp (in, "<ref", 4))        /* <ref>...</ref> -> "*" */
 467         {
 468           char *e1 = strstr (in+4, "/>");
 469           char *e2 = strstr (in+4, "</ref>");
 470           if (e1 && e1 < e2) in = e1 + 2;
 471           else if (e2) in = e2 + 6;
 472           else *out++ = *in++;
 473
 474           *out++ = '*';
 475         }
 476       else if (!strncmp (in, "<", 1))           /* <...> */
 477         {
 478           char *e = strstr (in+1, ">");
 479           if (e) in = e + 1;
 480           else *out++ = *in++;
 481         }
 482       else if (!strncmp (in, "[[", 2))          /* [[ ... ]] */
 483         {
 484           char *e1 = strstr (in+2, "|");
 485           char *e2 = strstr (in+2, "]]");
 486           if (e1 && e2 && e1 < e2)              /* [[link|anchor]] */
 487             {
 488               long L = e2 - e1 - 1;
 489               memmove (out, e1+1, L);
 490               out += L;
 491               in = e2+2;
 492             }
 493           else if (e2)                          /* [[link]] */
 494             {
 495               long L = e2 - in - 2;
 496               memmove (out, in+2, L);
 497               out += L;
 498               in = e2+2;
 499             }
 500           else
 501             *out++ = *in++;
 502         }
 503       else if (!strncmp (in, "[", 1))           /* [ ... ] */
 504         {
 505           char *e1 = strstr (in+2, " ");
 506           char *e2 = strstr (in+2, "]");
 507           if (e1 && e2 && e1 < e2)              /* [url anchor] */
 508             {
 509               long L = e2 - e1 - 1;
 510               memmove (out, e1+1, L);
 511               out += L;
 512               in = e2+2;
 513             }
 514           else
 515             *out++ = *in++;
 516         }
 517       else if (!strncmp (in, "''''", 4))        /* omit '''' */
 518         in += 4;
 519       else if (!strncmp (in, "'''", 3))         /* omit ''' */
 520         in += 3;
 521       else if (!strncmp (in, "''", 2) ||        /* '' or `` or "" -> " */
 522                !strncmp (in, "``", 2) ||
 523                !strncmp (in, "\"\"", 2))
 524         {
 525           *out++ = '"';
 526           in += 2;
 527         }
 528       else
 529         {
 530           *out++ = *in++;
 531         }
 532     }
 533   *out = 0;
 534
 535   /* Collapse newlines */
 536   in = text;
 537   out = text;
 538   while (*in)
 539     {
 540       while (!strncmp(in, "\n\n\n", 3))
 541         in++;
 542       *out++ = *in++;
 543     }
 544   *out = 0;
 545 }
 546
 547
 548 /* Returns a copy of the RSS document that has been converted to plain text,
 549    in UTF8 encoding.  Rougly, it uses the contents of the <description> field
 550    of each <item>, and decodes HTML within it.
 551  */
 552 char *
 553 textclient_strip_rss (const char *rss)
 554 {
 555   char *ret = malloc (strlen(rss) * 4 + 1);  // room for UTF8
 556   char *out = ret;
 557   const char *a = 0, *b = 0, *c = 0, *d = 0, *t = 0;
 558   int head = 1;
 559   int done = 0;
 560   int wiki_p = !!strcasestr (rss, "<generator>MediaWiki");
 561
 562   *out = 0;
 563   for (const char *in = rss; *in; in++) {
 564     if (*in == '<') {
 565       if (!strncasecmp (in, "<item", 5) ||      // New item, dump.
 566           !strncasecmp (in, "<entry", 6)) {
 567       DONE:
 568         head = 0;
 569         char *title = copy_rss_field (t);
 570         char *body  = pick_rss_field (a, b, c, d);
 571
 572         a = b = c = d = t = 0;
 573
 574         if (title && body && !strcmp (title, body)) {
 575           free (title);
 576           title = 0;
 577         }
 578
 579         if (title) {
 580           strcpy (out, title);
 581           free (title);
 582           out += strlen (out);
 583           strcpy (out, "\n\n");
 584           out += strlen (out);
 585         }
 586
 587         if (body) {
 588           strcpy (out, body);
 589           free (body);
 590           out += strlen (out);
 591           strcpy (out, "<P>");
 592           out += strlen (out);
 593         }
 594
 595       } else if (head) {   // still before first <item>
 596         ;
 597       } else if (!strncasecmp (in, "<title", 6)) {
 598         t = in+6;
 599       } else if (!strncasecmp (in, "<summary", 8)) {
 600         d = in+8;
 601       } else if (!strncasecmp (in, "<description", 12)) {
 602         a = in+12;
 603       } else if (!strncasecmp (in, "<content:encoded", 16)) {
 604         c = in+16;
 605       } else if (!strncasecmp (in, "<content", 8)) {
 606         b = in+8;
 607       }
 608     }
 609   }
 610
 611   if (! done) {         // Finish off the final item.
 612     done = 1;
 613     goto DONE;
 614   }
 615
 616   char *ret2 = textclient_strip_html (ret);
 617   free (ret);
 618   ret = ret2;
 619
 620   if (wiki_p) {
 621     strip_wiki (ret);
 622     ret2 = decode_entities (ret);
 623     free (ret);
 624     ret = ret2;
 625   }
 626
 627   return ret;
 628 }
 629
 630
 631 static void
 632 wrap_text (char *body, int columns, int max_lines)
 633 {
 634   int col = 0, last_col = 0;
 635   char *last_space = 0;
 636   int lines = 0;
 637   if (! body) return;
 638   for (char *p = body; *p; p++) {
 639     if (*p == '\r' || *p == '\n' || *p == ' ' || *p == '\t') {
 640       if (col > columns && last_space) {
 641         *last_space = '\n';
 642         col = col - last_col;
 643       }
 644       last_space = p;
 645       last_col = col;
 646     }
 647     if (*p == '\r' || *p == '\n') {
 648       col = 0;
 649       last_col = 0;
 650       last_space = 0;
 651       lines++;
 652       if (max_lines && lines >= max_lines)
 653         {
 654           *p = 0;
 655           break;
 656         }
 657     } else {
 658       col++;
 659     }
 660   }
 661 }
 662
 663
 664 static void
 665 rewrap_text (char *body, int columns)
 666 {
 667   if (! body) return;
 668   for (char *p = body; *p; p++) {
 669     if (*p == '\n') {
 670       if (p[1] == '\n')
 671         p++;
 672       else
 673         *p = ' ';
 674     }
 675   }
 676   wrap_text (body, columns, 0);
 677 }
 678
 679
 680
 681 static void
 682 strip_backslashes (char *s)
 683 {
 684   char *out = s;
 685   for (char *in = s; *in; in++) {
 686     if (*in == '\\') {
 687       in++;
 688       if      (*in == 'n') *out++ = '\n';
 689       else if (*in == 'r') *out++ = '\r';
 690       else if (*in == 't') *out++ = '\t';
 691       else *out++ = *in;
 692     } else {
 693       *out++ = *in;
 694     }
 695   }
 696   *out = 0;
 697 }
 698
 699
 700 /* Load the raw body of a URL, and convert it to plain text.
 701  */
 702 static char *
 703 mobile_url_text (Display *dpy, const char *url)
 704 {
 705   char *body = textclient_mobile_url_string (dpy, url);
 706   enum { RSS, HTML, TEXT } type;
 707   if (!body)
 708     return NULL;
 709
 710   if (!strncasecmp (body, "<?xml", 5) ||
 711       !strncasecmp (body, "<!doctype rss", 13))
 712     type = RSS;
 713   else if (!strncasecmp (body, "<!doctype html", 14) ||
 714            !strncasecmp (body, "<html", 5) ||
 715            !strncasecmp (body, "<head", 5))
 716     type = HTML;
 717   else if (strcasestr (body, "<base") ||
 718            strcasestr (body, "<body") ||
 719            strcasestr (body, "<script") ||
 720            strcasestr (body, "<style") ||
 721            strcasestr (body, "<a href"))
 722     type = HTML;
 723   else if (strcasestr (body, "<channel") ||
 724            strcasestr (body, "<generator") ||
 725            strcasestr (body, "<description") ||
 726            strcasestr (body, "<content") ||
 727            strcasestr (body, "<feed") ||
 728            strcasestr (body, "<entry"))
 729     type = RSS;
 730   else
 731     type = TEXT;
 732
 733   char *body2 = 0;
 734
 735   switch (type) {
 736   case HTML: body2 = textclient_strip_html (body); break;
 737   case RSS:  body2 = textclient_strip_rss (body);  break;
 738   case TEXT: break;
 739   default: abort(); break;
 740   }
 741
 742   if (body2) {
 743     free (body);
 744     return body2;
 745   } else {
 746     return body;
 747   }
 748 }
 749
 750
 751 int
 752 textclient_getc (text_data *d)
 753 {
 754   if (!d->fp || !*d->fp) {
 755     if (d->buf) {
 756       free (d->buf);
 757       d->buf = 0;
 758       d->fp = 0;
 759     }
 760     switch (d->mode) {
 761     case DATE: DATE:
 762       d->buf = textclient_mobile_date_string();
 763       break;
 764     case LITERAL:
 765       if (!d->literal || !*d->literal)
 766         goto DATE;
 767       d->buf = (char *) malloc (strlen (d->literal) + 3);
 768       strcpy (d->buf, d->literal);
 769       strcat (d->buf, "\n");
 770       strip_backslashes (d->buf);
 771       d->fp = d->buf;
 772       break;
 773     case URL:
 774       if (!d->url || !*d->url)
 775         goto DATE;
 776       d->buf = mobile_url_text (d->dpy, d->url);
 777       break;
 778     default:
 779       abort();
 780     }
 781     if (d->columns > 10)
 782       wrap_text (d->buf, d->columns, d->max_lines);
 783     d->fp = d->buf;
 784   }
 785
 786   if (!d->fp || !*d->fp)
 787     return -1;
 788
 789   unsigned char c = (unsigned char) *d->fp++;
 790   return (int) c;
 791 }
 792
 793
 794 Bool
 795 textclient_putc (text_data *d, XKeyEvent *k)
 796 {
 797   return False;
 798 }
 799
 800
 801 void
 802 textclient_reshape (text_data *d,
 803                     int pix_w, int pix_h,
 804                     int char_w, int char_h,
 805                     int max_lines)
 806 {
 807   d->columns = char_w;
 808   d->max_lines = max_lines;
 809   rewrap_text (d->buf, d->columns);
 810 }
 811
 812
 813 #endif /* whole file */