1 /* xscreensaver, Copyright (c) 2012-2015 Jamie Zawinski <jwz@jwz.org>
3 * Permission to use, copy, modify, distribute, and sell this software and its
4 * documentation for any purpose is hereby granted without fee, provided that
5 * the above copyright notice appear in all copies and that both that
6 * copyright notice and this permission notice appear in supporting
7 * documentation. No representations are made about the suitability of this
8 * software for any purpose. It is provided "as is" without express or
11 * Loading URLs and returning the underlying text.
13 * This is necessary because iOS doesn't have Perl installed, so we can't
14 * run "xscreensaver-text" to do this.
19 #ifdef USE_IPHONE /* whole file -- see utils/textclient.c */
21 #include "textclient.h"
22 #include "resources.h"
28 #define countof(x) (sizeof((x))/sizeof((*x)))
31 extern const char *progname;
35 enum { DATE, LITERAL, URL } mode;
48 textclient_open (Display *dpy)
50 text_data *d = (text_data *) calloc (1, sizeof (*d));
53 fprintf (stderr, "%s: textclient: init\n", progname);
56 char *s = get_string_resource (dpy, "textMode", "TextMode");
57 if (!s || !*s || !strcasecmp (s, "date") || !strcmp (s, "0"))
59 else if (!strcasecmp (s, "literal") || !strcmp (s, "1"))
61 else if (!strcasecmp (s, "url") || !strcmp (s, "3"))
66 d->literal = get_string_resource (dpy, "textLiteral", "TextLiteral");
67 d->url = get_string_resource (dpy, "textURL", "TextURL");
74 textclient_close (text_data *d)
77 fprintf (stderr, "%s: textclient: free\n", progname);
80 if (d->buf) free (d->buf);
81 if (d->literal) free (d->literal);
82 if (d->url) free (d->url);
90 UIDevice *dd = [UIDevice currentDevice];
91 NSString *name = [dd name]; // My iPhone
92 NSString *model = [dd model]; // iPad
93 // NSString *system = [dd systemName]; // iPhone OS
94 NSString *vers = [dd systemVersion]; // 5.0
97 localizedStringFromDate:[NSDate date]
98 dateStyle: NSDateFormatterMediumStyle
99 timeStyle: NSDateFormatterMediumStyle];
100 NSString *nl = @"\n";
102 NSString *result = name;
103 result = [result stringByAppendingString: nl];
104 result = [result stringByAppendingString: model];
105 // result = [result stringByAppendingString: nl];
106 // result = [result stringByAppendingString: system];
107 result = [result stringByAppendingString: @" "];
108 result = [result stringByAppendingString: vers];
109 result = [result stringByAppendingString: nl];
110 result = [result stringByAppendingString: nl];
111 result = [result stringByAppendingString: date];
112 result = [result stringByAppendingString: nl];
113 result = [result stringByAppendingString: nl];
114 return strdup ([result cStringUsingEncoding:NSISOLatin1StringEncoding]);
118 /* Returns a copy of the string with some basic HTML entities decoded.
121 decode_entities (const char *html)
123 char *ret = (char *) malloc ((strlen(html) * 4) + 1); // room for UTF8
124 const char *in = html;
128 const struct { const char *c; const char *e; } entities[] = {
134 // Convert Latin1 to UTF8
135 { "nbsp", " " }, // 160
136 { "iexcl", "\302\241" }, // ¡ 161
137 { "cent", "\302\242" }, // ¢ 162
138 { "pound", "\302\243" }, // £ 163
139 { "curren", "\302\244" }, // ¤ 164
140 { "yen", "\302\245" }, // ¥ 165
141 { "brvbar", "\302\246" }, // ¦ 166
142 { "sect", "\302\247" }, // § 167
143 { "uml", "\302\250" }, // ¨ 168
144 { "copy", "\302\251" }, // © 169
145 { "ordf", "\302\252" }, // ª 170
146 { "laquo", "\302\253" }, // « 171
147 { "not", "\302\254" }, // ¬ 172
148 { "shy", "\302\255" }, // 173
149 { "reg", "\302\256" }, // ® 174
150 { "macr", "\302\257" }, // ¯ 175
151 { "deg", "\302\260" }, // ° 176
152 { "plusmn", "\302\261" }, // ± 177
153 { "sup2", "\302\262" }, // ² 178
154 { "sup3", "\302\263" }, // ³ 179
155 { "acute", "\302\264" }, // ´ 180
156 { "micro", "\302\265" }, // µ 181
157 { "para", "\302\266" }, // ¶ 182
158 { "middot", "\302\267" }, // · 183
159 { "cedil", "\302\270" }, // ¸ 184
160 { "sup1", "\302\271" }, // ¹ 185
161 { "ordm", "\302\272" }, // º 186
162 { "raquo", "\302\273" }, // » 187
163 { "frac14", "\302\274" }, // ¼ 188
164 { "frac12", "\302\275" }, // ½ 189
165 { "frac34", "\302\276" }, // ¾ 190
166 { "iquest", "\302\277" }, // ¿ 191
167 { "Agrave", "\303\200" }, // À 192
168 { "Aacute", "\303\201" }, // Á 193
169 { "Acirc", "\303\202" }, // Â 194
170 { "Atilde", "\303\203" }, // Ã 195
171 { "Auml", "\303\204" }, // Ä 196
172 { "Aring", "\303\205" }, // Å 197
173 { "AElig", "\303\206" }, // Æ 198
174 { "Ccedil", "\303\207" }, // Ç 199
175 { "Egrave", "\303\210" }, // È 200
176 { "Eacute", "\303\211" }, // É 201
177 { "Ecirc", "\303\212" }, // Ê 202
178 { "Euml", "\303\213" }, // Ë 203
179 { "Igrave", "\303\214" }, // Ì 204
180 { "Iacute", "\303\215" }, // Í 205
181 { "Icirc", "\303\216" }, // Î 206
182 { "Iuml", "\303\217" }, // Ï 207
183 { "ETH", "\303\220" }, // Ð 208
184 { "Ntilde", "\303\221" }, // Ñ 209
185 { "Ograve", "\303\222" }, // Ò 210
186 { "Oacute", "\303\223" }, // Ó 211
187 { "Ocirc", "\303\224" }, // Ô 212
188 { "Otilde", "\303\225" }, // Õ 213
189 { "Ouml", "\303\226" }, // Ö 214
190 { "times", "\303\227" }, // × 215
191 { "Oslash", "\303\230" }, // Ø 216
192 { "Ugrave", "\303\231" }, // Ù 217
193 { "Uacute", "\303\232" }, // Ú 218
194 { "Ucirc", "\303\233" }, // Û 219
195 { "Uuml", "\303\234" }, // Ü 220
196 { "Yacute", "\303\235" }, // Ý 221
197 { "THORN", "\303\236" }, // Þ 222
198 { "szlig", "\303\237" }, // ß 223
199 { "agrave", "\303\240" }, // à 224
200 { "aacute", "\303\241" }, // á 225
201 { "acirc", "\303\242" }, // â 226
202 { "atilde", "\303\243" }, // ã 227
203 { "auml", "\303\244" }, // ä 228
204 { "aring", "\303\245" }, // å 229
205 { "aelig", "\303\246" }, // æ 230
206 { "ccedil", "\303\247" }, // ç 231
207 { "egrave", "\303\250" }, // è 232
208 { "eacute", "\303\251" }, // é 233
209 { "ecirc", "\303\252" }, // ê 234
210 { "euml", "\303\253" }, // ë 235
211 { "igrave", "\303\254" }, // ì 236
212 { "iacute", "\303\255" }, // í 237
213 { "icirc", "\303\256" }, // î 238
214 { "iuml", "\303\257" }, // ï 239
215 { "eth", "\303\260" }, // ð 240
216 { "ntilde", "\303\261" }, // ñ 241
217 { "ograve", "\303\262" }, // ò 242
218 { "oacute", "\303\263" }, // ó 243
219 { "ocirc", "\303\264" }, // ô 244
220 { "otilde", "\303\265" }, // õ 245
221 { "ouml", "\303\266" }, // ö 246
222 { "divide", "\303\267" }, // ÷ 247
223 { "oslash", "\303\270" }, // ø 248
224 { "ugrave", "\303\271" }, // ù 249
225 { "uacute", "\303\272" }, // ú 250
226 { "ucirc", "\303\273" }, // û 251
227 { "uuml", "\303\274" }, // ü 252
228 { "yacute", "\303\275" }, // ý 253
229 { "thorn", "\303\276" }, // þ 254
230 { "yuml", "\303\277" }, // ÿ 255
232 // And some random others
233 { "bdquo", "\342\200\236" }, // „
234 { "bull", "\342\200\242" }, // •
235 { "circ", "\313\206" }, // ˆ
236 { "cong", "\342\211\205" }, // ≅
237 { "empty", "\342\210\205" }, // ∅
238 { "emsp", "\342\200\203" }, //
239 { "ensp", "\342\200\202" }, //
240 { "equiv", "\342\211\241" }, // ≡
241 { "frasl", "\342\201\204" }, // ⁄
242 { "ge", "\342\211\245" }, // ≥
243 { "hArr", "\342\207\224" }, // ⇔
244 { "harr", "\342\206\224" }, // ↔
245 { "hellip", "\342\200\246" }, // …
246 { "lArr", "\342\207\220" }, // ⇐
247 { "lang", "\342\237\250" }, // ⟨
248 { "larr", "\342\206\220" }, // ←
249 { "ldquo", "\342\200\234" }, // “
250 { "le", "\342\211\244" }, // ≤
251 { "lowast", "\342\210\227" }, // ∗
252 { "loz", "\342\227\212" }, // ◊
253 { "lsaquo", "\342\200\271" }, // ‹
254 { "lsquo", "\342\200\230" }, // ‘
255 { "mdash", "\342\200\224" }, // —
256 { "minus", "\342\210\222" }, // −
257 { "ndash", "\342\200\223" }, // –
258 { "ne", "\342\211\240" }, // ≠
259 { "OElig", "\305\222" }, // Œ
260 { "oelig", "\305\223" }, // œ
261 { "prime", "\342\200\262" }, // ′
262 { "quot", "\342\200\235" }, // ”
263 { "rArr", "\342\207\222" }, // ⇒
264 { "rang", "\342\237\251" }, // ⟩
265 { "rarr", "\342\206\222" }, // →
266 { "rdquo", "\342\200\235" }, // ”
267 { "rsaquo", "\342\200\272" }, // ›
268 { "rsquo", "\342\200\231" }, // ’
269 { "sbquo", "\342\200\232" }, // ‚
270 { "sim", "\342\210\274" }, // ∼
271 { "thinsp", "\342\200\211" }, //
272 { "tilde", "\313\234" }, // ˜
273 { "trade", "\342\204\242" }, // ™
279 if (in[1] == '#' && in[2] == 'x') { // A
282 while ((*in >= '0' && *in <= '9') ||
283 (*in >= 'A' && *in <= 'F') ||
284 (*in >= 'a' && *in <= 'f')) {
285 i = (i * 16) + (*in >= 'a' ? *in - 'a' + 16 :
286 *in >= 'A' ? *in - 'A' + 16 :
290 *out += utf8_encode (i, out, strlen(out));
292 } else if (in[1] == '#') { // A
295 while (*in >= '0' && *in <= '9') {
296 i = (i * 10) + (*in - '0');
299 *out += utf8_encode (i, out, strlen(out));
303 for (i = 0; !done && i < countof(entities); i++) {
304 if (!strncmp (in+1, entities[i].c, strlen(entities[i].c))) {
305 strcpy (out, entities[i].e);
306 in += strlen(entities[i].c) + 1;
307 out += strlen(entities[i].e);
326 ret = realloc (ret, out - ret + 1);
332 /* Returns a copy of the HTML string that has been converted to plain text,
333 in UTF8 encoding. HTML tags are stripped, <BR> and <P> are converted
334 to newlines, and some basic HTML entities are decoded.
337 strip_html (const char *html)
343 char *ret = (char *) malloc ((strlen(html) * 4) + 1); // room for UTF8
347 for (const char *in = html; *in; in++) {
349 if (!strncmp (in, "-->", 3)) {
356 } else if (*in == '<') {
358 if (!strncmp (in, "<!--", 4)) {
361 } else if (!strncasecmp (in, "<BR", 3)) {
365 } else if (!strncasecmp (in, "<P", 2)) {
366 if (nl < 2) { *out++ = '\n'; nl++; }
367 if (nl < 2) { *out++ = '\n'; nl++; }
370 } else if (*in == ' ' || *in == '\t' || *in == '\r' || *in == '\n') {
371 if (!white && out != html)
383 char *ret2 = decode_entities (ret);
393 copy_rss_field (const char *s)
396 while (*s && *s != '>') // Skip forward to >
401 if (!strncmp (s, "<![CDATA[", 9)) { // CDATA quoting
403 char *e = strstr (s, "]]");
405 unsigned long L = strlen (s);
406 char *s2 = (char *) malloc (L+1);
410 } else { // Entity-encoded.
412 for (s2 = s; *s2 && *s2 != '<'; s2++) // Terminate at <
414 char *s3 = (char *) malloc (s2 - s + 1);
416 memcpy (s3, s, s2-s);
418 char *s4 = strip_html (s3);
426 pick_rss_field (const char *a, const char *b, const char *c, const char *d)
428 // Pick the longest of the fields.
429 char *a2 = copy_rss_field (a);
430 char *b2 = copy_rss_field (b);
431 char *c2 = copy_rss_field (c);
432 char *d2 = copy_rss_field (d);
433 unsigned long al = a2 ? strlen(a2) : 0;
434 unsigned long bl = b2 ? strlen(b2) : 0;
435 unsigned long cl = c2 ? strlen(c2) : 0;
436 unsigned long dl = d2 ? strlen(d2) : 0;
439 if (al > bl && al > cl && al > dl) ret = a2;
440 else if (bl > al && bl > cl && bl > dl) ret = b2;
441 else if (cl > al && cl > bl && cl > dl) ret = c2;
443 if (a2 && a2 != ret) free (a2);
444 if (b2 && b2 != ret) free (b2);
445 if (c2 && c2 != ret) free (c2);
446 if (d2 && d2 != ret) free (d2);
451 /* Strip some Wikipedia formatting from the string to make it more readable.
454 strip_wiki (char *text)
460 if (!strncmp (in, "<!--", 4)) /* <!-- ... --> */
462 char *e = strstr (in+4, "-->");
465 else if (!strncmp (in, "/*", 2)) /* ... */
467 char *e = strstr (in+2, "*/");
470 else if (!strncmp (in, "{{Infobox", 9)) /* {{Infobox ... \n}}\n */
472 char *e = strstr (in+2, "\n}}");
476 else if (!strncmp (in, "{{", 2)) /* {{ ...table... }} */
478 char *e = strstr (in+2, "}}");
482 else if (!strncmp (in, "{|", 2)) /* {| ...table... |} */
484 char *e = strstr (in+2, "|}");
488 else if (!strncmp (in, "|-", 2)) /* |- ...table cell... | */
490 char *e = strstr (in+2, "|");
494 else if (!strncmp (in, "<ref", 4)) /* <ref>...</ref> -> "*" */
496 char *e1 = strstr (in+4, "/>");
497 char *e2 = strstr (in+4, "</ref>");
498 if (e1 && e1 < e2) in = e1 + 2;
499 else if (e2) in = e2 + 6;
504 else if (!strncmp (in, "<", 1)) /* <...> */
506 char *e = strstr (in+1, ">");
509 else if (!strncmp (in, "[[", 2)) /* [[ ... ]] */
511 char *e1 = strstr (in+2, "|");
512 char *e2 = strstr (in+2, "]]");
513 if (e1 && e2 && e1 < e2) /* [[link|anchor]] */
515 long L = e2 - e1 - 1;
516 memmove (out, e1+1, L);
520 else if (e2) /* [[link]] */
522 long L = e2 - in - 2;
523 memmove (out, in+2, L);
530 else if (!strncmp (in, "[", 1)) /* [ ... ] */
532 char *e1 = strstr (in+2, " ");
533 char *e2 = strstr (in+2, "]");
534 if (e1 && e2 && e1 < e2) /* [url anchor] */
536 long L = e2 - e1 - 1;
537 memmove (out, e1+1, L);
544 else if (!strncmp (in, "''''", 4)) /* omit '''' */
546 else if (!strncmp (in, "'''", 3)) /* omit ''' */
548 else if (!strncmp (in, "''", 2) || /* '' or `` or "" -> " */
549 !strncmp (in, "``", 2) ||
550 !strncmp (in, "\"\"", 2))
562 /* Collapse newlines */
567 while (!strncmp(in, "\n\n\n", 3))
575 /* Returns a copy of the RSS document that has been converted to plain text,
576 in UTF8 encoding. Rougly, it uses the contents of the <description> field
577 of each <item>, and decodes HTML within it.
580 strip_rss (const char *rss)
582 char *ret = malloc (strlen(rss) * 4 + 1); // room for UTF8
584 const char *a = 0, *b = 0, *c = 0, *d = 0, *t = 0;
587 int wiki_p = !!strcasestr (rss, "<generator>MediaWiki");
590 for (const char *in = rss; *in; in++) {
592 if (!strncasecmp (in, "<item", 5) || // New item, dump.
593 !strncasecmp (in, "<entry", 6)) {
596 char *title = copy_rss_field (t);
597 char *body = pick_rss_field (a, b, c, d);
599 a = b = c = d = t = 0;
601 if (title && body && !strcmp (title, body)) {
610 strcpy (out, "\n\n");
622 } else if (head) { // still before first <item>
624 } else if (!strncasecmp (in, "<title", 6)) {
626 } else if (!strncasecmp (in, "<summary", 8)) {
628 } else if (!strncasecmp (in, "<description", 12)) {
630 } else if (!strncasecmp (in, "<content:encoded", 16)) {
632 } else if (!strncasecmp (in, "<content", 8)) {
638 if (! done) { // Finish off the final item.
643 ret = strip_html (ret);
648 ret2 = decode_entities (ret);
658 wrap_text (char *body, int columns, int max_lines)
660 int col = 0, last_col = 0;
661 char *last_space = 0;
664 for (char *p = body; *p; p++) {
665 if (*p == '\r' || *p == '\n' || *p == ' ' || *p == '\t') {
666 if (col > columns && last_space) {
668 col = col - last_col;
673 if (*p == '\r' || *p == '\n') {
678 if (max_lines && lines >= max_lines)
691 rewrap_text (char *body, int columns)
694 for (char *p = body; *p; p++) {
702 wrap_text (body, columns, 0);
708 strip_backslashes (char *s)
711 for (char *in = s; *in; in++) {
714 if (*in == 'n') *out++ = '\n';
715 else if (*in == 'r') *out++ = '\r';
716 else if (*in == 't') *out++ = '\t';
726 /* Returns the contents of the URL as plain text.
727 HTML and RSS are decoded.
730 url_string (const char *url)
733 [NSURL URLWithString:
734 [NSString stringWithCString: url
735 encoding:NSISOLatin1StringEncoding]];
737 [NSString stringWithContentsOfURL: nsurl
738 encoding: NSUTF8StringEncoding
743 enum { RSS, HTML, TEXT } type;
745 // Only search the first 1/2 K of the document while determining type.
747 unsigned long L = [body length];
748 if (L > 512) L = 512;
749 NSString *head = [[[body substringToIndex: L]
750 stringByTrimmingCharactersInSet:
751 [NSCharacterSet whitespaceAndNewlineCharacterSet]]
753 if ([head hasPrefix:@"<?xml"] ||
754 [head hasPrefix:@"<!doctype rss"])
756 else if ([head hasPrefix:@"<!doctype html"] ||
757 [head hasPrefix:@"<html"] ||
758 [head hasPrefix:@"<head"])
760 else if ([head rangeOfString:@"<base"].length ||
761 [head rangeOfString:@"<body"].length ||
762 [head rangeOfString:@"<script"].length ||
763 [head rangeOfString:@"<style"].length ||
764 [head rangeOfString:@"<a href"].length)
766 else if ([head rangeOfString:@"<channel"].length ||
767 [head rangeOfString:@"<generator"].length ||
768 [head rangeOfString:@"<description"].length ||
769 [head rangeOfString:@"<content"].length ||
770 [head rangeOfString:@"<feed"].length ||
771 [head rangeOfString:@"<entry"].length)
776 char *body2 = strdup ([body cStringUsingEncoding:NSUTF8StringEncoding]);
780 case HTML: body3 = strip_html (body2); break;
781 case RSS: body3 = strip_rss (body2); break;
783 default: abort(); break;
796 textclient_getc (text_data *d)
798 if (!d->fp || !*d->fp) {
806 d->buf = date_string();
809 if (!d->literal || !*d->literal)
811 d->buf = (char *) malloc (strlen (d->literal) + 3);
812 strcpy (d->buf, d->literal);
813 strcat (d->buf, "\n");
814 strip_backslashes (d->buf);
818 if (!d->url || !*d->url)
820 d->buf = url_string (d->url);
826 wrap_text (d->buf, d->columns, d->max_lines);
830 if (!d->fp || !*d->fp)
833 unsigned char c = (unsigned char) *d->fp++;
839 textclient_putc (text_data *d, XKeyEvent *k)
846 textclient_reshape (text_data *d,
847 int pix_w, int pix_h,
848 int char_w, int char_h,
852 d->max_lines = max_lines;
853 rewrap_text (d->buf, d->columns);
856 #endif /* USE_IPHONE -- whole file */