-/* xscreensaver, Copyright (c) 2014 Jamie Zawinski <jwz@jwz.org>
+/* xscreensaver, Copyright (c) 2014-2016 Jamie Zawinski <jwz@jwz.org>
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
#include <stdio.h>
#include <string.h>
-#ifdef HAVE_COCOA
+#ifdef HAVE_JWXYZ
# include "jwxyz.h"
-# elif defined(HAVE_ANDROID)
-# include "jwxyz.h"
-#else /* !HAVE_COCOA */
+#else /* !HAVE_JWXYZ */
# include <X11/Xlib.h>
#endif
/* Parse the first UTF8 character at the front of the string.
Return the Unicode character, and the number of bytes read.
*/
-static long
+long
utf8_decode (const unsigned char *in, long length, unsigned long *unicode_ret)
{
const unsigned char *start = in;
/* Converts a Unicode character to a multi-byte UTF8 sequence.
Returns the number of bytes written.
*/
-static int
+int
utf8_encode (unsigned long uc, char *out, long length)
{
const char *old = out;
out->byte1 = 0;
out->byte2 = 0;
- /* shrink */
- c2b = (XChar2b *) realloc (c2b, (out - c2b + 1) * sizeof(*c2b));
-
if (length_ret)
*length_ret = (int) (out - c2b);
+ /* shrink */
+ c2b = (XChar2b *) realloc (c2b, (out - c2b + 1) * sizeof(*c2b));
+
return c2b;
}
while (in < end)
{
- long len2 = utf8_decode (in, len, 0);
+ unsigned long uc;
+ long len2 = utf8_decode (in, len, &uc);
char tmp[10];
strncpy (tmp, (char *) in, len2);
tmp[len2] = 0;
ret[i++] = strdup (tmp);
in += len2;
+
+ /* If this is a Combining Diacritical, append it to the previous
+ character. E.g., "y\314\206\314\206" is one string, not three.
+ */
+ if (i > 1 &&
+ ((uc >= 0x300 && uc <= 0x36F) || /* Combining Diacritical */
+ (uc >= 0x1AB0 && uc <= 0x1AFF) || /* Combining Diacritical Ext. */
+ (uc >= 0x1DC0 && uc <= 0x1DFF) || /* Combining Diacritical Supp. */
+ (uc >= 0x20D0 && uc <= 0x20FF) || /* Combining Diacritical Sym. */
+ (uc >= 0xFE20 && uc <= 0xFE2F))) /* Combining Half Marks */
+ {
+ long L1 = strlen(ret[i-2]);
+ long L2 = strlen(ret[i-1]);
+ char *s2 = (char *) malloc (L1 + L2 + 1);
+ strncpy (s2, ret[i-2], L1);
+ strncpy (s2 + L1, ret[i-1], L2);
+ s2[L1 + L2] = 0;
+ free (ret[i-2]);
+ ret[i-2] = s2;
+ i--;
+ }
}
ret[i] = 0;
- /* shrink */
- ret = (char **) realloc (ret, (i+1) * sizeof(*ret));
-
if (length_ret)
*length_ret = i;
+ /* shrink */
+ ret = (char **) realloc (ret, (i+1) * sizeof(*ret));
+
return ret;
}
}
*out = 0;
- /* shrink */
- utf8 = (char *) realloc (utf8, (out - utf8 + 1) * sizeof(*utf8));
+ out_len = (int) (out - utf8 + 1);
if (length_ret)
- *length_ret = (int) (out - utf8);
+ *length_ret = out_len;
+
+ /* shrink */
+ utf8 = (char *) realloc (utf8, out_len);
return utf8;
}
if (uc == '\240') /* */
uc = ' ';
- else if (uc >= 0x2300 && uc <= 0x36F)
- uc = 0; /* Discard "Unicode Combining Diacriticals Block" */
+ else if (uc >= 0x300 && uc <= 0x36F)
+ uc = 0; /* Discard "Combining Diacritical Marks" */
+ else if (uc >= 0x1AB0 && uc <= 0x1AFF)
+ uc = 0; /* Discard "Combining Diacritical Marks Extended" */
+ else if (uc >= 0x1DC0 && uc <= 0x1DFF)
+ uc = 0; /* Discard "Combining Diacritical Marks Supplement" */
+ else if (uc >= 0x20D0 && uc <= 0x20FF)
+ uc = 0; /* Discard "Combining Diacritical Marks for Symbols" */
+ else if (uc >= 0xFE20 && uc <= 0xFE2F)
+ uc = 0; /* Discard "Combining Half Marks" */
+
else if (uc > 0xFF)
switch (uc) {