unicode: Support characters beyond the first unicode plane

We used 16-bit variables to store the 'character code' everywhere but this won't let us represent anything beyond U+FFFF. This patch changes those variables to a custom type that can be 32 or 16 bits depending on the build, and adjusts numerous internal APIs and datastructures to match. This includes: * utf8decode() and friends * font manipulation, caching, rendering, and generation * on-screen keyboard * FAT filesystem (parsing and generating utf16 LFNs) * WIN32 simulator platform code Note that this patch doesn't _enable_ >16bit unicode support; a followup patch will turn that on for appropriate targets. Appears to work on: * hosted linux, native, linux simulator in both 16/32-bit modes. Needs testing on: * windows and macos simulator (16bit+32bit) Change-Id: Iba111b27d2433019b6bff937cf1ebd2c4353a0e8
2024-12-17 08:55:21 -05:00 · 2024-12-17 08:55:21 -05:00 · a2c10f6189
commit a2c10f6189
parent 2a88253426
44 changed files with 476 additions and 330 deletions
--- a/firmware/arabjoin.h
+++ b/firmware/arabjoin.h
@ -1,3 +1,5 @@
+/* Note these are not ucschar_t becuase all arabic
+   codepoints are <16bit, so no need to waste table space */
 typedef struct {
    unsigned short isolated;
    unsigned short final;
--- a/firmware/bidi.c
+++ b/firmware/bidi.c
@ -44,7 +44,7 @@
 #define XOR(a,b) ((a||b) && !(a&&b))

 #ifndef BOOTLOADER
-static const arab_t * arab_lookup(unsigned short uchar)
+static const arab_t * arab_lookup(ucschar_t uchar)
 {
    if (uchar >= 0x621 && uchar <= 0x63a)
        return &(jointable[uchar - 0x621]);
@ -57,15 +57,15 @@ static const arab_t * arab_lookup(unsigned short uchar)
    return 0;
 }

-static void arabjoin(unsigned short * stringprt, int length)
+static void arabjoin(ucschar_t *stringprt, int length)
 {
    bool connected = false;
-    unsigned short * writeprt = stringprt;
+    ucschar_t *writeprt = stringprt;

    const arab_t * prev = 0;
    const arab_t * cur;
    const arab_t * ligature = 0;
-    short uchar;
+    ucschar_t uchar;

    int i;
    for (i = 0; i <= length; i++) {
@ -135,13 +135,13 @@ static void arabjoin(unsigned short * stringprt, int length)
 }
 #endif /* !BOOTLOADER */

-unsigned short *bidi_l2v(const unsigned char *str, int orientation)
+ucschar_t *bidi_l2v(const unsigned char *str, int orientation)
 {
-    static unsigned short  utf16_buf[SCROLL_LINE_SIZE];
-    unsigned short *target, *tmp;
+    static ucschar_t utf_buf[SCROLL_LINE_SIZE];
+    ucschar_t *target, *tmp;
 #ifndef BOOTLOADER
-    static unsigned short  bidi_buf[SCROLL_LINE_SIZE];
-    unsigned short *heb_str; /* *broken_str */
+    static ucschar_t bidi_buf[SCROLL_LINE_SIZE];
+    ucschar_t *heb_str; /* *broken_str */
    int block_start, block_end, block_type, block_length, i;
    int length = utf8length(str);
    length=length>=SCROLL_LINE_SIZE?SCROLL_LINE_SIZE-1:length;
@ -152,21 +152,21 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation)

    tmp = str;
    */
-    target = tmp = utf16_buf;
-    while (*str && target < &utf16_buf[SCROLL_LINE_SIZE-1])
+    target = tmp = utf_buf;
+    while (*str && target < &utf_buf[SCROLL_LINE_SIZE-1])
        str = utf8decode(str, target++);
    *target = 0;

 #ifdef BOOTLOADER
    (void)orientation;
-    return utf16_buf;
-    
+    return utf_buf;
+
 #else /* !BOOTLOADER */
-    if (target == utf16_buf) /* empty string */
+    if (target == utf_buf) /* empty string */
        return target;

    /* properly join any arabic chars */
-    arabjoin(utf16_buf, length);
+    arabjoin(utf_buf, length);

    block_start=block_end=block_length=0;

@ -204,7 +204,7 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation)

        for (i=block_start; i<=block_end; i++) {
            *target = (block_type == orientation) ?
-                      *(utf16_buf+i) : *(utf16_buf+block_end-i+block_start);
+                      *(utf_buf+i) : *(utf_buf+block_end-i+block_start);
            if (block_type!=orientation) {
                switch (*target) {
                case '(':
@ -226,7 +226,7 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation)
    *target = 0;

 #if 0 /* Is this code really necessary? */
-    broken_str = utf16_buf;
+    broken_str = utf_buf;
    begin=end=length-1;
    target = broken_str;

@ -246,7 +246,7 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation)
        if (char_count==max_chars) { /* try to avoid breaking words */
            int new_char_count = char_count;
            int new_begin = begin;
-            
+
            while (new_char_count>0) {
                if (_isblank(heb_str[new_begin]) ||
                    _isnewline(heb_str[new_begin])) {
@ -261,11 +261,11 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation)
            }
        }
        orig_begin=begin;
-        
+
        /* if (_isblank(heb_str[begin])) {
            heb_str[begin]='\n';
        } */
-        
+
        /* skip leading newlines */
        while (begin<=end && _isnewline(heb_str[begin])) {
            begin++;
@ -282,7 +282,7 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation)
            target++;
        }
        begin=orig_begin;
-        
+
        if (begin<=0) {
            *target = 0;
            break;
@ -295,4 +295,3 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation)
    return heb_str;
 #endif /* !BOOTLOADER */
 }
-
--- a/firmware/common/diacritic.c
+++ b/firmware/common/diacritic.c
@ -28,8 +28,8 @@
 #include "system.h"

 #define DIAC_NUM_RANGES      (ARRAYLEN(diac_ranges))
-#define DIAC_RTL             (1 << 7)
-#define DIAC_CNT             (0xFF ^ DIAC_RTL)
+#define DIAC_RTL             (1 << 15)
+#define DIAC_CNT             (0xFFFF ^ DIAC_RTL)

 /* Each diac_range_ struct defines a Unicode range that begins with
 * N diacritic characters, and continues with non-diacritic characters up to the
@ -39,8 +39,8 @@

 struct diac_range
 {
-    uint16_t base;
-    uint8_t  info; /* [RTL:1 CNT:7] */
+    uint16_t base; /* Not ucschar_t until we need >16b */
+    uint16_t info; /* [RTL:1 CNT:15] */
 };

 #define DIAC_RANGE_ENTRY(first_diac, first_non_diac, is_rtl) \
@ -51,7 +51,7 @@ struct diac_range
 static const struct diac_range diac_ranges[] =
 {
    DIAC_RANGE_ENTRY(0x0000, 0x0000, 0),
-    DIAC_RANGE_ENTRY(FIRST_DIACRITIC, 0x0370, 0),
+    DIAC_RANGE_ENTRY(FIRST_DIACRITIC, 0x0370, 0), /* v1 - v4.1 */
    DIAC_RANGE_ENTRY(0x0483, 0x048a, 0),
    DIAC_RANGE_ENTRY(0x0591, 0x05be, 1),
    DIAC_RANGE_ENTRY(0x05bf, 0x05c0, 1),
@ -146,6 +146,7 @@ static const struct diac_range diac_ranges[] =
    DIAC_RANGE_ENTRY(0x19c8, 0x19ca, 0),
    DIAC_RANGE_ENTRY(0x1a17, 0x1a1c, 0),
    DIAC_RANGE_ENTRY(0x1a55, 0x1a80, 0),
+    DIAC_RANGE_ENTRY(0x1ab0, 0x1b00, 0), /* v7.0 */
    DIAC_RANGE_ENTRY(0x1b00, 0x1b05, 0),
    DIAC_RANGE_ENTRY(0x1b34, 0x1b45, 0),
    DIAC_RANGE_ENTRY(0x1b6b, 0x1b74, 0),
@ -156,10 +157,10 @@ static const struct diac_range diac_ranges[] =
    DIAC_RANGE_ENTRY(0x1cd4, 0x1ce9, 0),
    DIAC_RANGE_ENTRY(0x1ced, 0x1cee, 0),
    DIAC_RANGE_ENTRY(0x1cf2, 0x1cf3, 0),
-    DIAC_RANGE_ENTRY(0x1dc0, 0x1e00, 0),
-    DIAC_RANGE_ENTRY(0x20d0, 0x20f1, 0),
+    DIAC_RANGE_ENTRY(0x1dc0, 0x1e00, 0), /* v4.1 - v5.2 */
+    DIAC_RANGE_ENTRY(0x20d0, 0x2100, 0), /* v1.0 - v5.1 */
    DIAC_RANGE_ENTRY(0x2cef, 0x2cf2, 0),
-    DIAC_RANGE_ENTRY(0x2de0, 0x2e00, 0),
+    DIAC_RANGE_ENTRY(0x2de0, 0x2e00, 0), /* v5.1 */
    DIAC_RANGE_ENTRY(0x302a, 0x3030, 0),
    DIAC_RANGE_ENTRY(0x3099, 0x309b, 0),
    DIAC_RANGE_ENTRY(0xa66f, 0xa673, 0),
@ -188,7 +189,7 @@ static const struct diac_range diac_ranges[] =
    DIAC_RANGE_ENTRY(0xabe3, 0xabeb, 0),
    DIAC_RANGE_ENTRY(0xabec, 0xabee, 0),
    DIAC_RANGE_ENTRY(0xfb1e, 0xfb1f, 0),
-    DIAC_RANGE_ENTRY(0xfe20, 0xfe27, 0),
+    DIAC_RANGE_ENTRY(0xfe20, 0xfe30, 0), /* v1.0 - v8.0 */
    DIAC_RANGE_ENTRY(0xfe70, 0xfe70, 1),
    DIAC_RANGE_ENTRY(0xff00, 0xff00, 0),
    DIAC_RANGE_ENTRY(0xffff, 0xffff, 0),
@ -196,7 +197,7 @@ static const struct diac_range diac_ranges[] =

 #define MRU_MAX_LEN 32

-bool is_diacritic(const unsigned short char_code, bool *is_rtl)
+bool is_diacritic(const ucschar_t char_code, bool *is_rtl)
 {
    static uint8_t mru_len = 0;
    static uint8_t diacritic_mru[MRU_MAX_LEN];
@ -209,7 +210,6 @@ bool is_diacritic(const unsigned short char_code, bool *is_rtl)
    /* Search in MRU */
    for (mru = 0, i = 0; mru < mru_len; mru++)
    {
-
        /* Items shifted >> 1 */
        itmp = i;
        i = diacritic_mru[mru];
@ -250,10 +250,10 @@ Found:
    if (is_rtl)
        *is_rtl = ((DIAC_RTL & info) == DIAC_RTL);

-    return (char_code < diac->base + (info & DIAC_CNT));
+    return (char_code < (diac->base + (info & DIAC_CNT)));
 }
 #else /*BOOTLOADER*/
-inline bool is_diacritic(const unsigned short char_code, bool *is_rtl)
+inline bool is_diacritic(const ucschar_t char_code, bool *is_rtl)
 {
    (void)char_code;
    if (is_rtl)
--- a/firmware/common/unicode.c
+++ b/firmware/common/unicode.c
@ -127,7 +127,7 @@ static int volatile cp_table_ref = 0;

 /* non-default codepage table buffer (cannot be bufalloced! playback itself
   may be making the load request) */
-static unsigned short codepage_table[MAX_CP_TABLE_SIZE+1];
+static unsigned short codepage_table[MAX_CP_TABLE_SIZE+1]; // XXX convert to ucschar_t if we ever need > 16bit mappings?

 #if defined(APPLICATION) && defined(__linux__)
 static const char * const name_codepages_linux[NUM_CODEPAGES+1] =
@ -344,7 +344,7 @@ unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int
    cp_lock_leave();

    while (count-- && utf8_size > 0) {
-        unsigned short ucs, tmp;
+        ucschar_t ucs, tmp;

        if (*iso < 128 || cp == UTF_8) /* Already UTF-8 */
        {
@ -511,8 +511,25 @@ unsigned long utf8length(const unsigned char *utf8)
    return l;
 }

+/* Take a utf8 string and return the encoded length in utf16 code units */
+unsigned long utf16len_utf8(const unsigned char *utf8)
+{
+    ucschar_t cp;
+    unsigned long length = 0;
+    while (*utf8) {
+        utf8 = utf8decode(utf8, &cp);
+#ifdef UNICODE32
+        if (cp >= 0x10000)
+            length++;
+#endif
+        length++;
+    }
+
+    return length;
+}
+
 /* Decode 1 UTF-8 char and return a pointer to the next char. */
-const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs)
+const unsigned char* utf8decode(const unsigned char *utf8, ucschar_t *ucs)
 {
    unsigned char c = *utf8++;
    unsigned long code;
@ -552,8 +569,16 @@ const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs)
        /* Invalid UTF-8 char */
        code = 0xfffd;
    }
-    /* currently we don't support chars above U-FFFF */
-    *ucs = (code < 0x10000) ? code : 0xfffd;
+
+#ifdef UNICODE32
+    if (code > 0x10ffff)
+        code = 0xfffd;
+#else
+    if (code > 0xffff)
+        code = 0xfffd;
+#endif
+
+    *ucs = code;
    return utf8;
 }

--- a/firmware/drivers/fat.c
+++ b/firmware/drivers/fat.c
@ -747,6 +747,8 @@ static bool fatlong_parse_entry(struct fatlong_parse_state *lnparse,
    /* so far so good; save entry information */
    lnparse->ord = ord;

+    /* Treat entries as opaque 16-bit values;
+       utf8decode happens in fatlong_parse_finish() */
    uint16_t *ucsp = fatent->ucssegs[ord - 1 + 5];
    unsigned int i = longent_char_first();

@ -797,13 +799,24 @@ static bool fatlong_parse_finish(struct fatlong_parse_state *lnparse,
    /* ensure the last segment is NULL-terminated if it is filled */
    fatent->ucssegs[lnparse->ord_max + 5][0] = 0x0000;

-    for (uint16_t *ucsp = fatent->ucssegs[5], ucc = *ucsp;
-         ucc; ucc = *++ucsp)
+    unsigned long ucc;     /* Decoded codepoint */
+    uint16_t *ucsp, ucs;
+    for (ucsp = fatent->ucssegs[5], ucs=*ucsp; ucs; ucs = *++ucsp)
    {
        /* end should be hit before ever seeing padding */
-        if (ucc == 0xffff)
+        if (ucs == 0xffff)
            return false;

+#ifdef UNICODE32
+        /* Check for a surrogate UTF16 pair */
+        if (ucs >= 0xd800 && ucs < 0xdc00 &&
+            *(ucsp+1) >= 0xdc00 && *(ucsp+1) < 0xe000) {
+            ucc = 0x10000 + (((ucs & 0x3ff) << 10) | (*(ucsp+1) & 0x3ff));
+            ucsp++;
+        } else
+#endif
+            ucc = ucs;
+
        if ((p = utf8encode(ucc, p)) - name > FAT_DIRENTRY_NAME_MAX)
            return false;
    }
@ -1612,12 +1625,27 @@ static int write_longname(struct bpb *fat_bpb, struct fat_filestr *parentstr,

    for (unsigned long i = 0; i < ucspadlen; i++)
    {
-        if (i < ucslen)
+        if (i < ucslen) {
+#ifdef UNICODE32
+            ucschar_t tmp;
+            name = utf8decode(name, &tmp);
+            /* For codepoints > U+FFFF we will need to use a UTF16 surrogate
+               pair. 'ucslen' already takes this into account! */
+            if (tmp < 0x10000) {
+                ucsname[i] = tmp;
+            } else {
+                tmp -= 0x10000;
+                ucsname[i++] = 0xd800 | ((tmp >> 10) & 0x3ff); /* High */
+                ucsname[i] = 0xdc00 | (tmp & 0x3ff); /* Low */
+            }
+#else
            name = utf8decode(name, &ucsname[i]);
-        else if (i == ucslen)
+#endif
+        } else if (i == ucslen) {
            ucsname[i] = 0x0000; /* name doesn't fill last block */
-        else /* i > ucslen */
+        } else /* i > ucslen */ {
            ucsname[i] = 0xffff; /* pad-out to end */
+        }
    }

    dc_lock_cache();
@ -1744,9 +1772,12 @@ static int add_dir_entry(struct bpb *fat_bpb, struct fat_filestr *parentstr,
        create_dos_name(basisname, name, &n);
        randomize_dos_name(shortname, basisname, &n);

-        /* one dir entry needed for every 13 characters of filename,
-           plus one entry for the short name */
-        ucslen = utf8length(name);
+        /* one dir entry needed for every 13 utf16 "code units"
+           of filename, plus one entry for the short name.
+           Keep in mind that a unicode character can take up to
+           two code units!
+        */
+        ucslen = utf16len_utf8(name);
        if (ucslen > 255)
            FAT_ERROR(-2); /* name is too long */

--- a/firmware/drivers/lcd-bitmap-common.c
+++ b/firmware/drivers/lcd-bitmap-common.c
@ -385,7 +385,7 @@ static void LCDFN(mono_bmp_part_helper)(const unsigned char *src, int src_x,
 /* put a string at a given pixel position, skipping first ofs pixel columns */
 static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str)
 {
-    unsigned short *ucs;
+    ucschar_t *ucs;
    struct viewport *vp = LCDFN(current_viewport);
    font_lock(vp->font, true);
    struct font* pf = font_get(vp->font);
@ -429,7 +429,7 @@ static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str)
        bool is_rtl, is_diac;
        const unsigned char *bits;
        int width, base_width, base_ofs = 0;
-        const unsigned short next_ch = ucs[1];
+        const ucschar_t next_ch = ucs[1];

        if (x >= vp->width)
            break;
@ -447,7 +447,7 @@ static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str)
            {
                if (!rtl_next_non_diac_width)
                {
-                    const unsigned short *u;
+                    const ucschar_t *u;

                    /* Jump to next non-diacritic char, and calc its width */
                    for (u = &ucs[1]; *u && IS_DIACRITIC(*u); u++);
@ -529,7 +529,7 @@ static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str)
 /* put a string at a given pixel position, skipping first ofs pixel columns */
 static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str)
 {
-    unsigned short *ucs;
+    ucschar_t *ucs;
    struct viewport *vp = LCDFN(current_viewport);
    struct font* pf = font_get(vp->font);
    const unsigned char *bits;
@ -567,7 +567,7 @@ static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str)
    /* allow utf but no diacritics or rtl lang */
    for (ucs = bidi_l2v(str, 1); *ucs; ucs++)
    {
-        const unsigned short next_ch = ucs[1];
+        const ucschar_t next_ch = ucs[1];

        if (x >= vp->width)
            break;
--- a/firmware/export/bidi.h
+++ b/firmware/export/bidi.h
@ -21,6 +21,6 @@
 #ifndef BIDI_H
 #define BIDI_H

-extern unsigned short *bidi_l2v(const unsigned char *str, int orientation);
+ucschar_t *bidi_l2v(const unsigned char *str, int orientation);

 #endif /* BIDI_H */
--- a/firmware/export/config.h
+++ b/firmware/export/config.h
@ -1461,4 +1461,11 @@ Lyre prototype 1 */
 #error "HAVE_LCD_SLEEP_SETTING requires HAVE_LCD_SLEEP"
 #endif

+// XXX Figure out a better place to put this?
+#ifdef UNICODE32
+#define ucschar_t unsigned int
+#else
+#define ucschar_t unsigned short
+#endif
+
 #endif /* __CONFIG_H__ */
--- a/firmware/export/cpu.h
+++ b/firmware/export/cpu.h
@ -18,6 +18,9 @@
 * KIND, either express or implied.
 *
 ****************************************************************************/
+#ifndef __CPU_H
+#define __CPU_H
+
 #include "config.h"

 #if CONFIG_CPU == MCF5249
@ -80,3 +83,5 @@
 #if CONFIG_CPU == STM32H743
 #include "cpu-stm32h743.h"
 #endif
+
+#endif /* __CPU_H */
--- a/firmware/export/font.h
+++ b/firmware/export/font.h
@ -86,7 +86,7 @@ struct font {
    int          maxwidth;        /* max width in pixels*/
    unsigned int height;          /* height in pixels*/
    int          ascent;          /* ascent (baseline) height*/
-    int          firstchar;       /* first character in bitmap*/
+    unsigned int firstchar;       /* first character in bitmap*/
    int          size;            /* font size in glyphs*/
    int          depth;           /* depth of the font, 0=1bit and 1=4bit */
    const unsigned char *bits;    /* 8-bit column bitmap data*/
@ -95,24 +95,24 @@ struct font {
    const unsigned char *width;   /* character widths or NULL if fixed*/
    int          defaultchar;     /* default char (not glyph index)*/
    int32_t      bits_size;       /* # bytes of glyph bits*/
-    
+
    /* file, buffer and cache management */
    int          fd;              /* fd for the font file. >= 0 if cached */
    int          fd_width;        /* fd for the font file. >= 0 if cached */
-    int          fd_offset;       /* fd for the font file. >= 0 if cached */    
+    int          fd_offset;       /* fd for the font file. >= 0 if cached */
    int          handle;          /* core_allocator handle */
-    unsigned char *buffer_start;    /* buffer to store the font in */       
-    unsigned char *buffer_position; /* position in the buffer */    
+    unsigned char *buffer_start;    /* buffer to store the font in */
+    unsigned char *buffer_position; /* position in the buffer */
    unsigned char *buffer_end;      /* end of the buffer */
    size_t         buffer_size;     /* size of the buffer in bytes */
    bool         disabled;        /* font disabled (use blank as fallback if not in cache) */
-#ifndef __PCTOOL__    
+#ifndef __PCTOOL__
    struct font_cache cache;
    uint32_t file_width_offset;    /* offset to file width data    */
    uint32_t file_offset_offset;   /* offset to file offset data   */
    int long_offset;
-#endif    
-    
+#endif
+
 };

 /* font routines*/
@ -134,7 +134,7 @@ void font_enable_all(void);
 struct font* font_get(int font);
 int font_getstringnsize(const unsigned char *str, size_t maxbytes, int *w, int *h, int fontnumber);
 int font_getstringsize(const unsigned char *str, int *w, int *h, int fontnumber);
-int font_get_width(struct font* ft, unsigned short ch);
-const unsigned char * font_get_bits(struct font* ft, unsigned short ch);
+int font_get_width(struct font* ft, ucschar_t ch);
+const unsigned char * font_get_bits(struct font* ft, ucschar_t ch);

 #endif
--- a/firmware/export/hangul.h
+++ b/firmware/export/hangul.h
@ -21,5 +21,4 @@

 extern const char jamo_table[51][3];

-unsigned short hangul_join(unsigned short lead, unsigned short vowel,
-                                unsigned short tail);
+ucschar_t hangul_join(ucschar_t lead, ucschar_t vowel, ucschar_t tail);
--- a/firmware/font.c
+++ b/firmware/font.c
@ -53,6 +53,12 @@
 #define FONT_EXT "fnt"
 #define GLYPH_CACHE_EXT "gc"

+#ifdef UNICODE32
+#define FC_HEADER_VAL 0x01000020
+#else
+#define FC_HEADER_VAL 0x01000010
+#endif
+
 /* max static loadable font buffer size */
 #ifndef MAX_FONT_SIZE
 #if LCD_HEIGHT > 64
@ -182,7 +188,7 @@ void font_init(void)

 static short readshort(struct font *pf)
 {
-    unsigned short s;
+    uint16_t s;

    s = *pf->buffer_position++ & 0xff;
    s |= (*pf->buffer_position++ << 8);
@ -361,8 +367,8 @@ static size_t font_glyphs_to_bufsize(struct font *pf, int glyphs)
    size_t bufsize;

    /* LRU bytes per glyph */
-    bufsize = LRU_SLOT_OVERHEAD + sizeof(struct font_cache_entry) + 
-        sizeof( unsigned short);
+    bufsize = LRU_SLOT_OVERHEAD + sizeof(struct font_cache_entry) +
+        sizeof(unsigned short);
    /* Image bytes per glyph */
    bufsize += glyph_bytes(pf, pf->maxwidth);
    bufsize *= glyphs;
@ -371,7 +377,7 @@ static size_t font_glyphs_to_bufsize(struct font *pf, int glyphs)
 }

 static struct font* font_load_header(int fd, struct font *pheader,
-                                     struct font *pf, 
+                                     struct font *pf,
                                     uint32_t *nwidth, uint32_t *noffset)
 {
    /* Load the header. Readshort() and readlong()              *
@ -420,16 +426,24 @@ int font_load_ex( const char *path, size_t buf_size, int glyphs )
    if ( fd < 0 )
        return -1;

+#ifdef UNICODE32
+    if (glyphs && glyphs < 3)
+        glyphs = 3; /* Guarantee we'll always have at least 2 after alignment */
+#else
+    if (glyphs && glyphs < 2)
+        glyphs = 2; /* Guarantee we'll always have at least 1 after alignment */
+#endif
+
    /* load font struct f with file header */
    int file_size = filesize( fd );
    struct font header;
    struct font f;

-    uint32_t nwidth, noffset;     
+    uint32_t nwidth, noffset;
    if ( !font_load_header( fd, &header, &f, &nwidth, &noffset )
 #if LCD_DEPTH < 16
        || f.depth
-#endif  
+#endif
    )
    {
        close(fd);
@ -458,7 +472,7 @@ int font_load_ex( const char *path, size_t buf_size, int glyphs )
        cached = true;
    else
        bufsize = file_size;
-    
+
    /* check already loaded */
    int font_id = find_font_index(path);

@ -503,7 +517,7 @@ int font_load_ex( const char *path, size_t buf_size, int glyphs )
                return -1;
        }
        pd->refcount++;
-        //printf("reusing handle %d for %s (count: %d)\n", font_id, path, pd->refcount); 
+        //printf("reusing handle %d for %s (count: %d)\n", font_id, path, pd->refcount);
        close(fd);
        return font_id;
    }
@ -522,7 +536,7 @@ int font_load_ex( const char *path, size_t buf_size, int glyphs )
        return -1;
    font_id = open_slot;
    size_t path_bufsz = MAX(path_len + 1, 64); /* enough size for common case */
-    /* allocate mem */    
+    /* allocate mem */
    int handle = core_alloc_ex(
                     bufsize + path_bufsz + sizeof( struct buflib_alloc_data ),
                     &buflibops );
@ -574,7 +588,7 @@ int font_load_ex( const char *path, size_t buf_size, int glyphs )
        pf->fd_offset = -1;
    }
    else
-    {           
+    {
        lseek( fd, 0, SEEK_SET);
        read(fd, pf->buffer_start, pf->buffer_size);

@ -723,7 +737,7 @@ load_cache_entry(struct font_cache_entry* p, void* callback_data)
 {
    struct font* pf = callback_data;

-    unsigned short char_code = p->_char_code;
+    ucschar_t char_code = p->_char_code;
    int fd;

    lock_font_handle(pf->handle, true);
@ -788,7 +802,7 @@ static void cache_create(struct font* pf)
     * when the font file is closed during USB */
    unsigned char *cache_buf = pf->buffer_start + bitmap_size;
    size_t cache_size = pf->buffer_size - bitmap_size;
-    ALIGN_BUFFER(cache_buf, cache_size, 2);
+    ALIGN_BUFFER(cache_buf, cache_size, sizeof(ucschar_t));
    memset(pf->buffer_start, 0, bitmap_size);
    /* Initialise cache */
    font_cache_create(&pf->cache, cache_buf, cache_size, bitmap_size);
@ -797,7 +811,7 @@ static void cache_create(struct font* pf)
 /*
 * Returns width of character
 */
-int font_get_width(struct font* pf, unsigned short char_code)
+int font_get_width(struct font* pf, ucschar_t char_code)
 {
    int width;
    struct font_cache_entry *e;
@ -820,7 +834,7 @@ int font_get_width(struct font* pf, unsigned short char_code)
    return width;
 }

-const unsigned char* font_get_bits(struct font* pf, unsigned short char_code)
+const unsigned char* font_get_bits(struct font* pf, ucschar_t char_code)
 {
    const unsigned char* bits;

@ -831,7 +845,7 @@ const unsigned char* font_get_bits(struct font* pf, unsigned short char_code)

    if (pf->fd >= 0 && pf != &sysfont)
    {
-        bits = 
+        bits =
            (unsigned char*)font_cache_get(&pf->cache, char_code,
                                false, load_cache_entry, pf)->bitmap;
    }
@ -884,7 +898,7 @@ static void glyph_file_write(void* data)
 {
    struct font_cache_entry* p = data;
    struct font* pf = cache_pf;
-    unsigned short ch;
+    ucschar_t ch;
    static int buffer_pos = 0;
 #define WRITE_BUFFER 256
    static unsigned char buffer[WRITE_BUFFER];
@ -899,11 +913,19 @@ static void glyph_file_write(void* data)
    }
    if ( p->_char_code == 0xffff )
        return;
-    
+
    ch = p->_char_code + pf->firstchar;
-    buffer[buffer_pos] = ch >> 8;
+#ifdef UNICODE32
+    buffer[buffer_pos] = (ch >> 24) & 0xff;
+    buffer[buffer_pos+1] = (ch >> 16) & 0xff;
+    buffer[buffer_pos+2] = (ch >> 8) & 0xff;
+    buffer[buffer_pos+3] = ch & 0xff;
+    buffer_pos += 4;
+#else
+    buffer[buffer_pos] = (ch >> 8) & 0xff;
    buffer[buffer_pos+1] = ch & 0xff;
    buffer_pos += 2;
+#endif
    return;
 }

@ -928,11 +950,13 @@ static void glyph_cache_save(int font_id)
        fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0666);
        if (fd >= 0)
        {
+            uint32_t header = FC_HEADER_VAL;
+            write(fd, &header, sizeof(header));
            cache_pf = pf;
            cache_fd = fd;
            lru_traverse(&cache_pf->cache._lru, glyph_file_write);
            glyph_file_write(NULL);
-            if (cache_fd >= 0) 
+            if (cache_fd >= 0)
            {
                close(cache_fd);
                cache_fd = -1;
@ -944,9 +968,9 @@ static void glyph_cache_save(int font_id)
 }


-static int ushortcmp(const void *a, const void *b)
+static int ucscharcmp(const void *a, const void *b)
 {
-    return ((int)(*(unsigned short*)a - *(unsigned short*)b));
+    return ((int)(*(ucschar_t*)a - *(ucschar_t*)b));
 }

 static NO_INLINE void glyph_cache_load(const char *font_path, struct font *pf)
@ -954,13 +978,13 @@ static NO_INLINE void glyph_cache_load(const char *font_path, struct font *pf)
 #define MAX_SORT 256
    if (pf->fd >= 0) {
        int i, size, fd;
-        unsigned char tmp[2];
-        unsigned short ch;
-        unsigned short glyphs[MAX_SORT];
-        unsigned short glyphs_lru_order[MAX_SORT];
-        int glyph_file_skip=0, glyph_file_size=0;
-        
-        int sort_size = pf->cache._capacity;        
+        unsigned char tmp[sizeof(ucschar_t)];
+        ucschar_t ch;
+        ucschar_t glyphs[MAX_SORT];
+        ucschar_t glyphs_lru_order[MAX_SORT];
+        unsigned int glyph_file_skip=0, glyph_file_size=0;
+
+        int sort_size = pf->cache._capacity;
        if ( sort_size > MAX_SORT )
             sort_size = MAX_SORT;

@ -974,31 +998,41 @@ static NO_INLINE void glyph_cache_load(const char *font_path, struct font *pf)
            fd = open(GLYPH_CACHE_FILE, O_RDONLY|O_BINARY);
 #endif
        if (fd >= 0) {
+            /* Header */
+            uint32_t hdr = 0;
+            read(fd, &hdr, sizeof(hdr));
+            if (hdr != FC_HEADER_VAL)
+                goto latin;
            /* only read what fits */
            glyph_file_size = filesize( fd );
-            if ( glyph_file_size > 2*pf->cache._capacity ) {
-                glyph_file_skip = glyph_file_size - 2*pf->cache._capacity;
-                lseek( fd, glyph_file_skip, SEEK_SET );
+            if (glyph_file_size < sizeof(uint32_t))
+                goto latin;
+            glyph_file_size -= sizeof(uint32_t);
+            if ( glyph_file_size > (int)sizeof(ucschar_t)*pf->cache._capacity ) {
+                glyph_file_skip = glyph_file_size - sizeof(ucschar_t)*pf->cache._capacity;
+                lseek( fd, glyph_file_skip + sizeof(uint32_t), SEEK_SET );
            }
-
            while(1) {
-
                for ( size = 0;
-                      read( fd, tmp, 2 ) == 2 && size < sort_size;
-                      size++ ) 
+                      read( fd, tmp, sizeof(tmp) ) == sizeof(tmp) && size < sort_size;
+                      size++ )
                {
+#ifdef UNICODE32
+                    glyphs[size] = (tmp[0] << 24) | (tmp[1] << 16) | (tmp[2] << 8) | tmp[3];
+#else
                    glyphs[size] = (tmp[0] << 8) | tmp[1];
+#endif
                    glyphs_lru_order[size] = glyphs[size];
                }
-                
+
                /* sort glyphs array to make sector cache happy */
-                qsort((void *)glyphs, size, sizeof(unsigned short), 
-                      ushortcmp );
+                qsort((void *)glyphs, size, sizeof(ucschar_t),
+                      ucscharcmp );

                /* load font bitmaps */
                for( i = 0; i < size ; i++ )
-                         font_get_bits(pf, glyphs[i]);
-                
+                    font_get_bits(pf, glyphs[i]);
+
                /* redo to fix lru order */
                for ( i = 0; i < size ; i++)
                    font_get_bits(pf, glyphs_lru_order[i]);
@ -1009,6 +1043,7 @@ static NO_INLINE void glyph_cache_load(const char *font_path, struct font *pf)

            close(fd);
        } else {
+        latin:
            /* load latin1 chars into cache */
            for ( ch = 32 ; ch < 256  && ch < pf->cache._capacity + 32; ch++ )
                font_get_bits(pf, ch);
@ -1040,7 +1075,7 @@ struct font* font_get(int font)
 /*
 * Returns width of character
 */
-int font_get_width(struct font* pf, unsigned short char_code)
+int font_get_width(struct font* pf, ucschar_t char_code)
 {
    /* check input range*/
    if (char_code < pf->firstchar || char_code >= pf->firstchar+pf->size)
@ -1050,7 +1085,7 @@ int font_get_width(struct font* pf, unsigned short char_code)
    return pf->width? pf->width[char_code]: pf->maxwidth;
 }

-const unsigned char* font_get_bits(struct font* pf, unsigned short char_code)
+const unsigned char* font_get_bits(struct font* pf, ucschar_t char_code)
 {
    const unsigned char* bits;

@ -1079,7 +1114,7 @@ int font_getstringnsize(const unsigned char *str, size_t maxbytes, int *w, int *
 {
    struct font* pf = font_get(fontnum);
    font_lock( fontnum, true );
-    unsigned short ch;
+    ucschar_t ch;
    int width = 0;
    size_t b = maxbytes - 1;

--- a/firmware/font_cache.c
+++ b/firmware/font_cache.c
@ -43,13 +43,18 @@ void font_cache_create(
    int font_cache_entry_size =
        sizeof(struct font_cache_entry) + bitmap_bytes_size;

-    /* make sure font cache entries are a multiple of 16 bits */
-    if (font_cache_entry_size % 2 != 0)
+    /* make sure font cache entries are a multiple of sizeof(ucschar_t) */
+    while (font_cache_entry_size & (sizeof(ucschar_t) -1))
        font_cache_entry_size++;

    int cache_size = buf_size /
        (font_cache_entry_size + LRU_SLOT_OVERHEAD + sizeof(short));

+#ifdef UNICODE32
+    /* Ensure LRU index size is a multiple of 32 bits */
+    cache_size &= ~1;
+#endif
+
    fcache->_size = 1;
    fcache->_capacity = cache_size;
    fcache->_prev_result = 0;
@ -72,12 +77,12 @@ void font_cache_create(

 /*************************************************************************
 * Binary search that attempts a primary lucky guess that succeeds
- * when there are consecutive codes in the cache between previous 
- * search and new search. Returns a negative of insertion point if 
+ * when there are consecutive codes in the cache between previous
+ * search and new search. Returns a negative of insertion point if
 * not found.
 ************************************************************************/
 static int search(struct font_cache* fcache,
-                  unsigned short char_code,
+                  ucschar_t char_code,
                  int size,
                  int *p_insertion_point )
 {
@ -85,12 +90,12 @@ static int search(struct font_cache* fcache,
    int left, right, mid=-1, c;
    left = 0;
    right = size;
-    
+
    /* go for a lucky guess */
-    mid = char_code + 
+    mid = char_code +
        fcache->_prev_result - fcache->_prev_char_code;
-            
-    /* check bounds */        
+
+    /* check bounds */
    if ( mid < 0 || mid > right )
            mid = ( left + right ) / 2;

@ -114,7 +119,7 @@ static int search(struct font_cache* fcache,
        mid = (left + right) / 2;
    }
    while (left <= right);
-    
+
    /* not found */
    *p_insertion_point = mid;
    return 0;
@ -124,7 +129,7 @@ static int search(struct font_cache* fcache,
 ******************************************************************************/
 struct font_cache_entry* font_cache_get(
    struct font_cache* fcache,
-    unsigned short char_code,
+    ucschar_t char_code,
    bool cache_only,
    void (*callback) (struct font_cache_entry* p, void *callback_data),
    void *callback_data)
@ -132,7 +137,7 @@ struct font_cache_entry* font_cache_get(
    struct font_cache_entry* p;
    int insertion_point;
    int index_to_replace;
-    
+
    /* check bounds */
    p = lru_data(&fcache->_lru, fcache->_index[0]);
    if( char_code < p->_char_code )
@ -158,14 +163,14 @@ struct font_cache_entry* font_cache_get(
            }
            else
            {
-                p = lru_data(&fcache->_lru, 
+                p = lru_data(&fcache->_lru,
                fcache->_index[insertion_point+1]);
                if ( char_code > p->_char_code )
                     insertion_point++;
            }
        }
    }
-    
+
    /* not found */
    if (cache_only)
        return NULL;
--- a/firmware/hangul.c
+++ b/firmware/hangul.c
@ -18,6 +18,7 @@
 * KIND, either express or implied.
 *
 ****************************************************************************/
+#include "config.h"
 #include "hangul.h"

 const char jamo_table[51][3] = {
@ -75,10 +76,9 @@ const char jamo_table[51][3] = {
 };

 /* takes three jamo chars and joins them into one hangul */
-unsigned short hangul_join(unsigned short lead, unsigned short vowel,
-                                unsigned short tail)
+ucschar_t hangul_join(ucschar_t lead, ucschar_t vowel, ucschar_t tail)
 {
-    unsigned short ch = 0xfffd;
+    ucschar_t ch = 0xfffd;

    if (lead < 0x3131 || lead > 0x3163)
        return ch;
--- a/firmware/include/diacritic.h
+++ b/firmware/include/diacritic.h
@ -27,7 +27,7 @@
 * Sets is_rtl (if it's not NULL) to whether the character
 * belongs to an RTL language.
 */
-bool is_diacritic(const unsigned short char_code, bool *is_rtl);
+bool is_diacritic(const ucschar_t char_code, bool *is_rtl);

 /* Note IS_DIACRITIC macros may elide the function call
 * therefore there is a separate _RTL version that requires a bool pointer
--- a/firmware/include/font_cache.h
+++ b/firmware/include/font_cache.h
@ -21,24 +21,25 @@
 #ifndef _FONT_CACHE_H_
 #define _FONT_CACHE_H_
 #include <stdbool.h>
+#include "config.h"
 #include "lru.h"

 /*******************************************************************************
- * 
+ *
 ******************************************************************************/
 struct font_cache
 {
    struct lru _lru;
-    int _size;
-    int _capacity;
-    int _prev_char_code;
+    unsigned int _size;
+    unsigned int _capacity;
+    ucschar_t _prev_char_code;
    int _prev_result;
    short *_index; /* index of lru handles in char_code order */
 };

 struct font_cache_entry
 {
-    unsigned short _char_code;
+    ucschar_t _char_code;
    unsigned char width;
    unsigned char bitmap[1]; /* place holder */
 };
@ -55,7 +56,7 @@ void font_cache_create(
 * Note: With cache_only this can return NULL, which otherwise never happens */
 struct font_cache_entry* font_cache_get(
    struct font_cache* fcache,
-    unsigned short char_code,
+    ucschar_t char_code,
    bool cache_only,
    void (*callback) (struct font_cache_entry* p, void *callback_data),
    void *callback_data);
--- a/firmware/include/lru.h
+++ b/firmware/include/lru.h
@ -33,6 +33,7 @@ struct lru
    void *_base;
 };

+/* LRU_SLOT_OVERHEAD is the fixed portion of struct lru_node */
 #define LRU_SLOT_OVERHEAD (2 * sizeof(short))

 /* Create LRU list with specified size from buf. */
@ -45,4 +46,3 @@ void *lru_data(struct lru* pl, short handle);
 void lru_traverse(struct lru* pl, void (*callback)(void* data));

 #endif /* LRU_H */
-
--- a/firmware/include/rbunicode.h
+++ b/firmware/include/rbunicode.h
@ -27,7 +27,7 @@
 */
 #ifndef _RBUNICODE_H_
 #define _RBUNICODE_H_
- 
+
 #include "config.h"
 #include <stdbool.h>

@ -63,8 +63,9 @@ unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8, in
 unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8, int count);
 unsigned char* utf16decode(const unsigned char *utf16, unsigned char *utf8, int count, int utf8_size, bool le);
 bool utf16_has_bom(const unsigned char *utf16, bool *le);
+unsigned long utf16len_utf8(const unsigned char *utf8);
 unsigned long utf8length(const unsigned char *utf8);
-const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs);
+const unsigned char* utf8decode(const unsigned char *utf8, ucschar_t *ucs);
 void set_codepage(int cp);
 int get_codepage(void);
 int utf8seek(const unsigned char* utf8, int offset);
--- a/firmware/target/hosted/filesystem-win32.c
+++ b/firmware/target/hosted/filesystem-win32.c
@ -63,55 +63,78 @@ static void win32_last_error_errno(void)
 static HANDLE win32_open(const char *ospath);
 static int win32_stat(const char *ospath, LPBY_HANDLE_FILE_INFORMATION lpInfo);

-unsigned short * strcpy_utf8ucs2(unsigned short *buffer,
-                                 const unsigned char *utf8)
+static unsigned short * strcpy_utf8utf16(unsigned short *buffer,
+                                         const unsigned char *utf8)
 {
-    for (wchar_t *ucs2 = buffer;
-         ((utf8 = utf8decode(utf8, ucs2)), *ucs2); ucs2++);
+    for (wchar_t *ucs = buffer; *ucs ; ucs++) {
+        ucschar_t cp;
+        utf8 = utf8decode(utf8, &cp);
+#ifdef UNICODE32
+        if (cp > 0x10000) {
+            cp -= 0x10000;
+            *ucs++ = 0xd800 | (cp >> 10);
+            cp = 0xdc00 | (cp & 0x3ff);
+        }
+#endif
+        *ucs = cp;
+    }
    return buffer;
 }

-#if 0
-unsigned char * strcpy_ucs2utf8(unsigned char *buffer,
-                                const unsigned short *ucs2)
+#if 0 /* Unused in current code */
+static unsigned char * strcpy_utf16utf8(unsigned char *buffer,
+                                        const unsigned short *utf16buf)
 {
-    for (unsigned char *utf8 = buffer;
-         ((utf8 = utf8encode(*ucs2, utf8)), *ucs2); ucs2++);
+    unsigned char *utf8 = buffer;
+
+    /* windows is always LE */
+    const int le = 1;
+
+    while (*utf16buf) {
+        const unsigned char *utf16 = (const unsigned char *)utf16buf;
+        unsigned long ucs;
+        /* Check for a surrogate pair */
+        if (*(utf16 + le) >= 0xD8 && *(utf16 + le) < 0xE0) {
+            ucs = 0x10000 + ((utf16[1 - le] << 10) | ((utf16[le] - 0xD8) << 18)
+                  | utf16[2 + (1 - le)] | ((utf16[2 + le] - 0xDC) << 8));
+            utf16buf += 2;
+        } else {
+            ucs = utf16[le] << 8 | utf16[1 - le];
+            utf16buf++;
+        }
+        utf8 = utf8encode(ucs, utf8);
+    }
    return buffer;
 }
-
-size_t strlen_utf8ucs2(const unsigned char *utf8)
-{
-    /* This won't properly count multiword ucs2 so use the alternative
-       below for now which doesn't either */
-    size_t length = 0;
-    unsigned short ucschar[2];
-    for (unsigned char c = *utf8; c;
-         ((utf8 = utf8decode(utf8, ucschar)), c = *utf8))
-        length++;
-
-    return length;
-}
-#endif /* 0 */
-
-size_t strlen_utf8ucs2(const unsigned char *utf8)
-{
-    return utf8length(utf8);
-}
-
-size_t strlen_ucs2utf8(const unsigned short *ucs2)
+static size_t strlen_utf16utf8(const unsigned short *utf16buf)
 {
    size_t length = 0;
    unsigned char utf8char[4];

-    for (unsigned short c = *ucs2; c; (c = *++ucs2))
-        length += utf8encode(c, utf8char) - utf8char;
+    /* windows is always LE */
+    const int le = 1;

+    while (*utf16buf) {
+        const unsigned char *utf16 = (const unsigned char *)utf16buf;
+        unsigned long ucs;
+        /* Check for a surrogate pair */
+        if (*(utf16 + le) >= 0xD8 && *(utf16 + le) < 0xE0) {
+            ucs = 0x10000 + ((utf16[1 - le] << 10) | ((utf16[le] - 0xD8) << 18)
+                  | utf16[2 + (1 - le)] | ((utf16[2 + le] - 0xDC) << 8));
+            utf16buf += 2;
+        } else {
+            ucs = utf16[le] << 8 | utf16[1 - le];
+            utf16buf++;
+        }
+        length += utf8encode(ucs, utf8char) - utf8char;
+    }
    return length;
 }
+#endif

-size_t strlcpy_ucs2utf8(char *buffer, const unsigned short *ucs2,
-                        size_t bufsize)
+/* Note: Must be exported */
+size_t strlcpy_utf16utf8(char *buffer, const unsigned short *utf16,
+                         size_t bufsize)
 {
    if (!buffer)
        bufsize = 0;
@ -119,12 +142,24 @@ size_t strlcpy_ucs2utf8(char *buffer, const unsigned short *ucs2,
    size_t length = 0;
    unsigned char utf8char[4];

-    for (unsigned short c = *ucs2; c; (c = *++ucs2))
+    unsigned long ucc;
+    while(*utf16)
    {
+        /* Check for a surrogate UTF16 pair */
+        if (*utf16 >= 0xd800 && *utf16 < 0xdc00 &&
+            *(utf16+1) >= 0xdc00 && *(utf16+1) < 0xe000) {
+            ucc = 0x10000 + (((*utf16 & 0x3ff) << 10) | (*(utf16+1) & 0x3ff));
+            utf16++;
+        } else {
+            ucc = *utf16;
+        }
+
        /* If the last character won't fit, this won't split it */
-        size_t utf8size = utf8encode(c, utf8char) - utf8char;
+        size_t utf8size = utf8encode(ucc, utf8char) - utf8char;
        if ((length += utf8size) < bufsize)
            buffer = mempcpy(buffer, utf8char, utf8size);
+
+        utf16++;
    }

    /* Above won't ever copy to very end */
@ -134,44 +169,44 @@ size_t strlcpy_ucs2utf8(char *buffer, const unsigned short *ucs2,
    return length;
 }

-#define _toucs2(utf8) \
+#define _toutf16(utf8) \
    ({ const char *_utf8 = (utf8);         \
-       size_t _l = strlen_utf8ucs2(_utf8); \
+       size_t _l = utf16len_utf8(_utf8); \
       void *_buffer = alloca((_l + 1)*2); \
-       strcpy_utf8ucs2(_buffer, _utf8); })
+       strcpy_utf8utf16(_buffer, _utf8); })

-#define _toutf8(ucs2) \
-    ({ const char *_ucs2 = (ucs2);         \
-       size_t _l = strlen_ucs2utf8(_ucs2); \
+#define _toutf8(utf16) \
+    ({ const char *_ucs = (utf16);         \
+       size_t _l = strlen_utf16utf8(_ucs); \
       void *_buffer = alloca(_l + 1);     \
-       strcpy_ucs2utf8(_buffer, _ucs2); })
+       strcpy_utf16utf8(_buffer, _ucs); })

 int os_open(const char *ospath, int oflag, ...)
 {
-    return _wopen(_toucs2(ospath), oflag __OPEN_MODE_ARG);
+    return _wopen(_toutf16(ospath), oflag __OPEN_MODE_ARG);
 }

 int os_creat(const char *ospath, mode_t mode)
 {
-    return _wcreat(_toucs2(ospath), mode);
+    return _wcreat(_toutf16(ospath), mode);
 }

 int os_stat(const char *ospath, struct _stat *s)
 {
-    return _wstat(_toucs2(ospath), s);
+    return _wstat(_toutf16(ospath), s);
 }

 int os_remove(const char *ospath)
 {
-    return _wremove(_toucs2(ospath));
+    return _wremove(_toutf16(ospath));
 }

 int os_rename(const char *osold, const char *osnew)
 {
    int errnum = errno;

-    const wchar_t *wchosold = _toucs2(osold);
-    const wchar_t *wchosnew = _toucs2(osnew);
+    const wchar_t *wchosold = _toutf16(osold);
+    const wchar_t *wchosnew = _toutf16(osnew);

    int rc = _wrename(wchosold, wchosnew);
    if (rc < 0 && errno == EEXIST)
@ -213,18 +248,18 @@ bool os_file_exists(const char *ospath)

 _WDIR * os_opendir(const char *osdirname)
 {
-    return _wopendir(_toucs2(osdirname));
+    return _wopendir(_toutf16(osdirname));
 }

 int os_mkdir(const char *ospath, mode_t mode)
 {
-    return _wmkdir(_toucs2(ospath));
+    return _wmkdir(_toutf16(ospath));
    (void)mode;
 }

 int os_rmdir(const char *ospath)
 {
-    return _wrmdir(_toucs2(ospath));
+    return _wrmdir(_toutf16(ospath));
 }

 int os_dirfd(_WDIR *osdirp)
@ -288,7 +323,7 @@ static HANDLE win32_open(const char *ospath)
 {
    /* FILE_FLAG_BACKUP_SEMANTICS is required for this to succeed at opening
       a directory */
-    HANDLE h = CreateFileW(_toucs2(ospath), GENERIC_READ,
+    HANDLE h = CreateFileW(_toutf16(ospath), GENERIC_READ,
                           FILE_SHARE_READ | FILE_SHARE_WRITE |
                           FILE_SHARE_DELETE, NULL, OPEN_EXISTING,
                           FILE_FLAG_BACKUP_SEMANTICS, NULL);
@ -479,7 +514,7 @@ void volume_size(IF_MV(int volume,) sector_t *sizep, sector_t *freep)

    char volpath[MAX_PATH];
    if (os_volume_path(IF_MV(volume, ) volpath, sizeof (volpath)) >= 0)
-        GetDiskFreeSpaceExW(_toucs2(volpath), &free, &size, NULL);
+        GetDiskFreeSpaceExW(_toutf16(volpath), &free, &size, NULL);

    if (sizep)
        *sizep = size.QuadPart / 1024;
--- a/firmware/target/hosted/filesystem-win32.h
+++ b/firmware/target/hosted/filesystem-win32.h
@ -27,10 +27,10 @@
 /* filesystem-win32.c contains some string functions that could be useful
 * elsewhere; just move them away to unicode.c or something if they prove
 * so. */
-size_t strlcpy_ucs2utf8(char *buffer, const unsigned short *ucs,
-                        size_t bufsize);
+size_t strlcpy_utf16utf8(char *buffer, const unsigned short *utf16,
+                         size_t bufsize);

-#define strlcpy_from_os strlcpy_ucs2utf8
+#define strlcpy_from_os strlcpy_utf16utf8
 #endif /* __MINGW32__ */

 #endif /* !OSFUNCTIONS_DECLARED */