From a2c10f6189e078977db95e8493d79133d9b7c1e3 Mon Sep 17 00:00:00 2001 From: Solomon Peachy Date: Tue, 17 Dec 2024 08:55:21 -0500 Subject: [PATCH] unicode: Support characters beyond the first unicode plane We used 16-bit variables to store the 'character code' everywhere but this won't let us represent anything beyond U+FFFF. This patch changes those variables to a custom type that can be 32 or 16 bits depending on the build, and adjusts numerous internal APIs and datastructures to match. This includes: * utf8decode() and friends * font manipulation, caching, rendering, and generation * on-screen keyboard * FAT filesystem (parsing and generating utf16 LFNs) * WIN32 simulator platform code Note that this patch doesn't _enable_ >16bit unicode support; a followup patch will turn that on for appropriate targets. Appears to work on: * hosted linux, native, linux simulator in both 16/32-bit modes. Needs testing on: * windows and macos simulator (16bit+32bit) Change-Id: Iba111b27d2433019b6bff937cf1ebd2c4353a0e8 --- apps/hosted/android/keyboard.c | 4 +- apps/keyboard.h | 2 +- apps/plugin.h | 14 +- apps/plugins/announce_status.c | 6 +- apps/plugins/frotz/frotz.c | 10 +- apps/plugins/keyremap.c | 8 +- apps/plugins/lib/grey_draw.c | 24 ++-- apps/plugins/lib/kbd_helper.c | 12 +- apps/plugins/lib/kbd_helper.h | 14 +- apps/plugins/lib/simple_viewer.c | 2 +- apps/plugins/lrcplayer.c | 2 +- apps/plugins/lua/rocklib.c | 3 +- apps/plugins/mpegplayer/mpegplayer.c | 4 +- apps/plugins/rockpaint.c | 4 +- apps/plugins/tagcache/tagcache.c | 4 +- apps/plugins/text_viewer/tv_text_processor.c | 50 +++---- apps/plugins/zxbox/zxbox_keyb.c | 4 +- apps/recorder/keyboard.c | 66 ++++----- bootloader/iriver_h1x0.c | 10 +- bootloader/iriver_h300.c | 10 +- docs/PLUGIN_API | 10 +- firmware/arabjoin.h | 2 + firmware/bidi.c | 43 +++--- firmware/common/diacritic.c | 26 ++-- firmware/common/unicode.c | 35 ++++- firmware/drivers/fat.c | 49 +++++-- firmware/drivers/lcd-bitmap-common.c | 10 +- firmware/export/bidi.h | 2 +- firmware/export/config.h | 7 + firmware/export/cpu.h | 5 + firmware/export/font.h | 20 +-- firmware/export/hangul.h | 3 +- firmware/font.c | 121 ++++++++++------ firmware/font_cache.c | 33 +++-- firmware/hangul.c | 6 +- firmware/include/diacritic.h | 2 +- firmware/include/font_cache.h | 13 +- firmware/include/lru.h | 2 +- firmware/include/rbunicode.h | 5 +- firmware/target/hosted/filesystem-win32.c | 143 ++++++++++++------- firmware/target/hosted/filesystem-win32.h | 6 +- lib/rbcodec/metadata/id3tags.c | 4 +- tools/convbdf.c | 4 +- tools/convttf.c | 2 +- 44 files changed, 476 insertions(+), 330 deletions(-) diff --git a/apps/hosted/android/keyboard.c b/apps/hosted/android/keyboard.c index b74f67e782..f00a11ea6e 100644 --- a/apps/hosted/android/keyboard.c +++ b/apps/hosted/android/keyboard.c @@ -82,7 +82,7 @@ static void kdb_init(void) sleep(HZ/10); } -int kbd_input(char* text, int buflen, unsigned short *kbd) +int kbd_input(char* text, int buflen, ucschar_t *kbd) { (void)kbd; JNIEnv e = *env_ptr; @@ -107,7 +107,7 @@ int kbd_input(char* text, int buflen, unsigned short *kbd) e->DeleteLocalRef(env_ptr, str); e->DeleteLocalRef(env_ptr, ok_text); e->DeleteLocalRef(env_ptr, cancel_text); - + return !accepted; /* return 0 on success */ } diff --git a/apps/keyboard.h b/apps/keyboard.h index e3ee0b05d0..a2d497c28e 100644 --- a/apps/keyboard.h +++ b/apps/keyboard.h @@ -23,7 +23,7 @@ /* '*kbd', same format as https://www.rockbox.org/wiki/LoadableKeyboardLayouts */ -int kbd_input(char* buffer, int buflen, unsigned short *kbd); +int kbd_input(char* buffer, int buflen, ucschar_t *kbd); int load_kbd(unsigned char* filename); diff --git a/apps/plugin.h b/apps/plugin.h index 5be1147cab..a0c41a20e7 100644 --- a/apps/plugin.h +++ b/apps/plugin.h @@ -176,7 +176,7 @@ int plugin_open(const char *plugin, const char *parameter); * when this happens please take the opportunity to sort in * any new functions "waiting" at the end of the list. */ -#define PLUGIN_API_VERSION 273 +#define PLUGIN_API_VERSION 274 /* 239 Marks the removal of ARCHOS HWCODEC and CHARCELL */ @@ -296,15 +296,15 @@ struct plugin_api { #if defined(HAVE_LCD_ENABLE) || defined(HAVE_LCD_SLEEP) void (*button_queue_post)(long id, intptr_t data); #endif - unsigned short *(*bidi_l2v)( const unsigned char *str, int orientation ); - bool (*is_diacritic)(const unsigned short char_code, bool *is_rtl); - const unsigned char *(*font_get_bits)( struct font *pf, unsigned short char_code ); + ucschar_t *(*bidi_l2v)(const unsigned char *str, int orientation); + bool (*is_diacritic)(const ucschar_t char_code, bool *is_rtl); + const unsigned char *(*font_get_bits)(struct font *pf, ucschar_t char_code); int (*font_load)(const char *path); void (*font_unload)(int font_id); struct font* (*font_get)(int font); int (*font_getstringsize)(const unsigned char *str, int *w, int *h, int fontnumber); - int (*font_get_width)(struct font* pf, unsigned short char_code); + int (*font_get_width)(struct font* pf, ucschar_t char_code); void (*screen_clear_area)(struct screen * display, int xstart, int ystart, int width, int height); void (*gui_scrollbar_draw)(struct screen * screen, int x, int y, @@ -667,7 +667,7 @@ struct plugin_api { const unsigned char * const *units, unsigned int unit_count, bool binary_scale); /* unicode stuff */ - const unsigned char* (*utf8decode)(const unsigned char *utf8, unsigned short *ucs); + const unsigned char* (*utf8decode)(const unsigned char *utf8, ucschar_t *ucs); unsigned char* (*iso_decode)(const unsigned char *iso, unsigned char *utf8, int cp, int count); unsigned char* (*utf16LEdecode)(const unsigned char *utf16, unsigned char *utf8, int count); unsigned char* (*utf16BEdecode)(const unsigned char *utf16, unsigned char *utf8, int count); @@ -923,7 +923,7 @@ struct plugin_api { int (*rand)(void); void (*qsort)(void *base, size_t nmemb, size_t size, int(*compar)(const void *, const void *)); - int (*kbd_input)(char* buffer, int buflen, unsigned short *kbd); + int (*kbd_input)(char* buffer, int buflen, ucschar_t *kbd); struct tm* (*get_time)(void); struct tm * (*gmtime_r)(const time_t *timep, struct tm *tm); #if CONFIG_RTC diff --git a/apps/plugins/announce_status.c b/apps/plugins/announce_status.c index 21518b4d68..214a3812c2 100644 --- a/apps/plugins/announce_status.c +++ b/apps/plugins/announce_status.c @@ -70,7 +70,7 @@ static const char keybd_layout[] = * - \n does not create a key, but it also consumes one element * - the final null terminator is equivalent to \n * - since sizeof includes the null terminator we don't need +1 for that. */ -static unsigned short kbd_buf[sizeof(keybd_layout)]; +static ucschar_t kbd_buf[sizeof(keybd_layout)]; /****************** prototypes ******************/ void print_scroll(char* string); /* implements a scrolling screen */ @@ -164,7 +164,7 @@ static void config_set_defaults(void) gAnnounce.announce_on = 0; gAnnounce.grouping = 0; gAnnounce.wps_fmt[0] = '\0'; - gAnnounce.show_prompt = true; + gAnnounce.show_prompt = true; } static void config_reset_voice(void) @@ -250,7 +250,7 @@ static int announce_menu_cb(int action, struct gui_synclist *this_list) { (void)this_item; - unsigned short* kbd_p; + ucschar_t *kbd_p; int selection = rb->gui_synclist_get_sel_pos(this_list); diff --git a/apps/plugins/frotz/frotz.c b/apps/plugins/frotz/frotz.c index 4c12cfcd10..31fb668e60 100644 --- a/apps/plugins/frotz/frotz.c +++ b/apps/plugins/frotz/frotz.c @@ -128,7 +128,7 @@ void wait_for_key() case PLA_EXIT: hot_key_quit(); break; - + case PLA_SELECT: return; } @@ -159,7 +159,7 @@ zchar do_input(int timeout, bool show_cursor) { case PLA_EXIT: return ZC_HKEY_QUIT; - + case PLA_CANCEL: menu_ret = menu(); if (menu_ret != ZC_BAD) @@ -174,7 +174,7 @@ zchar do_input(int timeout, bool show_cursor) return ZC_BAD; default: - if (timeout != TIMEOUT_BLOCK && + if (timeout != TIMEOUT_BLOCK && !TIME_BEFORE(*rb->current_tick, timeout_at)) return ZC_TIME_OUT; } @@ -185,7 +185,7 @@ zchar os_read_key(int timeout, bool show_cursor) { int r; char inputbuf[5]; - short key; + ucschar_t key; zchar zkey; for(;;) @@ -214,7 +214,7 @@ zchar os_read_line(int max, zchar *buf, int timeout, int width, int continued) char inputbuf[256]; const char *in; char *out; - short key; + ucschar_t key; zchar zkey; for(;;) diff --git a/apps/plugins/keyremap.c b/apps/plugins/keyremap.c index 202d5fcfa4..bb7e965963 100644 --- a/apps/plugins/keyremap.c +++ b/apps/plugins/keyremap.c @@ -206,8 +206,8 @@ static int prompt_filename(char *buf, size_t bufsz) { #define KBD_LAYOUT "abcdefghijklmnop\nqrstuvwxyz |()[]\n1234567890 /._-+\n\n" \ "\nABCDEFGHIJKLMNOP\nQRSTUVWXYZ |()[]\n1234567890 /._-+" - unsigned short kbd[sizeof(KBD_LAYOUT) + 10]; - unsigned short *kbd_p = kbd; + ucschar_t kbd[sizeof(KBD_LAYOUT) + 10]; + ucschar_t *kbd_p = kbd; if (!kbd_create_layout(KBD_LAYOUT, kbd, sizeof(kbd))) kbd_p = NULL; @@ -1002,7 +1002,7 @@ next_line: { bufleft = bufsz - (pctx - filenamebuf); ctx = -1; - int ctx_x_flag_count = (LAST_CONTEXT_PLACEHOLDER + int ctx_x_flag_count = (LAST_CONTEXT_PLACEHOLDER * ARRAYLEN(context_flags)); for (int i=0;i < ctx_x_flag_count ;i++) @@ -2058,7 +2058,7 @@ static void synclist_set(int id, int selected_item, int items, int sel_size) } else if (menu_id == MENU_ID(M_SETKEYS)) { - keyset.view_columns = printcell_set_columns(&lists, NULL, + keyset.view_columns = printcell_set_columns(&lists, NULL, ACTVIEW_HEADER, Icon_Rockbox); printcell_enable(true); int curcol = printcell_get_column_selected(); diff --git a/apps/plugins/lib/grey_draw.c b/apps/plugins/lib/grey_draw.c index 298ffbe16e..481b34d3ad 100644 --- a/apps/plugins/lib/grey_draw.c +++ b/apps/plugins/lib/grey_draw.c @@ -199,8 +199,8 @@ void grey_hline(int x1, int x2, int y) /* nothing to draw? */ if (y < _grey_info.clip_t || y >= _grey_info.clip_b || x1 >= _grey_info.clip_r || x2 < _grey_info.clip_l) - return; - + return; + /* drawmode and optimisation */ if (vp->drawmode & DRMODE_INVERSEVID) { @@ -251,7 +251,7 @@ void grey_vline(int x, int y1, int y2) unsigned char *dst, *dst_end; void (*pfunc)(unsigned char *address); int dwidth; - + /* direction flip */ if (y2 < y1) { @@ -264,7 +264,7 @@ void grey_vline(int x, int y1, int y2) if (x < _grey_info.clip_l || x >= _grey_info.clip_r || y1 >= _grey_info.clip_b || y2 < _grey_info.clip_t) return; - + /* clipping */ if (y1 < _grey_info.clip_t) y1 = _grey_info.clip_t; @@ -425,7 +425,7 @@ void grey_fillrect(int x, int y, int width, int height) if (height <= 0) return; - + dwidth = _grey_info.cb_width; dst = &_grey_info.curbuffer[ _GREY_MULUQ(dwidth, _grey_info.vp->y - _grey_info.cb_y + y) + @@ -653,8 +653,8 @@ void grey_gray_bitmap(const unsigned char *src, int x, int y, int width, /* Put a string at a given pixel position, skipping first ofs pixel columns */ void grey_putsxyofs(int x, int y, int ofs, const unsigned char *str) { - int ch; - unsigned short *ucs; + ucschar_t ch; + ucschar_t *ucs; struct font* pf; if (_grey_info.clip_b <= _grey_info.clip_t) @@ -680,7 +680,7 @@ void grey_putsxyofs(int x, int y, int ofs, const unsigned char *str) bits = rb->font_get_bits(pf, ch); grey_mono_bitmap_part(bits, ofs, 0, width, x, y, width - ofs, pf->height); - + x += width - ofs; ofs = 0; } @@ -709,7 +709,7 @@ void grey_ub_clear_display(void) #endif } -/* Assembler optimised helper function for copying a single line to the +/* Assembler optimised helper function for copying a single line to the * greyvalue buffer. */ void _grey_line1(int width, unsigned char *dst, const unsigned char *src, const unsigned char *lut); @@ -725,7 +725,7 @@ void grey_ub_gray_bitmap_part(const unsigned char *src, int src_x, int src_y, if ((width <= 0) || (height <= 0) || (x >= _grey_info.width) || (y >= _grey_info.height) || (x + width <= 0) || (y + height <= 0)) return; - + /* clipping */ if (x < 0) { @@ -744,7 +744,7 @@ void grey_ub_gray_bitmap_part(const unsigned char *src, int src_x, int src_y, if (y + height > _grey_info.height) height = _grey_info.height - y; - src += _GREY_MULUQ(stride, src_y) + src_x; /* move starting point */ + src += _GREY_MULUQ(stride, src_y) + src_x; /* move starting point */ yc = y; ye = y + height; dst = _grey_info.values + (x << _GREY_BSHIFT); @@ -773,7 +773,7 @@ void grey_ub_gray_bitmap_part(const unsigned char *src, int src_x, int src_y, } while (src_row < src_end); #endif - + src += stride; } while (++yc < ye); diff --git a/apps/plugins/lib/kbd_helper.c b/apps/plugins/lib/kbd_helper.c index f99282575d..0f7bbff8f7 100644 --- a/apps/plugins/lib/kbd_helper.c +++ b/apps/plugins/lib/kbd_helper.c @@ -22,8 +22,8 @@ #include "kbd_helper.h" /* USAGE: - unsigned short kbd[64]; - unsigned short *kbd_p = kbd; + ucschar_t kbd[64]; + ucschar_t *kbd_p = kbd; if (!kbd_create_layout("ABCD1234\n", kbd, sizeof(kbd))) kbd_p = NULL; @@ -34,14 +34,14 @@ * success returns size of buffer used * failure returns 0 */ -int kbd_create_layout(const char *layout, unsigned short *buf, int bufsz) +int kbd_create_layout(const char *layout, ucschar_t *buf, int bufsz) { - unsigned short *pbuf; + ucschar_t *pbuf; const unsigned char *p = layout; int len = 0; int total_len = 0; pbuf = buf; - while (*p && (pbuf - buf + (ptrdiff_t) sizeof(unsigned short)) < bufsz) + while (*p && (pbuf - buf + (ptrdiff_t) sizeof(ucschar_t)) < bufsz) { p = rb->utf8decode(p, &pbuf[len+1]); if (pbuf[len+1] == '\n') @@ -60,7 +60,7 @@ int kbd_create_layout(const char *layout, unsigned short *buf, int bufsz) *pbuf = len; pbuf[len+1] = 0xFEFF; /* mark end of characters */ total_len += len + 1; - return total_len * sizeof(unsigned short); + return total_len * sizeof(ucschar_t); } return 0; diff --git a/apps/plugins/lib/kbd_helper.h b/apps/plugins/lib/kbd_helper.h index ee2ce7551c..ef24700309 100644 --- a/apps/plugins/lib/kbd_helper.h +++ b/apps/plugins/lib/kbd_helper.h @@ -1,10 +1,10 @@ /*************************************************************************** - * __________ __ ___. - * Open \______ \ ____ ____ | | _\_ |__ _______ ___ - * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / - * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < - * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ - * \/ \/ \/ \/ \/ + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ * $Id$ * * Copyright (C) 2020 William Wilgus @@ -22,6 +22,6 @@ #define KBD_HELPER_H /* create a custom keyboard layout for kbd_input */ -int kbd_create_layout(const char *layout, unsigned short *buf, int bufsz); +int kbd_create_layout(const char *layout, ucschar_t *buf, int bufsz); #endif /* KBD_HELPER_H */ diff --git a/apps/plugins/lib/simple_viewer.c b/apps/plugins/lib/simple_viewer.c index 07176eb991..e71efce753 100644 --- a/apps/plugins/lib/simple_viewer.c +++ b/apps/plugins/lib/simple_viewer.c @@ -62,7 +62,7 @@ static const char* get_next_line(const char *text, struct view_info *info) total = 0; while(*ptr) { - unsigned short ch; + ucschar_t ch; n = ((intptr_t)rb->utf8decode(ptr, &ch) - (intptr_t)ptr); if (rb->is_diacritic(ch, NULL)) w = 0; diff --git a/apps/plugins/lrcplayer.c b/apps/plugins/lrcplayer.c index d341e6b7a5..659373259d 100644 --- a/apps/plugins/lrcplayer.c +++ b/apps/plugins/lrcplayer.c @@ -422,7 +422,7 @@ static struct lrc_brpos *calc_brpos(struct lrc_line *lrc_line, int i) int nlrcbrpos = 0, max_lrcbrpos; uifont = rb->screens[0]->getuifont(); struct font* pf = rb->font_get(uifont); - unsigned short ch; + ucschar_t ch; struct snap { int count, width; int nword; diff --git a/apps/plugins/lua/rocklib.c b/apps/plugins/lua/rocklib.c index 9931d65326..3a318fcd96 100644 --- a/apps/plugins/lua/rocklib.c +++ b/apps/plugins/lua/rocklib.c @@ -141,6 +141,7 @@ RB_WRAP(touchscreen_mode) #endif +// XXX this may be broken with 32-bit ucschar_t RB_WRAP(kbd_input) { /*kbd_input(text, layout)* @@ -168,7 +169,7 @@ RB_WRAP(kbd_input) layout = NULL; } - if(!rb->kbd_input(buffer, LUAL_BUFFERSIZE, (unsigned short *)layout)) + if(!rb->kbd_input(buffer, LUAL_BUFFERSIZE, (ucschar_t *)layout)) { luaL_addstring(&b, buffer); luaL_pushresult(&b); diff --git a/apps/plugins/mpegplayer/mpegplayer.c b/apps/plugins/mpegplayer/mpegplayer.c index 96645d48ea..1ece91d908 100644 --- a/apps/plugins/mpegplayer/mpegplayer.c +++ b/apps/plugins/mpegplayer/mpegplayer.c @@ -1073,8 +1073,8 @@ static void draw_oriented_alpha_bitmap_part(const unsigned char *src, static void draw_putsxy_oriented(int x, int y, const char *str) { - unsigned short ch; - unsigned short *ucs; + ucschar_t ch; + ucschar_t *ucs; int ofs = MIN(x, 0); struct font* pf = rb->font_get(osd.font); diff --git a/apps/plugins/rockpaint.c b/apps/plugins/rockpaint.c index 05174ad034..a0fe37b6f4 100644 --- a/apps/plugins/rockpaint.c +++ b/apps/plugins/rockpaint.c @@ -969,8 +969,8 @@ static void buffer_alpha_bitmap_part( static void buffer_putsxyofs( fb_data *buf, int buf_width, int buf_height, int x, int y, int ofs, const unsigned char *str ) { - unsigned short ch; - unsigned short *ucs; + ucschar_t ch; + ucschar_t *ucs; struct font *pf = rb->font_get( FONT_UI ); if( !pf ) pf = rb->font_get( FONT_SYSFIXED ); diff --git a/apps/plugins/tagcache/tagcache.c b/apps/plugins/tagcache/tagcache.c index 87d2aa4c10..483d2fbaf9 100644 --- a/apps/plugins/tagcache/tagcache.c +++ b/apps/plugins/tagcache/tagcache.c @@ -135,10 +135,10 @@ static void sleep_yield(void) #define yield sleep_yield } -/* make sure tag can be displayed by font pf*/ +/* make sure tag can be displayed by font pf */ static bool text_is_displayable(struct font *pf, unsigned char *src) { - unsigned short code; + ucschar_t code; const unsigned char *ptr = src; while(*ptr) { diff --git a/apps/plugins/text_viewer/tv_text_processor.c b/apps/plugins/text_viewer/tv_text_processor.c index db96d61409..8bc8519780 100644 --- a/apps/plugins/text_viewer/tv_text_processor.c +++ b/apps/plugins/text_viewer/tv_text_processor.c @@ -41,7 +41,7 @@ static unsigned text_type = TV_TEXT_UNKNOWN; static const unsigned char *end_ptr; -static unsigned short ucsbuf[TV_MAX_BLOCKS][TV_MAX_CHARS_PER_BLOCK]; +static ucschar_t ucsbuf[TV_MAX_BLOCKS][TV_MAX_CHARS_PER_BLOCK]; static unsigned char utf8buf[TV_MAX_CHARS_PER_BLOCK * (2 * 3)]; static unsigned char *outbuf; @@ -54,11 +54,11 @@ static bool expand_extra_line = false; /* when a line is divided, this value sets true. */ static bool is_break_line = false; -static unsigned short break_chars[] = +static unsigned short break_chars[] = // XXX promote to ucschar_t if we get a codepoint > 0xffff { 0, /* halfwidth characters */ - '\t', '\n', 0x0b, 0x0c, ' ', '!', ',', '-', '.', ':', ';', '?', 0xb7, + '\t', '\n', 0x0b, 0x0c, ' ', '!', ',', '-', '.', ':', ';', '?', 0xb7, /* fullwidth characters */ 0x2010, /* hyphen */ 0x3000, /* fullwidth space */ @@ -76,7 +76,7 @@ static unsigned short break_chars[] = }; /* the characters which is not judged as space with isspace() */ -static unsigned short extra_spaces[] = { 0, 0x3000 }; +static unsigned short extra_spaces[] = { 0, 0x3000 }; // XXX promote to ucschar_t if we get a codepoint > 0xffff static int tv_glyph_width(int ch) { @@ -93,7 +93,7 @@ static int tv_glyph_width(int ch) return rb->font_get_width(rb->font_get(preferences->font_id), ch); } -static unsigned char *tv_get_ucs(const unsigned char *str, unsigned short *ch) +static unsigned char *tv_get_ucs(const unsigned char *str, ucschar_t *ch) { int count = 1; unsigned char utf8_tmp[3]; @@ -148,7 +148,7 @@ static unsigned char *tv_get_ucs(const unsigned char *str, unsigned short *ch) return (unsigned char *)str + count; } -static void tv_decode2utf8(const unsigned short *ucs, int count) +static void tv_decode2utf8(const ucschar_t *ucs, int count) { int i; @@ -158,7 +158,7 @@ static void tv_decode2utf8(const unsigned short *ucs, int count) *outbuf = '\0'; } -static bool tv_is_line_break_char(unsigned short ch) +static bool tv_is_line_break_char(ucschar_t ch) { size_t i; @@ -166,7 +166,7 @@ static bool tv_is_line_break_char(unsigned short ch) if (preferences->word_mode == WM_CHOP) return false; - for (i = 0; i < sizeof(break_chars)/sizeof(unsigned short); i++) + for (i = 0; i < sizeof(break_chars)/sizeof(ucschar_t); i++) { if (break_chars[i] == ch) return true; @@ -174,14 +174,14 @@ static bool tv_is_line_break_char(unsigned short ch) return false; } -static bool tv_isspace(unsigned short ch) +static bool tv_isspace(ucschar_t ch) { size_t i; if (ch < 128 && isspace(ch)) return true; - for (i = 0; i < sizeof(extra_spaces)/sizeof(unsigned short); i++) + for (i = 0; i < sizeof(extra_spaces)/sizeof(ucschar_t); i++) { if (extra_spaces[i] == ch) return true; @@ -191,17 +191,17 @@ static bool tv_isspace(unsigned short ch) static bool tv_is_break_line_join_mode(const unsigned char *next_str) { - unsigned short ch; + ucschar_t ch; tv_get_ucs(next_str, &ch); return tv_isspace(ch); } -static int tv_form_reflow_line(unsigned short *ucs, int chars) +static int tv_form_reflow_line(ucschar_t *ucs, int chars) { - unsigned short new_ucs[TV_MAX_CHARS_PER_BLOCK]; - unsigned short *p = new_ucs; - unsigned short ch; + ucschar_t new_ucs[TV_MAX_CHARS_PER_BLOCK]; + ucschar_t *p = new_ucs; + ucschar_t ch; int i; int k; int expand_spaces; @@ -262,15 +262,15 @@ static int tv_form_reflow_line(unsigned short *ucs, int chars) } } - rb->memcpy(ucs, new_ucs, sizeof(unsigned short) * TV_MAX_CHARS_PER_BLOCK); + rb->memcpy(ucs, new_ucs, sizeof(ucschar_t) * TV_MAX_CHARS_PER_BLOCK); return indent_chars + nonspace_chars + expand_spaces; } static void tv_align_right(int *block_chars) { - unsigned short *cur_text; - unsigned short *prev_text; - unsigned short ch; + ucschar_t *cur_text; + ucschar_t *prev_text; + ucschar_t ch; int cur_block = block_count - 1; int prev_block; int cur_chars; @@ -335,9 +335,9 @@ static void tv_align_right(int *block_chars) if (break_pos < prev_chars) { rb->memmove(cur_text + prev_chars - break_pos, - cur_text, block_chars[cur_block] * sizeof(unsigned short)); + cur_text, block_chars[cur_block] * sizeof(ucschar_t)); rb->memcpy(cur_text, prev_text + break_pos, - (prev_chars - break_pos) * sizeof(unsigned short)); + (prev_chars - break_pos) * sizeof(ucschar_t)); block_chars[prev_block] = break_pos; block_chars[cur_block ] += prev_chars - break_pos; @@ -347,15 +347,15 @@ static void tv_align_right(int *block_chars) } } -static int tv_parse_text(const unsigned char *src, unsigned short *ucs, +static int tv_parse_text(const unsigned char *src, ucschar_t *ucs, int *ucs_chars, bool is_indent) { const unsigned char *cur = src; const unsigned char *next = src; const unsigned char *line_break_ptr = NULL; const unsigned char *line_end_ptr = NULL; - unsigned short ch = 0; - unsigned short prev_ch; + ucschar_t ch = 0; + ucschar_t prev_ch; int chars = 0; int gw; int line_break_width = 0; @@ -480,7 +480,7 @@ static int tv_parse_text(const unsigned char *src, unsigned short *ucs, int tv_create_formed_text(const unsigned char *src, ssize_t bufsize, int block, bool is_multi, const unsigned char **dst) { - unsigned short ch; + ucschar_t ch; int chars[block_count]; int i; int size = 0; diff --git a/apps/plugins/zxbox/zxbox_keyb.c b/apps/plugins/zxbox/zxbox_keyb.c index 6ec04ec04b..56e0025c60 100644 --- a/apps/plugins/zxbox/zxbox_keyb.c +++ b/apps/plugins/zxbox/zxbox_keyb.c @@ -326,7 +326,7 @@ struct keyboard_parameters { const unsigned char* default_kbd; int DEFAULT_LINES; - unsigned short kbd_buf[KBD_BUF_SIZE]; + ucschar_t kbd_buf[KBD_BUF_SIZE]; int nchars; int font_w; int font_h; @@ -358,7 +358,7 @@ int zx_kbd_input(char* text/*, int buflen*/) int editpos, len_utf8; #endif /* int statusbar_size = global_settings.statusbar ? STATUSBAR_HEIGHT : 0;*/ - unsigned short ch/*, tmp, hlead = 0, hvowel = 0, htail = 0*/; + ucschar_t ch/*, tmp, hlead = 0, hvowel = 0, htail = 0*/; /*bool hangul = false;*/ unsigned char *utf8; const unsigned char *p; diff --git a/apps/recorder/keyboard.c b/apps/recorder/keyboard.c index 0b85918dc1..0b8dbea06c 100644 --- a/apps/recorder/keyboard.c +++ b/apps/recorder/keyboard.c @@ -90,15 +90,15 @@ enum ekbd_viewports struct keyboard_parameters { struct viewport *kbd_viewports; - unsigned short kbd_buf[KBD_BUF_SIZE]; - unsigned short *kbd_buf_ptr; + ucschar_t kbd_buf[KBD_BUF_SIZE]; + ucschar_t *kbd_buf_ptr; unsigned short max_line_len; int default_lines; - int last_k; - int last_i; - int font_w; - int font_h; - int text_w; + unsigned int last_k; + unsigned int last_i; + unsigned short font_w; + unsigned short font_h; + unsigned int text_w; int curfont; int main_y; #ifdef HAVE_MORSE_INPUT @@ -128,7 +128,7 @@ struct edit_state int editpos; /* Edit position on all screens */ bool cur_blink; /* Cursor on/off flag */ bool hangul; - unsigned short hlead, hvowel, htail; + ucschar_t hlead, hvowel, htail; #ifdef HAVE_MORSE_INPUT bool morse_mode; bool morse_reading; @@ -158,13 +158,13 @@ static void keyboard_layout(struct viewport *kbd_vp, { /*Note: viewports are initialized to vp_default by kbd_create_viewports */ - int sc_w = sc->getwidth(); - int sc_h = sc->getheight(); + unsigned short sc_w = sc->getwidth(); + unsigned short sc_h = sc->getheight(); /* TEXT */ struct viewport *vp = &kbd_vp[eKBD_VP_TEXT]; /* make sure height is even for the text box */ - int text_height = (MAX(pm->font_h, get_icon_height(sc->screen_type)) & ~1) + 2; + unsigned short text_height = (MAX(pm->font_h, (unsigned int)get_icon_height(sc->screen_type)) & ~1) + 2; vp->x = 0; /* LEFT */ vp->y = 0; /* TOP */ vp->width = sc_w; @@ -224,7 +224,7 @@ int load_kbd(unsigned char* filename) int fd; int i, line_len, max_line_len; unsigned char buf[4]; - unsigned short *pbuf; + ucschar_t *pbuf; if (filename == NULL) { @@ -245,7 +245,7 @@ int load_kbd(unsigned char* filename) /* check how many bytes to read for this character */ static const unsigned char sizes[4] = { 0x80, 0xe0, 0xf0, 0xf5 }; size_t count; - unsigned short ch; + ucschar_t ch; for (count = 0; count < ARRAYLEN(sizes); count++) { @@ -297,7 +297,7 @@ int load_kbd(unsigned char* filename) struct keyboard_parameters *pm = &kbd_param[l]; #if NB_SCREENS > 1 if (l > 0) - memcpy(pm->kbd_buf, kbd_param[0].kbd_buf, i*sizeof(unsigned short)); + memcpy(pm->kbd_buf, kbd_param[0].kbd_buf, i*sizeof(ucschar_t)); #endif /* initialize parameters */ pm->x = pm->y = pm->page = 0; @@ -309,7 +309,7 @@ int load_kbd(unsigned char* filename) } /* helper function to spell a char */ -static void kbd_spellchar(unsigned short c) +static void kbd_spellchar(ucschar_t c) { unsigned char tmp[5]; /* store char to pass to talk_spell */ @@ -322,7 +322,7 @@ static void kbd_spellchar(unsigned short c) talk_spell(tmp, false); } -static void kbd_inschar(struct edit_state *state, unsigned short ch) +static void kbd_inschar(struct edit_state *state, ucschar_t ch) { int i, j, len; unsigned char tmp[4]; @@ -361,10 +361,10 @@ static void kbd_delchar(struct edit_state *state) } /* Lookup k value based on state of param (pm) */ -static unsigned short get_kbd_ch(struct keyboard_parameters *pm, int x, int y) +static ucschar_t get_kbd_ch(struct keyboard_parameters *pm, int x, int y) { - int i = 0, k = pm->page*pm->lines + y, n; - unsigned short *pbuf; + unsigned int n, i = 0, k = pm->page*pm->lines + y; + ucschar_t *pbuf; if (k >= pm->last_k) { i = pm->last_i; @@ -406,12 +406,12 @@ static void kbd_move_picker_horizontal(struct keyboard_parameters *pm, static void kbd_move_picker_vertical(struct keyboard_parameters *pm, struct edit_state *state, int dir); -int kbd_input(char* text, int buflen, unsigned short *kbd) +int kbd_input(char* text, int buflen, ucschar_t *kbd) { bool done = false; struct keyboard_parameters * const param = kbd_param; struct edit_state state; - unsigned short ch; + ucschar_t ch; int ret = 0; /* assume success */ FOR_NB_SCREENS(l) { @@ -449,7 +449,7 @@ int kbd_input(char* text, int buflen, unsigned short *kbd) FOR_NB_SCREENS(l) { struct keyboard_parameters *pm = ¶m[l]; - unsigned short *pbuf; + ucschar_t *pbuf; const unsigned char *p; int len = 0; @@ -800,8 +800,8 @@ static void kbd_calc_pm_params(struct keyboard_parameters *pm, { struct font* font; const unsigned char *p; - unsigned short ch, *pbuf; - int i, w; + ucschar_t ch, *pbuf; + unsigned int i, w; #ifdef HAVE_TOUCHSCREEN pm->show_buttons = (sc->screen_type == SCREEN_MAIN && (touchscreen_get_mode() == TOUCHSCREEN_POINT)); @@ -812,7 +812,7 @@ static void kbd_calc_pm_params(struct keyboard_parameters *pm, pm->font_h = font->height; /* check if FONT_UI fits the screen */ - if (2*pm->font_h + 3 > sc->getheight()) + if (pm->font_h*2 + 3 > sc->getheight()) { pm->curfont = FONT_SYSFIXED; font = font_get(FONT_SYSFIXED); @@ -858,9 +858,9 @@ static void kbd_calc_vp_params(struct keyboard_parameters *pm, { (void) state; struct viewport *vp = &pm->kbd_viewports[eKBD_VP_PICKER]; - int icon_w, sc_w, sc_h; + unsigned int icon_w, sc_w, sc_h; int i, total_lines; - unsigned short *pbuf; + ucschar_t *pbuf; /* calculate how many characters to put in a row. */ icon_w = get_icon_width(sc->screen_type); @@ -970,7 +970,7 @@ static void kbd_draw_picker(struct keyboard_parameters *pm, x = 0; y = 0; outline[1] = '\0'; - + /* Draw morse code table with code descriptions. */ for (i = 0; morse_alphabets[i] != '\0'; i++) { int morse_code; @@ -1024,7 +1024,7 @@ static void kbd_draw_picker(struct keyboard_parameters *pm, /* draw page */ int i, j; int w, h; - unsigned short ch; + ucschar_t ch; unsigned char *utf8; sc->setfont(pm->curfont); @@ -1071,7 +1071,7 @@ static void kbd_draw_edit_line(struct keyboard_parameters *pm, int sc_w = vp->width; int y = (vp->height - pm->font_h) / 2; - + int text_margin = (sc_w - pm->text_w * pm->max_chars_text) / 2; #if 0 @@ -1265,12 +1265,12 @@ static void kbd_insert_selected(struct keyboard_parameters *pm, struct edit_state *state) { /* find input char */ - unsigned short ch = get_kbd_ch(pm, pm->x, pm->y); + ucschar_t ch = get_kbd_ch(pm, pm->x, pm->y); /* check for hangul input */ if (ch >= 0x3131 && ch <= 0x3163) { - unsigned short tmp; + ucschar_t tmp; if (!state->hangul) { @@ -1335,7 +1335,7 @@ static void kbd_insert_selected(struct keyboard_parameters *pm, static void kbd_backspace(struct edit_state *state) { - unsigned short ch; + ucschar_t ch; if (state->hangul) { if (state->htail) diff --git a/bootloader/iriver_h1x0.c b/bootloader/iriver_h1x0.c index 502143ce58..1bf41b5a8c 100644 --- a/bootloader/iriver_h1x0.c +++ b/bootloader/iriver_h1x0.c @@ -600,16 +600,16 @@ int usb_screen(void) return 0; } -unsigned short *bidi_l2v(const unsigned char *str, int orientation) +ucschar_t *bidi_l2v(const unsigned char *str, int orientation) { - static unsigned short utf16_buf[SCROLL_LINE_SIZE]; - unsigned short *target; + static ucschar_t utf_buf[SCROLL_LINE_SIZE]; + ucschar_t *target; (void)orientation; - target = utf16_buf; + target = utf_buf; while (*str) str = utf8decode(str, target++); *target = 0; - return utf16_buf; + return utf_buf; } diff --git a/bootloader/iriver_h300.c b/bootloader/iriver_h300.c index 3cb06ca5c6..7ba78f9d7f 100644 --- a/bootloader/iriver_h300.c +++ b/bootloader/iriver_h300.c @@ -660,16 +660,16 @@ int usb_screen(void) return 0; } -unsigned short *bidi_l2v(const unsigned char *str, int orientation) +ucschar_t *bidi_l2v(const unsigned char *str, int orientation) { - static unsigned short utf16_buf[SCROLL_LINE_SIZE]; - unsigned short *target; + static ucschar_t utf_buf[SCROLL_LINE_SIZE]; + ucschar_t *target; (void)orientation; - target = utf16_buf; + target = utf_buf; while (*str) str = utf8decode(str, target++); *target = 0; - return utf16_buf; + return utf_buf; } diff --git a/docs/PLUGIN_API b/docs/PLUGIN_API index 915d26119b..961d3da2b8 100644 --- a/docs/PLUGIN_API +++ b/docs/PLUGIN_API @@ -225,7 +225,7 @@ void beep_play(unsigned int frequency, unsigned int duration, unsigned int ampli \param amplitude \description -unsigned short *bidi_l2v( const unsigned char *str, int orientation ) +ucschar_t *bidi_l2v( const unsigned char *str, int orientation ) \param str \param orientation \return @@ -407,13 +407,13 @@ const struct cbmp_bitmap_info_entry *core_bitmaps \return \description -const unsigned char *font_get_bits( struct font *pf, unsigned short char_code ) +const unsigned char *font_get_bits( struct font *pf, ucschar_t char_code ) \param pf \param char_code \return \description -const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs) +const unsigned char* utf8decode(const unsigned char *utf8, ucschar_t *ucs) \group unicode stuff \param utf8 \param ucs @@ -747,7 +747,7 @@ int font_getstringsize(const unsigned char *str, int *w, int *h, int fontnumber) \return \description -int font_get_width(struct font* pf, unsigned short char_code) +int font_get_width(struct font* pf, ucschar_t char_code) \param pf \param char_code \return @@ -972,7 +972,7 @@ bool is_diacritic(const unsigned short char_code, bool *is_rtl) \return \description -int kbd_input(char* buffer, int buflen, unsigned short *kbd) +int kbd_input(char* buffer, int buflen, ucschar_t *kbd) \group misc \param buffer \param buflen diff --git a/firmware/arabjoin.h b/firmware/arabjoin.h index 0085b31f4c..5db6b4d79a 100644 --- a/firmware/arabjoin.h +++ b/firmware/arabjoin.h @@ -1,3 +1,5 @@ +/* Note these are not ucschar_t becuase all arabic + codepoints are <16bit, so no need to waste table space */ typedef struct { unsigned short isolated; unsigned short final; diff --git a/firmware/bidi.c b/firmware/bidi.c index c19412693e..310de36c91 100644 --- a/firmware/bidi.c +++ b/firmware/bidi.c @@ -44,7 +44,7 @@ #define XOR(a,b) ((a||b) && !(a&&b)) #ifndef BOOTLOADER -static const arab_t * arab_lookup(unsigned short uchar) +static const arab_t * arab_lookup(ucschar_t uchar) { if (uchar >= 0x621 && uchar <= 0x63a) return &(jointable[uchar - 0x621]); @@ -57,15 +57,15 @@ static const arab_t * arab_lookup(unsigned short uchar) return 0; } -static void arabjoin(unsigned short * stringprt, int length) +static void arabjoin(ucschar_t *stringprt, int length) { bool connected = false; - unsigned short * writeprt = stringprt; + ucschar_t *writeprt = stringprt; const arab_t * prev = 0; const arab_t * cur; const arab_t * ligature = 0; - short uchar; + ucschar_t uchar; int i; for (i = 0; i <= length; i++) { @@ -135,13 +135,13 @@ static void arabjoin(unsigned short * stringprt, int length) } #endif /* !BOOTLOADER */ -unsigned short *bidi_l2v(const unsigned char *str, int orientation) +ucschar_t *bidi_l2v(const unsigned char *str, int orientation) { - static unsigned short utf16_buf[SCROLL_LINE_SIZE]; - unsigned short *target, *tmp; + static ucschar_t utf_buf[SCROLL_LINE_SIZE]; + ucschar_t *target, *tmp; #ifndef BOOTLOADER - static unsigned short bidi_buf[SCROLL_LINE_SIZE]; - unsigned short *heb_str; /* *broken_str */ + static ucschar_t bidi_buf[SCROLL_LINE_SIZE]; + ucschar_t *heb_str; /* *broken_str */ int block_start, block_end, block_type, block_length, i; int length = utf8length(str); length=length>=SCROLL_LINE_SIZE?SCROLL_LINE_SIZE-1:length; @@ -152,21 +152,21 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation) tmp = str; */ - target = tmp = utf16_buf; - while (*str && target < &utf16_buf[SCROLL_LINE_SIZE-1]) + target = tmp = utf_buf; + while (*str && target < &utf_buf[SCROLL_LINE_SIZE-1]) str = utf8decode(str, target++); *target = 0; #ifdef BOOTLOADER (void)orientation; - return utf16_buf; - + return utf_buf; + #else /* !BOOTLOADER */ - if (target == utf16_buf) /* empty string */ + if (target == utf_buf) /* empty string */ return target; /* properly join any arabic chars */ - arabjoin(utf16_buf, length); + arabjoin(utf_buf, length); block_start=block_end=block_length=0; @@ -204,7 +204,7 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation) for (i=block_start; i<=block_end; i++) { *target = (block_type == orientation) ? - *(utf16_buf+i) : *(utf16_buf+block_end-i+block_start); + *(utf_buf+i) : *(utf_buf+block_end-i+block_start); if (block_type!=orientation) { switch (*target) { case '(': @@ -226,7 +226,7 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation) *target = 0; #if 0 /* Is this code really necessary? */ - broken_str = utf16_buf; + broken_str = utf_buf; begin=end=length-1; target = broken_str; @@ -246,7 +246,7 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation) if (char_count==max_chars) { /* try to avoid breaking words */ int new_char_count = char_count; int new_begin = begin; - + while (new_char_count>0) { if (_isblank(heb_str[new_begin]) || _isnewline(heb_str[new_begin])) { @@ -261,11 +261,11 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation) } } orig_begin=begin; - + /* if (_isblank(heb_str[begin])) { heb_str[begin]='\n'; } */ - + /* skip leading newlines */ while (begin<=end && _isnewline(heb_str[begin])) { begin++; @@ -282,7 +282,7 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation) target++; } begin=orig_begin; - + if (begin<=0) { *target = 0; break; @@ -295,4 +295,3 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation) return heb_str; #endif /* !BOOTLOADER */ } - diff --git a/firmware/common/diacritic.c b/firmware/common/diacritic.c index 7a0d97b79b..68c8dcd4c9 100644 --- a/firmware/common/diacritic.c +++ b/firmware/common/diacritic.c @@ -28,8 +28,8 @@ #include "system.h" #define DIAC_NUM_RANGES (ARRAYLEN(diac_ranges)) -#define DIAC_RTL (1 << 7) -#define DIAC_CNT (0xFF ^ DIAC_RTL) +#define DIAC_RTL (1 << 15) +#define DIAC_CNT (0xFFFF ^ DIAC_RTL) /* Each diac_range_ struct defines a Unicode range that begins with * N diacritic characters, and continues with non-diacritic characters up to the @@ -39,8 +39,8 @@ struct diac_range { - uint16_t base; - uint8_t info; /* [RTL:1 CNT:7] */ + uint16_t base; /* Not ucschar_t until we need >16b */ + uint16_t info; /* [RTL:1 CNT:15] */ }; #define DIAC_RANGE_ENTRY(first_diac, first_non_diac, is_rtl) \ @@ -51,7 +51,7 @@ struct diac_range static const struct diac_range diac_ranges[] = { DIAC_RANGE_ENTRY(0x0000, 0x0000, 0), - DIAC_RANGE_ENTRY(FIRST_DIACRITIC, 0x0370, 0), + DIAC_RANGE_ENTRY(FIRST_DIACRITIC, 0x0370, 0), /* v1 - v4.1 */ DIAC_RANGE_ENTRY(0x0483, 0x048a, 0), DIAC_RANGE_ENTRY(0x0591, 0x05be, 1), DIAC_RANGE_ENTRY(0x05bf, 0x05c0, 1), @@ -146,6 +146,7 @@ static const struct diac_range diac_ranges[] = DIAC_RANGE_ENTRY(0x19c8, 0x19ca, 0), DIAC_RANGE_ENTRY(0x1a17, 0x1a1c, 0), DIAC_RANGE_ENTRY(0x1a55, 0x1a80, 0), + DIAC_RANGE_ENTRY(0x1ab0, 0x1b00, 0), /* v7.0 */ DIAC_RANGE_ENTRY(0x1b00, 0x1b05, 0), DIAC_RANGE_ENTRY(0x1b34, 0x1b45, 0), DIAC_RANGE_ENTRY(0x1b6b, 0x1b74, 0), @@ -156,10 +157,10 @@ static const struct diac_range diac_ranges[] = DIAC_RANGE_ENTRY(0x1cd4, 0x1ce9, 0), DIAC_RANGE_ENTRY(0x1ced, 0x1cee, 0), DIAC_RANGE_ENTRY(0x1cf2, 0x1cf3, 0), - DIAC_RANGE_ENTRY(0x1dc0, 0x1e00, 0), - DIAC_RANGE_ENTRY(0x20d0, 0x20f1, 0), + DIAC_RANGE_ENTRY(0x1dc0, 0x1e00, 0), /* v4.1 - v5.2 */ + DIAC_RANGE_ENTRY(0x20d0, 0x2100, 0), /* v1.0 - v5.1 */ DIAC_RANGE_ENTRY(0x2cef, 0x2cf2, 0), - DIAC_RANGE_ENTRY(0x2de0, 0x2e00, 0), + DIAC_RANGE_ENTRY(0x2de0, 0x2e00, 0), /* v5.1 */ DIAC_RANGE_ENTRY(0x302a, 0x3030, 0), DIAC_RANGE_ENTRY(0x3099, 0x309b, 0), DIAC_RANGE_ENTRY(0xa66f, 0xa673, 0), @@ -188,7 +189,7 @@ static const struct diac_range diac_ranges[] = DIAC_RANGE_ENTRY(0xabe3, 0xabeb, 0), DIAC_RANGE_ENTRY(0xabec, 0xabee, 0), DIAC_RANGE_ENTRY(0xfb1e, 0xfb1f, 0), - DIAC_RANGE_ENTRY(0xfe20, 0xfe27, 0), + DIAC_RANGE_ENTRY(0xfe20, 0xfe30, 0), /* v1.0 - v8.0 */ DIAC_RANGE_ENTRY(0xfe70, 0xfe70, 1), DIAC_RANGE_ENTRY(0xff00, 0xff00, 0), DIAC_RANGE_ENTRY(0xffff, 0xffff, 0), @@ -196,7 +197,7 @@ static const struct diac_range diac_ranges[] = #define MRU_MAX_LEN 32 -bool is_diacritic(const unsigned short char_code, bool *is_rtl) +bool is_diacritic(const ucschar_t char_code, bool *is_rtl) { static uint8_t mru_len = 0; static uint8_t diacritic_mru[MRU_MAX_LEN]; @@ -209,7 +210,6 @@ bool is_diacritic(const unsigned short char_code, bool *is_rtl) /* Search in MRU */ for (mru = 0, i = 0; mru < mru_len; mru++) { - /* Items shifted >> 1 */ itmp = i; i = diacritic_mru[mru]; @@ -250,10 +250,10 @@ Found: if (is_rtl) *is_rtl = ((DIAC_RTL & info) == DIAC_RTL); - return (char_code < diac->base + (info & DIAC_CNT)); + return (char_code < (diac->base + (info & DIAC_CNT))); } #else /*BOOTLOADER*/ -inline bool is_diacritic(const unsigned short char_code, bool *is_rtl) +inline bool is_diacritic(const ucschar_t char_code, bool *is_rtl) { (void)char_code; if (is_rtl) diff --git a/firmware/common/unicode.c b/firmware/common/unicode.c index 444ea0f406..c1e187a709 100644 --- a/firmware/common/unicode.c +++ b/firmware/common/unicode.c @@ -127,7 +127,7 @@ static int volatile cp_table_ref = 0; /* non-default codepage table buffer (cannot be bufalloced! playback itself may be making the load request) */ -static unsigned short codepage_table[MAX_CP_TABLE_SIZE+1]; +static unsigned short codepage_table[MAX_CP_TABLE_SIZE+1]; // XXX convert to ucschar_t if we ever need > 16bit mappings? #if defined(APPLICATION) && defined(__linux__) static const char * const name_codepages_linux[NUM_CODEPAGES+1] = @@ -344,7 +344,7 @@ unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int cp_lock_leave(); while (count-- && utf8_size > 0) { - unsigned short ucs, tmp; + ucschar_t ucs, tmp; if (*iso < 128 || cp == UTF_8) /* Already UTF-8 */ { @@ -511,8 +511,25 @@ unsigned long utf8length(const unsigned char *utf8) return l; } +/* Take a utf8 string and return the encoded length in utf16 code units */ +unsigned long utf16len_utf8(const unsigned char *utf8) +{ + ucschar_t cp; + unsigned long length = 0; + while (*utf8) { + utf8 = utf8decode(utf8, &cp); +#ifdef UNICODE32 + if (cp >= 0x10000) + length++; +#endif + length++; + } + + return length; +} + /* Decode 1 UTF-8 char and return a pointer to the next char. */ -const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs) +const unsigned char* utf8decode(const unsigned char *utf8, ucschar_t *ucs) { unsigned char c = *utf8++; unsigned long code; @@ -552,8 +569,16 @@ const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs) /* Invalid UTF-8 char */ code = 0xfffd; } - /* currently we don't support chars above U-FFFF */ - *ucs = (code < 0x10000) ? code : 0xfffd; + +#ifdef UNICODE32 + if (code > 0x10ffff) + code = 0xfffd; +#else + if (code > 0xffff) + code = 0xfffd; +#endif + + *ucs = code; return utf8; } diff --git a/firmware/drivers/fat.c b/firmware/drivers/fat.c index 460b6d8a39..b67b3062ee 100644 --- a/firmware/drivers/fat.c +++ b/firmware/drivers/fat.c @@ -747,6 +747,8 @@ static bool fatlong_parse_entry(struct fatlong_parse_state *lnparse, /* so far so good; save entry information */ lnparse->ord = ord; + /* Treat entries as opaque 16-bit values; + utf8decode happens in fatlong_parse_finish() */ uint16_t *ucsp = fatent->ucssegs[ord - 1 + 5]; unsigned int i = longent_char_first(); @@ -797,13 +799,24 @@ static bool fatlong_parse_finish(struct fatlong_parse_state *lnparse, /* ensure the last segment is NULL-terminated if it is filled */ fatent->ucssegs[lnparse->ord_max + 5][0] = 0x0000; - for (uint16_t *ucsp = fatent->ucssegs[5], ucc = *ucsp; - ucc; ucc = *++ucsp) + unsigned long ucc; /* Decoded codepoint */ + uint16_t *ucsp, ucs; + for (ucsp = fatent->ucssegs[5], ucs=*ucsp; ucs; ucs = *++ucsp) { /* end should be hit before ever seeing padding */ - if (ucc == 0xffff) + if (ucs == 0xffff) return false; +#ifdef UNICODE32 + /* Check for a surrogate UTF16 pair */ + if (ucs >= 0xd800 && ucs < 0xdc00 && + *(ucsp+1) >= 0xdc00 && *(ucsp+1) < 0xe000) { + ucc = 0x10000 + (((ucs & 0x3ff) << 10) | (*(ucsp+1) & 0x3ff)); + ucsp++; + } else +#endif + ucc = ucs; + if ((p = utf8encode(ucc, p)) - name > FAT_DIRENTRY_NAME_MAX) return false; } @@ -1612,12 +1625,27 @@ static int write_longname(struct bpb *fat_bpb, struct fat_filestr *parentstr, for (unsigned long i = 0; i < ucspadlen; i++) { - if (i < ucslen) + if (i < ucslen) { +#ifdef UNICODE32 + ucschar_t tmp; + name = utf8decode(name, &tmp); + /* For codepoints > U+FFFF we will need to use a UTF16 surrogate + pair. 'ucslen' already takes this into account! */ + if (tmp < 0x10000) { + ucsname[i] = tmp; + } else { + tmp -= 0x10000; + ucsname[i++] = 0xd800 | ((tmp >> 10) & 0x3ff); /* High */ + ucsname[i] = 0xdc00 | (tmp & 0x3ff); /* Low */ + } +#else name = utf8decode(name, &ucsname[i]); - else if (i == ucslen) +#endif + } else if (i == ucslen) { ucsname[i] = 0x0000; /* name doesn't fill last block */ - else /* i > ucslen */ + } else /* i > ucslen */ { ucsname[i] = 0xffff; /* pad-out to end */ + } } dc_lock_cache(); @@ -1744,9 +1772,12 @@ static int add_dir_entry(struct bpb *fat_bpb, struct fat_filestr *parentstr, create_dos_name(basisname, name, &n); randomize_dos_name(shortname, basisname, &n); - /* one dir entry needed for every 13 characters of filename, - plus one entry for the short name */ - ucslen = utf8length(name); + /* one dir entry needed for every 13 utf16 "code units" + of filename, plus one entry for the short name. + Keep in mind that a unicode character can take up to + two code units! + */ + ucslen = utf16len_utf8(name); if (ucslen > 255) FAT_ERROR(-2); /* name is too long */ diff --git a/firmware/drivers/lcd-bitmap-common.c b/firmware/drivers/lcd-bitmap-common.c index ac032e815a..e57ab97611 100644 --- a/firmware/drivers/lcd-bitmap-common.c +++ b/firmware/drivers/lcd-bitmap-common.c @@ -385,7 +385,7 @@ static void LCDFN(mono_bmp_part_helper)(const unsigned char *src, int src_x, /* put a string at a given pixel position, skipping first ofs pixel columns */ static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str) { - unsigned short *ucs; + ucschar_t *ucs; struct viewport *vp = LCDFN(current_viewport); font_lock(vp->font, true); struct font* pf = font_get(vp->font); @@ -429,7 +429,7 @@ static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str) bool is_rtl, is_diac; const unsigned char *bits; int width, base_width, base_ofs = 0; - const unsigned short next_ch = ucs[1]; + const ucschar_t next_ch = ucs[1]; if (x >= vp->width) break; @@ -447,7 +447,7 @@ static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str) { if (!rtl_next_non_diac_width) { - const unsigned short *u; + const ucschar_t *u; /* Jump to next non-diacritic char, and calc its width */ for (u = &ucs[1]; *u && IS_DIACRITIC(*u); u++); @@ -529,7 +529,7 @@ static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str) /* put a string at a given pixel position, skipping first ofs pixel columns */ static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str) { - unsigned short *ucs; + ucschar_t *ucs; struct viewport *vp = LCDFN(current_viewport); struct font* pf = font_get(vp->font); const unsigned char *bits; @@ -567,7 +567,7 @@ static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str) /* allow utf but no diacritics or rtl lang */ for (ucs = bidi_l2v(str, 1); *ucs; ucs++) { - const unsigned short next_ch = ucs[1]; + const ucschar_t next_ch = ucs[1]; if (x >= vp->width) break; diff --git a/firmware/export/bidi.h b/firmware/export/bidi.h index 14a09b5105..dc52e8671e 100644 --- a/firmware/export/bidi.h +++ b/firmware/export/bidi.h @@ -21,6 +21,6 @@ #ifndef BIDI_H #define BIDI_H -extern unsigned short *bidi_l2v(const unsigned char *str, int orientation); +ucschar_t *bidi_l2v(const unsigned char *str, int orientation); #endif /* BIDI_H */ diff --git a/firmware/export/config.h b/firmware/export/config.h index 54107d5a31..245777a671 100644 --- a/firmware/export/config.h +++ b/firmware/export/config.h @@ -1461,4 +1461,11 @@ Lyre prototype 1 */ #error "HAVE_LCD_SLEEP_SETTING requires HAVE_LCD_SLEEP" #endif +// XXX Figure out a better place to put this? +#ifdef UNICODE32 +#define ucschar_t unsigned int +#else +#define ucschar_t unsigned short +#endif + #endif /* __CONFIG_H__ */ diff --git a/firmware/export/cpu.h b/firmware/export/cpu.h index 701baa9e78..5cd875c351 100644 --- a/firmware/export/cpu.h +++ b/firmware/export/cpu.h @@ -18,6 +18,9 @@ * KIND, either express or implied. * ****************************************************************************/ +#ifndef __CPU_H +#define __CPU_H + #include "config.h" #if CONFIG_CPU == MCF5249 @@ -80,3 +83,5 @@ #if CONFIG_CPU == STM32H743 #include "cpu-stm32h743.h" #endif + +#endif /* __CPU_H */ diff --git a/firmware/export/font.h b/firmware/export/font.h index 2334a8bd1a..604635f985 100644 --- a/firmware/export/font.h +++ b/firmware/export/font.h @@ -86,7 +86,7 @@ struct font { int maxwidth; /* max width in pixels*/ unsigned int height; /* height in pixels*/ int ascent; /* ascent (baseline) height*/ - int firstchar; /* first character in bitmap*/ + unsigned int firstchar; /* first character in bitmap*/ int size; /* font size in glyphs*/ int depth; /* depth of the font, 0=1bit and 1=4bit */ const unsigned char *bits; /* 8-bit column bitmap data*/ @@ -95,24 +95,24 @@ struct font { const unsigned char *width; /* character widths or NULL if fixed*/ int defaultchar; /* default char (not glyph index)*/ int32_t bits_size; /* # bytes of glyph bits*/ - + /* file, buffer and cache management */ int fd; /* fd for the font file. >= 0 if cached */ int fd_width; /* fd for the font file. >= 0 if cached */ - int fd_offset; /* fd for the font file. >= 0 if cached */ + int fd_offset; /* fd for the font file. >= 0 if cached */ int handle; /* core_allocator handle */ - unsigned char *buffer_start; /* buffer to store the font in */ - unsigned char *buffer_position; /* position in the buffer */ + unsigned char *buffer_start; /* buffer to store the font in */ + unsigned char *buffer_position; /* position in the buffer */ unsigned char *buffer_end; /* end of the buffer */ size_t buffer_size; /* size of the buffer in bytes */ bool disabled; /* font disabled (use blank as fallback if not in cache) */ -#ifndef __PCTOOL__ +#ifndef __PCTOOL__ struct font_cache cache; uint32_t file_width_offset; /* offset to file width data */ uint32_t file_offset_offset; /* offset to file offset data */ int long_offset; -#endif - +#endif + }; /* font routines*/ @@ -134,7 +134,7 @@ void font_enable_all(void); struct font* font_get(int font); int font_getstringnsize(const unsigned char *str, size_t maxbytes, int *w, int *h, int fontnumber); int font_getstringsize(const unsigned char *str, int *w, int *h, int fontnumber); -int font_get_width(struct font* ft, unsigned short ch); -const unsigned char * font_get_bits(struct font* ft, unsigned short ch); +int font_get_width(struct font* ft, ucschar_t ch); +const unsigned char * font_get_bits(struct font* ft, ucschar_t ch); #endif diff --git a/firmware/export/hangul.h b/firmware/export/hangul.h index ca2b4ee079..30883a13b4 100644 --- a/firmware/export/hangul.h +++ b/firmware/export/hangul.h @@ -21,5 +21,4 @@ extern const char jamo_table[51][3]; -unsigned short hangul_join(unsigned short lead, unsigned short vowel, - unsigned short tail); +ucschar_t hangul_join(ucschar_t lead, ucschar_t vowel, ucschar_t tail); diff --git a/firmware/font.c b/firmware/font.c index f8a755168e..0f11ea5a0a 100644 --- a/firmware/font.c +++ b/firmware/font.c @@ -53,6 +53,12 @@ #define FONT_EXT "fnt" #define GLYPH_CACHE_EXT "gc" +#ifdef UNICODE32 +#define FC_HEADER_VAL 0x01000020 +#else +#define FC_HEADER_VAL 0x01000010 +#endif + /* max static loadable font buffer size */ #ifndef MAX_FONT_SIZE #if LCD_HEIGHT > 64 @@ -182,7 +188,7 @@ void font_init(void) static short readshort(struct font *pf) { - unsigned short s; + uint16_t s; s = *pf->buffer_position++ & 0xff; s |= (*pf->buffer_position++ << 8); @@ -361,8 +367,8 @@ static size_t font_glyphs_to_bufsize(struct font *pf, int glyphs) size_t bufsize; /* LRU bytes per glyph */ - bufsize = LRU_SLOT_OVERHEAD + sizeof(struct font_cache_entry) + - sizeof( unsigned short); + bufsize = LRU_SLOT_OVERHEAD + sizeof(struct font_cache_entry) + + sizeof(unsigned short); /* Image bytes per glyph */ bufsize += glyph_bytes(pf, pf->maxwidth); bufsize *= glyphs; @@ -371,7 +377,7 @@ static size_t font_glyphs_to_bufsize(struct font *pf, int glyphs) } static struct font* font_load_header(int fd, struct font *pheader, - struct font *pf, + struct font *pf, uint32_t *nwidth, uint32_t *noffset) { /* Load the header. Readshort() and readlong() * @@ -420,16 +426,24 @@ int font_load_ex( const char *path, size_t buf_size, int glyphs ) if ( fd < 0 ) return -1; +#ifdef UNICODE32 + if (glyphs && glyphs < 3) + glyphs = 3; /* Guarantee we'll always have at least 2 after alignment */ +#else + if (glyphs && glyphs < 2) + glyphs = 2; /* Guarantee we'll always have at least 1 after alignment */ +#endif + /* load font struct f with file header */ int file_size = filesize( fd ); struct font header; struct font f; - uint32_t nwidth, noffset; + uint32_t nwidth, noffset; if ( !font_load_header( fd, &header, &f, &nwidth, &noffset ) #if LCD_DEPTH < 16 || f.depth -#endif +#endif ) { close(fd); @@ -458,7 +472,7 @@ int font_load_ex( const char *path, size_t buf_size, int glyphs ) cached = true; else bufsize = file_size; - + /* check already loaded */ int font_id = find_font_index(path); @@ -503,7 +517,7 @@ int font_load_ex( const char *path, size_t buf_size, int glyphs ) return -1; } pd->refcount++; - //printf("reusing handle %d for %s (count: %d)\n", font_id, path, pd->refcount); + //printf("reusing handle %d for %s (count: %d)\n", font_id, path, pd->refcount); close(fd); return font_id; } @@ -522,7 +536,7 @@ int font_load_ex( const char *path, size_t buf_size, int glyphs ) return -1; font_id = open_slot; size_t path_bufsz = MAX(path_len + 1, 64); /* enough size for common case */ - /* allocate mem */ + /* allocate mem */ int handle = core_alloc_ex( bufsize + path_bufsz + sizeof( struct buflib_alloc_data ), &buflibops ); @@ -574,7 +588,7 @@ int font_load_ex( const char *path, size_t buf_size, int glyphs ) pf->fd_offset = -1; } else - { + { lseek( fd, 0, SEEK_SET); read(fd, pf->buffer_start, pf->buffer_size); @@ -723,7 +737,7 @@ load_cache_entry(struct font_cache_entry* p, void* callback_data) { struct font* pf = callback_data; - unsigned short char_code = p->_char_code; + ucschar_t char_code = p->_char_code; int fd; lock_font_handle(pf->handle, true); @@ -788,7 +802,7 @@ static void cache_create(struct font* pf) * when the font file is closed during USB */ unsigned char *cache_buf = pf->buffer_start + bitmap_size; size_t cache_size = pf->buffer_size - bitmap_size; - ALIGN_BUFFER(cache_buf, cache_size, 2); + ALIGN_BUFFER(cache_buf, cache_size, sizeof(ucschar_t)); memset(pf->buffer_start, 0, bitmap_size); /* Initialise cache */ font_cache_create(&pf->cache, cache_buf, cache_size, bitmap_size); @@ -797,7 +811,7 @@ static void cache_create(struct font* pf) /* * Returns width of character */ -int font_get_width(struct font* pf, unsigned short char_code) +int font_get_width(struct font* pf, ucschar_t char_code) { int width; struct font_cache_entry *e; @@ -820,7 +834,7 @@ int font_get_width(struct font* pf, unsigned short char_code) return width; } -const unsigned char* font_get_bits(struct font* pf, unsigned short char_code) +const unsigned char* font_get_bits(struct font* pf, ucschar_t char_code) { const unsigned char* bits; @@ -831,7 +845,7 @@ const unsigned char* font_get_bits(struct font* pf, unsigned short char_code) if (pf->fd >= 0 && pf != &sysfont) { - bits = + bits = (unsigned char*)font_cache_get(&pf->cache, char_code, false, load_cache_entry, pf)->bitmap; } @@ -884,7 +898,7 @@ static void glyph_file_write(void* data) { struct font_cache_entry* p = data; struct font* pf = cache_pf; - unsigned short ch; + ucschar_t ch; static int buffer_pos = 0; #define WRITE_BUFFER 256 static unsigned char buffer[WRITE_BUFFER]; @@ -899,11 +913,19 @@ static void glyph_file_write(void* data) } if ( p->_char_code == 0xffff ) return; - + ch = p->_char_code + pf->firstchar; - buffer[buffer_pos] = ch >> 8; +#ifdef UNICODE32 + buffer[buffer_pos] = (ch >> 24) & 0xff; + buffer[buffer_pos+1] = (ch >> 16) & 0xff; + buffer[buffer_pos+2] = (ch >> 8) & 0xff; + buffer[buffer_pos+3] = ch & 0xff; + buffer_pos += 4; +#else + buffer[buffer_pos] = (ch >> 8) & 0xff; buffer[buffer_pos+1] = ch & 0xff; buffer_pos += 2; +#endif return; } @@ -928,11 +950,13 @@ static void glyph_cache_save(int font_id) fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0666); if (fd >= 0) { + uint32_t header = FC_HEADER_VAL; + write(fd, &header, sizeof(header)); cache_pf = pf; cache_fd = fd; lru_traverse(&cache_pf->cache._lru, glyph_file_write); glyph_file_write(NULL); - if (cache_fd >= 0) + if (cache_fd >= 0) { close(cache_fd); cache_fd = -1; @@ -944,9 +968,9 @@ static void glyph_cache_save(int font_id) } -static int ushortcmp(const void *a, const void *b) +static int ucscharcmp(const void *a, const void *b) { - return ((int)(*(unsigned short*)a - *(unsigned short*)b)); + return ((int)(*(ucschar_t*)a - *(ucschar_t*)b)); } static NO_INLINE void glyph_cache_load(const char *font_path, struct font *pf) @@ -954,13 +978,13 @@ static NO_INLINE void glyph_cache_load(const char *font_path, struct font *pf) #define MAX_SORT 256 if (pf->fd >= 0) { int i, size, fd; - unsigned char tmp[2]; - unsigned short ch; - unsigned short glyphs[MAX_SORT]; - unsigned short glyphs_lru_order[MAX_SORT]; - int glyph_file_skip=0, glyph_file_size=0; - - int sort_size = pf->cache._capacity; + unsigned char tmp[sizeof(ucschar_t)]; + ucschar_t ch; + ucschar_t glyphs[MAX_SORT]; + ucschar_t glyphs_lru_order[MAX_SORT]; + unsigned int glyph_file_skip=0, glyph_file_size=0; + + int sort_size = pf->cache._capacity; if ( sort_size > MAX_SORT ) sort_size = MAX_SORT; @@ -974,31 +998,41 @@ static NO_INLINE void glyph_cache_load(const char *font_path, struct font *pf) fd = open(GLYPH_CACHE_FILE, O_RDONLY|O_BINARY); #endif if (fd >= 0) { + /* Header */ + uint32_t hdr = 0; + read(fd, &hdr, sizeof(hdr)); + if (hdr != FC_HEADER_VAL) + goto latin; /* only read what fits */ glyph_file_size = filesize( fd ); - if ( glyph_file_size > 2*pf->cache._capacity ) { - glyph_file_skip = glyph_file_size - 2*pf->cache._capacity; - lseek( fd, glyph_file_skip, SEEK_SET ); + if (glyph_file_size < sizeof(uint32_t)) + goto latin; + glyph_file_size -= sizeof(uint32_t); + if ( glyph_file_size > (int)sizeof(ucschar_t)*pf->cache._capacity ) { + glyph_file_skip = glyph_file_size - sizeof(ucschar_t)*pf->cache._capacity; + lseek( fd, glyph_file_skip + sizeof(uint32_t), SEEK_SET ); } - while(1) { - for ( size = 0; - read( fd, tmp, 2 ) == 2 && size < sort_size; - size++ ) + read( fd, tmp, sizeof(tmp) ) == sizeof(tmp) && size < sort_size; + size++ ) { +#ifdef UNICODE32 + glyphs[size] = (tmp[0] << 24) | (tmp[1] << 16) | (tmp[2] << 8) | tmp[3]; +#else glyphs[size] = (tmp[0] << 8) | tmp[1]; +#endif glyphs_lru_order[size] = glyphs[size]; } - + /* sort glyphs array to make sector cache happy */ - qsort((void *)glyphs, size, sizeof(unsigned short), - ushortcmp ); + qsort((void *)glyphs, size, sizeof(ucschar_t), + ucscharcmp ); /* load font bitmaps */ for( i = 0; i < size ; i++ ) - font_get_bits(pf, glyphs[i]); - + font_get_bits(pf, glyphs[i]); + /* redo to fix lru order */ for ( i = 0; i < size ; i++) font_get_bits(pf, glyphs_lru_order[i]); @@ -1009,6 +1043,7 @@ static NO_INLINE void glyph_cache_load(const char *font_path, struct font *pf) close(fd); } else { + latin: /* load latin1 chars into cache */ for ( ch = 32 ; ch < 256 && ch < pf->cache._capacity + 32; ch++ ) font_get_bits(pf, ch); @@ -1040,7 +1075,7 @@ struct font* font_get(int font) /* * Returns width of character */ -int font_get_width(struct font* pf, unsigned short char_code) +int font_get_width(struct font* pf, ucschar_t char_code) { /* check input range*/ if (char_code < pf->firstchar || char_code >= pf->firstchar+pf->size) @@ -1050,7 +1085,7 @@ int font_get_width(struct font* pf, unsigned short char_code) return pf->width? pf->width[char_code]: pf->maxwidth; } -const unsigned char* font_get_bits(struct font* pf, unsigned short char_code) +const unsigned char* font_get_bits(struct font* pf, ucschar_t char_code) { const unsigned char* bits; @@ -1079,7 +1114,7 @@ int font_getstringnsize(const unsigned char *str, size_t maxbytes, int *w, int * { struct font* pf = font_get(fontnum); font_lock( fontnum, true ); - unsigned short ch; + ucschar_t ch; int width = 0; size_t b = maxbytes - 1; diff --git a/firmware/font_cache.c b/firmware/font_cache.c index 0b03edaf76..536fcfb522 100644 --- a/firmware/font_cache.c +++ b/firmware/font_cache.c @@ -43,13 +43,18 @@ void font_cache_create( int font_cache_entry_size = sizeof(struct font_cache_entry) + bitmap_bytes_size; - /* make sure font cache entries are a multiple of 16 bits */ - if (font_cache_entry_size % 2 != 0) + /* make sure font cache entries are a multiple of sizeof(ucschar_t) */ + while (font_cache_entry_size & (sizeof(ucschar_t) -1)) font_cache_entry_size++; int cache_size = buf_size / (font_cache_entry_size + LRU_SLOT_OVERHEAD + sizeof(short)); +#ifdef UNICODE32 + /* Ensure LRU index size is a multiple of 32 bits */ + cache_size &= ~1; +#endif + fcache->_size = 1; fcache->_capacity = cache_size; fcache->_prev_result = 0; @@ -72,12 +77,12 @@ void font_cache_create( /************************************************************************* * Binary search that attempts a primary lucky guess that succeeds - * when there are consecutive codes in the cache between previous - * search and new search. Returns a negative of insertion point if + * when there are consecutive codes in the cache between previous + * search and new search. Returns a negative of insertion point if * not found. ************************************************************************/ static int search(struct font_cache* fcache, - unsigned short char_code, + ucschar_t char_code, int size, int *p_insertion_point ) { @@ -85,12 +90,12 @@ static int search(struct font_cache* fcache, int left, right, mid=-1, c; left = 0; right = size; - + /* go for a lucky guess */ - mid = char_code + + mid = char_code + fcache->_prev_result - fcache->_prev_char_code; - - /* check bounds */ + + /* check bounds */ if ( mid < 0 || mid > right ) mid = ( left + right ) / 2; @@ -114,7 +119,7 @@ static int search(struct font_cache* fcache, mid = (left + right) / 2; } while (left <= right); - + /* not found */ *p_insertion_point = mid; return 0; @@ -124,7 +129,7 @@ static int search(struct font_cache* fcache, ******************************************************************************/ struct font_cache_entry* font_cache_get( struct font_cache* fcache, - unsigned short char_code, + ucschar_t char_code, bool cache_only, void (*callback) (struct font_cache_entry* p, void *callback_data), void *callback_data) @@ -132,7 +137,7 @@ struct font_cache_entry* font_cache_get( struct font_cache_entry* p; int insertion_point; int index_to_replace; - + /* check bounds */ p = lru_data(&fcache->_lru, fcache->_index[0]); if( char_code < p->_char_code ) @@ -158,14 +163,14 @@ struct font_cache_entry* font_cache_get( } else { - p = lru_data(&fcache->_lru, + p = lru_data(&fcache->_lru, fcache->_index[insertion_point+1]); if ( char_code > p->_char_code ) insertion_point++; } } } - + /* not found */ if (cache_only) return NULL; diff --git a/firmware/hangul.c b/firmware/hangul.c index 01c6ba2fab..f171d91486 100644 --- a/firmware/hangul.c +++ b/firmware/hangul.c @@ -18,6 +18,7 @@ * KIND, either express or implied. * ****************************************************************************/ +#include "config.h" #include "hangul.h" const char jamo_table[51][3] = { @@ -75,10 +76,9 @@ const char jamo_table[51][3] = { }; /* takes three jamo chars and joins them into one hangul */ -unsigned short hangul_join(unsigned short lead, unsigned short vowel, - unsigned short tail) +ucschar_t hangul_join(ucschar_t lead, ucschar_t vowel, ucschar_t tail) { - unsigned short ch = 0xfffd; + ucschar_t ch = 0xfffd; if (lead < 0x3131 || lead > 0x3163) return ch; diff --git a/firmware/include/diacritic.h b/firmware/include/diacritic.h index cfb6ac034c..e7a51eb095 100644 --- a/firmware/include/diacritic.h +++ b/firmware/include/diacritic.h @@ -27,7 +27,7 @@ * Sets is_rtl (if it's not NULL) to whether the character * belongs to an RTL language. */ -bool is_diacritic(const unsigned short char_code, bool *is_rtl); +bool is_diacritic(const ucschar_t char_code, bool *is_rtl); /* Note IS_DIACRITIC macros may elide the function call * therefore there is a separate _RTL version that requires a bool pointer diff --git a/firmware/include/font_cache.h b/firmware/include/font_cache.h index 1809720ed5..e9c2edbd1b 100644 --- a/firmware/include/font_cache.h +++ b/firmware/include/font_cache.h @@ -21,24 +21,25 @@ #ifndef _FONT_CACHE_H_ #define _FONT_CACHE_H_ #include +#include "config.h" #include "lru.h" /******************************************************************************* - * + * ******************************************************************************/ struct font_cache { struct lru _lru; - int _size; - int _capacity; - int _prev_char_code; + unsigned int _size; + unsigned int _capacity; + ucschar_t _prev_char_code; int _prev_result; short *_index; /* index of lru handles in char_code order */ }; struct font_cache_entry { - unsigned short _char_code; + ucschar_t _char_code; unsigned char width; unsigned char bitmap[1]; /* place holder */ }; @@ -55,7 +56,7 @@ void font_cache_create( * Note: With cache_only this can return NULL, which otherwise never happens */ struct font_cache_entry* font_cache_get( struct font_cache* fcache, - unsigned short char_code, + ucschar_t char_code, bool cache_only, void (*callback) (struct font_cache_entry* p, void *callback_data), void *callback_data); diff --git a/firmware/include/lru.h b/firmware/include/lru.h index cd271afbd8..5d33640983 100644 --- a/firmware/include/lru.h +++ b/firmware/include/lru.h @@ -33,6 +33,7 @@ struct lru void *_base; }; +/* LRU_SLOT_OVERHEAD is the fixed portion of struct lru_node */ #define LRU_SLOT_OVERHEAD (2 * sizeof(short)) /* Create LRU list with specified size from buf. */ @@ -45,4 +46,3 @@ void *lru_data(struct lru* pl, short handle); void lru_traverse(struct lru* pl, void (*callback)(void* data)); #endif /* LRU_H */ - diff --git a/firmware/include/rbunicode.h b/firmware/include/rbunicode.h index e4cd6be2fe..48ca1b2583 100644 --- a/firmware/include/rbunicode.h +++ b/firmware/include/rbunicode.h @@ -27,7 +27,7 @@ */ #ifndef _RBUNICODE_H_ #define _RBUNICODE_H_ - + #include "config.h" #include @@ -63,8 +63,9 @@ unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8, in unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8, int count); unsigned char* utf16decode(const unsigned char *utf16, unsigned char *utf8, int count, int utf8_size, bool le); bool utf16_has_bom(const unsigned char *utf16, bool *le); +unsigned long utf16len_utf8(const unsigned char *utf8); unsigned long utf8length(const unsigned char *utf8); -const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs); +const unsigned char* utf8decode(const unsigned char *utf8, ucschar_t *ucs); void set_codepage(int cp); int get_codepage(void); int utf8seek(const unsigned char* utf8, int offset); diff --git a/firmware/target/hosted/filesystem-win32.c b/firmware/target/hosted/filesystem-win32.c index ebb7f283ac..ce4500f46d 100644 --- a/firmware/target/hosted/filesystem-win32.c +++ b/firmware/target/hosted/filesystem-win32.c @@ -63,55 +63,78 @@ static void win32_last_error_errno(void) static HANDLE win32_open(const char *ospath); static int win32_stat(const char *ospath, LPBY_HANDLE_FILE_INFORMATION lpInfo); -unsigned short * strcpy_utf8ucs2(unsigned short *buffer, - const unsigned char *utf8) +static unsigned short * strcpy_utf8utf16(unsigned short *buffer, + const unsigned char *utf8) { - for (wchar_t *ucs2 = buffer; - ((utf8 = utf8decode(utf8, ucs2)), *ucs2); ucs2++); + for (wchar_t *ucs = buffer; *ucs ; ucs++) { + ucschar_t cp; + utf8 = utf8decode(utf8, &cp); +#ifdef UNICODE32 + if (cp > 0x10000) { + cp -= 0x10000; + *ucs++ = 0xd800 | (cp >> 10); + cp = 0xdc00 | (cp & 0x3ff); + } +#endif + *ucs = cp; + } return buffer; } -#if 0 -unsigned char * strcpy_ucs2utf8(unsigned char *buffer, - const unsigned short *ucs2) +#if 0 /* Unused in current code */ +static unsigned char * strcpy_utf16utf8(unsigned char *buffer, + const unsigned short *utf16buf) { - for (unsigned char *utf8 = buffer; - ((utf8 = utf8encode(*ucs2, utf8)), *ucs2); ucs2++); + unsigned char *utf8 = buffer; + + /* windows is always LE */ + const int le = 1; + + while (*utf16buf) { + const unsigned char *utf16 = (const unsigned char *)utf16buf; + unsigned long ucs; + /* Check for a surrogate pair */ + if (*(utf16 + le) >= 0xD8 && *(utf16 + le) < 0xE0) { + ucs = 0x10000 + ((utf16[1 - le] << 10) | ((utf16[le] - 0xD8) << 18) + | utf16[2 + (1 - le)] | ((utf16[2 + le] - 0xDC) << 8)); + utf16buf += 2; + } else { + ucs = utf16[le] << 8 | utf16[1 - le]; + utf16buf++; + } + utf8 = utf8encode(ucs, utf8); + } return buffer; } - -size_t strlen_utf8ucs2(const unsigned char *utf8) -{ - /* This won't properly count multiword ucs2 so use the alternative - below for now which doesn't either */ - size_t length = 0; - unsigned short ucschar[2]; - for (unsigned char c = *utf8; c; - ((utf8 = utf8decode(utf8, ucschar)), c = *utf8)) - length++; - - return length; -} -#endif /* 0 */ - -size_t strlen_utf8ucs2(const unsigned char *utf8) -{ - return utf8length(utf8); -} - -size_t strlen_ucs2utf8(const unsigned short *ucs2) +static size_t strlen_utf16utf8(const unsigned short *utf16buf) { size_t length = 0; unsigned char utf8char[4]; - for (unsigned short c = *ucs2; c; (c = *++ucs2)) - length += utf8encode(c, utf8char) - utf8char; + /* windows is always LE */ + const int le = 1; + while (*utf16buf) { + const unsigned char *utf16 = (const unsigned char *)utf16buf; + unsigned long ucs; + /* Check for a surrogate pair */ + if (*(utf16 + le) >= 0xD8 && *(utf16 + le) < 0xE0) { + ucs = 0x10000 + ((utf16[1 - le] << 10) | ((utf16[le] - 0xD8) << 18) + | utf16[2 + (1 - le)] | ((utf16[2 + le] - 0xDC) << 8)); + utf16buf += 2; + } else { + ucs = utf16[le] << 8 | utf16[1 - le]; + utf16buf++; + } + length += utf8encode(ucs, utf8char) - utf8char; + } return length; } +#endif -size_t strlcpy_ucs2utf8(char *buffer, const unsigned short *ucs2, - size_t bufsize) +/* Note: Must be exported */ +size_t strlcpy_utf16utf8(char *buffer, const unsigned short *utf16, + size_t bufsize) { if (!buffer) bufsize = 0; @@ -119,12 +142,24 @@ size_t strlcpy_ucs2utf8(char *buffer, const unsigned short *ucs2, size_t length = 0; unsigned char utf8char[4]; - for (unsigned short c = *ucs2; c; (c = *++ucs2)) + unsigned long ucc; + while(*utf16) { + /* Check for a surrogate UTF16 pair */ + if (*utf16 >= 0xd800 && *utf16 < 0xdc00 && + *(utf16+1) >= 0xdc00 && *(utf16+1) < 0xe000) { + ucc = 0x10000 + (((*utf16 & 0x3ff) << 10) | (*(utf16+1) & 0x3ff)); + utf16++; + } else { + ucc = *utf16; + } + /* If the last character won't fit, this won't split it */ - size_t utf8size = utf8encode(c, utf8char) - utf8char; + size_t utf8size = utf8encode(ucc, utf8char) - utf8char; if ((length += utf8size) < bufsize) buffer = mempcpy(buffer, utf8char, utf8size); + + utf16++; } /* Above won't ever copy to very end */ @@ -134,44 +169,44 @@ size_t strlcpy_ucs2utf8(char *buffer, const unsigned short *ucs2, return length; } -#define _toucs2(utf8) \ +#define _toutf16(utf8) \ ({ const char *_utf8 = (utf8); \ - size_t _l = strlen_utf8ucs2(_utf8); \ + size_t _l = utf16len_utf8(_utf8); \ void *_buffer = alloca((_l + 1)*2); \ - strcpy_utf8ucs2(_buffer, _utf8); }) + strcpy_utf8utf16(_buffer, _utf8); }) -#define _toutf8(ucs2) \ - ({ const char *_ucs2 = (ucs2); \ - size_t _l = strlen_ucs2utf8(_ucs2); \ +#define _toutf8(utf16) \ + ({ const char *_ucs = (utf16); \ + size_t _l = strlen_utf16utf8(_ucs); \ void *_buffer = alloca(_l + 1); \ - strcpy_ucs2utf8(_buffer, _ucs2); }) + strcpy_utf16utf8(_buffer, _ucs); }) int os_open(const char *ospath, int oflag, ...) { - return _wopen(_toucs2(ospath), oflag __OPEN_MODE_ARG); + return _wopen(_toutf16(ospath), oflag __OPEN_MODE_ARG); } int os_creat(const char *ospath, mode_t mode) { - return _wcreat(_toucs2(ospath), mode); + return _wcreat(_toutf16(ospath), mode); } int os_stat(const char *ospath, struct _stat *s) { - return _wstat(_toucs2(ospath), s); + return _wstat(_toutf16(ospath), s); } int os_remove(const char *ospath) { - return _wremove(_toucs2(ospath)); + return _wremove(_toutf16(ospath)); } int os_rename(const char *osold, const char *osnew) { int errnum = errno; - const wchar_t *wchosold = _toucs2(osold); - const wchar_t *wchosnew = _toucs2(osnew); + const wchar_t *wchosold = _toutf16(osold); + const wchar_t *wchosnew = _toutf16(osnew); int rc = _wrename(wchosold, wchosnew); if (rc < 0 && errno == EEXIST) @@ -213,18 +248,18 @@ bool os_file_exists(const char *ospath) _WDIR * os_opendir(const char *osdirname) { - return _wopendir(_toucs2(osdirname)); + return _wopendir(_toutf16(osdirname)); } int os_mkdir(const char *ospath, mode_t mode) { - return _wmkdir(_toucs2(ospath)); + return _wmkdir(_toutf16(ospath)); (void)mode; } int os_rmdir(const char *ospath) { - return _wrmdir(_toucs2(ospath)); + return _wrmdir(_toutf16(ospath)); } int os_dirfd(_WDIR *osdirp) @@ -288,7 +323,7 @@ static HANDLE win32_open(const char *ospath) { /* FILE_FLAG_BACKUP_SEMANTICS is required for this to succeed at opening a directory */ - HANDLE h = CreateFileW(_toucs2(ospath), GENERIC_READ, + HANDLE h = CreateFileW(_toutf16(ospath), GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); @@ -479,7 +514,7 @@ void volume_size(IF_MV(int volume,) sector_t *sizep, sector_t *freep) char volpath[MAX_PATH]; if (os_volume_path(IF_MV(volume, ) volpath, sizeof (volpath)) >= 0) - GetDiskFreeSpaceExW(_toucs2(volpath), &free, &size, NULL); + GetDiskFreeSpaceExW(_toutf16(volpath), &free, &size, NULL); if (sizep) *sizep = size.QuadPart / 1024; diff --git a/firmware/target/hosted/filesystem-win32.h b/firmware/target/hosted/filesystem-win32.h index 1d8f2749f9..c6e89a66aa 100644 --- a/firmware/target/hosted/filesystem-win32.h +++ b/firmware/target/hosted/filesystem-win32.h @@ -27,10 +27,10 @@ /* filesystem-win32.c contains some string functions that could be useful * elsewhere; just move them away to unicode.c or something if they prove * so. */ -size_t strlcpy_ucs2utf8(char *buffer, const unsigned short *ucs, - size_t bufsize); +size_t strlcpy_utf16utf8(char *buffer, const unsigned short *utf16, + size_t bufsize); -#define strlcpy_from_os strlcpy_ucs2utf8 +#define strlcpy_from_os strlcpy_utf16utf8 #endif /* __MINGW32__ */ #endif /* !OSFUNCTIONS_DECLARED */ diff --git a/lib/rbcodec/metadata/id3tags.c b/lib/rbcodec/metadata/id3tags.c index bbcf728dc7..8016574ff1 100644 --- a/lib/rbcodec/metadata/id3tags.c +++ b/lib/rbcodec/metadata/id3tags.c @@ -1092,9 +1092,9 @@ retry_with_limit: if (!parse_as_utf8(tag, &bytesread)) { - /* UTF-8 could potentially be 3 times larger */ + /* UTF-8 could potentially be 4 times larger */ /* so we need to create a new buffer */ - int utf8_size = (3 * bytesread); + int utf8_size = (4 * bytesread); if (utf8_size > ID3V2_BUF_SIZE) { //limit stack allocation to avoid stack overflow diff --git a/tools/convbdf.c b/tools/convbdf.c index 5534de1cbb..d452a3357c 100644 --- a/tools/convbdf.c +++ b/tools/convbdf.c @@ -116,7 +116,7 @@ int gen_h = 0; int gen_fnt = 0; int gen_map = 1; int start_char = 0; -int limit_char = 65535; +int limit_char = 0x10FFFF; int oflag = 0; char outfile[256]; @@ -569,7 +569,7 @@ struct font* bdf_read_font(char *path) int bdf_read_header(FILE *fp, struct font* pf) { int encoding; - int firstchar = 65535; + int firstchar = 0x10FFFF; int lastchar = -1; char buf[256]; char facename[256]; diff --git a/tools/convttf.c b/tools/convttf.c index 3df08b664f..845ea9c90a 100644 --- a/tools/convttf.c +++ b/tools/convttf.c @@ -91,7 +91,7 @@ static int eid = DEFAULT_ENCODING_ID; static FT_UShort nocmap; int pct = 0; /* display ttc table if it is not zero. */ -FT_Long max_char = 65535; +FT_Long max_char = 0x10FFFF; int pixel_size = 15; FT_Long start_char = 0; FT_Long limit_char;