unicode: Support characters beyond the first unicode plane

We used 16-bit variables to store the 'character code' everywhere but
this won't let us represent anything beyond U+FFFF.

This patch changes those variables to a custom type that can be 32 or 16
bits depending on the build, and adjusts numerous internal APIs and
datastructures to match.  This includes:

 * utf8decode() and friends
 * font manipulation, caching, rendering, and generation
 * on-screen keyboard
 * FAT filesystem (parsing and generating utf16 LFNs)
 * WIN32 simulator platform code

Note that this patch doesn't _enable_ >16bit unicode support; a followup
patch will turn that on for appropriate targets.

Appears to work on:

  * hosted linux, native, linux simulator in both 16/32-bit modes.

Needs testing on:

  * windows and macos simulator (16bit+32bit)

Change-Id: Iba111b27d2433019b6bff937cf1ebd2c4353a0e8
This commit is contained in:
Solomon Peachy 2024-12-17 08:55:21 -05:00
parent 2a88253426
commit a2c10f6189
44 changed files with 476 additions and 330 deletions

View file

@ -82,7 +82,7 @@ static void kdb_init(void)
sleep(HZ/10);
}
int kbd_input(char* text, int buflen, unsigned short *kbd)
int kbd_input(char* text, int buflen, ucschar_t *kbd)
{
(void)kbd;
JNIEnv e = *env_ptr;

View file

@ -23,7 +23,7 @@
/* '*kbd', same format as https://www.rockbox.org/wiki/LoadableKeyboardLayouts */
int kbd_input(char* buffer, int buflen, unsigned short *kbd);
int kbd_input(char* buffer, int buflen, ucschar_t *kbd);
int load_kbd(unsigned char* filename);

View file

@ -176,7 +176,7 @@ int plugin_open(const char *plugin, const char *parameter);
* when this happens please take the opportunity to sort in
* any new functions "waiting" at the end of the list.
*/
#define PLUGIN_API_VERSION 273
#define PLUGIN_API_VERSION 274
/* 239 Marks the removal of ARCHOS HWCODEC and CHARCELL */
@ -296,15 +296,15 @@ struct plugin_api {
#if defined(HAVE_LCD_ENABLE) || defined(HAVE_LCD_SLEEP)
void (*button_queue_post)(long id, intptr_t data);
#endif
unsigned short *(*bidi_l2v)( const unsigned char *str, int orientation );
bool (*is_diacritic)(const unsigned short char_code, bool *is_rtl);
const unsigned char *(*font_get_bits)( struct font *pf, unsigned short char_code );
ucschar_t *(*bidi_l2v)(const unsigned char *str, int orientation);
bool (*is_diacritic)(const ucschar_t char_code, bool *is_rtl);
const unsigned char *(*font_get_bits)(struct font *pf, ucschar_t char_code);
int (*font_load)(const char *path);
void (*font_unload)(int font_id);
struct font* (*font_get)(int font);
int (*font_getstringsize)(const unsigned char *str, int *w, int *h,
int fontnumber);
int (*font_get_width)(struct font* pf, unsigned short char_code);
int (*font_get_width)(struct font* pf, ucschar_t char_code);
void (*screen_clear_area)(struct screen * display, int xstart, int ystart,
int width, int height);
void (*gui_scrollbar_draw)(struct screen * screen, int x, int y,
@ -667,7 +667,7 @@ struct plugin_api {
const unsigned char * const *units,
unsigned int unit_count, bool binary_scale);
/* unicode stuff */
const unsigned char* (*utf8decode)(const unsigned char *utf8, unsigned short *ucs);
const unsigned char* (*utf8decode)(const unsigned char *utf8, ucschar_t *ucs);
unsigned char* (*iso_decode)(const unsigned char *iso, unsigned char *utf8, int cp, int count);
unsigned char* (*utf16LEdecode)(const unsigned char *utf16, unsigned char *utf8, int count);
unsigned char* (*utf16BEdecode)(const unsigned char *utf16, unsigned char *utf8, int count);
@ -923,7 +923,7 @@ struct plugin_api {
int (*rand)(void);
void (*qsort)(void *base, size_t nmemb, size_t size,
int(*compar)(const void *, const void *));
int (*kbd_input)(char* buffer, int buflen, unsigned short *kbd);
int (*kbd_input)(char* buffer, int buflen, ucschar_t *kbd);
struct tm* (*get_time)(void);
struct tm * (*gmtime_r)(const time_t *timep, struct tm *tm);
#if CONFIG_RTC

View file

@ -70,7 +70,7 @@ static const char keybd_layout[] =
* - \n does not create a key, but it also consumes one element
* - the final null terminator is equivalent to \n
* - since sizeof includes the null terminator we don't need +1 for that. */
static unsigned short kbd_buf[sizeof(keybd_layout)];
static ucschar_t kbd_buf[sizeof(keybd_layout)];
/****************** prototypes ******************/
void print_scroll(char* string); /* implements a scrolling screen */
@ -250,7 +250,7 @@ static int announce_menu_cb(int action,
struct gui_synclist *this_list)
{
(void)this_item;
unsigned short* kbd_p;
ucschar_t *kbd_p;
int selection = rb->gui_synclist_get_sel_pos(this_list);

View file

@ -185,7 +185,7 @@ zchar os_read_key(int timeout, bool show_cursor)
{
int r;
char inputbuf[5];
short key;
ucschar_t key;
zchar zkey;
for(;;)
@ -214,7 +214,7 @@ zchar os_read_line(int max, zchar *buf, int timeout, int width, int continued)
char inputbuf[256];
const char *in;
char *out;
short key;
ucschar_t key;
zchar zkey;
for(;;)

View file

@ -206,8 +206,8 @@ static int prompt_filename(char *buf, size_t bufsz)
{
#define KBD_LAYOUT "abcdefghijklmnop\nqrstuvwxyz |()[]\n1234567890 /._-+\n\n" \
"\nABCDEFGHIJKLMNOP\nQRSTUVWXYZ |()[]\n1234567890 /._-+"
unsigned short kbd[sizeof(KBD_LAYOUT) + 10];
unsigned short *kbd_p = kbd;
ucschar_t kbd[sizeof(KBD_LAYOUT) + 10];
ucschar_t *kbd_p = kbd;
if (!kbd_create_layout(KBD_LAYOUT, kbd, sizeof(kbd)))
kbd_p = NULL;

View file

@ -653,8 +653,8 @@ void grey_gray_bitmap(const unsigned char *src, int x, int y, int width,
/* Put a string at a given pixel position, skipping first ofs pixel columns */
void grey_putsxyofs(int x, int y, int ofs, const unsigned char *str)
{
int ch;
unsigned short *ucs;
ucschar_t ch;
ucschar_t *ucs;
struct font* pf;
if (_grey_info.clip_b <= _grey_info.clip_t)

View file

@ -22,8 +22,8 @@
#include "kbd_helper.h"
/* USAGE:
unsigned short kbd[64];
unsigned short *kbd_p = kbd;
ucschar_t kbd[64];
ucschar_t *kbd_p = kbd;
if (!kbd_create_layout("ABCD1234\n", kbd, sizeof(kbd)))
kbd_p = NULL;
@ -34,14 +34,14 @@
* success returns size of buffer used
* failure returns 0
*/
int kbd_create_layout(const char *layout, unsigned short *buf, int bufsz)
int kbd_create_layout(const char *layout, ucschar_t *buf, int bufsz)
{
unsigned short *pbuf;
ucschar_t *pbuf;
const unsigned char *p = layout;
int len = 0;
int total_len = 0;
pbuf = buf;
while (*p && (pbuf - buf + (ptrdiff_t) sizeof(unsigned short)) < bufsz)
while (*p && (pbuf - buf + (ptrdiff_t) sizeof(ucschar_t)) < bufsz)
{
p = rb->utf8decode(p, &pbuf[len+1]);
if (pbuf[len+1] == '\n')
@ -60,7 +60,7 @@ int kbd_create_layout(const char *layout, unsigned short *buf, int bufsz)
*pbuf = len;
pbuf[len+1] = 0xFEFF; /* mark end of characters */
total_len += len + 1;
return total_len * sizeof(unsigned short);
return total_len * sizeof(ucschar_t);
}
return 0;

View file

@ -22,6 +22,6 @@
#define KBD_HELPER_H
/* create a custom keyboard layout for kbd_input */
int kbd_create_layout(const char *layout, unsigned short *buf, int bufsz);
int kbd_create_layout(const char *layout, ucschar_t *buf, int bufsz);
#endif /* KBD_HELPER_H */

View file

@ -62,7 +62,7 @@ static const char* get_next_line(const char *text, struct view_info *info)
total = 0;
while(*ptr)
{
unsigned short ch;
ucschar_t ch;
n = ((intptr_t)rb->utf8decode(ptr, &ch) - (intptr_t)ptr);
if (rb->is_diacritic(ch, NULL))
w = 0;

View file

@ -422,7 +422,7 @@ static struct lrc_brpos *calc_brpos(struct lrc_line *lrc_line, int i)
int nlrcbrpos = 0, max_lrcbrpos;
uifont = rb->screens[0]->getuifont();
struct font* pf = rb->font_get(uifont);
unsigned short ch;
ucschar_t ch;
struct snap {
int count, width;
int nword;

View file

@ -141,6 +141,7 @@ RB_WRAP(touchscreen_mode)
#endif
// XXX this may be broken with 32-bit ucschar_t
RB_WRAP(kbd_input)
{
/*kbd_input(text, layout)*
@ -168,7 +169,7 @@ RB_WRAP(kbd_input)
layout = NULL;
}
if(!rb->kbd_input(buffer, LUAL_BUFFERSIZE, (unsigned short *)layout))
if(!rb->kbd_input(buffer, LUAL_BUFFERSIZE, (ucschar_t *)layout))
{
luaL_addstring(&b, buffer);
luaL_pushresult(&b);

View file

@ -1073,8 +1073,8 @@ static void draw_oriented_alpha_bitmap_part(const unsigned char *src,
static void draw_putsxy_oriented(int x, int y, const char *str)
{
unsigned short ch;
unsigned short *ucs;
ucschar_t ch;
ucschar_t *ucs;
int ofs = MIN(x, 0);
struct font* pf = rb->font_get(osd.font);

View file

@ -969,8 +969,8 @@ static void buffer_alpha_bitmap_part(
static void buffer_putsxyofs( fb_data *buf, int buf_width, int buf_height,
int x, int y, int ofs, const unsigned char *str )
{
unsigned short ch;
unsigned short *ucs;
ucschar_t ch;
ucschar_t *ucs;
struct font *pf = rb->font_get( FONT_UI );
if( !pf ) pf = rb->font_get( FONT_SYSFIXED );

View file

@ -135,10 +135,10 @@ static void sleep_yield(void)
#define yield sleep_yield
}
/* make sure tag can be displayed by font pf*/
/* make sure tag can be displayed by font pf */
static bool text_is_displayable(struct font *pf, unsigned char *src)
{
unsigned short code;
ucschar_t code;
const unsigned char *ptr = src;
while(*ptr)
{

View file

@ -41,7 +41,7 @@ static unsigned text_type = TV_TEXT_UNKNOWN;
static const unsigned char *end_ptr;
static unsigned short ucsbuf[TV_MAX_BLOCKS][TV_MAX_CHARS_PER_BLOCK];
static ucschar_t ucsbuf[TV_MAX_BLOCKS][TV_MAX_CHARS_PER_BLOCK];
static unsigned char utf8buf[TV_MAX_CHARS_PER_BLOCK * (2 * 3)];
static unsigned char *outbuf;
@ -54,7 +54,7 @@ static bool expand_extra_line = false;
/* when a line is divided, this value sets true. */
static bool is_break_line = false;
static unsigned short break_chars[] =
static unsigned short break_chars[] = // XXX promote to ucschar_t if we get a codepoint > 0xffff
{
0,
/* halfwidth characters */
@ -76,7 +76,7 @@ static unsigned short break_chars[] =
};
/* the characters which is not judged as space with isspace() */
static unsigned short extra_spaces[] = { 0, 0x3000 };
static unsigned short extra_spaces[] = { 0, 0x3000 }; // XXX promote to ucschar_t if we get a codepoint > 0xffff
static int tv_glyph_width(int ch)
{
@ -93,7 +93,7 @@ static int tv_glyph_width(int ch)
return rb->font_get_width(rb->font_get(preferences->font_id), ch);
}
static unsigned char *tv_get_ucs(const unsigned char *str, unsigned short *ch)
static unsigned char *tv_get_ucs(const unsigned char *str, ucschar_t *ch)
{
int count = 1;
unsigned char utf8_tmp[3];
@ -148,7 +148,7 @@ static unsigned char *tv_get_ucs(const unsigned char *str, unsigned short *ch)
return (unsigned char *)str + count;
}
static void tv_decode2utf8(const unsigned short *ucs, int count)
static void tv_decode2utf8(const ucschar_t *ucs, int count)
{
int i;
@ -158,7 +158,7 @@ static void tv_decode2utf8(const unsigned short *ucs, int count)
*outbuf = '\0';
}
static bool tv_is_line_break_char(unsigned short ch)
static bool tv_is_line_break_char(ucschar_t ch)
{
size_t i;
@ -166,7 +166,7 @@ static bool tv_is_line_break_char(unsigned short ch)
if (preferences->word_mode == WM_CHOP)
return false;
for (i = 0; i < sizeof(break_chars)/sizeof(unsigned short); i++)
for (i = 0; i < sizeof(break_chars)/sizeof(ucschar_t); i++)
{
if (break_chars[i] == ch)
return true;
@ -174,14 +174,14 @@ static bool tv_is_line_break_char(unsigned short ch)
return false;
}
static bool tv_isspace(unsigned short ch)
static bool tv_isspace(ucschar_t ch)
{
size_t i;
if (ch < 128 && isspace(ch))
return true;
for (i = 0; i < sizeof(extra_spaces)/sizeof(unsigned short); i++)
for (i = 0; i < sizeof(extra_spaces)/sizeof(ucschar_t); i++)
{
if (extra_spaces[i] == ch)
return true;
@ -191,17 +191,17 @@ static bool tv_isspace(unsigned short ch)
static bool tv_is_break_line_join_mode(const unsigned char *next_str)
{
unsigned short ch;
ucschar_t ch;
tv_get_ucs(next_str, &ch);
return tv_isspace(ch);
}
static int tv_form_reflow_line(unsigned short *ucs, int chars)
static int tv_form_reflow_line(ucschar_t *ucs, int chars)
{
unsigned short new_ucs[TV_MAX_CHARS_PER_BLOCK];
unsigned short *p = new_ucs;
unsigned short ch;
ucschar_t new_ucs[TV_MAX_CHARS_PER_BLOCK];
ucschar_t *p = new_ucs;
ucschar_t ch;
int i;
int k;
int expand_spaces;
@ -262,15 +262,15 @@ static int tv_form_reflow_line(unsigned short *ucs, int chars)
}
}
rb->memcpy(ucs, new_ucs, sizeof(unsigned short) * TV_MAX_CHARS_PER_BLOCK);
rb->memcpy(ucs, new_ucs, sizeof(ucschar_t) * TV_MAX_CHARS_PER_BLOCK);
return indent_chars + nonspace_chars + expand_spaces;
}
static void tv_align_right(int *block_chars)
{
unsigned short *cur_text;
unsigned short *prev_text;
unsigned short ch;
ucschar_t *cur_text;
ucschar_t *prev_text;
ucschar_t ch;
int cur_block = block_count - 1;
int prev_block;
int cur_chars;
@ -335,9 +335,9 @@ static void tv_align_right(int *block_chars)
if (break_pos < prev_chars)
{
rb->memmove(cur_text + prev_chars - break_pos,
cur_text, block_chars[cur_block] * sizeof(unsigned short));
cur_text, block_chars[cur_block] * sizeof(ucschar_t));
rb->memcpy(cur_text, prev_text + break_pos,
(prev_chars - break_pos) * sizeof(unsigned short));
(prev_chars - break_pos) * sizeof(ucschar_t));
block_chars[prev_block] = break_pos;
block_chars[cur_block ] += prev_chars - break_pos;
@ -347,15 +347,15 @@ static void tv_align_right(int *block_chars)
}
}
static int tv_parse_text(const unsigned char *src, unsigned short *ucs,
static int tv_parse_text(const unsigned char *src, ucschar_t *ucs,
int *ucs_chars, bool is_indent)
{
const unsigned char *cur = src;
const unsigned char *next = src;
const unsigned char *line_break_ptr = NULL;
const unsigned char *line_end_ptr = NULL;
unsigned short ch = 0;
unsigned short prev_ch;
ucschar_t ch = 0;
ucschar_t prev_ch;
int chars = 0;
int gw;
int line_break_width = 0;
@ -480,7 +480,7 @@ static int tv_parse_text(const unsigned char *src, unsigned short *ucs,
int tv_create_formed_text(const unsigned char *src, ssize_t bufsize,
int block, bool is_multi, const unsigned char **dst)
{
unsigned short ch;
ucschar_t ch;
int chars[block_count];
int i;
int size = 0;

View file

@ -326,7 +326,7 @@
struct keyboard_parameters {
const unsigned char* default_kbd;
int DEFAULT_LINES;
unsigned short kbd_buf[KBD_BUF_SIZE];
ucschar_t kbd_buf[KBD_BUF_SIZE];
int nchars;
int font_w;
int font_h;
@ -358,7 +358,7 @@ int zx_kbd_input(char* text/*, int buflen*/)
int editpos, len_utf8;
#endif
/* int statusbar_size = global_settings.statusbar ? STATUSBAR_HEIGHT : 0;*/
unsigned short ch/*, tmp, hlead = 0, hvowel = 0, htail = 0*/;
ucschar_t ch/*, tmp, hlead = 0, hvowel = 0, htail = 0*/;
/*bool hangul = false;*/
unsigned char *utf8;
const unsigned char *p;

View file

@ -90,15 +90,15 @@ enum ekbd_viewports
struct keyboard_parameters
{
struct viewport *kbd_viewports;
unsigned short kbd_buf[KBD_BUF_SIZE];
unsigned short *kbd_buf_ptr;
ucschar_t kbd_buf[KBD_BUF_SIZE];
ucschar_t *kbd_buf_ptr;
unsigned short max_line_len;
int default_lines;
int last_k;
int last_i;
int font_w;
int font_h;
int text_w;
unsigned int last_k;
unsigned int last_i;
unsigned short font_w;
unsigned short font_h;
unsigned int text_w;
int curfont;
int main_y;
#ifdef HAVE_MORSE_INPUT
@ -128,7 +128,7 @@ struct edit_state
int editpos; /* Edit position on all screens */
bool cur_blink; /* Cursor on/off flag */
bool hangul;
unsigned short hlead, hvowel, htail;
ucschar_t hlead, hvowel, htail;
#ifdef HAVE_MORSE_INPUT
bool morse_mode;
bool morse_reading;
@ -158,13 +158,13 @@ static void keyboard_layout(struct viewport *kbd_vp,
{
/*Note: viewports are initialized to vp_default by kbd_create_viewports */
int sc_w = sc->getwidth();
int sc_h = sc->getheight();
unsigned short sc_w = sc->getwidth();
unsigned short sc_h = sc->getheight();
/* TEXT */
struct viewport *vp = &kbd_vp[eKBD_VP_TEXT];
/* make sure height is even for the text box */
int text_height = (MAX(pm->font_h, get_icon_height(sc->screen_type)) & ~1) + 2;
unsigned short text_height = (MAX(pm->font_h, (unsigned int)get_icon_height(sc->screen_type)) & ~1) + 2;
vp->x = 0; /* LEFT */
vp->y = 0; /* TOP */
vp->width = sc_w;
@ -224,7 +224,7 @@ int load_kbd(unsigned char* filename)
int fd;
int i, line_len, max_line_len;
unsigned char buf[4];
unsigned short *pbuf;
ucschar_t *pbuf;
if (filename == NULL)
{
@ -245,7 +245,7 @@ int load_kbd(unsigned char* filename)
/* check how many bytes to read for this character */
static const unsigned char sizes[4] = { 0x80, 0xe0, 0xf0, 0xf5 };
size_t count;
unsigned short ch;
ucschar_t ch;
for (count = 0; count < ARRAYLEN(sizes); count++)
{
@ -297,7 +297,7 @@ int load_kbd(unsigned char* filename)
struct keyboard_parameters *pm = &kbd_param[l];
#if NB_SCREENS > 1
if (l > 0)
memcpy(pm->kbd_buf, kbd_param[0].kbd_buf, i*sizeof(unsigned short));
memcpy(pm->kbd_buf, kbd_param[0].kbd_buf, i*sizeof(ucschar_t));
#endif
/* initialize parameters */
pm->x = pm->y = pm->page = 0;
@ -309,7 +309,7 @@ int load_kbd(unsigned char* filename)
}
/* helper function to spell a char */
static void kbd_spellchar(unsigned short c)
static void kbd_spellchar(ucschar_t c)
{
unsigned char tmp[5];
/* store char to pass to talk_spell */
@ -322,7 +322,7 @@ static void kbd_spellchar(unsigned short c)
talk_spell(tmp, false);
}
static void kbd_inschar(struct edit_state *state, unsigned short ch)
static void kbd_inschar(struct edit_state *state, ucschar_t ch)
{
int i, j, len;
unsigned char tmp[4];
@ -361,10 +361,10 @@ static void kbd_delchar(struct edit_state *state)
}
/* Lookup k value based on state of param (pm) */
static unsigned short get_kbd_ch(struct keyboard_parameters *pm, int x, int y)
static ucschar_t get_kbd_ch(struct keyboard_parameters *pm, int x, int y)
{
int i = 0, k = pm->page*pm->lines + y, n;
unsigned short *pbuf;
unsigned int n, i = 0, k = pm->page*pm->lines + y;
ucschar_t *pbuf;
if (k >= pm->last_k)
{
i = pm->last_i;
@ -406,12 +406,12 @@ static void kbd_move_picker_horizontal(struct keyboard_parameters *pm,
static void kbd_move_picker_vertical(struct keyboard_parameters *pm,
struct edit_state *state, int dir);
int kbd_input(char* text, int buflen, unsigned short *kbd)
int kbd_input(char* text, int buflen, ucschar_t *kbd)
{
bool done = false;
struct keyboard_parameters * const param = kbd_param;
struct edit_state state;
unsigned short ch;
ucschar_t ch;
int ret = 0; /* assume success */
FOR_NB_SCREENS(l)
{
@ -449,7 +449,7 @@ int kbd_input(char* text, int buflen, unsigned short *kbd)
FOR_NB_SCREENS(l)
{
struct keyboard_parameters *pm = &param[l];
unsigned short *pbuf;
ucschar_t *pbuf;
const unsigned char *p;
int len = 0;
@ -800,8 +800,8 @@ static void kbd_calc_pm_params(struct keyboard_parameters *pm,
{
struct font* font;
const unsigned char *p;
unsigned short ch, *pbuf;
int i, w;
ucschar_t ch, *pbuf;
unsigned int i, w;
#ifdef HAVE_TOUCHSCREEN
pm->show_buttons = (sc->screen_type == SCREEN_MAIN &&
(touchscreen_get_mode() == TOUCHSCREEN_POINT));
@ -812,7 +812,7 @@ static void kbd_calc_pm_params(struct keyboard_parameters *pm,
pm->font_h = font->height;
/* check if FONT_UI fits the screen */
if (2*pm->font_h + 3 > sc->getheight())
if (pm->font_h*2 + 3 > sc->getheight())
{
pm->curfont = FONT_SYSFIXED;
font = font_get(FONT_SYSFIXED);
@ -858,9 +858,9 @@ static void kbd_calc_vp_params(struct keyboard_parameters *pm,
{
(void) state;
struct viewport *vp = &pm->kbd_viewports[eKBD_VP_PICKER];
int icon_w, sc_w, sc_h;
unsigned int icon_w, sc_w, sc_h;
int i, total_lines;
unsigned short *pbuf;
ucschar_t *pbuf;
/* calculate how many characters to put in a row. */
icon_w = get_icon_width(sc->screen_type);
@ -1024,7 +1024,7 @@ static void kbd_draw_picker(struct keyboard_parameters *pm,
/* draw page */
int i, j;
int w, h;
unsigned short ch;
ucschar_t ch;
unsigned char *utf8;
sc->setfont(pm->curfont);
@ -1265,12 +1265,12 @@ static void kbd_insert_selected(struct keyboard_parameters *pm,
struct edit_state *state)
{
/* find input char */
unsigned short ch = get_kbd_ch(pm, pm->x, pm->y);
ucschar_t ch = get_kbd_ch(pm, pm->x, pm->y);
/* check for hangul input */
if (ch >= 0x3131 && ch <= 0x3163)
{
unsigned short tmp;
ucschar_t tmp;
if (!state->hangul)
{
@ -1335,7 +1335,7 @@ static void kbd_insert_selected(struct keyboard_parameters *pm,
static void kbd_backspace(struct edit_state *state)
{
unsigned short ch;
ucschar_t ch;
if (state->hangul)
{
if (state->htail)

View file

@ -600,16 +600,16 @@ int usb_screen(void)
return 0;
}
unsigned short *bidi_l2v(const unsigned char *str, int orientation)
ucschar_t *bidi_l2v(const unsigned char *str, int orientation)
{
static unsigned short utf16_buf[SCROLL_LINE_SIZE];
unsigned short *target;
static ucschar_t utf_buf[SCROLL_LINE_SIZE];
ucschar_t *target;
(void)orientation;
target = utf16_buf;
target = utf_buf;
while (*str)
str = utf8decode(str, target++);
*target = 0;
return utf16_buf;
return utf_buf;
}

View file

@ -660,16 +660,16 @@ int usb_screen(void)
return 0;
}
unsigned short *bidi_l2v(const unsigned char *str, int orientation)
ucschar_t *bidi_l2v(const unsigned char *str, int orientation)
{
static unsigned short utf16_buf[SCROLL_LINE_SIZE];
unsigned short *target;
static ucschar_t utf_buf[SCROLL_LINE_SIZE];
ucschar_t *target;
(void)orientation;
target = utf16_buf;
target = utf_buf;
while (*str)
str = utf8decode(str, target++);
*target = 0;
return utf16_buf;
return utf_buf;
}

View file

@ -225,7 +225,7 @@ void beep_play(unsigned int frequency, unsigned int duration, unsigned int ampli
\param amplitude
\description
unsigned short *bidi_l2v( const unsigned char *str, int orientation )
ucschar_t *bidi_l2v( const unsigned char *str, int orientation )
\param str
\param orientation
\return
@ -407,13 +407,13 @@ const struct cbmp_bitmap_info_entry *core_bitmaps
\return
\description
const unsigned char *font_get_bits( struct font *pf, unsigned short char_code )
const unsigned char *font_get_bits( struct font *pf, ucschar_t char_code )
\param pf
\param char_code
\return
\description
const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs)
const unsigned char* utf8decode(const unsigned char *utf8, ucschar_t *ucs)
\group unicode stuff
\param utf8
\param ucs
@ -747,7 +747,7 @@ int font_getstringsize(const unsigned char *str, int *w, int *h, int fontnumber)
\return
\description
int font_get_width(struct font* pf, unsigned short char_code)
int font_get_width(struct font* pf, ucschar_t char_code)
\param pf
\param char_code
\return
@ -972,7 +972,7 @@ bool is_diacritic(const unsigned short char_code, bool *is_rtl)
\return
\description
int kbd_input(char* buffer, int buflen, unsigned short *kbd)
int kbd_input(char* buffer, int buflen, ucschar_t *kbd)
\group misc
\param buffer
\param buflen

View file

@ -1,3 +1,5 @@
/* Note these are not ucschar_t becuase all arabic
codepoints are <16bit, so no need to waste table space */
typedef struct {
unsigned short isolated;
unsigned short final;

View file

@ -44,7 +44,7 @@
#define XOR(a,b) ((a||b) && !(a&&b))
#ifndef BOOTLOADER
static const arab_t * arab_lookup(unsigned short uchar)
static const arab_t * arab_lookup(ucschar_t uchar)
{
if (uchar >= 0x621 && uchar <= 0x63a)
return &(jointable[uchar - 0x621]);
@ -57,15 +57,15 @@ static const arab_t * arab_lookup(unsigned short uchar)
return 0;
}
static void arabjoin(unsigned short * stringprt, int length)
static void arabjoin(ucschar_t *stringprt, int length)
{
bool connected = false;
unsigned short * writeprt = stringprt;
ucschar_t *writeprt = stringprt;
const arab_t * prev = 0;
const arab_t * cur;
const arab_t * ligature = 0;
short uchar;
ucschar_t uchar;
int i;
for (i = 0; i <= length; i++) {
@ -135,13 +135,13 @@ static void arabjoin(unsigned short * stringprt, int length)
}
#endif /* !BOOTLOADER */
unsigned short *bidi_l2v(const unsigned char *str, int orientation)
ucschar_t *bidi_l2v(const unsigned char *str, int orientation)
{
static unsigned short utf16_buf[SCROLL_LINE_SIZE];
unsigned short *target, *tmp;
static ucschar_t utf_buf[SCROLL_LINE_SIZE];
ucschar_t *target, *tmp;
#ifndef BOOTLOADER
static unsigned short bidi_buf[SCROLL_LINE_SIZE];
unsigned short *heb_str; /* *broken_str */
static ucschar_t bidi_buf[SCROLL_LINE_SIZE];
ucschar_t *heb_str; /* *broken_str */
int block_start, block_end, block_type, block_length, i;
int length = utf8length(str);
length=length>=SCROLL_LINE_SIZE?SCROLL_LINE_SIZE-1:length;
@ -152,21 +152,21 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation)
tmp = str;
*/
target = tmp = utf16_buf;
while (*str && target < &utf16_buf[SCROLL_LINE_SIZE-1])
target = tmp = utf_buf;
while (*str && target < &utf_buf[SCROLL_LINE_SIZE-1])
str = utf8decode(str, target++);
*target = 0;
#ifdef BOOTLOADER
(void)orientation;
return utf16_buf;
return utf_buf;
#else /* !BOOTLOADER */
if (target == utf16_buf) /* empty string */
if (target == utf_buf) /* empty string */
return target;
/* properly join any arabic chars */
arabjoin(utf16_buf, length);
arabjoin(utf_buf, length);
block_start=block_end=block_length=0;
@ -204,7 +204,7 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation)
for (i=block_start; i<=block_end; i++) {
*target = (block_type == orientation) ?
*(utf16_buf+i) : *(utf16_buf+block_end-i+block_start);
*(utf_buf+i) : *(utf_buf+block_end-i+block_start);
if (block_type!=orientation) {
switch (*target) {
case '(':
@ -226,7 +226,7 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation)
*target = 0;
#if 0 /* Is this code really necessary? */
broken_str = utf16_buf;
broken_str = utf_buf;
begin=end=length-1;
target = broken_str;
@ -295,4 +295,3 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation)
return heb_str;
#endif /* !BOOTLOADER */
}

View file

@ -28,8 +28,8 @@
#include "system.h"
#define DIAC_NUM_RANGES (ARRAYLEN(diac_ranges))
#define DIAC_RTL (1 << 7)
#define DIAC_CNT (0xFF ^ DIAC_RTL)
#define DIAC_RTL (1 << 15)
#define DIAC_CNT (0xFFFF ^ DIAC_RTL)
/* Each diac_range_ struct defines a Unicode range that begins with
* N diacritic characters, and continues with non-diacritic characters up to the
@ -39,8 +39,8 @@
struct diac_range
{
uint16_t base;
uint8_t info; /* [RTL:1 CNT:7] */
uint16_t base; /* Not ucschar_t until we need >16b */
uint16_t info; /* [RTL:1 CNT:15] */
};
#define DIAC_RANGE_ENTRY(first_diac, first_non_diac, is_rtl) \
@ -51,7 +51,7 @@ struct diac_range
static const struct diac_range diac_ranges[] =
{
DIAC_RANGE_ENTRY(0x0000, 0x0000, 0),
DIAC_RANGE_ENTRY(FIRST_DIACRITIC, 0x0370, 0),
DIAC_RANGE_ENTRY(FIRST_DIACRITIC, 0x0370, 0), /* v1 - v4.1 */
DIAC_RANGE_ENTRY(0x0483, 0x048a, 0),
DIAC_RANGE_ENTRY(0x0591, 0x05be, 1),
DIAC_RANGE_ENTRY(0x05bf, 0x05c0, 1),
@ -146,6 +146,7 @@ static const struct diac_range diac_ranges[] =
DIAC_RANGE_ENTRY(0x19c8, 0x19ca, 0),
DIAC_RANGE_ENTRY(0x1a17, 0x1a1c, 0),
DIAC_RANGE_ENTRY(0x1a55, 0x1a80, 0),
DIAC_RANGE_ENTRY(0x1ab0, 0x1b00, 0), /* v7.0 */
DIAC_RANGE_ENTRY(0x1b00, 0x1b05, 0),
DIAC_RANGE_ENTRY(0x1b34, 0x1b45, 0),
DIAC_RANGE_ENTRY(0x1b6b, 0x1b74, 0),
@ -156,10 +157,10 @@ static const struct diac_range diac_ranges[] =
DIAC_RANGE_ENTRY(0x1cd4, 0x1ce9, 0),
DIAC_RANGE_ENTRY(0x1ced, 0x1cee, 0),
DIAC_RANGE_ENTRY(0x1cf2, 0x1cf3, 0),
DIAC_RANGE_ENTRY(0x1dc0, 0x1e00, 0),
DIAC_RANGE_ENTRY(0x20d0, 0x20f1, 0),
DIAC_RANGE_ENTRY(0x1dc0, 0x1e00, 0), /* v4.1 - v5.2 */
DIAC_RANGE_ENTRY(0x20d0, 0x2100, 0), /* v1.0 - v5.1 */
DIAC_RANGE_ENTRY(0x2cef, 0x2cf2, 0),
DIAC_RANGE_ENTRY(0x2de0, 0x2e00, 0),
DIAC_RANGE_ENTRY(0x2de0, 0x2e00, 0), /* v5.1 */
DIAC_RANGE_ENTRY(0x302a, 0x3030, 0),
DIAC_RANGE_ENTRY(0x3099, 0x309b, 0),
DIAC_RANGE_ENTRY(0xa66f, 0xa673, 0),
@ -188,7 +189,7 @@ static const struct diac_range diac_ranges[] =
DIAC_RANGE_ENTRY(0xabe3, 0xabeb, 0),
DIAC_RANGE_ENTRY(0xabec, 0xabee, 0),
DIAC_RANGE_ENTRY(0xfb1e, 0xfb1f, 0),
DIAC_RANGE_ENTRY(0xfe20, 0xfe27, 0),
DIAC_RANGE_ENTRY(0xfe20, 0xfe30, 0), /* v1.0 - v8.0 */
DIAC_RANGE_ENTRY(0xfe70, 0xfe70, 1),
DIAC_RANGE_ENTRY(0xff00, 0xff00, 0),
DIAC_RANGE_ENTRY(0xffff, 0xffff, 0),
@ -196,7 +197,7 @@ static const struct diac_range diac_ranges[] =
#define MRU_MAX_LEN 32
bool is_diacritic(const unsigned short char_code, bool *is_rtl)
bool is_diacritic(const ucschar_t char_code, bool *is_rtl)
{
static uint8_t mru_len = 0;
static uint8_t diacritic_mru[MRU_MAX_LEN];
@ -209,7 +210,6 @@ bool is_diacritic(const unsigned short char_code, bool *is_rtl)
/* Search in MRU */
for (mru = 0, i = 0; mru < mru_len; mru++)
{
/* Items shifted >> 1 */
itmp = i;
i = diacritic_mru[mru];
@ -250,10 +250,10 @@ Found:
if (is_rtl)
*is_rtl = ((DIAC_RTL & info) == DIAC_RTL);
return (char_code < diac->base + (info & DIAC_CNT));
return (char_code < (diac->base + (info & DIAC_CNT)));
}
#else /*BOOTLOADER*/
inline bool is_diacritic(const unsigned short char_code, bool *is_rtl)
inline bool is_diacritic(const ucschar_t char_code, bool *is_rtl)
{
(void)char_code;
if (is_rtl)

View file

@ -127,7 +127,7 @@ static int volatile cp_table_ref = 0;
/* non-default codepage table buffer (cannot be bufalloced! playback itself
may be making the load request) */
static unsigned short codepage_table[MAX_CP_TABLE_SIZE+1];
static unsigned short codepage_table[MAX_CP_TABLE_SIZE+1]; // XXX convert to ucschar_t if we ever need > 16bit mappings?
#if defined(APPLICATION) && defined(__linux__)
static const char * const name_codepages_linux[NUM_CODEPAGES+1] =
@ -344,7 +344,7 @@ unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int
cp_lock_leave();
while (count-- && utf8_size > 0) {
unsigned short ucs, tmp;
ucschar_t ucs, tmp;
if (*iso < 128 || cp == UTF_8) /* Already UTF-8 */
{
@ -511,8 +511,25 @@ unsigned long utf8length(const unsigned char *utf8)
return l;
}
/* Take a utf8 string and return the encoded length in utf16 code units */
unsigned long utf16len_utf8(const unsigned char *utf8)
{
ucschar_t cp;
unsigned long length = 0;
while (*utf8) {
utf8 = utf8decode(utf8, &cp);
#ifdef UNICODE32
if (cp >= 0x10000)
length++;
#endif
length++;
}
return length;
}
/* Decode 1 UTF-8 char and return a pointer to the next char. */
const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs)
const unsigned char* utf8decode(const unsigned char *utf8, ucschar_t *ucs)
{
unsigned char c = *utf8++;
unsigned long code;
@ -552,8 +569,16 @@ const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs)
/* Invalid UTF-8 char */
code = 0xfffd;
}
/* currently we don't support chars above U-FFFF */
*ucs = (code < 0x10000) ? code : 0xfffd;
#ifdef UNICODE32
if (code > 0x10ffff)
code = 0xfffd;
#else
if (code > 0xffff)
code = 0xfffd;
#endif
*ucs = code;
return utf8;
}

View file

@ -747,6 +747,8 @@ static bool fatlong_parse_entry(struct fatlong_parse_state *lnparse,
/* so far so good; save entry information */
lnparse->ord = ord;
/* Treat entries as opaque 16-bit values;
utf8decode happens in fatlong_parse_finish() */
uint16_t *ucsp = fatent->ucssegs[ord - 1 + 5];
unsigned int i = longent_char_first();
@ -797,13 +799,24 @@ static bool fatlong_parse_finish(struct fatlong_parse_state *lnparse,
/* ensure the last segment is NULL-terminated if it is filled */
fatent->ucssegs[lnparse->ord_max + 5][0] = 0x0000;
for (uint16_t *ucsp = fatent->ucssegs[5], ucc = *ucsp;
ucc; ucc = *++ucsp)
unsigned long ucc; /* Decoded codepoint */
uint16_t *ucsp, ucs;
for (ucsp = fatent->ucssegs[5], ucs=*ucsp; ucs; ucs = *++ucsp)
{
/* end should be hit before ever seeing padding */
if (ucc == 0xffff)
if (ucs == 0xffff)
return false;
#ifdef UNICODE32
/* Check for a surrogate UTF16 pair */
if (ucs >= 0xd800 && ucs < 0xdc00 &&
*(ucsp+1) >= 0xdc00 && *(ucsp+1) < 0xe000) {
ucc = 0x10000 + (((ucs & 0x3ff) << 10) | (*(ucsp+1) & 0x3ff));
ucsp++;
} else
#endif
ucc = ucs;
if ((p = utf8encode(ucc, p)) - name > FAT_DIRENTRY_NAME_MAX)
return false;
}
@ -1612,12 +1625,27 @@ static int write_longname(struct bpb *fat_bpb, struct fat_filestr *parentstr,
for (unsigned long i = 0; i < ucspadlen; i++)
{
if (i < ucslen)
if (i < ucslen) {
#ifdef UNICODE32
ucschar_t tmp;
name = utf8decode(name, &tmp);
/* For codepoints > U+FFFF we will need to use a UTF16 surrogate
pair. 'ucslen' already takes this into account! */
if (tmp < 0x10000) {
ucsname[i] = tmp;
} else {
tmp -= 0x10000;
ucsname[i++] = 0xd800 | ((tmp >> 10) & 0x3ff); /* High */
ucsname[i] = 0xdc00 | (tmp & 0x3ff); /* Low */
}
#else
name = utf8decode(name, &ucsname[i]);
else if (i == ucslen)
#endif
} else if (i == ucslen) {
ucsname[i] = 0x0000; /* name doesn't fill last block */
else /* i > ucslen */
} else /* i > ucslen */ {
ucsname[i] = 0xffff; /* pad-out to end */
}
}
dc_lock_cache();
@ -1744,9 +1772,12 @@ static int add_dir_entry(struct bpb *fat_bpb, struct fat_filestr *parentstr,
create_dos_name(basisname, name, &n);
randomize_dos_name(shortname, basisname, &n);
/* one dir entry needed for every 13 characters of filename,
plus one entry for the short name */
ucslen = utf8length(name);
/* one dir entry needed for every 13 utf16 "code units"
of filename, plus one entry for the short name.
Keep in mind that a unicode character can take up to
two code units!
*/
ucslen = utf16len_utf8(name);
if (ucslen > 255)
FAT_ERROR(-2); /* name is too long */

View file

@ -385,7 +385,7 @@ static void LCDFN(mono_bmp_part_helper)(const unsigned char *src, int src_x,
/* put a string at a given pixel position, skipping first ofs pixel columns */
static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str)
{
unsigned short *ucs;
ucschar_t *ucs;
struct viewport *vp = LCDFN(current_viewport);
font_lock(vp->font, true);
struct font* pf = font_get(vp->font);
@ -429,7 +429,7 @@ static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str)
bool is_rtl, is_diac;
const unsigned char *bits;
int width, base_width, base_ofs = 0;
const unsigned short next_ch = ucs[1];
const ucschar_t next_ch = ucs[1];
if (x >= vp->width)
break;
@ -447,7 +447,7 @@ static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str)
{
if (!rtl_next_non_diac_width)
{
const unsigned short *u;
const ucschar_t *u;
/* Jump to next non-diacritic char, and calc its width */
for (u = &ucs[1]; *u && IS_DIACRITIC(*u); u++);
@ -529,7 +529,7 @@ static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str)
/* put a string at a given pixel position, skipping first ofs pixel columns */
static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str)
{
unsigned short *ucs;
ucschar_t *ucs;
struct viewport *vp = LCDFN(current_viewport);
struct font* pf = font_get(vp->font);
const unsigned char *bits;
@ -567,7 +567,7 @@ static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str)
/* allow utf but no diacritics or rtl lang */
for (ucs = bidi_l2v(str, 1); *ucs; ucs++)
{
const unsigned short next_ch = ucs[1];
const ucschar_t next_ch = ucs[1];
if (x >= vp->width)
break;

View file

@ -21,6 +21,6 @@
#ifndef BIDI_H
#define BIDI_H
extern unsigned short *bidi_l2v(const unsigned char *str, int orientation);
ucschar_t *bidi_l2v(const unsigned char *str, int orientation);
#endif /* BIDI_H */

View file

@ -1461,4 +1461,11 @@ Lyre prototype 1 */
#error "HAVE_LCD_SLEEP_SETTING requires HAVE_LCD_SLEEP"
#endif
// XXX Figure out a better place to put this?
#ifdef UNICODE32
#define ucschar_t unsigned int
#else
#define ucschar_t unsigned short
#endif
#endif /* __CONFIG_H__ */

View file

@ -18,6 +18,9 @@
* KIND, either express or implied.
*
****************************************************************************/
#ifndef __CPU_H
#define __CPU_H
#include "config.h"
#if CONFIG_CPU == MCF5249
@ -80,3 +83,5 @@
#if CONFIG_CPU == STM32H743
#include "cpu-stm32h743.h"
#endif
#endif /* __CPU_H */

View file

@ -86,7 +86,7 @@ struct font {
int maxwidth; /* max width in pixels*/
unsigned int height; /* height in pixels*/
int ascent; /* ascent (baseline) height*/
int firstchar; /* first character in bitmap*/
unsigned int firstchar; /* first character in bitmap*/
int size; /* font size in glyphs*/
int depth; /* depth of the font, 0=1bit and 1=4bit */
const unsigned char *bits; /* 8-bit column bitmap data*/
@ -134,7 +134,7 @@ void font_enable_all(void);
struct font* font_get(int font);
int font_getstringnsize(const unsigned char *str, size_t maxbytes, int *w, int *h, int fontnumber);
int font_getstringsize(const unsigned char *str, int *w, int *h, int fontnumber);
int font_get_width(struct font* ft, unsigned short ch);
const unsigned char * font_get_bits(struct font* ft, unsigned short ch);
int font_get_width(struct font* ft, ucschar_t ch);
const unsigned char * font_get_bits(struct font* ft, ucschar_t ch);
#endif

View file

@ -21,5 +21,4 @@
extern const char jamo_table[51][3];
unsigned short hangul_join(unsigned short lead, unsigned short vowel,
unsigned short tail);
ucschar_t hangul_join(ucschar_t lead, ucschar_t vowel, ucschar_t tail);

View file

@ -53,6 +53,12 @@
#define FONT_EXT "fnt"
#define GLYPH_CACHE_EXT "gc"
#ifdef UNICODE32
#define FC_HEADER_VAL 0x01000020
#else
#define FC_HEADER_VAL 0x01000010
#endif
/* max static loadable font buffer size */
#ifndef MAX_FONT_SIZE
#if LCD_HEIGHT > 64
@ -182,7 +188,7 @@ void font_init(void)
static short readshort(struct font *pf)
{
unsigned short s;
uint16_t s;
s = *pf->buffer_position++ & 0xff;
s |= (*pf->buffer_position++ << 8);
@ -362,7 +368,7 @@ static size_t font_glyphs_to_bufsize(struct font *pf, int glyphs)
/* LRU bytes per glyph */
bufsize = LRU_SLOT_OVERHEAD + sizeof(struct font_cache_entry) +
sizeof( unsigned short);
sizeof(unsigned short);
/* Image bytes per glyph */
bufsize += glyph_bytes(pf, pf->maxwidth);
bufsize *= glyphs;
@ -420,6 +426,14 @@ int font_load_ex( const char *path, size_t buf_size, int glyphs )
if ( fd < 0 )
return -1;
#ifdef UNICODE32
if (glyphs && glyphs < 3)
glyphs = 3; /* Guarantee we'll always have at least 2 after alignment */
#else
if (glyphs && glyphs < 2)
glyphs = 2; /* Guarantee we'll always have at least 1 after alignment */
#endif
/* load font struct f with file header */
int file_size = filesize( fd );
struct font header;
@ -723,7 +737,7 @@ load_cache_entry(struct font_cache_entry* p, void* callback_data)
{
struct font* pf = callback_data;
unsigned short char_code = p->_char_code;
ucschar_t char_code = p->_char_code;
int fd;
lock_font_handle(pf->handle, true);
@ -788,7 +802,7 @@ static void cache_create(struct font* pf)
* when the font file is closed during USB */
unsigned char *cache_buf = pf->buffer_start + bitmap_size;
size_t cache_size = pf->buffer_size - bitmap_size;
ALIGN_BUFFER(cache_buf, cache_size, 2);
ALIGN_BUFFER(cache_buf, cache_size, sizeof(ucschar_t));
memset(pf->buffer_start, 0, bitmap_size);
/* Initialise cache */
font_cache_create(&pf->cache, cache_buf, cache_size, bitmap_size);
@ -797,7 +811,7 @@ static void cache_create(struct font* pf)
/*
* Returns width of character
*/
int font_get_width(struct font* pf, unsigned short char_code)
int font_get_width(struct font* pf, ucschar_t char_code)
{
int width;
struct font_cache_entry *e;
@ -820,7 +834,7 @@ int font_get_width(struct font* pf, unsigned short char_code)
return width;
}
const unsigned char* font_get_bits(struct font* pf, unsigned short char_code)
const unsigned char* font_get_bits(struct font* pf, ucschar_t char_code)
{
const unsigned char* bits;
@ -884,7 +898,7 @@ static void glyph_file_write(void* data)
{
struct font_cache_entry* p = data;
struct font* pf = cache_pf;
unsigned short ch;
ucschar_t ch;
static int buffer_pos = 0;
#define WRITE_BUFFER 256
static unsigned char buffer[WRITE_BUFFER];
@ -901,9 +915,17 @@ static void glyph_file_write(void* data)
return;
ch = p->_char_code + pf->firstchar;
buffer[buffer_pos] = ch >> 8;
#ifdef UNICODE32
buffer[buffer_pos] = (ch >> 24) & 0xff;
buffer[buffer_pos+1] = (ch >> 16) & 0xff;
buffer[buffer_pos+2] = (ch >> 8) & 0xff;
buffer[buffer_pos+3] = ch & 0xff;
buffer_pos += 4;
#else
buffer[buffer_pos] = (ch >> 8) & 0xff;
buffer[buffer_pos+1] = ch & 0xff;
buffer_pos += 2;
#endif
return;
}
@ -928,6 +950,8 @@ static void glyph_cache_save(int font_id)
fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0666);
if (fd >= 0)
{
uint32_t header = FC_HEADER_VAL;
write(fd, &header, sizeof(header));
cache_pf = pf;
cache_fd = fd;
lru_traverse(&cache_pf->cache._lru, glyph_file_write);
@ -944,9 +968,9 @@ static void glyph_cache_save(int font_id)
}
static int ushortcmp(const void *a, const void *b)
static int ucscharcmp(const void *a, const void *b)
{
return ((int)(*(unsigned short*)a - *(unsigned short*)b));
return ((int)(*(ucschar_t*)a - *(ucschar_t*)b));
}
static NO_INLINE void glyph_cache_load(const char *font_path, struct font *pf)
@ -954,11 +978,11 @@ static NO_INLINE void glyph_cache_load(const char *font_path, struct font *pf)
#define MAX_SORT 256
if (pf->fd >= 0) {
int i, size, fd;
unsigned char tmp[2];
unsigned short ch;
unsigned short glyphs[MAX_SORT];
unsigned short glyphs_lru_order[MAX_SORT];
int glyph_file_skip=0, glyph_file_size=0;
unsigned char tmp[sizeof(ucschar_t)];
ucschar_t ch;
ucschar_t glyphs[MAX_SORT];
ucschar_t glyphs_lru_order[MAX_SORT];
unsigned int glyph_file_skip=0, glyph_file_size=0;
int sort_size = pf->cache._capacity;
if ( sort_size > MAX_SORT )
@ -974,30 +998,40 @@ static NO_INLINE void glyph_cache_load(const char *font_path, struct font *pf)
fd = open(GLYPH_CACHE_FILE, O_RDONLY|O_BINARY);
#endif
if (fd >= 0) {
/* Header */
uint32_t hdr = 0;
read(fd, &hdr, sizeof(hdr));
if (hdr != FC_HEADER_VAL)
goto latin;
/* only read what fits */
glyph_file_size = filesize( fd );
if ( glyph_file_size > 2*pf->cache._capacity ) {
glyph_file_skip = glyph_file_size - 2*pf->cache._capacity;
lseek( fd, glyph_file_skip, SEEK_SET );
if (glyph_file_size < sizeof(uint32_t))
goto latin;
glyph_file_size -= sizeof(uint32_t);
if ( glyph_file_size > (int)sizeof(ucschar_t)*pf->cache._capacity ) {
glyph_file_skip = glyph_file_size - sizeof(ucschar_t)*pf->cache._capacity;
lseek( fd, glyph_file_skip + sizeof(uint32_t), SEEK_SET );
}
while(1) {
for ( size = 0;
read( fd, tmp, 2 ) == 2 && size < sort_size;
read( fd, tmp, sizeof(tmp) ) == sizeof(tmp) && size < sort_size;
size++ )
{
#ifdef UNICODE32
glyphs[size] = (tmp[0] << 24) | (tmp[1] << 16) | (tmp[2] << 8) | tmp[3];
#else
glyphs[size] = (tmp[0] << 8) | tmp[1];
#endif
glyphs_lru_order[size] = glyphs[size];
}
/* sort glyphs array to make sector cache happy */
qsort((void *)glyphs, size, sizeof(unsigned short),
ushortcmp );
qsort((void *)glyphs, size, sizeof(ucschar_t),
ucscharcmp );
/* load font bitmaps */
for( i = 0; i < size ; i++ )
font_get_bits(pf, glyphs[i]);
font_get_bits(pf, glyphs[i]);
/* redo to fix lru order */
for ( i = 0; i < size ; i++)
@ -1009,6 +1043,7 @@ static NO_INLINE void glyph_cache_load(const char *font_path, struct font *pf)
close(fd);
} else {
latin:
/* load latin1 chars into cache */
for ( ch = 32 ; ch < 256 && ch < pf->cache._capacity + 32; ch++ )
font_get_bits(pf, ch);
@ -1040,7 +1075,7 @@ struct font* font_get(int font)
/*
* Returns width of character
*/
int font_get_width(struct font* pf, unsigned short char_code)
int font_get_width(struct font* pf, ucschar_t char_code)
{
/* check input range*/
if (char_code < pf->firstchar || char_code >= pf->firstchar+pf->size)
@ -1050,7 +1085,7 @@ int font_get_width(struct font* pf, unsigned short char_code)
return pf->width? pf->width[char_code]: pf->maxwidth;
}
const unsigned char* font_get_bits(struct font* pf, unsigned short char_code)
const unsigned char* font_get_bits(struct font* pf, ucschar_t char_code)
{
const unsigned char* bits;
@ -1079,7 +1114,7 @@ int font_getstringnsize(const unsigned char *str, size_t maxbytes, int *w, int *
{
struct font* pf = font_get(fontnum);
font_lock( fontnum, true );
unsigned short ch;
ucschar_t ch;
int width = 0;
size_t b = maxbytes - 1;

View file

@ -43,13 +43,18 @@ void font_cache_create(
int font_cache_entry_size =
sizeof(struct font_cache_entry) + bitmap_bytes_size;
/* make sure font cache entries are a multiple of 16 bits */
if (font_cache_entry_size % 2 != 0)
/* make sure font cache entries are a multiple of sizeof(ucschar_t) */
while (font_cache_entry_size & (sizeof(ucschar_t) -1))
font_cache_entry_size++;
int cache_size = buf_size /
(font_cache_entry_size + LRU_SLOT_OVERHEAD + sizeof(short));
#ifdef UNICODE32
/* Ensure LRU index size is a multiple of 32 bits */
cache_size &= ~1;
#endif
fcache->_size = 1;
fcache->_capacity = cache_size;
fcache->_prev_result = 0;
@ -77,7 +82,7 @@ void font_cache_create(
* not found.
************************************************************************/
static int search(struct font_cache* fcache,
unsigned short char_code,
ucschar_t char_code,
int size,
int *p_insertion_point )
{
@ -124,7 +129,7 @@ static int search(struct font_cache* fcache,
******************************************************************************/
struct font_cache_entry* font_cache_get(
struct font_cache* fcache,
unsigned short char_code,
ucschar_t char_code,
bool cache_only,
void (*callback) (struct font_cache_entry* p, void *callback_data),
void *callback_data)

View file

@ -18,6 +18,7 @@
* KIND, either express or implied.
*
****************************************************************************/
#include "config.h"
#include "hangul.h"
const char jamo_table[51][3] = {
@ -75,10 +76,9 @@ const char jamo_table[51][3] = {
};
/* takes three jamo chars and joins them into one hangul */
unsigned short hangul_join(unsigned short lead, unsigned short vowel,
unsigned short tail)
ucschar_t hangul_join(ucschar_t lead, ucschar_t vowel, ucschar_t tail)
{
unsigned short ch = 0xfffd;
ucschar_t ch = 0xfffd;
if (lead < 0x3131 || lead > 0x3163)
return ch;

View file

@ -27,7 +27,7 @@
* Sets is_rtl (if it's not NULL) to whether the character
* belongs to an RTL language.
*/
bool is_diacritic(const unsigned short char_code, bool *is_rtl);
bool is_diacritic(const ucschar_t char_code, bool *is_rtl);
/* Note IS_DIACRITIC macros may elide the function call
* therefore there is a separate _RTL version that requires a bool pointer

View file

@ -21,6 +21,7 @@
#ifndef _FONT_CACHE_H_
#define _FONT_CACHE_H_
#include <stdbool.h>
#include "config.h"
#include "lru.h"
/*******************************************************************************
@ -29,16 +30,16 @@
struct font_cache
{
struct lru _lru;
int _size;
int _capacity;
int _prev_char_code;
unsigned int _size;
unsigned int _capacity;
ucschar_t _prev_char_code;
int _prev_result;
short *_index; /* index of lru handles in char_code order */
};
struct font_cache_entry
{
unsigned short _char_code;
ucschar_t _char_code;
unsigned char width;
unsigned char bitmap[1]; /* place holder */
};
@ -55,7 +56,7 @@ void font_cache_create(
* Note: With cache_only this can return NULL, which otherwise never happens */
struct font_cache_entry* font_cache_get(
struct font_cache* fcache,
unsigned short char_code,
ucschar_t char_code,
bool cache_only,
void (*callback) (struct font_cache_entry* p, void *callback_data),
void *callback_data);

View file

@ -33,6 +33,7 @@ struct lru
void *_base;
};
/* LRU_SLOT_OVERHEAD is the fixed portion of struct lru_node */
#define LRU_SLOT_OVERHEAD (2 * sizeof(short))
/* Create LRU list with specified size from buf. */
@ -45,4 +46,3 @@ void *lru_data(struct lru* pl, short handle);
void lru_traverse(struct lru* pl, void (*callback)(void* data));
#endif /* LRU_H */

View file

@ -63,8 +63,9 @@ unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8, in
unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8, int count);
unsigned char* utf16decode(const unsigned char *utf16, unsigned char *utf8, int count, int utf8_size, bool le);
bool utf16_has_bom(const unsigned char *utf16, bool *le);
unsigned long utf16len_utf8(const unsigned char *utf8);
unsigned long utf8length(const unsigned char *utf8);
const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs);
const unsigned char* utf8decode(const unsigned char *utf8, ucschar_t *ucs);
void set_codepage(int cp);
int get_codepage(void);
int utf8seek(const unsigned char* utf8, int offset);

View file

@ -63,55 +63,78 @@ static void win32_last_error_errno(void)
static HANDLE win32_open(const char *ospath);
static int win32_stat(const char *ospath, LPBY_HANDLE_FILE_INFORMATION lpInfo);
unsigned short * strcpy_utf8ucs2(unsigned short *buffer,
const unsigned char *utf8)
static unsigned short * strcpy_utf8utf16(unsigned short *buffer,
const unsigned char *utf8)
{
for (wchar_t *ucs2 = buffer;
((utf8 = utf8decode(utf8, ucs2)), *ucs2); ucs2++);
for (wchar_t *ucs = buffer; *ucs ; ucs++) {
ucschar_t cp;
utf8 = utf8decode(utf8, &cp);
#ifdef UNICODE32
if (cp > 0x10000) {
cp -= 0x10000;
*ucs++ = 0xd800 | (cp >> 10);
cp = 0xdc00 | (cp & 0x3ff);
}
#endif
*ucs = cp;
}
return buffer;
}
#if 0
unsigned char * strcpy_ucs2utf8(unsigned char *buffer,
const unsigned short *ucs2)
#if 0 /* Unused in current code */
static unsigned char * strcpy_utf16utf8(unsigned char *buffer,
const unsigned short *utf16buf)
{
for (unsigned char *utf8 = buffer;
((utf8 = utf8encode(*ucs2, utf8)), *ucs2); ucs2++);
unsigned char *utf8 = buffer;
/* windows is always LE */
const int le = 1;
while (*utf16buf) {
const unsigned char *utf16 = (const unsigned char *)utf16buf;
unsigned long ucs;
/* Check for a surrogate pair */
if (*(utf16 + le) >= 0xD8 && *(utf16 + le) < 0xE0) {
ucs = 0x10000 + ((utf16[1 - le] << 10) | ((utf16[le] - 0xD8) << 18)
| utf16[2 + (1 - le)] | ((utf16[2 + le] - 0xDC) << 8));
utf16buf += 2;
} else {
ucs = utf16[le] << 8 | utf16[1 - le];
utf16buf++;
}
utf8 = utf8encode(ucs, utf8);
}
return buffer;
}
size_t strlen_utf8ucs2(const unsigned char *utf8)
{
/* This won't properly count multiword ucs2 so use the alternative
below for now which doesn't either */
size_t length = 0;
unsigned short ucschar[2];
for (unsigned char c = *utf8; c;
((utf8 = utf8decode(utf8, ucschar)), c = *utf8))
length++;
return length;
}
#endif /* 0 */
size_t strlen_utf8ucs2(const unsigned char *utf8)
{
return utf8length(utf8);
}
size_t strlen_ucs2utf8(const unsigned short *ucs2)
static size_t strlen_utf16utf8(const unsigned short *utf16buf)
{
size_t length = 0;
unsigned char utf8char[4];
for (unsigned short c = *ucs2; c; (c = *++ucs2))
length += utf8encode(c, utf8char) - utf8char;
/* windows is always LE */
const int le = 1;
while (*utf16buf) {
const unsigned char *utf16 = (const unsigned char *)utf16buf;
unsigned long ucs;
/* Check for a surrogate pair */
if (*(utf16 + le) >= 0xD8 && *(utf16 + le) < 0xE0) {
ucs = 0x10000 + ((utf16[1 - le] << 10) | ((utf16[le] - 0xD8) << 18)
| utf16[2 + (1 - le)] | ((utf16[2 + le] - 0xDC) << 8));
utf16buf += 2;
} else {
ucs = utf16[le] << 8 | utf16[1 - le];
utf16buf++;
}
length += utf8encode(ucs, utf8char) - utf8char;
}
return length;
}
#endif
size_t strlcpy_ucs2utf8(char *buffer, const unsigned short *ucs2,
size_t bufsize)
/* Note: Must be exported */
size_t strlcpy_utf16utf8(char *buffer, const unsigned short *utf16,
size_t bufsize)
{
if (!buffer)
bufsize = 0;
@ -119,12 +142,24 @@ size_t strlcpy_ucs2utf8(char *buffer, const unsigned short *ucs2,
size_t length = 0;
unsigned char utf8char[4];
for (unsigned short c = *ucs2; c; (c = *++ucs2))
unsigned long ucc;
while(*utf16)
{
/* Check for a surrogate UTF16 pair */
if (*utf16 >= 0xd800 && *utf16 < 0xdc00 &&
*(utf16+1) >= 0xdc00 && *(utf16+1) < 0xe000) {
ucc = 0x10000 + (((*utf16 & 0x3ff) << 10) | (*(utf16+1) & 0x3ff));
utf16++;
} else {
ucc = *utf16;
}
/* If the last character won't fit, this won't split it */
size_t utf8size = utf8encode(c, utf8char) - utf8char;
size_t utf8size = utf8encode(ucc, utf8char) - utf8char;
if ((length += utf8size) < bufsize)
buffer = mempcpy(buffer, utf8char, utf8size);
utf16++;
}
/* Above won't ever copy to very end */
@ -134,44 +169,44 @@ size_t strlcpy_ucs2utf8(char *buffer, const unsigned short *ucs2,
return length;
}
#define _toucs2(utf8) \
#define _toutf16(utf8) \
({ const char *_utf8 = (utf8); \
size_t _l = strlen_utf8ucs2(_utf8); \
size_t _l = utf16len_utf8(_utf8); \
void *_buffer = alloca((_l + 1)*2); \
strcpy_utf8ucs2(_buffer, _utf8); })
strcpy_utf8utf16(_buffer, _utf8); })
#define _toutf8(ucs2) \
({ const char *_ucs2 = (ucs2); \
size_t _l = strlen_ucs2utf8(_ucs2); \
#define _toutf8(utf16) \
({ const char *_ucs = (utf16); \
size_t _l = strlen_utf16utf8(_ucs); \
void *_buffer = alloca(_l + 1); \
strcpy_ucs2utf8(_buffer, _ucs2); })
strcpy_utf16utf8(_buffer, _ucs); })
int os_open(const char *ospath, int oflag, ...)
{
return _wopen(_toucs2(ospath), oflag __OPEN_MODE_ARG);
return _wopen(_toutf16(ospath), oflag __OPEN_MODE_ARG);
}
int os_creat(const char *ospath, mode_t mode)
{
return _wcreat(_toucs2(ospath), mode);
return _wcreat(_toutf16(ospath), mode);
}
int os_stat(const char *ospath, struct _stat *s)
{
return _wstat(_toucs2(ospath), s);
return _wstat(_toutf16(ospath), s);
}
int os_remove(const char *ospath)
{
return _wremove(_toucs2(ospath));
return _wremove(_toutf16(ospath));
}
int os_rename(const char *osold, const char *osnew)
{
int errnum = errno;
const wchar_t *wchosold = _toucs2(osold);
const wchar_t *wchosnew = _toucs2(osnew);
const wchar_t *wchosold = _toutf16(osold);
const wchar_t *wchosnew = _toutf16(osnew);
int rc = _wrename(wchosold, wchosnew);
if (rc < 0 && errno == EEXIST)
@ -213,18 +248,18 @@ bool os_file_exists(const char *ospath)
_WDIR * os_opendir(const char *osdirname)
{
return _wopendir(_toucs2(osdirname));
return _wopendir(_toutf16(osdirname));
}
int os_mkdir(const char *ospath, mode_t mode)
{
return _wmkdir(_toucs2(ospath));
return _wmkdir(_toutf16(ospath));
(void)mode;
}
int os_rmdir(const char *ospath)
{
return _wrmdir(_toucs2(ospath));
return _wrmdir(_toutf16(ospath));
}
int os_dirfd(_WDIR *osdirp)
@ -288,7 +323,7 @@ static HANDLE win32_open(const char *ospath)
{
/* FILE_FLAG_BACKUP_SEMANTICS is required for this to succeed at opening
a directory */
HANDLE h = CreateFileW(_toucs2(ospath), GENERIC_READ,
HANDLE h = CreateFileW(_toutf16(ospath), GENERIC_READ,
FILE_SHARE_READ | FILE_SHARE_WRITE |
FILE_SHARE_DELETE, NULL, OPEN_EXISTING,
FILE_FLAG_BACKUP_SEMANTICS, NULL);
@ -479,7 +514,7 @@ void volume_size(IF_MV(int volume,) sector_t *sizep, sector_t *freep)
char volpath[MAX_PATH];
if (os_volume_path(IF_MV(volume, ) volpath, sizeof (volpath)) >= 0)
GetDiskFreeSpaceExW(_toucs2(volpath), &free, &size, NULL);
GetDiskFreeSpaceExW(_toutf16(volpath), &free, &size, NULL);
if (sizep)
*sizep = size.QuadPart / 1024;

View file

@ -27,10 +27,10 @@
/* filesystem-win32.c contains some string functions that could be useful
* elsewhere; just move them away to unicode.c or something if they prove
* so. */
size_t strlcpy_ucs2utf8(char *buffer, const unsigned short *ucs,
size_t bufsize);
size_t strlcpy_utf16utf8(char *buffer, const unsigned short *utf16,
size_t bufsize);
#define strlcpy_from_os strlcpy_ucs2utf8
#define strlcpy_from_os strlcpy_utf16utf8
#endif /* __MINGW32__ */
#endif /* !OSFUNCTIONS_DECLARED */

View file

@ -1092,9 +1092,9 @@ retry_with_limit:
if (!parse_as_utf8(tag, &bytesread))
{
/* UTF-8 could potentially be 3 times larger */
/* UTF-8 could potentially be 4 times larger */
/* so we need to create a new buffer */
int utf8_size = (3 * bytesread);
int utf8_size = (4 * bytesread);
if (utf8_size > ID3V2_BUF_SIZE)
{
//limit stack allocation to avoid stack overflow

View file

@ -116,7 +116,7 @@ int gen_h = 0;
int gen_fnt = 0;
int gen_map = 1;
int start_char = 0;
int limit_char = 65535;
int limit_char = 0x10FFFF;
int oflag = 0;
char outfile[256];
@ -569,7 +569,7 @@ struct font* bdf_read_font(char *path)
int bdf_read_header(FILE *fp, struct font* pf)
{
int encoding;
int firstchar = 65535;
int firstchar = 0x10FFFF;
int lastchar = -1;
char buf[256];
char facename[256];

View file

@ -91,7 +91,7 @@ static int eid = DEFAULT_ENCODING_ID;
static FT_UShort nocmap;
int pct = 0; /* display ttc table if it is not zero. */
FT_Long max_char = 65535;
FT_Long max_char = 0x10FFFF;
int pixel_size = 15;
FT_Long start_char = 0;
FT_Long limit_char;