1
0
Fork 0
forked from len0rd/rockbox

unicode: Support characters beyond the first unicode plane

We used 16-bit variables to store the 'character code' everywhere but
this won't let us represent anything beyond U+FFFF.

This patch changes those variables to a custom type that can be 32 or 16
bits depending on the build, and adjusts numerous internal APIs and
datastructures to match.  This includes:

 * utf8decode() and friends
 * font manipulation, caching, rendering, and generation
 * on-screen keyboard
 * FAT filesystem (parsing and generating utf16 LFNs)
 * WIN32 simulator platform code

Note that this patch doesn't _enable_ >16bit unicode support; a followup
patch will turn that on for appropriate targets.

Appears to work on:

  * hosted linux, native, linux simulator in both 16/32-bit modes.

Needs testing on:

  * windows and macos simulator (16bit+32bit)

Change-Id: Iba111b27d2433019b6bff937cf1ebd2c4353a0e8
This commit is contained in:
Solomon Peachy 2024-12-17 08:55:21 -05:00
parent 2a88253426
commit a2c10f6189
44 changed files with 476 additions and 330 deletions

View file

@ -1,3 +1,5 @@
/* Note these are not ucschar_t becuase all arabic
codepoints are <16bit, so no need to waste table space */
typedef struct {
unsigned short isolated;
unsigned short final;

View file

@ -44,7 +44,7 @@
#define XOR(a,b) ((a||b) && !(a&&b))
#ifndef BOOTLOADER
static const arab_t * arab_lookup(unsigned short uchar)
static const arab_t * arab_lookup(ucschar_t uchar)
{
if (uchar >= 0x621 && uchar <= 0x63a)
return &(jointable[uchar - 0x621]);
@ -57,15 +57,15 @@ static const arab_t * arab_lookup(unsigned short uchar)
return 0;
}
static void arabjoin(unsigned short * stringprt, int length)
static void arabjoin(ucschar_t *stringprt, int length)
{
bool connected = false;
unsigned short * writeprt = stringprt;
ucschar_t *writeprt = stringprt;
const arab_t * prev = 0;
const arab_t * cur;
const arab_t * ligature = 0;
short uchar;
ucschar_t uchar;
int i;
for (i = 0; i <= length; i++) {
@ -135,13 +135,13 @@ static void arabjoin(unsigned short * stringprt, int length)
}
#endif /* !BOOTLOADER */
unsigned short *bidi_l2v(const unsigned char *str, int orientation)
ucschar_t *bidi_l2v(const unsigned char *str, int orientation)
{
static unsigned short utf16_buf[SCROLL_LINE_SIZE];
unsigned short *target, *tmp;
static ucschar_t utf_buf[SCROLL_LINE_SIZE];
ucschar_t *target, *tmp;
#ifndef BOOTLOADER
static unsigned short bidi_buf[SCROLL_LINE_SIZE];
unsigned short *heb_str; /* *broken_str */
static ucschar_t bidi_buf[SCROLL_LINE_SIZE];
ucschar_t *heb_str; /* *broken_str */
int block_start, block_end, block_type, block_length, i;
int length = utf8length(str);
length=length>=SCROLL_LINE_SIZE?SCROLL_LINE_SIZE-1:length;
@ -152,21 +152,21 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation)
tmp = str;
*/
target = tmp = utf16_buf;
while (*str && target < &utf16_buf[SCROLL_LINE_SIZE-1])
target = tmp = utf_buf;
while (*str && target < &utf_buf[SCROLL_LINE_SIZE-1])
str = utf8decode(str, target++);
*target = 0;
#ifdef BOOTLOADER
(void)orientation;
return utf16_buf;
return utf_buf;
#else /* !BOOTLOADER */
if (target == utf16_buf) /* empty string */
if (target == utf_buf) /* empty string */
return target;
/* properly join any arabic chars */
arabjoin(utf16_buf, length);
arabjoin(utf_buf, length);
block_start=block_end=block_length=0;
@ -204,7 +204,7 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation)
for (i=block_start; i<=block_end; i++) {
*target = (block_type == orientation) ?
*(utf16_buf+i) : *(utf16_buf+block_end-i+block_start);
*(utf_buf+i) : *(utf_buf+block_end-i+block_start);
if (block_type!=orientation) {
switch (*target) {
case '(':
@ -226,7 +226,7 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation)
*target = 0;
#if 0 /* Is this code really necessary? */
broken_str = utf16_buf;
broken_str = utf_buf;
begin=end=length-1;
target = broken_str;
@ -246,7 +246,7 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation)
if (char_count==max_chars) { /* try to avoid breaking words */
int new_char_count = char_count;
int new_begin = begin;
while (new_char_count>0) {
if (_isblank(heb_str[new_begin]) ||
_isnewline(heb_str[new_begin])) {
@ -261,11 +261,11 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation)
}
}
orig_begin=begin;
/* if (_isblank(heb_str[begin])) {
heb_str[begin]='\n';
} */
/* skip leading newlines */
while (begin<=end && _isnewline(heb_str[begin])) {
begin++;
@ -282,7 +282,7 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation)
target++;
}
begin=orig_begin;
if (begin<=0) {
*target = 0;
break;
@ -295,4 +295,3 @@ unsigned short *bidi_l2v(const unsigned char *str, int orientation)
return heb_str;
#endif /* !BOOTLOADER */
}

View file

@ -28,8 +28,8 @@
#include "system.h"
#define DIAC_NUM_RANGES (ARRAYLEN(diac_ranges))
#define DIAC_RTL (1 << 7)
#define DIAC_CNT (0xFF ^ DIAC_RTL)
#define DIAC_RTL (1 << 15)
#define DIAC_CNT (0xFFFF ^ DIAC_RTL)
/* Each diac_range_ struct defines a Unicode range that begins with
* N diacritic characters, and continues with non-diacritic characters up to the
@ -39,8 +39,8 @@
struct diac_range
{
uint16_t base;
uint8_t info; /* [RTL:1 CNT:7] */
uint16_t base; /* Not ucschar_t until we need >16b */
uint16_t info; /* [RTL:1 CNT:15] */
};
#define DIAC_RANGE_ENTRY(first_diac, first_non_diac, is_rtl) \
@ -51,7 +51,7 @@ struct diac_range
static const struct diac_range diac_ranges[] =
{
DIAC_RANGE_ENTRY(0x0000, 0x0000, 0),
DIAC_RANGE_ENTRY(FIRST_DIACRITIC, 0x0370, 0),
DIAC_RANGE_ENTRY(FIRST_DIACRITIC, 0x0370, 0), /* v1 - v4.1 */
DIAC_RANGE_ENTRY(0x0483, 0x048a, 0),
DIAC_RANGE_ENTRY(0x0591, 0x05be, 1),
DIAC_RANGE_ENTRY(0x05bf, 0x05c0, 1),
@ -146,6 +146,7 @@ static const struct diac_range diac_ranges[] =
DIAC_RANGE_ENTRY(0x19c8, 0x19ca, 0),
DIAC_RANGE_ENTRY(0x1a17, 0x1a1c, 0),
DIAC_RANGE_ENTRY(0x1a55, 0x1a80, 0),
DIAC_RANGE_ENTRY(0x1ab0, 0x1b00, 0), /* v7.0 */
DIAC_RANGE_ENTRY(0x1b00, 0x1b05, 0),
DIAC_RANGE_ENTRY(0x1b34, 0x1b45, 0),
DIAC_RANGE_ENTRY(0x1b6b, 0x1b74, 0),
@ -156,10 +157,10 @@ static const struct diac_range diac_ranges[] =
DIAC_RANGE_ENTRY(0x1cd4, 0x1ce9, 0),
DIAC_RANGE_ENTRY(0x1ced, 0x1cee, 0),
DIAC_RANGE_ENTRY(0x1cf2, 0x1cf3, 0),
DIAC_RANGE_ENTRY(0x1dc0, 0x1e00, 0),
DIAC_RANGE_ENTRY(0x20d0, 0x20f1, 0),
DIAC_RANGE_ENTRY(0x1dc0, 0x1e00, 0), /* v4.1 - v5.2 */
DIAC_RANGE_ENTRY(0x20d0, 0x2100, 0), /* v1.0 - v5.1 */
DIAC_RANGE_ENTRY(0x2cef, 0x2cf2, 0),
DIAC_RANGE_ENTRY(0x2de0, 0x2e00, 0),
DIAC_RANGE_ENTRY(0x2de0, 0x2e00, 0), /* v5.1 */
DIAC_RANGE_ENTRY(0x302a, 0x3030, 0),
DIAC_RANGE_ENTRY(0x3099, 0x309b, 0),
DIAC_RANGE_ENTRY(0xa66f, 0xa673, 0),
@ -188,7 +189,7 @@ static const struct diac_range diac_ranges[] =
DIAC_RANGE_ENTRY(0xabe3, 0xabeb, 0),
DIAC_RANGE_ENTRY(0xabec, 0xabee, 0),
DIAC_RANGE_ENTRY(0xfb1e, 0xfb1f, 0),
DIAC_RANGE_ENTRY(0xfe20, 0xfe27, 0),
DIAC_RANGE_ENTRY(0xfe20, 0xfe30, 0), /* v1.0 - v8.0 */
DIAC_RANGE_ENTRY(0xfe70, 0xfe70, 1),
DIAC_RANGE_ENTRY(0xff00, 0xff00, 0),
DIAC_RANGE_ENTRY(0xffff, 0xffff, 0),
@ -196,7 +197,7 @@ static const struct diac_range diac_ranges[] =
#define MRU_MAX_LEN 32
bool is_diacritic(const unsigned short char_code, bool *is_rtl)
bool is_diacritic(const ucschar_t char_code, bool *is_rtl)
{
static uint8_t mru_len = 0;
static uint8_t diacritic_mru[MRU_MAX_LEN];
@ -209,7 +210,6 @@ bool is_diacritic(const unsigned short char_code, bool *is_rtl)
/* Search in MRU */
for (mru = 0, i = 0; mru < mru_len; mru++)
{
/* Items shifted >> 1 */
itmp = i;
i = diacritic_mru[mru];
@ -250,10 +250,10 @@ Found:
if (is_rtl)
*is_rtl = ((DIAC_RTL & info) == DIAC_RTL);
return (char_code < diac->base + (info & DIAC_CNT));
return (char_code < (diac->base + (info & DIAC_CNT)));
}
#else /*BOOTLOADER*/
inline bool is_diacritic(const unsigned short char_code, bool *is_rtl)
inline bool is_diacritic(const ucschar_t char_code, bool *is_rtl)
{
(void)char_code;
if (is_rtl)

View file

@ -127,7 +127,7 @@ static int volatile cp_table_ref = 0;
/* non-default codepage table buffer (cannot be bufalloced! playback itself
may be making the load request) */
static unsigned short codepage_table[MAX_CP_TABLE_SIZE+1];
static unsigned short codepage_table[MAX_CP_TABLE_SIZE+1]; // XXX convert to ucschar_t if we ever need > 16bit mappings?
#if defined(APPLICATION) && defined(__linux__)
static const char * const name_codepages_linux[NUM_CODEPAGES+1] =
@ -344,7 +344,7 @@ unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int
cp_lock_leave();
while (count-- && utf8_size > 0) {
unsigned short ucs, tmp;
ucschar_t ucs, tmp;
if (*iso < 128 || cp == UTF_8) /* Already UTF-8 */
{
@ -511,8 +511,25 @@ unsigned long utf8length(const unsigned char *utf8)
return l;
}
/* Take a utf8 string and return the encoded length in utf16 code units */
unsigned long utf16len_utf8(const unsigned char *utf8)
{
ucschar_t cp;
unsigned long length = 0;
while (*utf8) {
utf8 = utf8decode(utf8, &cp);
#ifdef UNICODE32
if (cp >= 0x10000)
length++;
#endif
length++;
}
return length;
}
/* Decode 1 UTF-8 char and return a pointer to the next char. */
const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs)
const unsigned char* utf8decode(const unsigned char *utf8, ucschar_t *ucs)
{
unsigned char c = *utf8++;
unsigned long code;
@ -552,8 +569,16 @@ const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs)
/* Invalid UTF-8 char */
code = 0xfffd;
}
/* currently we don't support chars above U-FFFF */
*ucs = (code < 0x10000) ? code : 0xfffd;
#ifdef UNICODE32
if (code > 0x10ffff)
code = 0xfffd;
#else
if (code > 0xffff)
code = 0xfffd;
#endif
*ucs = code;
return utf8;
}

View file

@ -747,6 +747,8 @@ static bool fatlong_parse_entry(struct fatlong_parse_state *lnparse,
/* so far so good; save entry information */
lnparse->ord = ord;
/* Treat entries as opaque 16-bit values;
utf8decode happens in fatlong_parse_finish() */
uint16_t *ucsp = fatent->ucssegs[ord - 1 + 5];
unsigned int i = longent_char_first();
@ -797,13 +799,24 @@ static bool fatlong_parse_finish(struct fatlong_parse_state *lnparse,
/* ensure the last segment is NULL-terminated if it is filled */
fatent->ucssegs[lnparse->ord_max + 5][0] = 0x0000;
for (uint16_t *ucsp = fatent->ucssegs[5], ucc = *ucsp;
ucc; ucc = *++ucsp)
unsigned long ucc; /* Decoded codepoint */
uint16_t *ucsp, ucs;
for (ucsp = fatent->ucssegs[5], ucs=*ucsp; ucs; ucs = *++ucsp)
{
/* end should be hit before ever seeing padding */
if (ucc == 0xffff)
if (ucs == 0xffff)
return false;
#ifdef UNICODE32
/* Check for a surrogate UTF16 pair */
if (ucs >= 0xd800 && ucs < 0xdc00 &&
*(ucsp+1) >= 0xdc00 && *(ucsp+1) < 0xe000) {
ucc = 0x10000 + (((ucs & 0x3ff) << 10) | (*(ucsp+1) & 0x3ff));
ucsp++;
} else
#endif
ucc = ucs;
if ((p = utf8encode(ucc, p)) - name > FAT_DIRENTRY_NAME_MAX)
return false;
}
@ -1612,12 +1625,27 @@ static int write_longname(struct bpb *fat_bpb, struct fat_filestr *parentstr,
for (unsigned long i = 0; i < ucspadlen; i++)
{
if (i < ucslen)
if (i < ucslen) {
#ifdef UNICODE32
ucschar_t tmp;
name = utf8decode(name, &tmp);
/* For codepoints > U+FFFF we will need to use a UTF16 surrogate
pair. 'ucslen' already takes this into account! */
if (tmp < 0x10000) {
ucsname[i] = tmp;
} else {
tmp -= 0x10000;
ucsname[i++] = 0xd800 | ((tmp >> 10) & 0x3ff); /* High */
ucsname[i] = 0xdc00 | (tmp & 0x3ff); /* Low */
}
#else
name = utf8decode(name, &ucsname[i]);
else if (i == ucslen)
#endif
} else if (i == ucslen) {
ucsname[i] = 0x0000; /* name doesn't fill last block */
else /* i > ucslen */
} else /* i > ucslen */ {
ucsname[i] = 0xffff; /* pad-out to end */
}
}
dc_lock_cache();
@ -1744,9 +1772,12 @@ static int add_dir_entry(struct bpb *fat_bpb, struct fat_filestr *parentstr,
create_dos_name(basisname, name, &n);
randomize_dos_name(shortname, basisname, &n);
/* one dir entry needed for every 13 characters of filename,
plus one entry for the short name */
ucslen = utf8length(name);
/* one dir entry needed for every 13 utf16 "code units"
of filename, plus one entry for the short name.
Keep in mind that a unicode character can take up to
two code units!
*/
ucslen = utf16len_utf8(name);
if (ucslen > 255)
FAT_ERROR(-2); /* name is too long */

View file

@ -385,7 +385,7 @@ static void LCDFN(mono_bmp_part_helper)(const unsigned char *src, int src_x,
/* put a string at a given pixel position, skipping first ofs pixel columns */
static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str)
{
unsigned short *ucs;
ucschar_t *ucs;
struct viewport *vp = LCDFN(current_viewport);
font_lock(vp->font, true);
struct font* pf = font_get(vp->font);
@ -429,7 +429,7 @@ static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str)
bool is_rtl, is_diac;
const unsigned char *bits;
int width, base_width, base_ofs = 0;
const unsigned short next_ch = ucs[1];
const ucschar_t next_ch = ucs[1];
if (x >= vp->width)
break;
@ -447,7 +447,7 @@ static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str)
{
if (!rtl_next_non_diac_width)
{
const unsigned short *u;
const ucschar_t *u;
/* Jump to next non-diacritic char, and calc its width */
for (u = &ucs[1]; *u && IS_DIACRITIC(*u); u++);
@ -529,7 +529,7 @@ static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str)
/* put a string at a given pixel position, skipping first ofs pixel columns */
static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str)
{
unsigned short *ucs;
ucschar_t *ucs;
struct viewport *vp = LCDFN(current_viewport);
struct font* pf = font_get(vp->font);
const unsigned char *bits;
@ -567,7 +567,7 @@ static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str)
/* allow utf but no diacritics or rtl lang */
for (ucs = bidi_l2v(str, 1); *ucs; ucs++)
{
const unsigned short next_ch = ucs[1];
const ucschar_t next_ch = ucs[1];
if (x >= vp->width)
break;

View file

@ -21,6 +21,6 @@
#ifndef BIDI_H
#define BIDI_H
extern unsigned short *bidi_l2v(const unsigned char *str, int orientation);
ucschar_t *bidi_l2v(const unsigned char *str, int orientation);
#endif /* BIDI_H */

View file

@ -1461,4 +1461,11 @@ Lyre prototype 1 */
#error "HAVE_LCD_SLEEP_SETTING requires HAVE_LCD_SLEEP"
#endif
// XXX Figure out a better place to put this?
#ifdef UNICODE32
#define ucschar_t unsigned int
#else
#define ucschar_t unsigned short
#endif
#endif /* __CONFIG_H__ */

View file

@ -18,6 +18,9 @@
* KIND, either express or implied.
*
****************************************************************************/
#ifndef __CPU_H
#define __CPU_H
#include "config.h"
#if CONFIG_CPU == MCF5249
@ -80,3 +83,5 @@
#if CONFIG_CPU == STM32H743
#include "cpu-stm32h743.h"
#endif
#endif /* __CPU_H */

View file

@ -86,7 +86,7 @@ struct font {
int maxwidth; /* max width in pixels*/
unsigned int height; /* height in pixels*/
int ascent; /* ascent (baseline) height*/
int firstchar; /* first character in bitmap*/
unsigned int firstchar; /* first character in bitmap*/
int size; /* font size in glyphs*/
int depth; /* depth of the font, 0=1bit and 1=4bit */
const unsigned char *bits; /* 8-bit column bitmap data*/
@ -95,24 +95,24 @@ struct font {
const unsigned char *width; /* character widths or NULL if fixed*/
int defaultchar; /* default char (not glyph index)*/
int32_t bits_size; /* # bytes of glyph bits*/
/* file, buffer and cache management */
int fd; /* fd for the font file. >= 0 if cached */
int fd_width; /* fd for the font file. >= 0 if cached */
int fd_offset; /* fd for the font file. >= 0 if cached */
int fd_offset; /* fd for the font file. >= 0 if cached */
int handle; /* core_allocator handle */
unsigned char *buffer_start; /* buffer to store the font in */
unsigned char *buffer_position; /* position in the buffer */
unsigned char *buffer_start; /* buffer to store the font in */
unsigned char *buffer_position; /* position in the buffer */
unsigned char *buffer_end; /* end of the buffer */
size_t buffer_size; /* size of the buffer in bytes */
bool disabled; /* font disabled (use blank as fallback if not in cache) */
#ifndef __PCTOOL__
#ifndef __PCTOOL__
struct font_cache cache;
uint32_t file_width_offset; /* offset to file width data */
uint32_t file_offset_offset; /* offset to file offset data */
int long_offset;
#endif
#endif
};
/* font routines*/
@ -134,7 +134,7 @@ void font_enable_all(void);
struct font* font_get(int font);
int font_getstringnsize(const unsigned char *str, size_t maxbytes, int *w, int *h, int fontnumber);
int font_getstringsize(const unsigned char *str, int *w, int *h, int fontnumber);
int font_get_width(struct font* ft, unsigned short ch);
const unsigned char * font_get_bits(struct font* ft, unsigned short ch);
int font_get_width(struct font* ft, ucschar_t ch);
const unsigned char * font_get_bits(struct font* ft, ucschar_t ch);
#endif

View file

@ -21,5 +21,4 @@
extern const char jamo_table[51][3];
unsigned short hangul_join(unsigned short lead, unsigned short vowel,
unsigned short tail);
ucschar_t hangul_join(ucschar_t lead, ucschar_t vowel, ucschar_t tail);

View file

@ -53,6 +53,12 @@
#define FONT_EXT "fnt"
#define GLYPH_CACHE_EXT "gc"
#ifdef UNICODE32
#define FC_HEADER_VAL 0x01000020
#else
#define FC_HEADER_VAL 0x01000010
#endif
/* max static loadable font buffer size */
#ifndef MAX_FONT_SIZE
#if LCD_HEIGHT > 64
@ -182,7 +188,7 @@ void font_init(void)
static short readshort(struct font *pf)
{
unsigned short s;
uint16_t s;
s = *pf->buffer_position++ & 0xff;
s |= (*pf->buffer_position++ << 8);
@ -361,8 +367,8 @@ static size_t font_glyphs_to_bufsize(struct font *pf, int glyphs)
size_t bufsize;
/* LRU bytes per glyph */
bufsize = LRU_SLOT_OVERHEAD + sizeof(struct font_cache_entry) +
sizeof( unsigned short);
bufsize = LRU_SLOT_OVERHEAD + sizeof(struct font_cache_entry) +
sizeof(unsigned short);
/* Image bytes per glyph */
bufsize += glyph_bytes(pf, pf->maxwidth);
bufsize *= glyphs;
@ -371,7 +377,7 @@ static size_t font_glyphs_to_bufsize(struct font *pf, int glyphs)
}
static struct font* font_load_header(int fd, struct font *pheader,
struct font *pf,
struct font *pf,
uint32_t *nwidth, uint32_t *noffset)
{
/* Load the header. Readshort() and readlong() *
@ -420,16 +426,24 @@ int font_load_ex( const char *path, size_t buf_size, int glyphs )
if ( fd < 0 )
return -1;
#ifdef UNICODE32
if (glyphs && glyphs < 3)
glyphs = 3; /* Guarantee we'll always have at least 2 after alignment */
#else
if (glyphs && glyphs < 2)
glyphs = 2; /* Guarantee we'll always have at least 1 after alignment */
#endif
/* load font struct f with file header */
int file_size = filesize( fd );
struct font header;
struct font f;
uint32_t nwidth, noffset;
uint32_t nwidth, noffset;
if ( !font_load_header( fd, &header, &f, &nwidth, &noffset )
#if LCD_DEPTH < 16
|| f.depth
#endif
#endif
)
{
close(fd);
@ -458,7 +472,7 @@ int font_load_ex( const char *path, size_t buf_size, int glyphs )
cached = true;
else
bufsize = file_size;
/* check already loaded */
int font_id = find_font_index(path);
@ -503,7 +517,7 @@ int font_load_ex( const char *path, size_t buf_size, int glyphs )
return -1;
}
pd->refcount++;
//printf("reusing handle %d for %s (count: %d)\n", font_id, path, pd->refcount);
//printf("reusing handle %d for %s (count: %d)\n", font_id, path, pd->refcount);
close(fd);
return font_id;
}
@ -522,7 +536,7 @@ int font_load_ex( const char *path, size_t buf_size, int glyphs )
return -1;
font_id = open_slot;
size_t path_bufsz = MAX(path_len + 1, 64); /* enough size for common case */
/* allocate mem */
/* allocate mem */
int handle = core_alloc_ex(
bufsize + path_bufsz + sizeof( struct buflib_alloc_data ),
&buflibops );
@ -574,7 +588,7 @@ int font_load_ex( const char *path, size_t buf_size, int glyphs )
pf->fd_offset = -1;
}
else
{
{
lseek( fd, 0, SEEK_SET);
read(fd, pf->buffer_start, pf->buffer_size);
@ -723,7 +737,7 @@ load_cache_entry(struct font_cache_entry* p, void* callback_data)
{
struct font* pf = callback_data;
unsigned short char_code = p->_char_code;
ucschar_t char_code = p->_char_code;
int fd;
lock_font_handle(pf->handle, true);
@ -788,7 +802,7 @@ static void cache_create(struct font* pf)
* when the font file is closed during USB */
unsigned char *cache_buf = pf->buffer_start + bitmap_size;
size_t cache_size = pf->buffer_size - bitmap_size;
ALIGN_BUFFER(cache_buf, cache_size, 2);
ALIGN_BUFFER(cache_buf, cache_size, sizeof(ucschar_t));
memset(pf->buffer_start, 0, bitmap_size);
/* Initialise cache */
font_cache_create(&pf->cache, cache_buf, cache_size, bitmap_size);
@ -797,7 +811,7 @@ static void cache_create(struct font* pf)
/*
* Returns width of character
*/
int font_get_width(struct font* pf, unsigned short char_code)
int font_get_width(struct font* pf, ucschar_t char_code)
{
int width;
struct font_cache_entry *e;
@ -820,7 +834,7 @@ int font_get_width(struct font* pf, unsigned short char_code)
return width;
}
const unsigned char* font_get_bits(struct font* pf, unsigned short char_code)
const unsigned char* font_get_bits(struct font* pf, ucschar_t char_code)
{
const unsigned char* bits;
@ -831,7 +845,7 @@ const unsigned char* font_get_bits(struct font* pf, unsigned short char_code)
if (pf->fd >= 0 && pf != &sysfont)
{
bits =
bits =
(unsigned char*)font_cache_get(&pf->cache, char_code,
false, load_cache_entry, pf)->bitmap;
}
@ -884,7 +898,7 @@ static void glyph_file_write(void* data)
{
struct font_cache_entry* p = data;
struct font* pf = cache_pf;
unsigned short ch;
ucschar_t ch;
static int buffer_pos = 0;
#define WRITE_BUFFER 256
static unsigned char buffer[WRITE_BUFFER];
@ -899,11 +913,19 @@ static void glyph_file_write(void* data)
}
if ( p->_char_code == 0xffff )
return;
ch = p->_char_code + pf->firstchar;
buffer[buffer_pos] = ch >> 8;
#ifdef UNICODE32
buffer[buffer_pos] = (ch >> 24) & 0xff;
buffer[buffer_pos+1] = (ch >> 16) & 0xff;
buffer[buffer_pos+2] = (ch >> 8) & 0xff;
buffer[buffer_pos+3] = ch & 0xff;
buffer_pos += 4;
#else
buffer[buffer_pos] = (ch >> 8) & 0xff;
buffer[buffer_pos+1] = ch & 0xff;
buffer_pos += 2;
#endif
return;
}
@ -928,11 +950,13 @@ static void glyph_cache_save(int font_id)
fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0666);
if (fd >= 0)
{
uint32_t header = FC_HEADER_VAL;
write(fd, &header, sizeof(header));
cache_pf = pf;
cache_fd = fd;
lru_traverse(&cache_pf->cache._lru, glyph_file_write);
glyph_file_write(NULL);
if (cache_fd >= 0)
if (cache_fd >= 0)
{
close(cache_fd);
cache_fd = -1;
@ -944,9 +968,9 @@ static void glyph_cache_save(int font_id)
}
static int ushortcmp(const void *a, const void *b)
static int ucscharcmp(const void *a, const void *b)
{
return ((int)(*(unsigned short*)a - *(unsigned short*)b));
return ((int)(*(ucschar_t*)a - *(ucschar_t*)b));
}
static NO_INLINE void glyph_cache_load(const char *font_path, struct font *pf)
@ -954,13 +978,13 @@ static NO_INLINE void glyph_cache_load(const char *font_path, struct font *pf)
#define MAX_SORT 256
if (pf->fd >= 0) {
int i, size, fd;
unsigned char tmp[2];
unsigned short ch;
unsigned short glyphs[MAX_SORT];
unsigned short glyphs_lru_order[MAX_SORT];
int glyph_file_skip=0, glyph_file_size=0;
int sort_size = pf->cache._capacity;
unsigned char tmp[sizeof(ucschar_t)];
ucschar_t ch;
ucschar_t glyphs[MAX_SORT];
ucschar_t glyphs_lru_order[MAX_SORT];
unsigned int glyph_file_skip=0, glyph_file_size=0;
int sort_size = pf->cache._capacity;
if ( sort_size > MAX_SORT )
sort_size = MAX_SORT;
@ -974,31 +998,41 @@ static NO_INLINE void glyph_cache_load(const char *font_path, struct font *pf)
fd = open(GLYPH_CACHE_FILE, O_RDONLY|O_BINARY);
#endif
if (fd >= 0) {
/* Header */
uint32_t hdr = 0;
read(fd, &hdr, sizeof(hdr));
if (hdr != FC_HEADER_VAL)
goto latin;
/* only read what fits */
glyph_file_size = filesize( fd );
if ( glyph_file_size > 2*pf->cache._capacity ) {
glyph_file_skip = glyph_file_size - 2*pf->cache._capacity;
lseek( fd, glyph_file_skip, SEEK_SET );
if (glyph_file_size < sizeof(uint32_t))
goto latin;
glyph_file_size -= sizeof(uint32_t);
if ( glyph_file_size > (int)sizeof(ucschar_t)*pf->cache._capacity ) {
glyph_file_skip = glyph_file_size - sizeof(ucschar_t)*pf->cache._capacity;
lseek( fd, glyph_file_skip + sizeof(uint32_t), SEEK_SET );
}
while(1) {
for ( size = 0;
read( fd, tmp, 2 ) == 2 && size < sort_size;
size++ )
read( fd, tmp, sizeof(tmp) ) == sizeof(tmp) && size < sort_size;
size++ )
{
#ifdef UNICODE32
glyphs[size] = (tmp[0] << 24) | (tmp[1] << 16) | (tmp[2] << 8) | tmp[3];
#else
glyphs[size] = (tmp[0] << 8) | tmp[1];
#endif
glyphs_lru_order[size] = glyphs[size];
}
/* sort glyphs array to make sector cache happy */
qsort((void *)glyphs, size, sizeof(unsigned short),
ushortcmp );
qsort((void *)glyphs, size, sizeof(ucschar_t),
ucscharcmp );
/* load font bitmaps */
for( i = 0; i < size ; i++ )
font_get_bits(pf, glyphs[i]);
font_get_bits(pf, glyphs[i]);
/* redo to fix lru order */
for ( i = 0; i < size ; i++)
font_get_bits(pf, glyphs_lru_order[i]);
@ -1009,6 +1043,7 @@ static NO_INLINE void glyph_cache_load(const char *font_path, struct font *pf)
close(fd);
} else {
latin:
/* load latin1 chars into cache */
for ( ch = 32 ; ch < 256 && ch < pf->cache._capacity + 32; ch++ )
font_get_bits(pf, ch);
@ -1040,7 +1075,7 @@ struct font* font_get(int font)
/*
* Returns width of character
*/
int font_get_width(struct font* pf, unsigned short char_code)
int font_get_width(struct font* pf, ucschar_t char_code)
{
/* check input range*/
if (char_code < pf->firstchar || char_code >= pf->firstchar+pf->size)
@ -1050,7 +1085,7 @@ int font_get_width(struct font* pf, unsigned short char_code)
return pf->width? pf->width[char_code]: pf->maxwidth;
}
const unsigned char* font_get_bits(struct font* pf, unsigned short char_code)
const unsigned char* font_get_bits(struct font* pf, ucschar_t char_code)
{
const unsigned char* bits;
@ -1079,7 +1114,7 @@ int font_getstringnsize(const unsigned char *str, size_t maxbytes, int *w, int *
{
struct font* pf = font_get(fontnum);
font_lock( fontnum, true );
unsigned short ch;
ucschar_t ch;
int width = 0;
size_t b = maxbytes - 1;

View file

@ -43,13 +43,18 @@ void font_cache_create(
int font_cache_entry_size =
sizeof(struct font_cache_entry) + bitmap_bytes_size;
/* make sure font cache entries are a multiple of 16 bits */
if (font_cache_entry_size % 2 != 0)
/* make sure font cache entries are a multiple of sizeof(ucschar_t) */
while (font_cache_entry_size & (sizeof(ucschar_t) -1))
font_cache_entry_size++;
int cache_size = buf_size /
(font_cache_entry_size + LRU_SLOT_OVERHEAD + sizeof(short));
#ifdef UNICODE32
/* Ensure LRU index size is a multiple of 32 bits */
cache_size &= ~1;
#endif
fcache->_size = 1;
fcache->_capacity = cache_size;
fcache->_prev_result = 0;
@ -72,12 +77,12 @@ void font_cache_create(
/*************************************************************************
* Binary search that attempts a primary lucky guess that succeeds
* when there are consecutive codes in the cache between previous
* search and new search. Returns a negative of insertion point if
* when there are consecutive codes in the cache between previous
* search and new search. Returns a negative of insertion point if
* not found.
************************************************************************/
static int search(struct font_cache* fcache,
unsigned short char_code,
ucschar_t char_code,
int size,
int *p_insertion_point )
{
@ -85,12 +90,12 @@ static int search(struct font_cache* fcache,
int left, right, mid=-1, c;
left = 0;
right = size;
/* go for a lucky guess */
mid = char_code +
mid = char_code +
fcache->_prev_result - fcache->_prev_char_code;
/* check bounds */
/* check bounds */
if ( mid < 0 || mid > right )
mid = ( left + right ) / 2;
@ -114,7 +119,7 @@ static int search(struct font_cache* fcache,
mid = (left + right) / 2;
}
while (left <= right);
/* not found */
*p_insertion_point = mid;
return 0;
@ -124,7 +129,7 @@ static int search(struct font_cache* fcache,
******************************************************************************/
struct font_cache_entry* font_cache_get(
struct font_cache* fcache,
unsigned short char_code,
ucschar_t char_code,
bool cache_only,
void (*callback) (struct font_cache_entry* p, void *callback_data),
void *callback_data)
@ -132,7 +137,7 @@ struct font_cache_entry* font_cache_get(
struct font_cache_entry* p;
int insertion_point;
int index_to_replace;
/* check bounds */
p = lru_data(&fcache->_lru, fcache->_index[0]);
if( char_code < p->_char_code )
@ -158,14 +163,14 @@ struct font_cache_entry* font_cache_get(
}
else
{
p = lru_data(&fcache->_lru,
p = lru_data(&fcache->_lru,
fcache->_index[insertion_point+1]);
if ( char_code > p->_char_code )
insertion_point++;
}
}
}
/* not found */
if (cache_only)
return NULL;

View file

@ -18,6 +18,7 @@
* KIND, either express or implied.
*
****************************************************************************/
#include "config.h"
#include "hangul.h"
const char jamo_table[51][3] = {
@ -75,10 +76,9 @@ const char jamo_table[51][3] = {
};
/* takes three jamo chars and joins them into one hangul */
unsigned short hangul_join(unsigned short lead, unsigned short vowel,
unsigned short tail)
ucschar_t hangul_join(ucschar_t lead, ucschar_t vowel, ucschar_t tail)
{
unsigned short ch = 0xfffd;
ucschar_t ch = 0xfffd;
if (lead < 0x3131 || lead > 0x3163)
return ch;

View file

@ -27,7 +27,7 @@
* Sets is_rtl (if it's not NULL) to whether the character
* belongs to an RTL language.
*/
bool is_diacritic(const unsigned short char_code, bool *is_rtl);
bool is_diacritic(const ucschar_t char_code, bool *is_rtl);
/* Note IS_DIACRITIC macros may elide the function call
* therefore there is a separate _RTL version that requires a bool pointer

View file

@ -21,24 +21,25 @@
#ifndef _FONT_CACHE_H_
#define _FONT_CACHE_H_
#include <stdbool.h>
#include "config.h"
#include "lru.h"
/*******************************************************************************
*
*
******************************************************************************/
struct font_cache
{
struct lru _lru;
int _size;
int _capacity;
int _prev_char_code;
unsigned int _size;
unsigned int _capacity;
ucschar_t _prev_char_code;
int _prev_result;
short *_index; /* index of lru handles in char_code order */
};
struct font_cache_entry
{
unsigned short _char_code;
ucschar_t _char_code;
unsigned char width;
unsigned char bitmap[1]; /* place holder */
};
@ -55,7 +56,7 @@ void font_cache_create(
* Note: With cache_only this can return NULL, which otherwise never happens */
struct font_cache_entry* font_cache_get(
struct font_cache* fcache,
unsigned short char_code,
ucschar_t char_code,
bool cache_only,
void (*callback) (struct font_cache_entry* p, void *callback_data),
void *callback_data);

View file

@ -33,6 +33,7 @@ struct lru
void *_base;
};
/* LRU_SLOT_OVERHEAD is the fixed portion of struct lru_node */
#define LRU_SLOT_OVERHEAD (2 * sizeof(short))
/* Create LRU list with specified size from buf. */
@ -45,4 +46,3 @@ void *lru_data(struct lru* pl, short handle);
void lru_traverse(struct lru* pl, void (*callback)(void* data));
#endif /* LRU_H */

View file

@ -27,7 +27,7 @@
*/
#ifndef _RBUNICODE_H_
#define _RBUNICODE_H_
#include "config.h"
#include <stdbool.h>
@ -63,8 +63,9 @@ unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8, in
unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8, int count);
unsigned char* utf16decode(const unsigned char *utf16, unsigned char *utf8, int count, int utf8_size, bool le);
bool utf16_has_bom(const unsigned char *utf16, bool *le);
unsigned long utf16len_utf8(const unsigned char *utf8);
unsigned long utf8length(const unsigned char *utf8);
const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs);
const unsigned char* utf8decode(const unsigned char *utf8, ucschar_t *ucs);
void set_codepage(int cp);
int get_codepage(void);
int utf8seek(const unsigned char* utf8, int offset);

View file

@ -63,55 +63,78 @@ static void win32_last_error_errno(void)
static HANDLE win32_open(const char *ospath);
static int win32_stat(const char *ospath, LPBY_HANDLE_FILE_INFORMATION lpInfo);
unsigned short * strcpy_utf8ucs2(unsigned short *buffer,
const unsigned char *utf8)
static unsigned short * strcpy_utf8utf16(unsigned short *buffer,
const unsigned char *utf8)
{
for (wchar_t *ucs2 = buffer;
((utf8 = utf8decode(utf8, ucs2)), *ucs2); ucs2++);
for (wchar_t *ucs = buffer; *ucs ; ucs++) {
ucschar_t cp;
utf8 = utf8decode(utf8, &cp);
#ifdef UNICODE32
if (cp > 0x10000) {
cp -= 0x10000;
*ucs++ = 0xd800 | (cp >> 10);
cp = 0xdc00 | (cp & 0x3ff);
}
#endif
*ucs = cp;
}
return buffer;
}
#if 0
unsigned char * strcpy_ucs2utf8(unsigned char *buffer,
const unsigned short *ucs2)
#if 0 /* Unused in current code */
static unsigned char * strcpy_utf16utf8(unsigned char *buffer,
const unsigned short *utf16buf)
{
for (unsigned char *utf8 = buffer;
((utf8 = utf8encode(*ucs2, utf8)), *ucs2); ucs2++);
unsigned char *utf8 = buffer;
/* windows is always LE */
const int le = 1;
while (*utf16buf) {
const unsigned char *utf16 = (const unsigned char *)utf16buf;
unsigned long ucs;
/* Check for a surrogate pair */
if (*(utf16 + le) >= 0xD8 && *(utf16 + le) < 0xE0) {
ucs = 0x10000 + ((utf16[1 - le] << 10) | ((utf16[le] - 0xD8) << 18)
| utf16[2 + (1 - le)] | ((utf16[2 + le] - 0xDC) << 8));
utf16buf += 2;
} else {
ucs = utf16[le] << 8 | utf16[1 - le];
utf16buf++;
}
utf8 = utf8encode(ucs, utf8);
}
return buffer;
}
size_t strlen_utf8ucs2(const unsigned char *utf8)
{
/* This won't properly count multiword ucs2 so use the alternative
below for now which doesn't either */
size_t length = 0;
unsigned short ucschar[2];
for (unsigned char c = *utf8; c;
((utf8 = utf8decode(utf8, ucschar)), c = *utf8))
length++;
return length;
}
#endif /* 0 */
size_t strlen_utf8ucs2(const unsigned char *utf8)
{
return utf8length(utf8);
}
size_t strlen_ucs2utf8(const unsigned short *ucs2)
static size_t strlen_utf16utf8(const unsigned short *utf16buf)
{
size_t length = 0;
unsigned char utf8char[4];
for (unsigned short c = *ucs2; c; (c = *++ucs2))
length += utf8encode(c, utf8char) - utf8char;
/* windows is always LE */
const int le = 1;
while (*utf16buf) {
const unsigned char *utf16 = (const unsigned char *)utf16buf;
unsigned long ucs;
/* Check for a surrogate pair */
if (*(utf16 + le) >= 0xD8 && *(utf16 + le) < 0xE0) {
ucs = 0x10000 + ((utf16[1 - le] << 10) | ((utf16[le] - 0xD8) << 18)
| utf16[2 + (1 - le)] | ((utf16[2 + le] - 0xDC) << 8));
utf16buf += 2;
} else {
ucs = utf16[le] << 8 | utf16[1 - le];
utf16buf++;
}
length += utf8encode(ucs, utf8char) - utf8char;
}
return length;
}
#endif
size_t strlcpy_ucs2utf8(char *buffer, const unsigned short *ucs2,
size_t bufsize)
/* Note: Must be exported */
size_t strlcpy_utf16utf8(char *buffer, const unsigned short *utf16,
size_t bufsize)
{
if (!buffer)
bufsize = 0;
@ -119,12 +142,24 @@ size_t strlcpy_ucs2utf8(char *buffer, const unsigned short *ucs2,
size_t length = 0;
unsigned char utf8char[4];
for (unsigned short c = *ucs2; c; (c = *++ucs2))
unsigned long ucc;
while(*utf16)
{
/* Check for a surrogate UTF16 pair */
if (*utf16 >= 0xd800 && *utf16 < 0xdc00 &&
*(utf16+1) >= 0xdc00 && *(utf16+1) < 0xe000) {
ucc = 0x10000 + (((*utf16 & 0x3ff) << 10) | (*(utf16+1) & 0x3ff));
utf16++;
} else {
ucc = *utf16;
}
/* If the last character won't fit, this won't split it */
size_t utf8size = utf8encode(c, utf8char) - utf8char;
size_t utf8size = utf8encode(ucc, utf8char) - utf8char;
if ((length += utf8size) < bufsize)
buffer = mempcpy(buffer, utf8char, utf8size);
utf16++;
}
/* Above won't ever copy to very end */
@ -134,44 +169,44 @@ size_t strlcpy_ucs2utf8(char *buffer, const unsigned short *ucs2,
return length;
}
#define _toucs2(utf8) \
#define _toutf16(utf8) \
({ const char *_utf8 = (utf8); \
size_t _l = strlen_utf8ucs2(_utf8); \
size_t _l = utf16len_utf8(_utf8); \
void *_buffer = alloca((_l + 1)*2); \
strcpy_utf8ucs2(_buffer, _utf8); })
strcpy_utf8utf16(_buffer, _utf8); })
#define _toutf8(ucs2) \
({ const char *_ucs2 = (ucs2); \
size_t _l = strlen_ucs2utf8(_ucs2); \
#define _toutf8(utf16) \
({ const char *_ucs = (utf16); \
size_t _l = strlen_utf16utf8(_ucs); \
void *_buffer = alloca(_l + 1); \
strcpy_ucs2utf8(_buffer, _ucs2); })
strcpy_utf16utf8(_buffer, _ucs); })
int os_open(const char *ospath, int oflag, ...)
{
return _wopen(_toucs2(ospath), oflag __OPEN_MODE_ARG);
return _wopen(_toutf16(ospath), oflag __OPEN_MODE_ARG);
}
int os_creat(const char *ospath, mode_t mode)
{
return _wcreat(_toucs2(ospath), mode);
return _wcreat(_toutf16(ospath), mode);
}
int os_stat(const char *ospath, struct _stat *s)
{
return _wstat(_toucs2(ospath), s);
return _wstat(_toutf16(ospath), s);
}
int os_remove(const char *ospath)
{
return _wremove(_toucs2(ospath));
return _wremove(_toutf16(ospath));
}
int os_rename(const char *osold, const char *osnew)
{
int errnum = errno;
const wchar_t *wchosold = _toucs2(osold);
const wchar_t *wchosnew = _toucs2(osnew);
const wchar_t *wchosold = _toutf16(osold);
const wchar_t *wchosnew = _toutf16(osnew);
int rc = _wrename(wchosold, wchosnew);
if (rc < 0 && errno == EEXIST)
@ -213,18 +248,18 @@ bool os_file_exists(const char *ospath)
_WDIR * os_opendir(const char *osdirname)
{
return _wopendir(_toucs2(osdirname));
return _wopendir(_toutf16(osdirname));
}
int os_mkdir(const char *ospath, mode_t mode)
{
return _wmkdir(_toucs2(ospath));
return _wmkdir(_toutf16(ospath));
(void)mode;
}
int os_rmdir(const char *ospath)
{
return _wrmdir(_toucs2(ospath));
return _wrmdir(_toutf16(ospath));
}
int os_dirfd(_WDIR *osdirp)
@ -288,7 +323,7 @@ static HANDLE win32_open(const char *ospath)
{
/* FILE_FLAG_BACKUP_SEMANTICS is required for this to succeed at opening
a directory */
HANDLE h = CreateFileW(_toucs2(ospath), GENERIC_READ,
HANDLE h = CreateFileW(_toutf16(ospath), GENERIC_READ,
FILE_SHARE_READ | FILE_SHARE_WRITE |
FILE_SHARE_DELETE, NULL, OPEN_EXISTING,
FILE_FLAG_BACKUP_SEMANTICS, NULL);
@ -479,7 +514,7 @@ void volume_size(IF_MV(int volume,) sector_t *sizep, sector_t *freep)
char volpath[MAX_PATH];
if (os_volume_path(IF_MV(volume, ) volpath, sizeof (volpath)) >= 0)
GetDiskFreeSpaceExW(_toucs2(volpath), &free, &size, NULL);
GetDiskFreeSpaceExW(_toutf16(volpath), &free, &size, NULL);
if (sizep)
*sizep = size.QuadPart / 1024;

View file

@ -27,10 +27,10 @@
/* filesystem-win32.c contains some string functions that could be useful
* elsewhere; just move them away to unicode.c or something if they prove
* so. */
size_t strlcpy_ucs2utf8(char *buffer, const unsigned short *ucs,
size_t bufsize);
size_t strlcpy_utf16utf8(char *buffer, const unsigned short *utf16,
size_t bufsize);
#define strlcpy_from_os strlcpy_ucs2utf8
#define strlcpy_from_os strlcpy_utf16utf8
#endif /* __MINGW32__ */
#endif /* !OSFUNCTIONS_DECLARED */