mirror of
https://github.com/Rockbox/rockbox.git
synced 2025-11-09 13:12:37 -05:00
internals: Support characters beyond the first unicode plane (WIP)
We used 16-bit variables to store the 'character code' everywhere but
this won't let us represent anything beyond U+FFFF.
This patch changes those variables to a custom type that can be 32 or 16
bits depending on the build, and adjusts numerous internal APIs and
datastructures to match. This includes:
* utf8decode() and friends
* on-screen keyboard
* font manipulation, caching, rendering, and generation
* VFAT code parses and generates utf16 dirents
* WIN32 simulator reads and writes utf16 filenames
Note that this patch doesn't _enable_ >16bit unicode support; a followup
patch will turn that on for appropriate targets.
Known bugs:
* Native players in 32-bit unicode mode generate mangled filename
entries if they include UTF16 surrogate codepoints. Root cause
is unclear, and may reside in core dircache code.
Needs testing on:
* windows simulator (16bit+32bit)
Change-Id: I193a00fe2a11a4181ddc82df2d71be52bf00b6e6
This commit is contained in:
parent
94712b34d4
commit
d05c59f35b
44 changed files with 480 additions and 335 deletions
|
|
@ -747,6 +747,8 @@ static bool fatlong_parse_entry(struct fatlong_parse_state *lnparse,
|
|||
/* so far so good; save entry information */
|
||||
lnparse->ord = ord;
|
||||
|
||||
/* Treat entries as opaque 16-bit values;
|
||||
utf8decode happens in fatlong_parse_finish() */
|
||||
uint16_t *ucsp = fatent->ucssegs[ord - 1 + 5];
|
||||
unsigned int i = longent_char_first();
|
||||
|
||||
|
|
@ -797,13 +799,23 @@ static bool fatlong_parse_finish(struct fatlong_parse_state *lnparse,
|
|||
/* ensure the last segment is NULL-terminated if it is filled */
|
||||
fatent->ucssegs[lnparse->ord_max + 5][0] = 0x0000;
|
||||
|
||||
for (uint16_t *ucsp = fatent->ucssegs[5], ucc = *ucsp;
|
||||
ucc; ucc = *++ucsp)
|
||||
unsigned long ucc; /* Decoded codepoint */
|
||||
uint16_t *ucsp, ucs;
|
||||
for (ucsp = fatent->ucssegs[5], ucs=*ucsp; ucs; ucs = *++ucsp)
|
||||
{
|
||||
/* end should be hit before ever seeing padding */
|
||||
if (ucc == 0xffff)
|
||||
if (ucs == 0xffff)
|
||||
return false;
|
||||
|
||||
/* Check for a surrogate UTF16 pair */
|
||||
if (ucs >= 0xd800 && ucs < 0xdc00 &&
|
||||
*(ucsp+1) >= 0xdc00 && *(ucsp+1) < 0xe000) {
|
||||
ucc = 0x10000 + ((ucs & 0x3ff) << 10) | (*(ucsp+1) & 0x3ff);
|
||||
ucsp++;
|
||||
} else {
|
||||
ucc = ucs;
|
||||
}
|
||||
|
||||
if ((p = utf8encode(ucc, p)) - name > FAT_DIRENTRY_NAME_MAX)
|
||||
return false;
|
||||
}
|
||||
|
|
@ -1608,7 +1620,7 @@ static int write_longname(struct bpb *fat_bpb, struct fat_filestr *parentstr,
|
|||
/* we need to convert the name first since the entries are written in
|
||||
reverse order */
|
||||
unsigned long ucspadlen = ALIGN_UP(ucslen, FATLONG_NAME_CHARS);
|
||||
uint16_t ucsname[ucspadlen];
|
||||
ucschar_t ucsname[ucspadlen];
|
||||
|
||||
for (unsigned long i = 0; i < ucspadlen; i++)
|
||||
{
|
||||
|
|
@ -1626,6 +1638,9 @@ static int write_longname(struct bpb *fat_bpb, struct fat_filestr *parentstr,
|
|||
const unsigned int firstentry = file->e.entry - longentries;
|
||||
|
||||
/* longame entries */
|
||||
#ifdef UNICODE32
|
||||
long carried_val = -1;
|
||||
#endif
|
||||
for (unsigned int i = 0; i < longentries; i++)
|
||||
{
|
||||
ent = cache_direntry(fat_bpb, parentstr, firstentry + i);
|
||||
|
|
@ -1651,11 +1666,38 @@ static int write_longname(struct bpb *fat_bpb, struct fat_filestr *parentstr,
|
|||
ent->ldir_chksum = chksum;
|
||||
|
||||
/* set name */
|
||||
uint16_t *ucsptr = &ucsname[(ord - 1) * FATLONG_NAME_CHARS];
|
||||
ucschar_t *ucsptr = &ucsname[(ord - 1) * FATLONG_NAME_CHARS];
|
||||
for (unsigned j = longent_char_first(); j; j = longent_char_next(j))
|
||||
{
|
||||
uint16_t ucs = *ucsptr++;
|
||||
INT162BYTES(ent->data, j, ucs);
|
||||
#ifdef UNICODE32
|
||||
if (carried_val >= 0) {
|
||||
INT162BYTES(ent->data, j, carried_val);
|
||||
carried_val = -1;
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
ucschar_t ucs = *ucsptr++;
|
||||
#ifdef UNICODE32
|
||||
if (ucs >= 0x10000) {
|
||||
ucs-=0x10000;
|
||||
uint16_t v = 0xdc00 | (ucs & 0x3ff);
|
||||
unsigned oldj = j;
|
||||
INT162BYTES(ent->data, j, v);
|
||||
j = longent_char_next(j);
|
||||
v = 0xd800 | ((ucs >> 10) & 0x3ff);
|
||||
if (j) {
|
||||
INT162BYTES(ent->data, j, v);
|
||||
} else if ((i + 1) < longentries) {
|
||||
/* Carry the other end of the surrogate pair to the next block */
|
||||
carried_val = v;
|
||||
} else {
|
||||
/* No more blocks, so re-write the first entry of the pair */
|
||||
v = 0xfffd;
|
||||
INT162BYTES(ent->data, oldj, v);
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
INT162BYTES(ent->data, j, ucs);
|
||||
}
|
||||
|
||||
dc_dirty_buf(ent);
|
||||
|
|
@ -1744,9 +1786,12 @@ static int add_dir_entry(struct bpb *fat_bpb, struct fat_filestr *parentstr,
|
|||
create_dos_name(basisname, name, &n);
|
||||
randomize_dos_name(shortname, basisname, &n);
|
||||
|
||||
/* one dir entry needed for every 13 characters of filename,
|
||||
plus one entry for the short name */
|
||||
ucslen = utf8length(name);
|
||||
/* one dir entry needed for every 13 "code units"
|
||||
of filename, plus one entry for the short name.
|
||||
Keep in mind that a utf8 character can take 1
|
||||
or 2 code units.
|
||||
*/
|
||||
ucslen = utf16len_utf8(name);
|
||||
if (ucslen > 255)
|
||||
FAT_ERROR(-2); /* name is too long */
|
||||
|
||||
|
|
|
|||
|
|
@ -385,7 +385,7 @@ static void LCDFN(mono_bmp_part_helper)(const unsigned char *src, int src_x,
|
|||
/* put a string at a given pixel position, skipping first ofs pixel columns */
|
||||
static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str)
|
||||
{
|
||||
unsigned short *ucs;
|
||||
ucschar_t *ucs;
|
||||
struct viewport *vp = LCDFN(current_viewport);
|
||||
font_lock(vp->font, true);
|
||||
struct font* pf = font_get(vp->font);
|
||||
|
|
@ -429,7 +429,7 @@ static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str)
|
|||
bool is_rtl, is_diac;
|
||||
const unsigned char *bits;
|
||||
int width, base_width, base_ofs = 0;
|
||||
const unsigned short next_ch = ucs[1];
|
||||
const ucschar_t next_ch = ucs[1];
|
||||
|
||||
if (x >= vp->width)
|
||||
break;
|
||||
|
|
@ -447,7 +447,7 @@ static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str)
|
|||
{
|
||||
if (!rtl_next_non_diac_width)
|
||||
{
|
||||
const unsigned short *u;
|
||||
const ucschar_t *u;
|
||||
|
||||
/* Jump to next non-diacritic char, and calc its width */
|
||||
for (u = &ucs[1]; *u && IS_DIACRITIC(*u); u++);
|
||||
|
|
@ -529,7 +529,7 @@ static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str)
|
|||
/* put a string at a given pixel position, skipping first ofs pixel columns */
|
||||
static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str)
|
||||
{
|
||||
unsigned short *ucs;
|
||||
ucschar_t *ucs;
|
||||
struct viewport *vp = LCDFN(current_viewport);
|
||||
struct font* pf = font_get(vp->font);
|
||||
const unsigned char *bits;
|
||||
|
|
@ -567,7 +567,7 @@ static void LCDFN(putsxyofs)(int x, int y, int ofs, const unsigned char *str)
|
|||
/* allow utf but no diacritics or rtl lang */
|
||||
for (ucs = bidi_l2v(str, 1); *ucs; ucs++)
|
||||
{
|
||||
const unsigned short next_ch = ucs[1];
|
||||
const ucschar_t next_ch = ucs[1];
|
||||
|
||||
if (x >= vp->width)
|
||||
break;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue