1
0
Fork 0
forked from len0rd/rockbox

unicode: Support characters beyond the first unicode plane

We used 16-bit variables to store the 'character code' everywhere but
this won't let us represent anything beyond U+FFFF.

This patch changes those variables to a custom type that can be 32 or 16
bits depending on the build, and adjusts numerous internal APIs and
datastructures to match.  This includes:

 * utf8decode() and friends
 * font manipulation, caching, rendering, and generation
 * on-screen keyboard
 * FAT filesystem (parsing and generating utf16 LFNs)
 * WIN32 simulator platform code

Note that this patch doesn't _enable_ >16bit unicode support; a followup
patch will turn that on for appropriate targets.

Appears to work on:

  * hosted linux, native, linux simulator in both 16/32-bit modes.

Needs testing on:

  * windows and macos simulator (16bit+32bit)

Change-Id: Iba111b27d2433019b6bff937cf1ebd2c4353a0e8
This commit is contained in:
Solomon Peachy 2024-12-17 08:55:21 -05:00
parent 2a88253426
commit a2c10f6189
44 changed files with 476 additions and 330 deletions

View file

@ -27,7 +27,7 @@
*/
#ifndef _RBUNICODE_H_
#define _RBUNICODE_H_
#include "config.h"
#include <stdbool.h>
@ -63,8 +63,9 @@ unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8, in
unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8, int count);
unsigned char* utf16decode(const unsigned char *utf16, unsigned char *utf8, int count, int utf8_size, bool le);
bool utf16_has_bom(const unsigned char *utf16, bool *le);
unsigned long utf16len_utf8(const unsigned char *utf8);
unsigned long utf8length(const unsigned char *utf8);
const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs);
const unsigned char* utf8decode(const unsigned char *utf8, ucschar_t *ucs);
void set_codepage(int cp);
int get_codepage(void);
int utf8seek(const unsigned char* utf8, int offset);