unicode: Support characters beyond the first unicode plane

We used 16-bit variables to store the 'character code' everywhere but
this won't let us represent anything beyond U+FFFF.

This patch changes those variables to a custom type that can be 32 or 16
bits depending on the build, and adjusts numerous internal APIs and
datastructures to match.  This includes:

 * utf8decode() and friends
 * font manipulation, caching, rendering, and generation
 * on-screen keyboard
 * FAT filesystem (parsing and generating utf16 LFNs)
 * WIN32 simulator platform code

Note that this patch doesn't _enable_ >16bit unicode support; a followup
patch will turn that on for appropriate targets.

Appears to work on:

  * hosted linux, native, linux simulator in both 16/32-bit modes.

Needs testing on:

  * windows and macos simulator (16bit+32bit)

Change-Id: Iba111b27d2433019b6bff937cf1ebd2c4353a0e8
This commit is contained in:
Solomon Peachy 2024-12-17 08:55:21 -05:00
parent 2a88253426
commit a2c10f6189
44 changed files with 476 additions and 330 deletions

View file

@ -21,6 +21,6 @@
#ifndef BIDI_H
#define BIDI_H
extern unsigned short *bidi_l2v(const unsigned char *str, int orientation);
ucschar_t *bidi_l2v(const unsigned char *str, int orientation);
#endif /* BIDI_H */

View file

@ -1461,4 +1461,11 @@ Lyre prototype 1 */
#error "HAVE_LCD_SLEEP_SETTING requires HAVE_LCD_SLEEP"
#endif
// XXX Figure out a better place to put this?
#ifdef UNICODE32
#define ucschar_t unsigned int
#else
#define ucschar_t unsigned short
#endif
#endif /* __CONFIG_H__ */

View file

@ -18,6 +18,9 @@
* KIND, either express or implied.
*
****************************************************************************/
#ifndef __CPU_H
#define __CPU_H
#include "config.h"
#if CONFIG_CPU == MCF5249
@ -80,3 +83,5 @@
#if CONFIG_CPU == STM32H743
#include "cpu-stm32h743.h"
#endif
#endif /* __CPU_H */

View file

@ -86,7 +86,7 @@ struct font {
int maxwidth; /* max width in pixels*/
unsigned int height; /* height in pixels*/
int ascent; /* ascent (baseline) height*/
int firstchar; /* first character in bitmap*/
unsigned int firstchar; /* first character in bitmap*/
int size; /* font size in glyphs*/
int depth; /* depth of the font, 0=1bit and 1=4bit */
const unsigned char *bits; /* 8-bit column bitmap data*/
@ -95,24 +95,24 @@ struct font {
const unsigned char *width; /* character widths or NULL if fixed*/
int defaultchar; /* default char (not glyph index)*/
int32_t bits_size; /* # bytes of glyph bits*/
/* file, buffer and cache management */
int fd; /* fd for the font file. >= 0 if cached */
int fd_width; /* fd for the font file. >= 0 if cached */
int fd_offset; /* fd for the font file. >= 0 if cached */
int fd_offset; /* fd for the font file. >= 0 if cached */
int handle; /* core_allocator handle */
unsigned char *buffer_start; /* buffer to store the font in */
unsigned char *buffer_position; /* position in the buffer */
unsigned char *buffer_start; /* buffer to store the font in */
unsigned char *buffer_position; /* position in the buffer */
unsigned char *buffer_end; /* end of the buffer */
size_t buffer_size; /* size of the buffer in bytes */
bool disabled; /* font disabled (use blank as fallback if not in cache) */
#ifndef __PCTOOL__
#ifndef __PCTOOL__
struct font_cache cache;
uint32_t file_width_offset; /* offset to file width data */
uint32_t file_offset_offset; /* offset to file offset data */
int long_offset;
#endif
#endif
};
/* font routines*/
@ -134,7 +134,7 @@ void font_enable_all(void);
struct font* font_get(int font);
int font_getstringnsize(const unsigned char *str, size_t maxbytes, int *w, int *h, int fontnumber);
int font_getstringsize(const unsigned char *str, int *w, int *h, int fontnumber);
int font_get_width(struct font* ft, unsigned short ch);
const unsigned char * font_get_bits(struct font* ft, unsigned short ch);
int font_get_width(struct font* ft, ucschar_t ch);
const unsigned char * font_get_bits(struct font* ft, ucschar_t ch);
#endif

View file

@ -21,5 +21,4 @@
extern const char jamo_table[51][3];
unsigned short hangul_join(unsigned short lead, unsigned short vowel,
unsigned short tail);
ucschar_t hangul_join(ucschar_t lead, ucschar_t vowel, ucschar_t tail);