mirror of
https://github.com/Rockbox/rockbox.git
synced 2025-10-13 10:07:38 -04:00
We used 16-bit variables to store the 'character code' everywhere but this won't let us represent anything beyond U+FFFF. This patch changes those variables to a custom type that can be 32 or 16 bits depending on the build, and adjusts numerous internal APIs and datastructures to match. This includes: * utf8decode() and friends * font manipulation, caching, rendering, and generation * on-screen keyboard * FAT filesystem (parsing and generating utf16 LFNs) * WIN32 simulator platform code Note that this patch doesn't _enable_ >16bit unicode support; a followup patch will turn that on for appropriate targets. Appears to work on: * hosted linux, native, linux simulator in both 16/32-bit modes. Needs testing on: * windows and macos simulator (16bit+32bit) Change-Id: Iba111b27d2433019b6bff937cf1ebd2c4353a0e8
659 lines
18 KiB
C
659 lines
18 KiB
C
/***************************************************************************
|
|
* __________ __ ___.
|
|
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
|
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
|
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
|
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
|
* \/ \/ \/ \/ \/
|
|
* $Id$
|
|
*
|
|
* Copyright (c) 2004,2005 by Marcoen Hirschberg
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version 2
|
|
* of the License, or (at your option) any later version.
|
|
*
|
|
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
|
* KIND, either express or implied.
|
|
*
|
|
****************************************************************************/
|
|
/* Some conversion functions for handling UTF-8
|
|
*
|
|
* I got all the info from:
|
|
* http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
|
|
* and
|
|
* http://en.wikipedia.org/wiki/Unicode
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include "config.h"
|
|
#include "system.h"
|
|
#include "thread.h"
|
|
#include "file.h"
|
|
#include "debug.h"
|
|
#include "rbunicode.h"
|
|
#include "rbpaths.h"
|
|
#include "pathfuncs.h"
|
|
#include "core_alloc.h"
|
|
|
|
#ifndef O_BINARY
|
|
#define O_BINARY 0
|
|
#endif
|
|
#ifndef O_NOISODECODE
|
|
#define O_NOISODECODE 0
|
|
#endif
|
|
|
|
#define getle16(p) (p[0] | (p[1] << 8))
|
|
#define getbe16(p) ((p[0] << 8) | p[1])
|
|
|
|
#if !defined (__PCTOOL__) && (CONFIG_PLATFORM & PLATFORM_NATIVE)
|
|
/* Because file scanning uses the default CP table when matching entries,
|
|
on-demand loading is not feasible; we also must use the filesystem lock */
|
|
#include "file_internal.h"
|
|
#else /* APPLICATION */
|
|
#ifdef __PCTOOL__
|
|
#define yield()
|
|
#define DEFAULT_CP_STATIC_ALLOC
|
|
#endif
|
|
#define open_noiso_internal open
|
|
#endif /* !APPLICATION */
|
|
|
|
#if 0 /* not needed just now (will probably end up a spinlock) */
|
|
#include "mutex.h"
|
|
static struct mutex cp_mutex SHAREDBSS_ATTR;
|
|
#define cp_lock_init() mutex_init(&cp_mutex)
|
|
#define cp_lock_enter() mutex_lock(&cp_mutex)
|
|
#define cp_lock_leave() mutex_unlock(&cp_mutex)
|
|
#else
|
|
#define cp_lock_init() do {} while (0)
|
|
#define cp_lock_enter() asm volatile ("")
|
|
#define cp_lock_leave() asm volatile ("")
|
|
#endif
|
|
|
|
enum cp_tid
|
|
{
|
|
CP_TID_NONE = -1,
|
|
CP_TID_ISO,
|
|
CP_TID_932,
|
|
CP_TID_936,
|
|
CP_TID_949,
|
|
CP_TID_950,
|
|
};
|
|
|
|
struct cp_info
|
|
{
|
|
int8_t tid;
|
|
const char *filename;
|
|
const char *name;
|
|
};
|
|
|
|
#define MAX_CP_TABLE_SIZE 32768
|
|
|
|
#define CPF_ISO "iso.cp"
|
|
#define CPF_932 "932.cp" /* SJIS */
|
|
#define CPF_936 "936.cp" /* GB2312 */
|
|
#define CPF_949 "949.cp" /* KSX1001 */
|
|
#define CPF_950 "950.cp" /* BIG5 */
|
|
|
|
static const struct cp_info cp_info[NUM_CODEPAGES+1] =
|
|
{
|
|
[0 ... NUM_CODEPAGES] = { CP_TID_NONE, NULL , "unknown" },
|
|
[ISO_8859_1] = { CP_TID_NONE, NULL , "ISO-8859-1" },
|
|
[ISO_8859_7] = { CP_TID_ISO , CPF_ISO, "ISO-8859-7" },
|
|
[ISO_8859_8] = { CP_TID_ISO , CPF_ISO, "ISO-8859-8" },
|
|
[WIN_1251] = { CP_TID_ISO , CPF_ISO, "CP1251" },
|
|
[ISO_8859_11] = { CP_TID_ISO , CPF_ISO, "ISO-8859-11" },
|
|
[WIN_1256] = { CP_TID_ISO , CPF_ISO, "CP1256" },
|
|
[ISO_8859_9] = { CP_TID_ISO , CPF_ISO, "ISO-8859-9" },
|
|
[ISO_8859_2] = { CP_TID_ISO , CPF_ISO, "ISO-8859-2" },
|
|
[WIN_1250] = { CP_TID_ISO , CPF_ISO, "CP1250" },
|
|
[WIN_1252] = { CP_TID_ISO , CPF_ISO, "CP1252" },
|
|
[SJIS] = { CP_TID_932 , CPF_932, "SJIS" },
|
|
[GB_2312] = { CP_TID_936 , CPF_936, "GB-2312" },
|
|
[KSX_1001] = { CP_TID_949 , CPF_949, "KSX-1001" },
|
|
[BIG_5] = { CP_TID_950 , CPF_950, "BIG5" },
|
|
[UTF_8] = { CP_TID_NONE, NULL , "UTF-8" },
|
|
};
|
|
|
|
static int default_cp = INIT_CODEPAGE;
|
|
static int default_cp_tid = CP_TID_NONE;
|
|
static int default_cp_handle = 0;
|
|
static int volatile default_cp_table_ref = 0;
|
|
|
|
static int loaded_cp_tid = CP_TID_NONE;
|
|
static int volatile cp_table_ref = 0;
|
|
#define CP_LOADING BIT_N(sizeof(int)*8-1) /* guard against multi loaders */
|
|
|
|
/* non-default codepage table buffer (cannot be bufalloced! playback itself
|
|
may be making the load request) */
|
|
static unsigned short codepage_table[MAX_CP_TABLE_SIZE+1]; // XXX convert to ucschar_t if we ever need > 16bit mappings?
|
|
|
|
#if defined(APPLICATION) && defined(__linux__)
|
|
static const char * const name_codepages_linux[NUM_CODEPAGES+1] =
|
|
{
|
|
[0 ... NUM_CODEPAGES] = "unknown",
|
|
[ISO_8859_1] = "iso8859-1",
|
|
[ISO_8859_7] = "iso8859-7",
|
|
[ISO_8859_8] = "iso8859-8",
|
|
[WIN_1251] = "cp1251",
|
|
[ISO_8859_11] = "iso8859-11",
|
|
[WIN_1256] = "cp1256",
|
|
[ISO_8859_9] = "iso8859-9",
|
|
[ISO_8859_2] = "iso8859-2",
|
|
[WIN_1250] = "cp1250",
|
|
/* iso8859-15 is closest, linux doesnt have a codepage named cp1252 */
|
|
[WIN_1252] = "iso8859-15",
|
|
[SJIS] = "cp932",
|
|
[GB_2312] = "cp936",
|
|
[KSX_1001] = "cp949",
|
|
[BIG_5] = "cp950",
|
|
[UTF_8] = "utf8",
|
|
};
|
|
|
|
const char *get_current_codepage_name_linux(void)
|
|
{
|
|
int cp = default_cp;
|
|
if (cp < 0 || cp>= NUM_CODEPAGES)
|
|
cp = NUM_CODEPAGES;
|
|
return name_codepages_linux[cp];
|
|
}
|
|
#endif /* defined(APPLICATION) && defined(__linux__) */
|
|
|
|
#ifdef DEFAULT_CP_STATIC_ALLOC
|
|
static unsigned short default_cp_table_buf[MAX_CP_TABLE_SIZE+1];
|
|
#define cp_table_get_data(handle) \
|
|
default_cp_table_buf
|
|
#define cp_table_free(handle) \
|
|
do {} while (0)
|
|
#define cp_table_alloc(size, opsp) \
|
|
({ (void)(opsp); 1; })
|
|
#define cp_table_pin(handle) \
|
|
do { (void)handle; } while(0)
|
|
#define cp_table_unpin(handle) \
|
|
do { (void)handle; } while(0)
|
|
#else
|
|
#define cp_table_alloc(size, opsp) \
|
|
core_alloc_ex((size), (opsp))
|
|
#define cp_table_free(handle) \
|
|
core_free(handle)
|
|
#define cp_table_get_data(handle) \
|
|
core_get_data(handle)
|
|
#define cp_table_pin(handle) \
|
|
core_pin(handle)
|
|
#define cp_table_unpin(handle) \
|
|
core_unpin(handle)
|
|
#endif
|
|
|
|
static const unsigned char utf8comp[6] =
|
|
{
|
|
0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
|
|
};
|
|
|
|
static inline void cptable_tohw16(uint16_t *buf, unsigned int count)
|
|
{
|
|
#ifdef ROCKBOX_BIG_ENDIAN
|
|
for (unsigned int i = 0; i < count; i++)
|
|
buf[i] = letoh16(buf[i]);
|
|
#endif
|
|
(void)buf; (void)count;
|
|
}
|
|
|
|
static int alloc_and_load_cp_table(int cp, void *buf)
|
|
{
|
|
/* alloc and read only if there is an associated file */
|
|
const char *filename = cp_info[cp].filename;
|
|
if (!filename)
|
|
return 0;
|
|
|
|
char path[MAX_PATH];
|
|
if (path_append(path, CODEPAGE_DIR, filename, sizeof (path))
|
|
>= sizeof (path)) {
|
|
return -1;
|
|
}
|
|
|
|
/* must be opened without a chance of reentering from FS code */
|
|
int fd = open_noiso_internal(path, O_RDONLY);
|
|
if (fd < 0)
|
|
return -1;
|
|
|
|
off_t size = filesize(fd);
|
|
|
|
if (size > 0 && size <= MAX_CP_TABLE_SIZE*2 &&
|
|
!(size % (off_t)sizeof (uint16_t))) {
|
|
|
|
/* if the buffer is provided, use that but don't alloc */
|
|
int handle = buf ? 0 : cp_table_alloc(size, NULL);
|
|
if (handle > 0) {
|
|
cp_table_pin(handle);
|
|
buf = cp_table_get_data(handle);
|
|
}
|
|
|
|
if (buf && read(fd, buf, size) == size) {
|
|
close(fd);
|
|
cptable_tohw16(buf, size / sizeof (uint16_t));
|
|
if (handle > 0)
|
|
cp_table_unpin(handle);
|
|
return handle;
|
|
}
|
|
|
|
if (handle > 0)
|
|
cp_table_free(handle);
|
|
}
|
|
|
|
close(fd);
|
|
return -1;
|
|
}
|
|
|
|
/* returns number of additional bytes required in encoded string (bytes_count - 1) */
|
|
static int utf8_ucs_get_extra_bytes_count(unsigned long ucs)
|
|
{
|
|
int tail = 0;
|
|
|
|
if (ucs > 0x7F)
|
|
while (ucs >> (5*tail + 6))
|
|
tail++;
|
|
|
|
return tail;
|
|
}
|
|
|
|
static unsigned char * utf8encode_internal(unsigned long ucs, unsigned char *utf8, int tail)
|
|
{
|
|
*utf8++ = (ucs >> (6*tail)) | utf8comp[tail];
|
|
while (tail--)
|
|
*utf8++ = ((ucs >> (6*tail)) & (MASK ^ 0xFF)) | COMP;
|
|
return utf8;
|
|
}
|
|
|
|
FORCE_INLINE static unsigned char* utf8encode_ex(unsigned long ucs, unsigned char *utf8, int* utf8_size)
|
|
{
|
|
const int tail = utf8_ucs_get_extra_bytes_count(ucs);
|
|
*utf8_size -= tail + 1;
|
|
return *utf8_size < 0 ? utf8 : utf8encode_internal(ucs, utf8, tail);
|
|
}
|
|
|
|
/* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */
|
|
unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8)
|
|
{
|
|
return utf8encode_internal(ucs, utf8, utf8_ucs_get_extra_bytes_count(ucs));
|
|
}
|
|
|
|
unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, int cp, int count)
|
|
{
|
|
return iso_decode_ex(iso, utf8, cp, count, INT_MAX);
|
|
}
|
|
|
|
/* Recode an iso encoded string to UTF-8 */
|
|
unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int cp, int count, int utf8_size)
|
|
{
|
|
uint16_t *table = NULL;
|
|
|
|
cp_lock_enter();
|
|
|
|
if (cp < 0 || cp >= NUM_CODEPAGES)
|
|
cp = default_cp;
|
|
|
|
int tid = cp_info[cp].tid;
|
|
|
|
while (1) {
|
|
if (tid == default_cp_tid) {
|
|
/* use default table */
|
|
if (default_cp_handle > 0) {
|
|
table = cp_table_get_data(default_cp_handle);
|
|
default_cp_table_ref++;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
bool load = false;
|
|
|
|
if (tid == loaded_cp_tid) {
|
|
/* use loaded table */
|
|
if (!(cp_table_ref & CP_LOADING)) {
|
|
if (tid != CP_TID_NONE) {
|
|
table = codepage_table;
|
|
cp_table_ref++;
|
|
}
|
|
|
|
break;
|
|
}
|
|
} else if (cp_table_ref == 0) {
|
|
load = true;
|
|
cp_table_ref |= CP_LOADING;
|
|
}
|
|
|
|
/* alloc and load must be done outside the lock */
|
|
cp_lock_leave();
|
|
|
|
if (!load) {
|
|
yield();
|
|
} else if (alloc_and_load_cp_table(cp, codepage_table) < 0) {
|
|
cp = INIT_CODEPAGE; /* table may be clobbered now */
|
|
tid = cp_info[cp].tid;
|
|
}
|
|
|
|
cp_lock_enter();
|
|
|
|
if (load) {
|
|
loaded_cp_tid = tid;
|
|
cp_table_ref &= ~CP_LOADING;
|
|
}
|
|
}
|
|
|
|
cp_lock_leave();
|
|
|
|
while (count-- && utf8_size > 0) {
|
|
ucschar_t ucs, tmp;
|
|
|
|
if (*iso < 128 || cp == UTF_8) /* Already UTF-8 */
|
|
{
|
|
*utf8++ = *iso++;
|
|
--utf8_size;
|
|
}
|
|
|
|
else {
|
|
/* tid tells us which table to use and how */
|
|
switch (tid) {
|
|
case CP_TID_ISO: /* Greek */
|
|
/* Hebrew */
|
|
/* Cyrillic */
|
|
/* Thai */
|
|
/* Arabic */
|
|
/* Turkish */
|
|
/* Latin Extended */
|
|
/* Central European */
|
|
/* Western European */
|
|
tmp = ((cp-1)*128) + (*iso++ - 128);
|
|
ucs = table[tmp];
|
|
break;
|
|
|
|
case CP_TID_932: /* Japanese */
|
|
if (*iso > 0xA0 && *iso < 0xE0) {
|
|
tmp = *iso++ | (0xA100 - 0x8000);
|
|
ucs = table[tmp];
|
|
break;
|
|
}
|
|
/* fallthrough */
|
|
case CP_TID_936: /* Simplified Chinese */
|
|
case CP_TID_949: /* Korean */
|
|
case CP_TID_950: /* Traditional Chinese */
|
|
if (count < 1 || !iso[1]) {
|
|
ucs = *iso++;
|
|
break;
|
|
}
|
|
|
|
/* we assume all cjk strings are written
|
|
in big endian order */
|
|
tmp = *iso++ << 8;
|
|
tmp |= *iso++;
|
|
tmp -= 0x8000;
|
|
ucs = table[tmp];
|
|
count--;
|
|
break;
|
|
|
|
default:
|
|
ucs = *iso++;
|
|
break;
|
|
}
|
|
|
|
if (ucs == 0) /* unknown char, use replacement char */
|
|
ucs = 0xfffd;
|
|
|
|
utf8 = utf8encode_ex(ucs, utf8, &utf8_size);
|
|
}
|
|
}
|
|
|
|
if (table) {
|
|
cp_lock_enter();
|
|
if (table == codepage_table) {
|
|
cp_table_ref--;
|
|
} else {
|
|
default_cp_table_ref--;
|
|
}
|
|
cp_lock_leave();
|
|
}
|
|
|
|
return utf8;
|
|
}
|
|
|
|
unsigned char* utf16decode(const unsigned char *utf16, unsigned char *utf8,
|
|
int count, int utf8_size, bool le)
|
|
{
|
|
// little-endian flag is used as significant byte index
|
|
if (le)
|
|
le = 1;
|
|
|
|
unsigned long ucs;
|
|
|
|
while (count > 0 && utf8_size > 0) {
|
|
/* Check for a surrogate pair */
|
|
if (*(utf16 + le) >= 0xD8 && *(utf16 + le) < 0xE0) {
|
|
ucs = 0x10000 + ((utf16[1 - le] << 10) | ((utf16[le] - 0xD8) << 18)
|
|
| utf16[2 + (1 - le)] | ((utf16[2 + le] - 0xDC) << 8));
|
|
utf16 += 4;
|
|
count -= 2;
|
|
} else {
|
|
ucs = utf16[le] << 8 | utf16[1 - le];
|
|
utf16 += 2;
|
|
count -= 1;
|
|
}
|
|
utf8 = utf8encode_ex(ucs, utf8, &utf8_size);
|
|
}
|
|
return utf8;
|
|
}
|
|
|
|
/* Recode a UTF-16 string with big-endian byte ordering to UTF-8 */
|
|
unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8,
|
|
int count)
|
|
{
|
|
return utf16decode(utf16, utf8, count, INT_MAX, true);
|
|
}
|
|
|
|
unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8,
|
|
int count)
|
|
{
|
|
return utf16decode(utf16, utf8, count, INT_MAX, false);
|
|
}
|
|
|
|
bool utf16_has_bom(const unsigned char *utf16, bool *le)
|
|
{
|
|
unsigned long ucs = utf16[0] << 8 | utf16[1];
|
|
|
|
if (ucs == 0xFEFF) /* Check for BOM */
|
|
{
|
|
*le = false;
|
|
return true;
|
|
}
|
|
|
|
if (ucs == 0xFFFE)
|
|
{
|
|
*le = true;
|
|
return true;
|
|
}
|
|
|
|
/* If there is no BOM let's try to guess it. If one of the bytes is 0x00, it is
|
|
probably the most significant one. */
|
|
*le = utf16[1] == 0;
|
|
return false;
|
|
}
|
|
|
|
#if 0 /* currently unused */
|
|
/* Recode any UTF-16 string to UTF-8 */
|
|
unsigned char* utf16decode(const unsigned char *utf16, unsigned char *utf8,
|
|
unsigned int count)
|
|
{
|
|
unsigned long ucs;
|
|
|
|
ucs = *(utf16++) << 8;
|
|
ucs |= *(utf16++);
|
|
|
|
if (ucs == 0xFEFF) /* Check for BOM */
|
|
return utf16BEdecode(utf16, utf8, count-1);
|
|
else if (ucs == 0xFFFE)
|
|
return utf16LEdecode(utf16, utf8, count-1);
|
|
else { /* ADDME: Should default be LE or BE? */
|
|
utf16 -= 2;
|
|
return utf16BEdecode(utf16, utf8, count);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/* Return the number of UTF-8 chars in a string */
|
|
unsigned long utf8length(const unsigned char *utf8)
|
|
{
|
|
unsigned long l = 0;
|
|
|
|
while (*utf8 != 0)
|
|
if ((*utf8++ & MASK) != COMP)
|
|
l++;
|
|
|
|
return l;
|
|
}
|
|
|
|
/* Take a utf8 string and return the encoded length in utf16 code units */
|
|
unsigned long utf16len_utf8(const unsigned char *utf8)
|
|
{
|
|
ucschar_t cp;
|
|
unsigned long length = 0;
|
|
while (*utf8) {
|
|
utf8 = utf8decode(utf8, &cp);
|
|
#ifdef UNICODE32
|
|
if (cp >= 0x10000)
|
|
length++;
|
|
#endif
|
|
length++;
|
|
}
|
|
|
|
return length;
|
|
}
|
|
|
|
/* Decode 1 UTF-8 char and return a pointer to the next char. */
|
|
const unsigned char* utf8decode(const unsigned char *utf8, ucschar_t *ucs)
|
|
{
|
|
unsigned char c = *utf8++;
|
|
unsigned long code;
|
|
int tail = 0;
|
|
|
|
if ((c <= 0x7f) || (c >= 0xc2)) {
|
|
/* Start of new character. */
|
|
if (c < 0x80) { /* U-00000000 - U-0000007F, 1 byte */
|
|
code = c;
|
|
} else if (c < 0xe0) { /* U-00000080 - U-000007FF, 2 bytes */
|
|
tail = 1;
|
|
code = c & 0x1f;
|
|
} else if (c < 0xf0) { /* U-00000800 - U-0000FFFF, 3 bytes */
|
|
tail = 2;
|
|
code = c & 0x0f;
|
|
} else if (c < 0xf5) { /* U-00010000 - U-001FFFFF, 4 bytes */
|
|
tail = 3;
|
|
code = c & 0x07;
|
|
} else {
|
|
/* Invalid size. */
|
|
code = 0xfffd;
|
|
}
|
|
|
|
while (tail-- && ((c = *utf8++) != 0)) {
|
|
if ((c & 0xc0) == 0x80) {
|
|
/* Valid continuation character. */
|
|
code = (code << 6) | (c & 0x3f);
|
|
|
|
} else {
|
|
/* Invalid continuation char */
|
|
code = 0xfffd;
|
|
utf8--;
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
/* Invalid UTF-8 char */
|
|
code = 0xfffd;
|
|
}
|
|
|
|
#ifdef UNICODE32
|
|
if (code > 0x10ffff)
|
|
code = 0xfffd;
|
|
#else
|
|
if (code > 0xffff)
|
|
code = 0xfffd;
|
|
#endif
|
|
|
|
*ucs = code;
|
|
return utf8;
|
|
}
|
|
|
|
void set_codepage(int cp)
|
|
{
|
|
if (cp < 0 || cp >= NUM_CODEPAGES)
|
|
cp = NUM_CODEPAGES;
|
|
|
|
/* load first then swap if load is successful, else just leave it; if
|
|
handle is 0 then we just free the current one; this won't happen often
|
|
thus we don't worry about reusing it and consequently avoid possible
|
|
clobbering of the existing one */
|
|
|
|
int handle = -1;
|
|
int tid = cp_info[cp].tid;
|
|
|
|
while (1) {
|
|
cp_lock_enter();
|
|
|
|
if (default_cp_tid == tid)
|
|
break;
|
|
|
|
if (handle >= 0 && default_cp_table_ref == 0) {
|
|
int hold = default_cp_handle;
|
|
default_cp_handle = handle;
|
|
handle = hold;
|
|
default_cp_tid = tid;
|
|
break;
|
|
}
|
|
|
|
/* alloc and load must be done outside the lock */
|
|
cp_lock_leave();
|
|
|
|
if (handle < 0 && (handle = alloc_and_load_cp_table(cp, NULL)) < 0)
|
|
return; /* OOM; change nothing */
|
|
|
|
yield();
|
|
}
|
|
|
|
default_cp = cp;
|
|
cp_lock_leave();
|
|
|
|
if (handle > 0)
|
|
cp_table_free(handle);
|
|
}
|
|
|
|
int get_codepage(void)
|
|
{
|
|
return default_cp;
|
|
}
|
|
|
|
/* seek to a given char in a utf8 string and
|
|
return its start position in the string */
|
|
int utf8seek(const unsigned char* utf8, int offset)
|
|
{
|
|
int pos = 0;
|
|
|
|
while (offset--) {
|
|
pos++;
|
|
while ((utf8[pos] & MASK) == COMP)
|
|
pos++;
|
|
}
|
|
return pos;
|
|
}
|
|
|
|
const char * get_codepage_name(int cp)
|
|
{
|
|
if (cp < 0 || cp >= NUM_CODEPAGES)
|
|
cp = NUM_CODEPAGES;
|
|
return cp_info[cp].name;
|
|
}
|
|
|
|
#if 0 /* not needed just now */
|
|
void unicode_init(void)
|
|
{
|
|
cp_lock_init();
|
|
}
|
|
#endif
|