1
0
Fork 0
forked from len0rd/rockbox

unicode: add iso_decode_ex with utf8 buffer size check

Make use of it in id3tags, playlist and cuesheet

Change-Id: Ibc8abc0faf16688bc9b826b7a712d1dfe9bf75b2
This commit is contained in:
Roman Artiukhin 2024-12-11 14:31:38 +02:00 committed by Solomon Peachy
parent f8fa1e7d5a
commit 004304dc65
5 changed files with 56 additions and 37 deletions

View file

@ -314,7 +314,6 @@ bool parse_cuesheet(struct cuesheet_file *cue_file, struct cuesheet *cue)
break;
size_t count = MAX_NAME*3 + 1;
size_t count8859 = MAX_NAME;
switch (option)
{
@ -339,7 +338,6 @@ bool parse_cuesheet(struct cuesheet_file *cue_file, struct cuesheet *cue)
dest = cue->file;
count = MAX_PATH;
count8859 = MAX_PATH/3;
break;
case eCS_TRACK:
/*Fall-Through*/
@ -357,8 +355,8 @@ bool parse_cuesheet(struct cuesheet_file *cue_file, struct cuesheet *cue)
{
if (char_enc == CHAR_ENC_ISO_8859_1)
{
dest = iso_decode(string, dest, -1,
MIN(strlen(string), count8859));
dest = iso_decode_ex(string, dest, -1,
strlen(string), count - 1);
*dest = '\0';
}
else

View file

@ -350,14 +350,7 @@ static int convert_m3u_name(char* buf, int buf_len, int buf_max, char* temp)
buf_len = i;
dest = temp;
/* Convert char by char, so as to not overflow temp (iso_decode should
* preferably handle this). No more than 4 bytes should be generated for
* each input char.
*/
for (i = 0; i < buf_len && dest < (temp + buf_max - 4); i++)
{
dest = iso_decode(&buf[i], dest, -1, 1);
}
dest = iso_decode_ex(buf, dest, -1, buf_len, buf_max - 1);
*dest = 0;
strcpy(buf, temp);

View file

@ -245,8 +245,8 @@ static int alloc_and_load_cp_table(int cp, void *buf)
return -1;
}
/* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */
unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8)
/* returns number of additional bytes required in encoded string (bytes_count - 1) */
static int utf8_ucs_get_extra_bytes_count(unsigned long ucs)
{
int tail = 0;
@ -254,17 +254,41 @@ unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8)
while (ucs >> (5*tail + 6))
tail++;
return tail;
}
static unsigned char * utf8encode_internal(unsigned long ucs, unsigned char *utf8, int tail)
{
*utf8++ = (ucs >> (6*tail)) | utf8comp[tail];
while (tail--)
*utf8++ = ((ucs >> (6*tail)) & (MASK ^ 0xFF)) | COMP;
return utf8;
}
/* Recode an iso encoded string to UTF-8 */
unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8,
int cp, int count)
static unsigned char* utf8encode_ex(unsigned long ucs, unsigned char *utf8, int* utf8_size)
{
const int tail = utf8_ucs_get_extra_bytes_count(ucs);
*utf8_size -= tail + 1;
return *utf8_size < 0 ? utf8 : utf8encode_internal(ucs, utf8, tail);
}
/* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */
unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8)
{
return utf8encode_internal(ucs, utf8, utf8_ucs_get_extra_bytes_count(ucs));
}
unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, int cp, int count)
{
return iso_decode_ex(iso, utf8, cp, count, -1);
}
/* Recode an iso encoded string to UTF-8 */
unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int cp, int count, int utf8_size)
{
if (utf8_size == -1)
utf8_size = INT_MAX;
uint16_t *table = NULL;
cp_lock_enter();
@ -322,11 +346,14 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8,
cp_lock_leave();
while (count--) {
while (count-- && utf8_size > 0) {
unsigned short ucs, tmp;
if (*iso < 128 || cp == UTF_8) /* Already UTF-8 */
{
*utf8++ = *iso++;
--utf8_size;
}
else {
/* tid tells us which table to use and how */
@ -375,7 +402,8 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8,
if (ucs == 0) /* unknown char, use replacement char */
ucs = 0xfffd;
utf8 = utf8encode(ucs, utf8);
utf8 = utf8encode_ex(ucs, utf8, &utf8_size);
}
}

View file

@ -57,6 +57,8 @@ enum codepages {
/* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */
unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8);
unsigned char* iso_decode(const unsigned char *latin1, unsigned char *utf8, int cp, int count);
unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int cp, int count, int utf8_size);
unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8, int count);
unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8, int count);
bool utf16_has_bom(const unsigned char *utf16, bool *le);

View file

@ -568,22 +568,16 @@ static bool parse_as_utf8(char* string, int *len)
/* Must be called after parse_as_utf8. Checks to see if the passed in string is a 16-bit wide Unicode v2
string. If it is, we convert it to a UTF-8 string. If it's not unicode,
we convert from the default codepage */
static void unicode_munge(char* string, char* utf8buf, int *len) {
we convert from the default codepage
NOTE: real UTF-8 buffer size is expected to be utf8buf_size + 1 (additional byte for string terminator) */
static void unicode_munge(unsigned char* string, unsigned char* utf8buf, int *len, int utf8buf_size) {
unsigned char *str = string;
unsigned char* utf8 = utf8buf;
int i = 0;
unsigned char *str = (unsigned char *)string;
int templen = 0;
unsigned char* utf8 = (unsigned char *)utf8buf;
switch (str[0]) {
case 0x00: /* Type 0x00 is ordinary ISO 8859-1 */
str++;
(*len)--;
utf8 = iso_decode(str, utf8, -1, *len);
*utf8 = 0;
*len = (intptr_t)utf8 - (intptr_t)utf8buf;
break;
case 0x01: /* Unicode with or without BOM */
case 0x02:
(*len)--;
@ -618,10 +612,15 @@ static void unicode_munge(char* string, char* utf8buf, int *len) {
*len = templen - 1;
break;
/* case 0x03: UTF-8 encoded string handled by parse_as_utf8 */
case 0x00: /* Type 0x00 is ordinary ISO 8859-1 */
str++;
(*len)--;
//fallthrough
default: /* Plain old string */
utf8 = iso_decode(str, utf8, -1, *len);
utf8 = iso_decode_ex(str, utf8, -1, *len, utf8buf_size);
*utf8 = 0;
*len = (intptr_t)utf8 - (intptr_t)utf8buf;
*len = utf8 - utf8buf;
break;
}
}
@ -1103,10 +1102,9 @@ retry_with_limit:
{
//limit stack allocation to avoid stack overflow
utf8_size = ID3V2_BUF_SIZE;
bytesread = ID3V2_BUF_SIZE/3;
}
char utf8buf[utf8_size + 1];
unicode_munge( tag, utf8buf, &bytesread);
unsigned char utf8buf[utf8_size + 1];
unicode_munge( (unsigned char *)tag, utf8buf, &bytesread, utf8_size);
if(bytesread >= buffersize - bufferpos)
bytesread = buffersize - bufferpos - 1;