mirror of
https://github.com/Rockbox/rockbox.git
synced 2025-10-14 10:37:38 -04:00
unicode: add iso_decode_ex with utf8 buffer size check
Make use of it in id3tags, playlist and cuesheet Change-Id: Ibc8abc0faf16688bc9b826b7a712d1dfe9bf75b2
This commit is contained in:
parent
f8fa1e7d5a
commit
004304dc65
5 changed files with 56 additions and 37 deletions
|
@ -314,7 +314,6 @@ bool parse_cuesheet(struct cuesheet_file *cue_file, struct cuesheet *cue)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
size_t count = MAX_NAME*3 + 1;
|
size_t count = MAX_NAME*3 + 1;
|
||||||
size_t count8859 = MAX_NAME;
|
|
||||||
|
|
||||||
switch (option)
|
switch (option)
|
||||||
{
|
{
|
||||||
|
@ -339,7 +338,6 @@ bool parse_cuesheet(struct cuesheet_file *cue_file, struct cuesheet *cue)
|
||||||
|
|
||||||
dest = cue->file;
|
dest = cue->file;
|
||||||
count = MAX_PATH;
|
count = MAX_PATH;
|
||||||
count8859 = MAX_PATH/3;
|
|
||||||
break;
|
break;
|
||||||
case eCS_TRACK:
|
case eCS_TRACK:
|
||||||
/*Fall-Through*/
|
/*Fall-Through*/
|
||||||
|
@ -357,8 +355,8 @@ bool parse_cuesheet(struct cuesheet_file *cue_file, struct cuesheet *cue)
|
||||||
{
|
{
|
||||||
if (char_enc == CHAR_ENC_ISO_8859_1)
|
if (char_enc == CHAR_ENC_ISO_8859_1)
|
||||||
{
|
{
|
||||||
dest = iso_decode(string, dest, -1,
|
dest = iso_decode_ex(string, dest, -1,
|
||||||
MIN(strlen(string), count8859));
|
strlen(string), count - 1);
|
||||||
*dest = '\0';
|
*dest = '\0';
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
|
@ -350,14 +350,7 @@ static int convert_m3u_name(char* buf, int buf_len, int buf_max, char* temp)
|
||||||
buf_len = i;
|
buf_len = i;
|
||||||
dest = temp;
|
dest = temp;
|
||||||
|
|
||||||
/* Convert char by char, so as to not overflow temp (iso_decode should
|
dest = iso_decode_ex(buf, dest, -1, buf_len, buf_max - 1);
|
||||||
* preferably handle this). No more than 4 bytes should be generated for
|
|
||||||
* each input char.
|
|
||||||
*/
|
|
||||||
for (i = 0; i < buf_len && dest < (temp + buf_max - 4); i++)
|
|
||||||
{
|
|
||||||
dest = iso_decode(&buf[i], dest, -1, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
*dest = 0;
|
*dest = 0;
|
||||||
strcpy(buf, temp);
|
strcpy(buf, temp);
|
||||||
|
|
|
@ -245,8 +245,8 @@ static int alloc_and_load_cp_table(int cp, void *buf)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */
|
/* returns number of additional bytes required in encoded string (bytes_count - 1) */
|
||||||
unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8)
|
static int utf8_ucs_get_extra_bytes_count(unsigned long ucs)
|
||||||
{
|
{
|
||||||
int tail = 0;
|
int tail = 0;
|
||||||
|
|
||||||
|
@ -254,17 +254,41 @@ unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8)
|
||||||
while (ucs >> (5*tail + 6))
|
while (ucs >> (5*tail + 6))
|
||||||
tail++;
|
tail++;
|
||||||
|
|
||||||
|
return tail;
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned char * utf8encode_internal(unsigned long ucs, unsigned char *utf8, int tail)
|
||||||
|
{
|
||||||
*utf8++ = (ucs >> (6*tail)) | utf8comp[tail];
|
*utf8++ = (ucs >> (6*tail)) | utf8comp[tail];
|
||||||
while (tail--)
|
while (tail--)
|
||||||
*utf8++ = ((ucs >> (6*tail)) & (MASK ^ 0xFF)) | COMP;
|
*utf8++ = ((ucs >> (6*tail)) & (MASK ^ 0xFF)) | COMP;
|
||||||
|
|
||||||
return utf8;
|
return utf8;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Recode an iso encoded string to UTF-8 */
|
static unsigned char* utf8encode_ex(unsigned long ucs, unsigned char *utf8, int* utf8_size)
|
||||||
unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8,
|
|
||||||
int cp, int count)
|
|
||||||
{
|
{
|
||||||
|
const int tail = utf8_ucs_get_extra_bytes_count(ucs);
|
||||||
|
*utf8_size -= tail + 1;
|
||||||
|
return *utf8_size < 0 ? utf8 : utf8encode_internal(ucs, utf8, tail);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */
|
||||||
|
unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8)
|
||||||
|
{
|
||||||
|
return utf8encode_internal(ucs, utf8, utf8_ucs_get_extra_bytes_count(ucs));
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8, int cp, int count)
|
||||||
|
{
|
||||||
|
return iso_decode_ex(iso, utf8, cp, count, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Recode an iso encoded string to UTF-8 */
|
||||||
|
unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int cp, int count, int utf8_size)
|
||||||
|
{
|
||||||
|
if (utf8_size == -1)
|
||||||
|
utf8_size = INT_MAX;
|
||||||
|
|
||||||
uint16_t *table = NULL;
|
uint16_t *table = NULL;
|
||||||
|
|
||||||
cp_lock_enter();
|
cp_lock_enter();
|
||||||
|
@ -322,11 +346,14 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8,
|
||||||
|
|
||||||
cp_lock_leave();
|
cp_lock_leave();
|
||||||
|
|
||||||
while (count--) {
|
while (count-- && utf8_size > 0) {
|
||||||
unsigned short ucs, tmp;
|
unsigned short ucs, tmp;
|
||||||
|
|
||||||
if (*iso < 128 || cp == UTF_8) /* Already UTF-8 */
|
if (*iso < 128 || cp == UTF_8) /* Already UTF-8 */
|
||||||
|
{
|
||||||
*utf8++ = *iso++;
|
*utf8++ = *iso++;
|
||||||
|
--utf8_size;
|
||||||
|
}
|
||||||
|
|
||||||
else {
|
else {
|
||||||
/* tid tells us which table to use and how */
|
/* tid tells us which table to use and how */
|
||||||
|
@ -375,7 +402,8 @@ unsigned char* iso_decode(const unsigned char *iso, unsigned char *utf8,
|
||||||
|
|
||||||
if (ucs == 0) /* unknown char, use replacement char */
|
if (ucs == 0) /* unknown char, use replacement char */
|
||||||
ucs = 0xfffd;
|
ucs = 0xfffd;
|
||||||
utf8 = utf8encode(ucs, utf8);
|
|
||||||
|
utf8 = utf8encode_ex(ucs, utf8, &utf8_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -57,6 +57,8 @@ enum codepages {
|
||||||
/* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */
|
/* Encode a UCS value as UTF-8 and return a pointer after this UTF-8 char. */
|
||||||
unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8);
|
unsigned char* utf8encode(unsigned long ucs, unsigned char *utf8);
|
||||||
unsigned char* iso_decode(const unsigned char *latin1, unsigned char *utf8, int cp, int count);
|
unsigned char* iso_decode(const unsigned char *latin1, unsigned char *utf8, int cp, int count);
|
||||||
|
unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int cp, int count, int utf8_size);
|
||||||
|
|
||||||
unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8, int count);
|
unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8, int count);
|
||||||
unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8, int count);
|
unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8, int count);
|
||||||
bool utf16_has_bom(const unsigned char *utf16, bool *le);
|
bool utf16_has_bom(const unsigned char *utf16, bool *le);
|
||||||
|
|
|
@ -568,22 +568,16 @@ static bool parse_as_utf8(char* string, int *len)
|
||||||
|
|
||||||
/* Must be called after parse_as_utf8. Checks to see if the passed in string is a 16-bit wide Unicode v2
|
/* Must be called after parse_as_utf8. Checks to see if the passed in string is a 16-bit wide Unicode v2
|
||||||
string. If it is, we convert it to a UTF-8 string. If it's not unicode,
|
string. If it is, we convert it to a UTF-8 string. If it's not unicode,
|
||||||
we convert from the default codepage */
|
we convert from the default codepage
|
||||||
static void unicode_munge(char* string, char* utf8buf, int *len) {
|
NOTE: real UTF-8 buffer size is expected to be utf8buf_size + 1 (additional byte for string terminator) */
|
||||||
|
static void unicode_munge(unsigned char* string, unsigned char* utf8buf, int *len, int utf8buf_size) {
|
||||||
|
unsigned char *str = string;
|
||||||
|
unsigned char* utf8 = utf8buf;
|
||||||
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
unsigned char *str = (unsigned char *)string;
|
|
||||||
int templen = 0;
|
int templen = 0;
|
||||||
unsigned char* utf8 = (unsigned char *)utf8buf;
|
|
||||||
|
|
||||||
switch (str[0]) {
|
switch (str[0]) {
|
||||||
case 0x00: /* Type 0x00 is ordinary ISO 8859-1 */
|
|
||||||
str++;
|
|
||||||
(*len)--;
|
|
||||||
utf8 = iso_decode(str, utf8, -1, *len);
|
|
||||||
*utf8 = 0;
|
|
||||||
*len = (intptr_t)utf8 - (intptr_t)utf8buf;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 0x01: /* Unicode with or without BOM */
|
case 0x01: /* Unicode with or without BOM */
|
||||||
case 0x02:
|
case 0x02:
|
||||||
(*len)--;
|
(*len)--;
|
||||||
|
@ -618,10 +612,15 @@ static void unicode_munge(char* string, char* utf8buf, int *len) {
|
||||||
*len = templen - 1;
|
*len = templen - 1;
|
||||||
break;
|
break;
|
||||||
/* case 0x03: UTF-8 encoded string handled by parse_as_utf8 */
|
/* case 0x03: UTF-8 encoded string handled by parse_as_utf8 */
|
||||||
|
|
||||||
|
case 0x00: /* Type 0x00 is ordinary ISO 8859-1 */
|
||||||
|
str++;
|
||||||
|
(*len)--;
|
||||||
|
//fallthrough
|
||||||
default: /* Plain old string */
|
default: /* Plain old string */
|
||||||
utf8 = iso_decode(str, utf8, -1, *len);
|
utf8 = iso_decode_ex(str, utf8, -1, *len, utf8buf_size);
|
||||||
*utf8 = 0;
|
*utf8 = 0;
|
||||||
*len = (intptr_t)utf8 - (intptr_t)utf8buf;
|
*len = utf8 - utf8buf;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1103,10 +1102,9 @@ retry_with_limit:
|
||||||
{
|
{
|
||||||
//limit stack allocation to avoid stack overflow
|
//limit stack allocation to avoid stack overflow
|
||||||
utf8_size = ID3V2_BUF_SIZE;
|
utf8_size = ID3V2_BUF_SIZE;
|
||||||
bytesread = ID3V2_BUF_SIZE/3;
|
|
||||||
}
|
}
|
||||||
char utf8buf[utf8_size + 1];
|
unsigned char utf8buf[utf8_size + 1];
|
||||||
unicode_munge( tag, utf8buf, &bytesread);
|
unicode_munge( (unsigned char *)tag, utf8buf, &bytesread, utf8_size);
|
||||||
if(bytesread >= buffersize - bufferpos)
|
if(bytesread >= buffersize - bufferpos)
|
||||||
bytesread = buffersize - bufferpos - 1;
|
bytesread = buffersize - bufferpos - 1;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue