mirror of
https://github.com/Rockbox/rockbox.git
synced 2025-10-13 10:07:38 -04:00
unicode: add utf16decode with utf8 buffer size check
Make use of it in id3tags and cuesheet Change-Id: I153c23f1f7312e9d5e1de9f03725f2d2ab0abc93
This commit is contained in:
parent
e334a1f95e
commit
1f548f74e6
4 changed files with 34 additions and 42 deletions
|
@ -265,7 +265,7 @@ bool parse_cuesheet(struct cuesheet_file *cue_file, struct cuesheet *cue)
|
||||||
{
|
{
|
||||||
if (char_enc == CHAR_ENC_UTF_16_LE)
|
if (char_enc == CHAR_ENC_UTF_16_LE)
|
||||||
{
|
{
|
||||||
s = utf16LEdecode(line, utf16_buf, line_len);
|
s = utf16decode(line, utf16_buf, line_len>>1, sizeof(utf16_buf) - 1, true);
|
||||||
/* terminate the string at the newline */
|
/* terminate the string at the newline */
|
||||||
*s = '\0';
|
*s = '\0';
|
||||||
strcpy(line, utf16_buf);
|
strcpy(line, utf16_buf);
|
||||||
|
@ -275,7 +275,7 @@ bool parse_cuesheet(struct cuesheet_file *cue_file, struct cuesheet *cue)
|
||||||
}
|
}
|
||||||
else if (char_enc == CHAR_ENC_UTF_16_BE)
|
else if (char_enc == CHAR_ENC_UTF_16_BE)
|
||||||
{
|
{
|
||||||
s = utf16BEdecode(line, utf16_buf, line_len);
|
s = utf16decode(line, utf16_buf, line_len>>1, sizeof(utf16_buf) - 1, false);
|
||||||
*s = '\0';
|
*s = '\0';
|
||||||
strcpy(line, utf16_buf);
|
strcpy(line, utf16_buf);
|
||||||
}
|
}
|
||||||
|
|
|
@ -265,7 +265,7 @@ static unsigned char * utf8encode_internal(unsigned long ucs, unsigned char *utf
|
||||||
return utf8;
|
return utf8;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned char* utf8encode_ex(unsigned long ucs, unsigned char *utf8, int* utf8_size)
|
FORCE_INLINE static unsigned char* utf8encode_ex(unsigned long ucs, unsigned char *utf8, int* utf8_size)
|
||||||
{
|
{
|
||||||
const int tail = utf8_ucs_get_extra_bytes_count(ucs);
|
const int tail = utf8_ucs_get_extra_bytes_count(ucs);
|
||||||
*utf8_size -= tail + 1;
|
*utf8_size -= tail + 1;
|
||||||
|
@ -420,49 +420,46 @@ unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int
|
||||||
return utf8;
|
return utf8;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Recode a UTF-16 string with little-endian byte ordering to UTF-8 */
|
unsigned char* utf16decode(const unsigned char *utf16, unsigned char *utf8,
|
||||||
unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8,
|
int count, int utf8_size, bool le)
|
||||||
int count)
|
|
||||||
{
|
{
|
||||||
|
if (utf8_size == -1)
|
||||||
|
utf8_size = INT_MAX;
|
||||||
|
|
||||||
|
// little-endian flag is used as significant byte index
|
||||||
|
if (le)
|
||||||
|
le = 1;
|
||||||
|
|
||||||
unsigned long ucs;
|
unsigned long ucs;
|
||||||
|
|
||||||
while (count > 0) {
|
while (count > 0 && utf8_size > 0) {
|
||||||
/* Check for a surrogate pair */
|
/* Check for a surrogate pair */
|
||||||
if (utf16[1] >= 0xD8 && utf16[1] < 0xE0) {
|
if (*(utf16 + le) >= 0xD8 && *(utf16 + le) < 0xE0) {
|
||||||
ucs = 0x10000 + ((utf16[0] << 10) | ((utf16[1] - 0xD8) << 18)
|
ucs = 0x10000 + ((utf16[1 - le] << 10) | ((utf16[le] - 0xD8) << 18)
|
||||||
| utf16[2] | ((utf16[3] - 0xDC) << 8));
|
| utf16[2 + (1 - le)] | ((utf16[2 + le] - 0xDC) << 8));
|
||||||
utf16 += 4;
|
utf16 += 4;
|
||||||
count -= 2;
|
count -= 2;
|
||||||
} else {
|
} else {
|
||||||
ucs = getle16(utf16);
|
ucs = utf16[le] << 8 | utf16[1 - le];
|
||||||
utf16 += 2;
|
utf16 += 2;
|
||||||
count -= 1;
|
count -= 1;
|
||||||
}
|
}
|
||||||
utf8 = utf8encode(ucs, utf8);
|
utf8 = utf8encode_ex(ucs, utf8, &utf8_size);
|
||||||
}
|
}
|
||||||
return utf8;
|
return utf8;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Recode a UTF-16 string with big-endian byte ordering to UTF-8 */
|
/* Recode a UTF-16 string with big-endian byte ordering to UTF-8 */
|
||||||
|
unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8,
|
||||||
|
int count)
|
||||||
|
{
|
||||||
|
return utf16decode(utf16, utf8, count, -1, true);
|
||||||
|
}
|
||||||
|
|
||||||
unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8,
|
unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8,
|
||||||
int count)
|
int count)
|
||||||
{
|
{
|
||||||
unsigned long ucs;
|
return utf16decode(utf16, utf8, count, -1, false);
|
||||||
|
|
||||||
while (count > 0) {
|
|
||||||
if (*utf16 >= 0xD8 && *utf16 < 0xE0) { /* Check for a surrogate pair */
|
|
||||||
ucs = 0x10000 + (((utf16[0] - 0xD8) << 18) | (utf16[1] << 10)
|
|
||||||
| ((utf16[2] - 0xDC) << 8) | utf16[3]);
|
|
||||||
utf16 += 4;
|
|
||||||
count -= 2;
|
|
||||||
} else {
|
|
||||||
ucs = getbe16(utf16);
|
|
||||||
utf16 += 2;
|
|
||||||
count -= 1;
|
|
||||||
}
|
|
||||||
utf8 = utf8encode(ucs, utf8);
|
|
||||||
}
|
|
||||||
return utf8;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool utf16_has_bom(const unsigned char *utf16, bool *le)
|
bool utf16_has_bom(const unsigned char *utf16, bool *le)
|
||||||
|
|
|
@ -61,6 +61,7 @@ unsigned char* iso_decode_ex(const unsigned char *iso, unsigned char *utf8, int
|
||||||
|
|
||||||
unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8, int count);
|
unsigned char* utf16LEdecode(const unsigned char *utf16, unsigned char *utf8, int count);
|
||||||
unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8, int count);
|
unsigned char* utf16BEdecode(const unsigned char *utf16, unsigned char *utf8, int count);
|
||||||
|
unsigned char* utf16decode(const unsigned char *utf16, unsigned char *utf8, int count, int utf8_size, bool le);
|
||||||
bool utf16_has_bom(const unsigned char *utf16, bool *le);
|
bool utf16_has_bom(const unsigned char *utf16, bool *le);
|
||||||
unsigned long utf8length(const unsigned char *utf8);
|
unsigned long utf8length(const unsigned char *utf8);
|
||||||
const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs);
|
const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs);
|
||||||
|
|
|
@ -574,17 +574,13 @@ static void unicode_munge(unsigned char* string, unsigned char* utf8buf, int *le
|
||||||
unsigned char *str = string;
|
unsigned char *str = string;
|
||||||
unsigned char* utf8 = utf8buf;
|
unsigned char* utf8 = utf8buf;
|
||||||
|
|
||||||
int i = 0;
|
|
||||||
int templen = 0;
|
|
||||||
|
|
||||||
switch (str[0]) {
|
switch (str[0]) {
|
||||||
case 0x01: /* Unicode with or without BOM */
|
case 0x01: /* Unicode with or without BOM */
|
||||||
case 0x02:
|
case 0x02:
|
||||||
(*len)--;
|
(*len)--;
|
||||||
str++;
|
str++;
|
||||||
bool le;
|
bool le;
|
||||||
|
int i = 0;
|
||||||
|
|
||||||
/* Handle frames with more than one string
|
/* Handle frames with more than one string
|
||||||
(needed for TXXX frames).*/
|
(needed for TXXX frames).*/
|
||||||
do {
|
do {
|
||||||
|
@ -593,24 +589,22 @@ static void unicode_munge(unsigned char* string, unsigned char* utf8buf, int *le
|
||||||
str += BOM_UTF_16_SIZE;
|
str += BOM_UTF_16_SIZE;
|
||||||
*len -= BOM_UTF_16_SIZE;
|
*len -= BOM_UTF_16_SIZE;
|
||||||
}
|
}
|
||||||
|
string = str;
|
||||||
|
|
||||||
while ((i < *len) && (str[0] || str[1])) {
|
while ((i < *len) && (str[0] || str[1])) {
|
||||||
if(le)
|
|
||||||
utf8 = utf16LEdecode(str, utf8, 1);
|
|
||||||
else
|
|
||||||
utf8 = utf16BEdecode(str, utf8, 1);
|
|
||||||
|
|
||||||
str+=2;
|
str+=2;
|
||||||
i += 2;
|
i += 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
utf8 = utf16decode(string, utf8, (str-string)>>1 /*(str-string)/2*/, utf8buf_size, le);
|
||||||
*utf8++ = 0; /* Terminate the string */
|
*utf8++ = 0; /* Terminate the string */
|
||||||
templen += (strlen(&utf8buf[templen]) + 1);
|
utf8buf_size -= utf8 - utf8buf;
|
||||||
str += 2;
|
str += 2;
|
||||||
i+=2;
|
i += 2;
|
||||||
} while(i < *len);
|
} while(i < *len && utf8buf_size > 0);
|
||||||
*len = templen - 1;
|
*len = utf8 - utf8buf - 1;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* case 0x03: UTF-8 encoded string handled by parse_as_utf8 */
|
/* case 0x03: UTF-8 encoded string handled by parse_as_utf8 */
|
||||||
|
|
||||||
case 0x00: /* Type 0x00 is ordinary ISO 8859-1 */
|
case 0x00: /* Type 0x00 is ordinary ISO 8859-1 */
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue