forked from len0rd/rockbox
metadata: asf: Use system utf16decode conversion
Change-Id: I606bf5365c84cbee4badd1ac1cbaace1207834f4
This commit is contained in:
parent
1f548f74e6
commit
de8d1437dc
1 changed files with 43 additions and 49 deletions
|
@ -33,7 +33,7 @@
|
||||||
#include "metadata_common.h"
|
#include "metadata_common.h"
|
||||||
#include "metadata_parsers.h"
|
#include "metadata_parsers.h"
|
||||||
#include <codecs/libasf/asf.h>
|
#include <codecs/libasf/asf.h>
|
||||||
|
#include "rbunicode.h"
|
||||||
/* TODO: Just read the GUIDs into a 16-byte array, and use memcmp to compare */
|
/* TODO: Just read the GUIDs into a 16-byte array, and use memcmp to compare */
|
||||||
struct guid_s {
|
struct guid_s {
|
||||||
uint32_t v1;
|
uint32_t v1;
|
||||||
|
@ -154,75 +154,69 @@ static int asf_intdecode(int fd, int type, int length)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int is_valid_utf16(const unsigned char *data, size_t length)
|
||||||
|
{
|
||||||
|
if (length < 2) return 0; // Not enough data for even one UTF-16 character
|
||||||
|
|
||||||
|
// Get the last two bytes as a UTF-16 character (little-endian)
|
||||||
|
uint16_t last = data[length - 2] | (data[length - 1] << 8);
|
||||||
|
|
||||||
|
// Check if the last character is a high surrogate
|
||||||
|
if (last >= 0xD800 && last <= 0xDBFF) {
|
||||||
|
return 0; // Invalid if it's the last character
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the last character is a low surrogate
|
||||||
|
if (last >= 0xDC00 && last <= 0xDFFF) {
|
||||||
|
if (length < 4) return 0; // Invalid if there's no preceding character
|
||||||
|
uint16_t second_last = data[length - 4] | (data[length - 3] << 8);
|
||||||
|
|
||||||
|
// Invalid if not preceded by a high surrogate
|
||||||
|
return second_last >= 0xD800 && second_last <= 0xDBFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If it's not a surrogate, it's valid
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
/* Decode a LE utf16 string from a disk buffer into a fixed-sized
|
/* Decode a LE utf16 string from a disk buffer into a fixed-sized
|
||||||
utf8 buffer.
|
utf8 buffer.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void asf_utf16LEdecode(int fd,
|
static void asf_utf16LEdecode(int fd,
|
||||||
uint16_t utf16bytes,
|
uint16_t utf16bytes,
|
||||||
unsigned char **utf8,
|
unsigned char **utf8,
|
||||||
int* utf8bytes
|
int* utf8bytes
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
unsigned long ucs;
|
const int reserve_bytes = 6;
|
||||||
int n;
|
int n;
|
||||||
unsigned char utf16buf[256];
|
unsigned char utf16buf[258];
|
||||||
unsigned char* utf16 = utf16buf;
|
unsigned char* newutf8 = *utf8;
|
||||||
unsigned char* newutf8;
|
const int utf8bytes_initial = *utf8bytes;
|
||||||
|
|
||||||
n = read(fd, utf16buf, MIN(sizeof(utf16buf), utf16bytes));
|
while ((n = read(fd, utf16buf, MIN(sizeof(utf16buf) - 2, utf16bytes))) >= 2)
|
||||||
utf16bytes -= n;
|
{
|
||||||
|
// If the UTF-16 string ends with an incomplete surrogate pair, try to complete it.
|
||||||
while (n > 0) {
|
if (!is_valid_utf16(utf16buf, n))
|
||||||
/* Check for a surrogate pair */
|
{
|
||||||
if (utf16[1] >= 0xD8 && utf16[1] < 0xE0) {
|
n += read(fd, utf16buf + n, 2);
|
||||||
if (n < 4) {
|
|
||||||
/* Run out of utf16 bytes, read some more */
|
|
||||||
utf16buf[0] = utf16[0];
|
|
||||||
utf16buf[1] = utf16[1];
|
|
||||||
|
|
||||||
n = read(fd, utf16buf + 2, MIN(sizeof(utf16buf)-2, utf16bytes));
|
|
||||||
utf16 = utf16buf;
|
|
||||||
utf16bytes -= n;
|
|
||||||
n += 2;
|
|
||||||
}
|
}
|
||||||
|
newutf8 = utf16decode(utf16buf, newutf8, n>>1, *utf8bytes - reserve_bytes, true);
|
||||||
|
*utf8bytes = utf8bytes_initial - (newutf8 - *utf8);
|
||||||
|
utf16bytes -= n;
|
||||||
|
|
||||||
if (n < 4) {
|
if (*utf8bytes <= reserve_bytes)
|
||||||
/* Truncated utf16 string, abort */
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
ucs = 0x10000 + ((utf16[0] << 10) | ((utf16[1] - 0xD8) << 18)
|
|
||||||
| utf16[2] | ((utf16[3] - 0xDC) << 8));
|
|
||||||
utf16 += 4;
|
|
||||||
n -= 4;
|
|
||||||
} else {
|
|
||||||
ucs = (utf16[0] | (utf16[1] << 8));
|
|
||||||
utf16 += 2;
|
|
||||||
n -= 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (*utf8bytes > 6) {
|
*newutf8 = 0;
|
||||||
newutf8 = utf8encode(ucs, *utf8);
|
|
||||||
*utf8bytes -= (newutf8 - *utf8);
|
|
||||||
*utf8 += (newutf8 - *utf8);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* We have run out of utf16 bytes, read more if available */
|
|
||||||
if ((n == 0) && (utf16bytes > 0)) {
|
|
||||||
n = read(fd, utf16buf, MIN(sizeof(utf16buf), utf16bytes));
|
|
||||||
utf16 = utf16buf;
|
|
||||||
utf16bytes -= n;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
*utf8[0] = 0;
|
|
||||||
--*utf8bytes;
|
--*utf8bytes;
|
||||||
|
*utf8 = newutf8;
|
||||||
|
|
||||||
if (utf16bytes > 0) {
|
if (utf16bytes > 0) {
|
||||||
/* Skip any remaining bytes */
|
/* Skip any remaining bytes */
|
||||||
lseek(fd, utf16bytes, SEEK_CUR);
|
lseek(fd, utf16bytes, SEEK_CUR);
|
||||||
}
|
}
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int asf_parse_header(int fd, struct mp3entry* id3,
|
static int asf_parse_header(int fd, struct mp3entry* id3,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue