forked from len0rd/rockbox
The Unicode parser now handles unterminated strings, plus it tries to guess the byte order when the BOM is missing (a violation of the spec, btw)
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@4215 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
4b4776c362
commit
2f4a5616c0
1 changed files with 22 additions and 2 deletions
|
@ -260,6 +260,8 @@ static int unicode_munge(char** string, int *len) {
|
|||
int i;
|
||||
char *str = *string;
|
||||
char *outstr = *string;
|
||||
bool bom = false;
|
||||
int outlen;
|
||||
|
||||
if(str[0] > 0x03) {
|
||||
/* Plain old string */
|
||||
|
@ -275,21 +277,37 @@ static int unicode_munge(char** string, int *len) {
|
|||
|
||||
/* Unicode with or without BOM */
|
||||
if(str[0] == 0x01 || str[0] == 0x02) {
|
||||
(*len)--;
|
||||
str++;
|
||||
tmp = BYTES2INT(0, 0, str[0], str[1]);
|
||||
|
||||
/* Now check if there is a BOM (zero-width non-breaking space, 0xfeff)
|
||||
and if it is in little or big endian format */
|
||||
if(tmp == 0xfffe) { /* Little endian? */
|
||||
bom = true;
|
||||
le = true;
|
||||
str += 2;
|
||||
(*len)-=2;
|
||||
}
|
||||
|
||||
if(tmp == 0xfeff) /* Big endian? */
|
||||
if(tmp == 0xfeff) { /* Big endian? */
|
||||
bom = true;
|
||||
str += 2;
|
||||
(*len)-=2;
|
||||
}
|
||||
|
||||
/* If there is no BOM (which is a specification violation),
|
||||
let's try to guess it. If one of the bytes is 0x00, it is
|
||||
probably the most significant one. */
|
||||
if(!bom) {
|
||||
if(str[1] == 0)
|
||||
le = true;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
|
||||
outlen = *len / 2;
|
||||
|
||||
do {
|
||||
if(le) {
|
||||
if(str[1])
|
||||
|
@ -303,9 +321,11 @@ static int unicode_munge(char** string, int *len) {
|
|||
outstr[i++] = str[1];
|
||||
}
|
||||
str += 2;
|
||||
} while(str[0] || str[1]);
|
||||
} while((str[0] || str[1]) && (i < outlen));
|
||||
|
||||
*len = i;
|
||||
|
||||
outstr[i] = 0; /* Terminate the string */
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue