forked from len0rd/rockbox
Add tag parsing to the ASF metadata parser. TODO: 1) Stress test to ensure it deals correctly with very long tags and the id3v2/v2 buffers becoming full. 2) Review the entire parser and attempt to reduce the binary size.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@13790 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
9b96d66621
commit
b081b94d17
1 changed files with 179 additions and 77 deletions
|
@ -21,16 +21,16 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
#include <atoi.h>
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
|
|
||||||
#include "id3.h"
|
#include "id3.h"
|
||||||
#include "debug.h"
|
#include "debug.h"
|
||||||
#include "rbunicode.h"
|
#include "rbunicode.h"
|
||||||
#include "metadata_common.h"
|
#include "metadata_common.h"
|
||||||
|
#include "system.h"
|
||||||
#include <codecs/libwma/asf.h>
|
#include <codecs/libwma/asf.h>
|
||||||
|
|
||||||
static asf_waveformatex_t wfx;
|
|
||||||
|
|
||||||
/* TODO: Just read the GUIDs into a 16-byte array, and use memcmp to compare */
|
/* TODO: Just read the GUIDs into a 16-byte array, and use memcmp to compare */
|
||||||
struct guid_s {
|
struct guid_s {
|
||||||
uint32_t v1;
|
uint32_t v1;
|
||||||
|
@ -121,7 +121,105 @@ static void asf_read_object_header(asf_object_t *obj, int fd)
|
||||||
obj->datalen = 0;
|
obj->datalen = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int asf_parse_header(int fd, struct mp3entry* id3)
|
/* Parse an integer from the extended content object - we always
|
||||||
|
convert to an int, regardless of native format.
|
||||||
|
*/
|
||||||
|
static int asf_intdecode(int fd, int type, int length)
|
||||||
|
{
|
||||||
|
uint16_t tmp16;
|
||||||
|
uint32_t tmp32;
|
||||||
|
uint64_t tmp64;
|
||||||
|
|
||||||
|
if (type==3) {
|
||||||
|
read_uint32le(fd, &tmp32);
|
||||||
|
lseek(fd,length - 4,SEEK_CUR);
|
||||||
|
return (int)tmp32;
|
||||||
|
} else if (type==4) {
|
||||||
|
read_uint32le(fd, &tmp64);
|
||||||
|
lseek(fd,length - 8,SEEK_CUR);
|
||||||
|
return (int)tmp64;
|
||||||
|
} else if (type == 5) {
|
||||||
|
read_uint16le(fd, &tmp16);
|
||||||
|
lseek(fd,length - 2,SEEK_CUR);
|
||||||
|
return (int)tmp16;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Decode a LE utf16 string from a disk buffer into a fixed-sized
|
||||||
|
utf8 buffer.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void asf_utf16LEdecode(int fd,
|
||||||
|
uint16_t utf16bytes,
|
||||||
|
unsigned char **utf8,
|
||||||
|
int* utf8bytes
|
||||||
|
)
|
||||||
|
{
|
||||||
|
unsigned long ucs;
|
||||||
|
int n;
|
||||||
|
unsigned char utf16buf[256];
|
||||||
|
unsigned char* utf16 = utf16buf;
|
||||||
|
unsigned char* newutf8;
|
||||||
|
|
||||||
|
n = read(fd, utf16buf, MIN(sizeof(utf16buf), utf16bytes));
|
||||||
|
utf16bytes -= n;
|
||||||
|
|
||||||
|
while (n > 0) {
|
||||||
|
/* Check for a surrogate pair */
|
||||||
|
if (utf16[1] >= 0xD8 && utf16[1] < 0xE0) {
|
||||||
|
if (n < 4) {
|
||||||
|
/* Run out of utf16 bytes, read some more */
|
||||||
|
utf16buf[0] = utf16[0];
|
||||||
|
utf16buf[1] = utf16[1];
|
||||||
|
|
||||||
|
n = read(fd, utf16buf + 2, MIN(sizeof(utf16buf)-2, utf16bytes));
|
||||||
|
utf16 = utf16buf;
|
||||||
|
utf16bytes -= n;
|
||||||
|
n += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (n < 4) {
|
||||||
|
/* Truncated utf16 string, abort */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
ucs = 0x10000 + ((utf16[0] << 10) | ((utf16[1] - 0xD8) << 18)
|
||||||
|
| utf16[2] | ((utf16[3] - 0xDC) << 8));
|
||||||
|
utf16 += 4;
|
||||||
|
n -= 4;
|
||||||
|
} else {
|
||||||
|
ucs = (utf16[0] | (utf16[1] << 8));
|
||||||
|
utf16 += 2;
|
||||||
|
n -= 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*utf8bytes > 6) {
|
||||||
|
newutf8 = utf8encode(ucs, *utf8);
|
||||||
|
*utf8bytes -= (newutf8 - *utf8);
|
||||||
|
*utf8 += (newutf8 - *utf8);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We have run out of utf16 bytes, read more if available */
|
||||||
|
if ((n == 0) && (utf16bytes > 0)) {
|
||||||
|
n = read(fd, utf16buf, MIN(sizeof(utf16buf), utf16bytes));
|
||||||
|
utf16 = utf16buf;
|
||||||
|
utf16bytes -= n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*utf8[0] = 0;
|
||||||
|
--*utf8bytes;
|
||||||
|
|
||||||
|
if (utf16bytes > 0) {
|
||||||
|
/* Skip any remaining bytes */
|
||||||
|
lseek(fd, utf16bytes, SEEK_CUR);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int asf_parse_header(int fd, struct mp3entry* id3,
|
||||||
|
asf_waveformatex_t* wfx)
|
||||||
{
|
{
|
||||||
asf_object_t current;
|
asf_object_t current;
|
||||||
asf_object_t header;
|
asf_object_t header;
|
||||||
|
@ -129,14 +227,11 @@ static int asf_parse_header(int fd, struct mp3entry* id3)
|
||||||
int i;
|
int i;
|
||||||
int fileprop = 0;
|
int fileprop = 0;
|
||||||
uint64_t play_duration;
|
uint64_t play_duration;
|
||||||
uint64_t tmp64;
|
|
||||||
uint32_t tmp32;
|
|
||||||
uint16_t tmp16;
|
|
||||||
uint8_t tmp8;
|
|
||||||
uint16_t flags;
|
uint16_t flags;
|
||||||
uint32_t subobjects;
|
uint32_t subobjects;
|
||||||
uint8_t utf16buf[512];
|
|
||||||
uint8_t utf8buf[512];
|
uint8_t utf8buf[512];
|
||||||
|
int id3buf_remaining = sizeof(id3->id3v2buf) + sizeof(id3->id3v1buf);
|
||||||
|
unsigned char* id3buf = (unsigned char*)id3->id3v2buf;
|
||||||
|
|
||||||
asf_read_object_header((asf_object_t *) &header, fd);
|
asf_read_object_header((asf_object_t *) &header, fd);
|
||||||
|
|
||||||
|
@ -193,7 +288,7 @@ static int asf_parse_header(int fd, struct mp3entry* id3)
|
||||||
|
|
||||||
/* Read the packet size - uint32_t at offset 68 */
|
/* Read the packet size - uint32_t at offset 68 */
|
||||||
lseek(fd, 20, SEEK_CUR);
|
lseek(fd, 20, SEEK_CUR);
|
||||||
read_uint32le(fd, &wfx.packet_size);
|
read_uint32le(fd, &wfx->packet_size);
|
||||||
|
|
||||||
/* Skip bytes remaining in object */
|
/* Skip bytes remaining in object */
|
||||||
lseek(fd, current.size - 24 - 72, SEEK_CUR);
|
lseek(fd, current.size - 24 - 72, SEEK_CUR);
|
||||||
|
@ -225,7 +320,7 @@ static int asf_parse_header(int fd, struct mp3entry* id3)
|
||||||
DEBUGF("Found stream properties for audio stream %d\n",flags&0x7f);
|
DEBUGF("Found stream properties for audio stream %d\n",flags&0x7f);
|
||||||
|
|
||||||
/* TODO: Check codec_id and find the lowest numbered audio stream in the file */
|
/* TODO: Check codec_id and find the lowest numbered audio stream in the file */
|
||||||
wfx.audiostream = flags&0x7f;
|
wfx->audiostream = flags&0x7f;
|
||||||
|
|
||||||
if (propdatalen < 18) {
|
if (propdatalen < 18) {
|
||||||
return ASF_ERROR_INVALID_LENGTH;
|
return ASF_ERROR_INVALID_LENGTH;
|
||||||
|
@ -236,29 +331,25 @@ static int asf_parse_header(int fd, struct mp3entry* id3)
|
||||||
return ASF_ERROR_INVALID_LENGTH;
|
return ASF_ERROR_INVALID_LENGTH;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
read_uint16le(fd, &wfx.codec_id);
|
read_uint16le(fd, &wfx->codec_id);
|
||||||
read_uint16le(fd, &wfx.channels);
|
read_uint16le(fd, &wfx->channels);
|
||||||
read_uint32le(fd, &wfx.rate);
|
read_uint32le(fd, &wfx->rate);
|
||||||
read_uint32le(fd, &wfx.bitrate);
|
read_uint32le(fd, &wfx->bitrate);
|
||||||
wfx.bitrate *= 8;
|
wfx->bitrate *= 8;
|
||||||
read_uint16le(fd, &wfx.blockalign);
|
read_uint16le(fd, &wfx->blockalign);
|
||||||
read_uint16le(fd, &wfx.bitspersample);
|
read_uint16le(fd, &wfx->bitspersample);
|
||||||
read_uint16le(fd, &wfx.datalen);
|
read_uint16le(fd, &wfx->datalen);
|
||||||
|
|
||||||
/* Round bitrate to the nearest kbit */
|
/* Round bitrate to the nearest kbit */
|
||||||
id3->bitrate = (wfx.bitrate + 500) / 1000;
|
id3->bitrate = (wfx->bitrate + 500) / 1000;
|
||||||
id3->frequency = wfx.rate;
|
id3->frequency = wfx->rate;
|
||||||
|
|
||||||
if (wfx.codec_id == ASF_CODEC_ID_WMAV1) {
|
if (wfx->codec_id == ASF_CODEC_ID_WMAV1) {
|
||||||
read(fd, wfx.data, 4);
|
read(fd, wfx->data, 4);
|
||||||
lseek(fd,current.size - 24 - 72 - 4,SEEK_CUR);
|
lseek(fd,current.size - 24 - 72 - 4,SEEK_CUR);
|
||||||
/* A hack - copy the wfx struct to the MP3 TOC field in the id3 struct */
|
} else if (wfx->codec_id == ASF_CODEC_ID_WMAV2) {
|
||||||
memcpy(id3->toc, &wfx, sizeof(wfx));
|
read(fd, wfx->data, 6);
|
||||||
} else if (wfx.codec_id == ASF_CODEC_ID_WMAV2) {
|
|
||||||
read(fd, wfx.data, 6);
|
|
||||||
lseek(fd,current.size - 24 - 72 - 6,SEEK_CUR);
|
lseek(fd,current.size - 24 - 72 - 6,SEEK_CUR);
|
||||||
/* A hack - copy the wfx struct to the MP3 TOC field in the id3 struct */
|
|
||||||
memcpy(id3->toc, &wfx, sizeof(wfx));
|
|
||||||
} else {
|
} else {
|
||||||
lseek(fd,current.size - 24 - 72,SEEK_CUR);
|
lseek(fd,current.size - 24 - 72,SEEK_CUR);
|
||||||
}
|
}
|
||||||
|
@ -279,13 +370,24 @@ static int asf_parse_header(int fd, struct mp3entry* id3)
|
||||||
DEBUGF("strlength = %u\n",strlength[i]);
|
DEBUGF("strlength = %u\n",strlength[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i=0; i<5 ; i++) {
|
if (strlength[0] > 0) { /* 0 - Title */
|
||||||
if (strlength[i] > 0) {
|
id3->title = id3buf;
|
||||||
read(fd, utf16buf, strlength[i]);
|
asf_utf16LEdecode(fd, strlength[0], &id3buf, &id3buf_remaining);
|
||||||
utf16LEdecode(utf16buf, utf8buf, strlength[i]);
|
|
||||||
DEBUGF("TAG %d = %s\n",i,utf8buf);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (strlength[1] > 0) { /* 1 - Artist */
|
||||||
|
id3->artist = id3buf;
|
||||||
|
asf_utf16LEdecode(fd, strlength[1], &id3buf, &id3buf_remaining);
|
||||||
|
}
|
||||||
|
|
||||||
|
lseek(fd, strlength[2], SEEK_CUR); /* 2 - copyright */
|
||||||
|
|
||||||
|
if (strlength[3] > 0) { /* 3 - description */
|
||||||
|
id3->comment = id3buf;
|
||||||
|
asf_utf16LEdecode(fd, strlength[3], &id3buf, &id3buf_remaining);
|
||||||
|
}
|
||||||
|
|
||||||
|
lseek(fd, strlength[4], SEEK_CUR); /* 4 - rating */
|
||||||
} else if (asf_guid_match(¤t.guid, &asf_guid_extended_content_description)) {
|
} else if (asf_guid_match(¤t.guid, &asf_guid_extended_content_description)) {
|
||||||
uint16_t count;
|
uint16_t count;
|
||||||
int i;
|
int i;
|
||||||
|
@ -298,55 +400,50 @@ static int asf_parse_header(int fd, struct mp3entry* id3)
|
||||||
|
|
||||||
for (i=0; i < count; i++) {
|
for (i=0; i < count; i++) {
|
||||||
uint16_t length, type;
|
uint16_t length, type;
|
||||||
|
unsigned char* utf8 = utf8buf;
|
||||||
|
int utf8length = 512;
|
||||||
|
|
||||||
read_uint16le(fd, &length);
|
read_uint16le(fd, &length);
|
||||||
read(fd, utf16buf, length);
|
asf_utf16LEdecode(fd, length, &utf8, &utf8length);
|
||||||
utf16LEdecode(utf16buf, utf8buf, length);
|
|
||||||
DEBUGF("Key=\"%s\" ",utf8buf);
|
|
||||||
bytesleft -= 2 + length;
|
bytesleft -= 2 + length;
|
||||||
|
|
||||||
read_uint16le(fd, &type);
|
read_uint16le(fd, &type);
|
||||||
read_uint16le(fd, &length);
|
read_uint16le(fd, &length);
|
||||||
switch(type)
|
|
||||||
{
|
|
||||||
case 0: /* String */
|
|
||||||
read(fd, utf16buf, length);
|
|
||||||
utf16LEdecode(utf16buf, utf8buf, length);
|
|
||||||
DEBUGF("Value=\"%s\"\n",utf8buf);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 1: /* Hex string */
|
if (!strcmp("WM/TrackNumber",utf8buf)) {
|
||||||
DEBUGF("Value=NOT YET IMPLEMENTED (HEX STRING)\n");
|
if (type == 0) {
|
||||||
lseek(fd,length,SEEK_CUR);
|
id3->track_string = id3buf;
|
||||||
break;
|
asf_utf16LEdecode(fd, length, &id3buf, &id3buf_remaining);
|
||||||
|
id3->tracknum = atoi(id3->track_string);
|
||||||
case 2: /* Bool */
|
} else if ((type >=2) && (type <= 5)) {
|
||||||
read(fd, &tmp8, 1);
|
id3->tracknum = asf_intdecode(fd, type, length);
|
||||||
DEBUGF("Value=%s\n",(tmp8 ? "TRUE" : "FALSE"));
|
} else {
|
||||||
lseek(fd,length - 1,SEEK_CUR);
|
lseek(fd, length, SEEK_CUR);
|
||||||
break;
|
}
|
||||||
|
} else if ((!strcmp("WM/Genre",utf8buf)) && (type == 0)) {
|
||||||
case 3: /* 32-bit int */
|
id3->genre_string = id3buf;
|
||||||
read_uint32le(fd, &tmp32);
|
asf_utf16LEdecode(fd, length, &id3buf, &id3buf_remaining);
|
||||||
DEBUGF("Value=%u\n",(unsigned int)tmp32);
|
} else if ((!strcmp("WM/AlbumTitle",utf8buf)) && (type == 0)) {
|
||||||
lseek(fd,length - 4,SEEK_CUR);
|
id3->album = id3buf;
|
||||||
break;
|
asf_utf16LEdecode(fd, length, &id3buf, &id3buf_remaining);
|
||||||
|
} else if ((!strcmp("WM/AlbumArtist",utf8buf)) && (type == 0)) {
|
||||||
case 4: /* 64-bit int */
|
id3->albumartist = id3buf;
|
||||||
read_uint64le(fd, &tmp64);
|
asf_utf16LEdecode(fd, length, &id3buf, &id3buf_remaining);
|
||||||
DEBUGF("Value=[64-bit int]\n");
|
} else if ((!strcmp("WM/Composer",utf8buf)) && (type == 0)) {
|
||||||
lseek(fd,length - 8,SEEK_CUR);
|
id3->composer = id3buf;
|
||||||
break;
|
asf_utf16LEdecode(fd, length, &id3buf, &id3buf_remaining);
|
||||||
|
} else if (!strcmp("WM/Year",utf8buf)) {
|
||||||
case 5: /* 16-bit int */
|
if (type == 0) {
|
||||||
read_uint16le(fd, &tmp16);
|
id3->year_string = id3buf;
|
||||||
DEBUGF("Value=%u\n",tmp16);
|
asf_utf16LEdecode(fd, length, &id3buf, &id3buf_remaining);
|
||||||
lseek(fd,length - 2,SEEK_CUR);
|
id3->year = atoi(id3->year_string);
|
||||||
break;
|
} else if ((type >=2) && (type <= 5)) {
|
||||||
|
id3->year = asf_intdecode(fd, type, length);
|
||||||
default:
|
} else {
|
||||||
lseek(fd,length,SEEK_CUR);
|
lseek(fd, length, SEEK_CUR);
|
||||||
break;
|
}
|
||||||
|
} else {
|
||||||
|
lseek(fd, length, SEEK_CUR);
|
||||||
}
|
}
|
||||||
bytesleft -= 4 + length;
|
bytesleft -= 4 + length;
|
||||||
}
|
}
|
||||||
|
@ -386,10 +483,11 @@ bool get_asf_metadata(int fd, struct mp3entry* id3)
|
||||||
{
|
{
|
||||||
int res;
|
int res;
|
||||||
asf_object_t obj;
|
asf_object_t obj;
|
||||||
|
asf_waveformatex_t wfx;
|
||||||
|
|
||||||
wfx.audiostream = -1;
|
wfx.audiostream = -1;
|
||||||
|
|
||||||
res = asf_parse_header(fd, id3);
|
res = asf_parse_header(fd, id3, &wfx);
|
||||||
|
|
||||||
if (res < 0) {
|
if (res < 0) {
|
||||||
DEBUGF("ASF: parsing error - %d\n",res);
|
DEBUGF("ASF: parsing error - %d\n",res);
|
||||||
|
@ -419,5 +517,9 @@ bool get_asf_metadata(int fd, struct mp3entry* id3)
|
||||||
*/
|
*/
|
||||||
id3->first_frame_offset = lseek(fd, 0, SEEK_CUR) + 26;
|
id3->first_frame_offset = lseek(fd, 0, SEEK_CUR) + 26;
|
||||||
|
|
||||||
|
/* We copy the wfx struct to the MP3 TOC field in the id3 struct so
|
||||||
|
the codec doesn't need to parse the header object again */
|
||||||
|
memcpy(id3->toc, &wfx, sizeof(wfx));
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue