1
0
Fork 0
forked from len0rd/rockbox

Modify the wma pro decoder to produce non-interleaved samples, and work directly on the buffers in WMAProDecCtx instead to avoid the redundant copying of the output data. ~10% speedup (-2MHz) on pp502x and ~1.5% speedup (-3.8Mhz)

on mcf2049.


git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27583 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Mohamed Tarek 2010-07-26 22:03:20 +00:00
parent 87d59ab56c
commit 5dd8c53b96
3 changed files with 30 additions and 37 deletions

View file

@ -236,7 +236,7 @@ typedef struct WMAProDecodeCtx {
uint32_t frame_num; ///< current frame number uint32_t frame_num; ///< current frame number
GetBitContext gb; ///< bitstream reader context GetBitContext gb; ///< bitstream reader context
int buf_bit_size; ///< buffer size in bits int buf_bit_size; ///< buffer size in bits
int32_t* samples; int32_t samples;
int32_t* samples_end; ///< maximum samplebuffer pointer int32_t* samples_end; ///< maximum samplebuffer pointer
uint8_t drc_gain; ///< gain for the DRC tool uint8_t drc_gain; ///< gain for the DRC tool
int8_t skip_frame; ///< skip output step int8_t skip_frame; ///< skip output step
@ -1319,13 +1319,18 @@ static int decode_frame(WMAProDecodeCtx *s)
int len = 0; int len = 0;
int i; int i;
#if 0
/** check for potential output buffer overflow */ /** check for potential output buffer overflow */
/* Rockbox : No need to check that anymore since we work directly on the
buffers in the WMAProDecCtx */
if (s->num_channels * s->samples_per_frame > s->samples_end - s->samples) { if (s->num_channels * s->samples_per_frame > s->samples_end - s->samples) {
/** return an error if no frame could be decoded at all */ /** return an error if no frame could be decoded at all */
DEBUGF("not enough space for the output samples\n"); DEBUGF("not enough space for the output samples\n");
s->packet_loss = 1; s->packet_loss = 1;
return 0; return 0;
} }
#endif
/** get frame length */ /** get frame length */
if (s->len_prefix) if (s->len_prefix)
@ -1389,24 +1394,7 @@ static int decode_frame(WMAProDecodeCtx *s)
return 0; return 0;
} }
} }
s->samples += s->num_channels * s->samples_per_frame;
/** interleave samples and write them to the output buffer */
for (i = 0; i < s->num_channels; i++) {
int32_t* ptr = s->samples + i;
int incr = s->num_channels;
int32_t* iptr = s->channel[i].out;
int32_t* iend = iptr + s->samples_per_frame;
while (iptr < iend) {
*ptr = *iptr++ << 1;
ptr += incr;
}
/** reuse second half of the IMDCT output for the next frame */
memcpy(&s->channel[i].out[0],
&s->channel[i].out[s->samples_per_frame],
s->samples_per_frame * sizeof(*s->channel[i].out) >> 1);
}
if (s->skip_frame) { if (s->skip_frame) {
s->skip_frame = 0; s->skip_frame = 0;
@ -1502,7 +1490,7 @@ static void save_bits(WMAProDecodeCtx *s, GetBitContext* gb, int len,
*@param avpkt input packet *@param avpkt input packet
*@return number of bytes that were read from the input buffer *@return number of bytes that were read from the input buffer
*/ */
int decode_packet(asf_waveformatex_t *wfx, void *data, int *data_size, int decode_packet(asf_waveformatex_t *wfx, int32_t *dec[2], int *data_size,
void* pktdata, int size) void* pktdata, int size)
{ {
WMAProDecodeCtx *s = &globWMAProDecCtx; WMAProDecodeCtx *s = &globWMAProDecCtx;
@ -1510,10 +1498,18 @@ int decode_packet(asf_waveformatex_t *wfx, void *data, int *data_size,
const uint8_t* buf = pktdata; const uint8_t* buf = pktdata;
int buf_size = size; int buf_size = size;
int num_bits_prev_frame; int num_bits_prev_frame;
int packet_sequence_number; int packet_sequence_number;\
int i;
s->samples = data; /** reuse second half of the IMDCT output for the next frame */
s->samples_end = (int32_t*)((int8_t*)data + *data_size); /* NOTE : Relies on the WMAProDecCtx being static */
for(i = 0; i < s->num_channels; i++)
memcpy(&s->channel[i].out[0],
&s->channel[i].out[s->samples_per_frame],
s->samples_per_frame * sizeof(*s->channel[i].out) >> 1);
s->samples = 0;
*data_size = 0; *data_size = 0;
if (s->packet_done || s->packet_loss) { if (s->packet_done || s->packet_loss) {
@ -1583,7 +1579,10 @@ int decode_packet(asf_waveformatex_t *wfx, void *data, int *data_size,
save_bits(s, gb, remaining_bits(s, gb), 0); save_bits(s, gb, remaining_bits(s, gb), 0);
} }
*data_size = (int8_t *)s->samples - (int8_t *)data; dec[0] = s->channel[0].out;
dec[1] = s->channel[1].out;
*data_size = s->samples;
s->packet_offset = get_bits_count(gb) & 7; s->packet_offset = get_bits_count(gb) & 7;
s->frame_num++; s->frame_num++;

View file

@ -28,4 +28,4 @@
int decode_init(asf_waveformatex_t *wfx); int decode_init(asf_waveformatex_t *wfx);
int decode_packet(asf_waveformatex_t *wfx, int decode_packet(asf_waveformatex_t *wfx,
void *data, int *data_size, void* pktdata, int size); int32_t *dec[2], int *data_size, void* pktdata, int size);

View file

@ -25,10 +25,7 @@
CODEC_HEADER CODEC_HEADER
#define MAXSAMPLES (1L << 12) /* Max number of samples in a wma pro subframe */ int32_t *dec[2]; /* pointers to the output buffers in WMAProDecodeCtx in wmaprodec.c */
#define MAXCHANNELS 8
#define BUFSIZE MAXCHANNELS * MAXSAMPLES
static int32_t decoded[BUFSIZE];
/* this is the codec entry point */ /* this is the codec entry point */
enum codec_status codec_main(void) enum codec_status codec_main(void)
@ -73,7 +70,7 @@ next_track:
ci->configure(DSP_SWITCH_FREQUENCY, wfx.rate); ci->configure(DSP_SWITCH_FREQUENCY, wfx.rate);
ci->configure(DSP_SET_STEREO_MODE, wfx.channels == 1 ? ci->configure(DSP_SET_STEREO_MODE, wfx.channels == 1 ?
STEREO_MONO : STEREO_INTERLEAVED); STEREO_MONO : STEREO_NONINTERLEAVED);
codec_set_replaygain(ci->id3); codec_set_replaygain(ci->id3);
if (decode_init(&wfx) < 0) { if (decode_init(&wfx) < 0) {
@ -130,8 +127,7 @@ next_track:
* audio frames, see libwmapro/wmaprodec.c */ * audio frames, see libwmapro/wmaprodec.c */
while(size > 0) while(size > 0)
{ {
outlen = BUFSIZE; /* decode_packet needs to know the size of the output buffer */ res = decode_packet(&wfx, dec, &outlen, data, size);
res = decode_packet(&wfx, decoded, &outlen, data, size);
if(res < 0) { if(res < 0) {
LOGF("(WMA PRO) Error: decode_packet returned %d", res); LOGF("(WMA PRO) Error: decode_packet returned %d", res);
goto done; goto done;
@ -140,10 +136,8 @@ next_track:
size -= res; size -= res;
if(outlen) { if(outlen) {
ci->yield (); ci->yield ();
/* outlen now holds the size of the data in bytes - we want the outlen /= (2*wfx.channels);
* number of samples. */ ci->pcmbuf_insert(dec[0], dec[1], outlen );
outlen /= (sizeof(int32_t) * wfx.channels);
ci->pcmbuf_insert(decoded, NULL, outlen);
elapsedtime += outlen*10/(wfx.rate/100); elapsedtime += outlen*10/(wfx.rate/100);
ci->set_elapsed(elapsedtime); ci->set_elapsed(elapsedtime);
ci->yield (); ci->yield ();