1
0
Fork 0
forked from len0rd/rockbox

Modify the wma pro decoder to produce non-interleaved samples, and work directly on the buffers in WMAProDecCtx instead to avoid the redundant copying of the output data. ~10% speedup (-2MHz) on pp502x and ~1.5% speedup (-3.8Mhz)

on mcf2049.


git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27583 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Mohamed Tarek 2010-07-26 22:03:20 +00:00
parent 87d59ab56c
commit 5dd8c53b96
3 changed files with 30 additions and 37 deletions

View file

@ -236,7 +236,7 @@ typedef struct WMAProDecodeCtx {
uint32_t frame_num; ///< current frame number
GetBitContext gb; ///< bitstream reader context
int buf_bit_size; ///< buffer size in bits
int32_t* samples;
int32_t samples;
int32_t* samples_end; ///< maximum samplebuffer pointer
uint8_t drc_gain; ///< gain for the DRC tool
int8_t skip_frame; ///< skip output step
@ -1282,7 +1282,7 @@ static int decode_subframe(WMAProDecodeCtx *s)
}
/** apply imdct (ff_imdct_half == DCTIV with reverse) */
imdct_half(av_log2(subframe_len)+1,
s->channel[c].coeffs, s->tmp);
@ -1319,13 +1319,18 @@ static int decode_frame(WMAProDecodeCtx *s)
int len = 0;
int i;
#if 0
/** check for potential output buffer overflow */
/* Rockbox : No need to check that anymore since we work directly on the
buffers in the WMAProDecCtx */
if (s->num_channels * s->samples_per_frame > s->samples_end - s->samples) {
/** return an error if no frame could be decoded at all */
DEBUGF("not enough space for the output samples\n");
s->packet_loss = 1;
return 0;
}
#endif
/** get frame length */
if (s->len_prefix)
@ -1389,24 +1394,7 @@ static int decode_frame(WMAProDecodeCtx *s)
return 0;
}
}
/** interleave samples and write them to the output buffer */
for (i = 0; i < s->num_channels; i++) {
int32_t* ptr = s->samples + i;
int incr = s->num_channels;
int32_t* iptr = s->channel[i].out;
int32_t* iend = iptr + s->samples_per_frame;
while (iptr < iend) {
*ptr = *iptr++ << 1;
ptr += incr;
}
/** reuse second half of the IMDCT output for the next frame */
memcpy(&s->channel[i].out[0],
&s->channel[i].out[s->samples_per_frame],
s->samples_per_frame * sizeof(*s->channel[i].out) >> 1);
}
s->samples += s->num_channels * s->samples_per_frame;
if (s->skip_frame) {
s->skip_frame = 0;
@ -1502,7 +1490,7 @@ static void save_bits(WMAProDecodeCtx *s, GetBitContext* gb, int len,
*@param avpkt input packet
*@return number of bytes that were read from the input buffer
*/
int decode_packet(asf_waveformatex_t *wfx, void *data, int *data_size,
int decode_packet(asf_waveformatex_t *wfx, int32_t *dec[2], int *data_size,
void* pktdata, int size)
{
WMAProDecodeCtx *s = &globWMAProDecCtx;
@ -1510,10 +1498,18 @@ int decode_packet(asf_waveformatex_t *wfx, void *data, int *data_size,
const uint8_t* buf = pktdata;
int buf_size = size;
int num_bits_prev_frame;
int packet_sequence_number;
int packet_sequence_number;\
int i;
s->samples = data;
s->samples_end = (int32_t*)((int8_t*)data + *data_size);
/** reuse second half of the IMDCT output for the next frame */
/* NOTE : Relies on the WMAProDecCtx being static */
for(i = 0; i < s->num_channels; i++)
memcpy(&s->channel[i].out[0],
&s->channel[i].out[s->samples_per_frame],
s->samples_per_frame * sizeof(*s->channel[i].out) >> 1);
s->samples = 0;
*data_size = 0;
if (s->packet_done || s->packet_loss) {
@ -1583,7 +1579,10 @@ int decode_packet(asf_waveformatex_t *wfx, void *data, int *data_size,
save_bits(s, gb, remaining_bits(s, gb), 0);
}
*data_size = (int8_t *)s->samples - (int8_t *)data;
dec[0] = s->channel[0].out;
dec[1] = s->channel[1].out;
*data_size = s->samples;
s->packet_offset = get_bits_count(gb) & 7;
s->frame_num++;

View file

@ -28,4 +28,4 @@
int decode_init(asf_waveformatex_t *wfx);
int decode_packet(asf_waveformatex_t *wfx,
void *data, int *data_size, void* pktdata, int size);
int32_t *dec[2], int *data_size, void* pktdata, int size);

View file

@ -25,10 +25,7 @@
CODEC_HEADER
#define MAXSAMPLES (1L << 12) /* Max number of samples in a wma pro subframe */
#define MAXCHANNELS 8
#define BUFSIZE MAXCHANNELS * MAXSAMPLES
static int32_t decoded[BUFSIZE];
int32_t *dec[2]; /* pointers to the output buffers in WMAProDecodeCtx in wmaprodec.c */
/* this is the codec entry point */
enum codec_status codec_main(void)
@ -73,7 +70,7 @@ next_track:
ci->configure(DSP_SWITCH_FREQUENCY, wfx.rate);
ci->configure(DSP_SET_STEREO_MODE, wfx.channels == 1 ?
STEREO_MONO : STEREO_INTERLEAVED);
STEREO_MONO : STEREO_NONINTERLEAVED);
codec_set_replaygain(ci->id3);
if (decode_init(&wfx) < 0) {
@ -130,8 +127,7 @@ next_track:
* audio frames, see libwmapro/wmaprodec.c */
while(size > 0)
{
outlen = BUFSIZE; /* decode_packet needs to know the size of the output buffer */
res = decode_packet(&wfx, decoded, &outlen, data, size);
res = decode_packet(&wfx, dec, &outlen, data, size);
if(res < 0) {
LOGF("(WMA PRO) Error: decode_packet returned %d", res);
goto done;
@ -140,10 +136,8 @@ next_track:
size -= res;
if(outlen) {
ci->yield ();
/* outlen now holds the size of the data in bytes - we want the
* number of samples. */
outlen /= (sizeof(int32_t) * wfx.channels);
ci->pcmbuf_insert(decoded, NULL, outlen);
outlen /= (2*wfx.channels);
ci->pcmbuf_insert(dec[0], dec[1], outlen );
elapsedtime += outlen*10/(wfx.rate/100);
ci->set_elapsed(elapsedtime);
ci->yield ();