Optimization for cook codec. Rework sample output to be able to use highly optimized dsp routines. Moved some functions to iram. Speeds up codec by 1.3 MHz on PP5022.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24815 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Andree Buschmann 2010-02-21 19:47:05 +00:00
parent a31624e76e
commit 3d6faa08bf
5 changed files with 34 additions and 56 deletions

View file

@ -31,6 +31,7 @@ CODEC_HEADER
RMContext rmctx; RMContext rmctx;
RMPacket pkt; RMPacket pkt;
COOKContext q IBSS_ATTR; COOKContext q IBSS_ATTR;
int32_t rm_outbuf[2048];
static void init_rm(RMContext *rmctx) static void init_rm(RMContext *rmctx)
{ {
@ -43,7 +44,6 @@ enum codec_status codec_main(void)
static size_t buff_size; static size_t buff_size;
int datasize, res, consumed, i, time_offset; int datasize, res, consumed, i, time_offset;
uint8_t *bit_buffer; uint8_t *bit_buffer;
int16_t outbuf[2048] __attribute__((aligned(32)));
uint16_t fs,sps,h; uint16_t fs,sps,h;
uint32_t packet_count; uint32_t packet_count;
int scrambling_unit_size, num_units; int scrambling_unit_size, num_units;
@ -65,9 +65,11 @@ next_track:
init_rm(&rmctx); init_rm(&rmctx);
ci->configure(DSP_SET_FREQUENCY, ci->id3->frequency); ci->configure(DSP_SET_FREQUENCY, ci->id3->frequency);
ci->configure(DSP_SET_SAMPLE_DEPTH, 16); /* cook's sample representation is 21.11
* DSP_SET_SAMPLE_DEPTH = 11 (FRACT) + 16 (NATIVE) - 1 (SIGN) = 26 */
ci->configure(DSP_SET_SAMPLE_DEPTH, 26);
ci->configure(DSP_SET_STEREO_MODE, rmctx.nb_channels == 1 ? ci->configure(DSP_SET_STEREO_MODE, rmctx.nb_channels == 1 ?
STEREO_MONO : STEREO_INTERLEAVED); STEREO_MONO : STEREO_NONINTERLEAVED);
packet_count = rmctx.nb_packets; packet_count = rmctx.nb_packets;
rmctx.audio_framesize = rmctx.block_align; rmctx.audio_framesize = rmctx.block_align;
@ -155,7 +157,7 @@ seek_start :
ci->set_elapsed(rmctx.audiotimestamp+(1000*8*sps/rmctx.bit_rate)*i); ci->set_elapsed(rmctx.audiotimestamp+(1000*8*sps/rmctx.bit_rate)*i);
ci->seek_complete(); ci->seek_complete();
} }
res = cook_decode_frame(&rmctx,&q, outbuf, &datasize, pkt.frames[i], rmctx.block_align); res = cook_decode_frame(&rmctx,&q, rm_outbuf, &datasize, pkt.frames[i], rmctx.block_align);
rmctx.frame_number++; rmctx.frame_number++;
/* skip the first two frames; no valid audio */ /* skip the first two frames; no valid audio */
@ -166,7 +168,9 @@ seek_start :
return CODEC_ERROR; return CODEC_ERROR;
} }
ci->pcmbuf_insert(outbuf, NULL, q.samples_per_frame / rmctx.nb_channels); ci->pcmbuf_insert(rm_outbuf,
rm_outbuf+q.samples_per_channel,
q.samples_per_channel);
ci->set_elapsed(rmctx.audiotimestamp+(1000*8*sps/rmctx.bit_rate)*i); ci->set_elapsed(rmctx.audiotimestamp+(1000*8*sps/rmctx.bit_rate)*i);
} }
packet_count -= rmctx.audio_pkt_cnt; packet_count -= rmctx.audio_pkt_cnt;

View file

@ -598,7 +598,7 @@ decode_bytes_and_gain(COOKContext *q, const uint8_t *inbuffer,
static void static void
mlt_compensate_output(COOKContext *q, REAL_T *decode_buffer, mlt_compensate_output(COOKContext *q, REAL_T *decode_buffer,
cook_gains *gains, REAL_T *previous_buffer, cook_gains *gains, REAL_T *previous_buffer,
int16_t *out, int chan) int32_t *out, int chan)
{ {
REAL_T *buffer = q->mono_mdct_output; REAL_T *buffer = q->mono_mdct_output;
int i; int i;
@ -618,7 +618,9 @@ mlt_compensate_output(COOKContext *q, REAL_T *decode_buffer,
memcpy(previous_buffer, buffer+q->samples_per_channel, memcpy(previous_buffer, buffer+q->samples_per_channel,
sizeof(REAL_T)*q->samples_per_channel); sizeof(REAL_T)*q->samples_per_channel);
output_math(q, out, chan); /* Copy output to non-interleaved sample buffer */
memcpy(out + (chan * q->samples_per_channel), buffer,
sizeof(REAL_T)*q->samples_per_channel);
} }
@ -634,7 +636,7 @@ mlt_compensate_output(COOKContext *q, REAL_T *decode_buffer,
static int decode_subpacket(COOKContext *q, const uint8_t *inbuffer, static int decode_subpacket(COOKContext *q, const uint8_t *inbuffer,
int sub_packet_size, int16_t *outbuffer) { int sub_packet_size, int32_t *outbuffer) {
/* packet dump */ /* packet dump */
// for (i=0 ; i<sub_packet_size ; i++) { // for (i=0 ; i<sub_packet_size ; i++) {
// DEBUGF("%02x", inbuffer[i]); // DEBUGF("%02x", inbuffer[i]);
@ -666,7 +668,7 @@ static int decode_subpacket(COOKContext *q, const uint8_t *inbuffer,
q->mono_previous_buffer2, outbuffer, 1); q->mono_previous_buffer2, outbuffer, 1);
} }
} }
return q->samples_per_frame * sizeof(int16_t); return q->samples_per_frame * sizeof(int32_t);
} }
@ -677,7 +679,7 @@ static int decode_subpacket(COOKContext *q, const uint8_t *inbuffer,
*/ */
int cook_decode_frame(RMContext *rmctx,COOKContext *q, int cook_decode_frame(RMContext *rmctx,COOKContext *q,
int16_t *outbuffer, int *data_size, int32_t *outbuffer, int *data_size,
const uint8_t *inbuffer, int buf_size) { const uint8_t *inbuffer, int buf_size) {
//COOKContext *q = avctx->priv_data; //COOKContext *q = avctx->priv_data;
//COOKContext *q; //COOKContext *q;

View file

@ -97,6 +97,6 @@ typedef struct cook {
int cook_decode_init(RMContext *rmctx, COOKContext *q); int cook_decode_init(RMContext *rmctx, COOKContext *q);
int cook_decode_frame(RMContext *rmctx,COOKContext *q, int cook_decode_frame(RMContext *rmctx,COOKContext *q,
int16_t *outbuffer, int *data_size, int32_t *outbuffer, int *data_size,
const uint8_t *inbuffer, int buf_size); const uint8_t *inbuffer, int buf_size);
#endif /*_COOK_H */ #endif /*_COOK_H */

View file

@ -79,28 +79,28 @@ static inline FIXP fixp_pow2_neg(FIXP x, int i)
#else #else
static inline FIXP fixp_mult_su(FIXP a, FIXPU b) static inline FIXP fixp_mult_su(FIXP a, FIXPU b)
{ {
int32_t hb = (a >> 16) * b; int32_t hb = (a >> 16) * b;
uint32_t lb = (a & 0xffff) * b; uint32_t lb = (a & 0xffff) * b;
return hb + (lb >> 16) + ((lb & 0x8000) >> 15); return hb + (lb >> 16) + ((lb & 0x8000) >> 15);
} }
#endif #endif
/* Faster version of the above using 32x32=64 bit multiply */ /* Faster version of the above using 32x32=64 bit multiply */
#ifdef ROCKBOX #ifdef ROCKBOX
#define fixmul31(x,y) (MULT31(x,y)) #define fixmul31(x,y) (MULT31(x,y))
#else #else
static inline int32_t fixmul31(int32_t x, int32_t y) static inline int32_t fixmul31(int32_t x, int32_t y)
{ {
int64_t temp; int64_t temp;
temp = x; temp = x;
temp *= y; temp *= y;
temp >>= 31; //16+31-16 = 31 bits temp >>= 31; //16+31-16 = 31 bits
return (int32_t)temp; return (int32_t)temp;
} }
#endif #endif
/** /**
@ -166,7 +166,8 @@ static void scalar_dequant_math(COOKContext *q, int index,
*/ */
#include "../lib/mdct_lookup.h" #include "../lib/mdct_lookup.h"
static inline void imlt_math(COOKContext *q, FIXP *in) void imlt_math(COOKContext *q, FIXP *in) ICODE_ATTR;
void imlt_math(COOKContext *q, FIXP *in)
{ {
const int n = q->samples_per_channel; const int n = q->samples_per_channel;
const int step = 2 << (10 - av_log2(n)); const int step = 2 << (10 - av_log2(n));
@ -203,7 +204,8 @@ static inline void imlt_math(COOKContext *q, FIXP *in)
* @param gain gain correction to apply first to output buffer * @param gain gain correction to apply first to output buffer
* @param buffer data to overlap * @param buffer data to overlap
*/ */
static inline void overlap_math(COOKContext *q, int gain, FIXP buffer[]) void overlap_math(COOKContext *q, int gain, FIXP buffer[]) ICODE_ATTR;
void overlap_math(COOKContext *q, int gain, FIXP buffer[])
{ {
int i; int i;
#ifdef ROCKBOX #ifdef ROCKBOX
@ -280,33 +282,3 @@ static inline FIXP cplscale_math(FIXP x, int table, int i)
{ {
return fixp_mult_su(x, cplscales[table-2][i]); return fixp_mult_su(x, cplscales[table-2][i]);
} }
/**
* Final converion from floating point values to
* signed, 16 bit sound samples. Round and clip.
*
* @param q pointer to the COOKContext
* @param out pointer to the output buffer
* @param chan 0: left or single channel, 1: right channel
*/
static inline void output_math(COOKContext *q, register int16_t *out, int chan)
{
#ifdef ROCKBOX
register REAL_T * mono_output_ptr = q->mono_mdct_output;
register REAL_T * mono_output_end = mono_output_ptr + q->samples_per_channel;
out += chan;
const int STEP = q->nb_channels;
while( mono_output_ptr < mono_output_end )
{
*out = CLIP_TO_15(fixp_pow2_neg(*mono_output_ptr++, 11));
out += STEP;
}
#else
int j;
for (j = 0; j < q->samples_per_channel; j++) {
out[chan + q->nb_channels * j] =
av_clip(fixp_pow2(q->mono_mdct_output[j], -11), -32768, 32767);
}
#endif
}

View file

@ -120,7 +120,7 @@ int main(int argc, char *argv[])
char filename[15]; char filename[15];
int fd_out; int fd_out;
#endif #endif
int16_t outbuf[2048]; int32_t outbuf[2048];
uint16_t fs,sps,h; uint16_t fs,sps,h;
uint32_t packet_count; uint32_t packet_count;
COOKContext q; COOKContext q;