Optimization for cook codec. Rework sample output to be able to use highly optimized dsp routines. Moved some functions to iram. Speeds up codec by 1.3 MHz on PP5022.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24815 a1c6a512-1295-4272-9138-f99709370657
2010-02-21 19:47:05 +00:00 · 2010-02-21 19:47:05 +00:00 · 3d6faa08bf
commit 3d6faa08bf
parent a31624e76e
5 changed files with 34 additions and 56 deletions
--- a/apps/codecs/cook.c
+++ b/apps/codecs/cook.c
@ -31,6 +31,7 @@ CODEC_HEADER
 RMContext rmctx;
 RMPacket pkt;
 COOKContext q IBSS_ATTR;
+int32_t rm_outbuf[2048];

 static void init_rm(RMContext *rmctx)
 {
@ -43,7 +44,6 @@ enum codec_status codec_main(void)
    static size_t buff_size;
    int datasize, res, consumed, i, time_offset;
    uint8_t *bit_buffer;
-    int16_t outbuf[2048] __attribute__((aligned(32)));
    uint16_t fs,sps,h;
    uint32_t packet_count;
    int scrambling_unit_size, num_units;
@ -65,9 +65,11 @@ next_track:
    init_rm(&rmctx);
 
    ci->configure(DSP_SET_FREQUENCY, ci->id3->frequency);
-    ci->configure(DSP_SET_SAMPLE_DEPTH, 16);
+    /* cook's sample representation is 21.11
+     * DSP_SET_SAMPLE_DEPTH = 11 (FRACT) + 16 (NATIVE) - 1 (SIGN) = 26 */
+    ci->configure(DSP_SET_SAMPLE_DEPTH, 26);
    ci->configure(DSP_SET_STEREO_MODE, rmctx.nb_channels == 1 ?
-                  STEREO_MONO : STEREO_INTERLEAVED);
+                  STEREO_MONO : STEREO_NONINTERLEAVED);

    packet_count = rmctx.nb_packets;
    rmctx.audio_framesize = rmctx.block_align;
@ -155,7 +157,7 @@ seek_start :
                ci->set_elapsed(rmctx.audiotimestamp+(1000*8*sps/rmctx.bit_rate)*i);
                ci->seek_complete(); 
            }    
-            res = cook_decode_frame(&rmctx,&q, outbuf, &datasize, pkt.frames[i], rmctx.block_align);
+            res = cook_decode_frame(&rmctx,&q, rm_outbuf, &datasize, pkt.frames[i], rmctx.block_align);
            rmctx.frame_number++;

            /* skip the first two frames; no valid audio */
@ -166,7 +168,9 @@ seek_start :
                return CODEC_ERROR;
            }

-            ci->pcmbuf_insert(outbuf, NULL, q.samples_per_frame / rmctx.nb_channels);
+            ci->pcmbuf_insert(rm_outbuf, 
+                              rm_outbuf+q.samples_per_channel,
+                              q.samples_per_channel);
            ci->set_elapsed(rmctx.audiotimestamp+(1000*8*sps/rmctx.bit_rate)*i);  
        }
        packet_count -= rmctx.audio_pkt_cnt;
--- a/apps/codecs/libcook/cook.c
+++ b/apps/codecs/libcook/cook.c
@ -598,7 +598,7 @@ decode_bytes_and_gain(COOKContext *q, const uint8_t *inbuffer,
 static void
 mlt_compensate_output(COOKContext *q, REAL_T *decode_buffer,
                      cook_gains *gains, REAL_T *previous_buffer,
-                      int16_t *out, int chan)
+                      int32_t *out, int chan)
 {
    REAL_T *buffer = q->mono_mdct_output;
    int i;
@ -618,7 +618,9 @@ mlt_compensate_output(COOKContext *q, REAL_T *decode_buffer,
    memcpy(previous_buffer, buffer+q->samples_per_channel,
           sizeof(REAL_T)*q->samples_per_channel);

-    output_math(q, out, chan);
+    /* Copy output to non-interleaved sample buffer */
+    memcpy(out + (chan * q->samples_per_channel), buffer,
+           sizeof(REAL_T)*q->samples_per_channel);
 }


@ -634,7 +636,7 @@ mlt_compensate_output(COOKContext *q, REAL_T *decode_buffer,


 static int decode_subpacket(COOKContext *q, const uint8_t *inbuffer,
-                            int sub_packet_size, int16_t *outbuffer) {
+                            int sub_packet_size, int32_t *outbuffer) {
    /* packet dump */
 //    for (i=0 ; i<sub_packet_size ; i++) {
 //        DEBUGF("%02x", inbuffer[i]);
@ -666,7 +668,7 @@ static int decode_subpacket(COOKContext *q, const uint8_t *inbuffer,
                                  q->mono_previous_buffer2, outbuffer, 1);
        }
    }
-    return q->samples_per_frame * sizeof(int16_t);
+    return q->samples_per_frame * sizeof(int32_t);
 }


@ -677,7 +679,7 @@ static int decode_subpacket(COOKContext *q, const uint8_t *inbuffer,
 */

 int cook_decode_frame(RMContext *rmctx,COOKContext *q,
-            int16_t *outbuffer, int *data_size,
+            int32_t *outbuffer, int *data_size,
            const uint8_t *inbuffer, int buf_size) {
    //COOKContext *q = avctx->priv_data;
    //COOKContext *q;
--- a/apps/codecs/libcook/cook.h
+++ b/apps/codecs/libcook/cook.h
@ -97,6 +97,6 @@ typedef struct cook {

 int cook_decode_init(RMContext *rmctx, COOKContext *q);
 int cook_decode_frame(RMContext *rmctx,COOKContext *q,
-                      int16_t *outbuffer, int *data_size,
+                      int32_t *outbuffer, int *data_size,
                      const uint8_t *inbuffer, int buf_size);
 #endif /*_COOK_H */
--- a/apps/codecs/libcook/cook_fixpoint.h
+++ b/apps/codecs/libcook/cook_fixpoint.h
@ -79,28 +79,28 @@ static inline FIXP fixp_pow2_neg(FIXP x, int i)
 #else
 static inline FIXP fixp_mult_su(FIXP a, FIXPU b)
 {
-    int32_t hb = (a >> 16) * b; 	 
-    uint32_t lb = (a & 0xffff) * b; 	 
+    int32_t hb = (a >> 16) * b;      
+    uint32_t lb = (a & 0xffff) * b;      

-    return hb + (lb >> 16) + ((lb & 0x8000) >> 15); 	 
+    return hb + (lb >> 16) + ((lb & 0x8000) >> 15);      
 }
 #endif

 /* Faster version of the above using 32x32=64 bit multiply */
 #ifdef ROCKBOX
 #define fixmul31(x,y) (MULT31(x,y))
-#else 	 
-static inline int32_t fixmul31(int32_t x, int32_t y) 	 
-{ 	 
-    int64_t temp; 	 
+#else    
+static inline int32_t fixmul31(int32_t x, int32_t y)     
+{    
+    int64_t temp;    

-    temp = x; 	 
-    temp *= y; 	 
+    temp = x;    
+    temp *= y;   

-    temp >>= 31;        //16+31-16 = 31 bits 	 
+    temp >>= 31;        //16+31-16 = 31 bits     
    
-    return (int32_t)temp; 	 
-} 	 
+    return (int32_t)temp;    
+}    
 #endif

 /**
@ -166,7 +166,8 @@ static void scalar_dequant_math(COOKContext *q, int index,
 */
 #include "../lib/mdct_lookup.h"

-static inline void imlt_math(COOKContext *q, FIXP *in)
+void imlt_math(COOKContext *q, FIXP *in) ICODE_ATTR;
+void imlt_math(COOKContext *q, FIXP *in)
 {
    const int n = q->samples_per_channel;
    const int step = 2 << (10 - av_log2(n));
@ -203,7 +204,8 @@ static inline void imlt_math(COOKContext *q, FIXP *in)
 * @param gain              gain correction to apply first to output buffer
 * @param buffer            data to overlap
 */
-static inline void overlap_math(COOKContext *q, int gain, FIXP buffer[])
+void overlap_math(COOKContext *q, int gain, FIXP buffer[]) ICODE_ATTR;
+void overlap_math(COOKContext *q, int gain, FIXP buffer[])
 {
    int i;
 #ifdef ROCKBOX
@ -280,33 +282,3 @@ static inline FIXP cplscale_math(FIXP x, int table, int i)
 {
  return fixp_mult_su(x, cplscales[table-2][i]);
 }
-
-
-/**
- * Final converion from floating point values to
- * signed, 16 bit sound samples. Round and clip.
- *
- * @param q                 pointer to the COOKContext
- * @param out               pointer to the output buffer
- * @param chan              0: left or single channel, 1: right channel
- */
-static inline void output_math(COOKContext *q, register int16_t *out, int chan)
-{
-#ifdef ROCKBOX
-    register REAL_T * mono_output_ptr = q->mono_mdct_output;
-    register REAL_T * mono_output_end = mono_output_ptr + q->samples_per_channel;
-    out += chan;
-    const int STEP = q->nb_channels;
-    while( mono_output_ptr < mono_output_end )
-    {
-      *out = CLIP_TO_15(fixp_pow2_neg(*mono_output_ptr++, 11));
-      out += STEP;
-    }
-#else
-    int j;
-    for (j = 0; j < q->samples_per_channel; j++) {
-        out[chan + q->nb_channels * j] =
-        av_clip(fixp_pow2(q->mono_mdct_output[j], -11), -32768, 32767);
-    }
-#endif
-}
--- a/apps/codecs/libcook/main.c
+++ b/apps/codecs/libcook/main.c
@ -120,7 +120,7 @@ int main(int argc, char *argv[])
    char filename[15];
    int fd_out;
 #endif
-    int16_t outbuf[2048];
+    int32_t outbuf[2048];
    uint16_t fs,sps,h;
    uint32_t packet_count;
    COOKContext q;