Partial conversion of the wmapro decoder to fixed point arithmetic. Currently inverse quantization & rescaling,

imdct and windowing are all in fixed point. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27302 a1c6a512-1295-4272-9138-f99709370657
2010-07-05 22:33:37 +00:00 · 2010-07-05 22:33:37 +00:00 · d884af2b99
commit d884af2b99
parent 6a04479d63
14 changed files with 3644 additions and 43 deletions
--- a/apps/codecs/libwmapro/README.rockbox
+++ b/apps/codecs/libwmapro/README.rockbox
@ -14,8 +14,18 @@ IMPORT DETAILS

 Based on ffmpeg svn r22886 dated 15 April 2010.

-Currently, the files contain minimal changes from their original state in order
-to be able to compile cleanly. 
+The code is slowly being modified to convert it from floating point maths to 
+fixed point.
+
+As of 6 July 2010, the following steps are all working properly in fixed point:
+- Inverse quantization and rescaling
+- IMDCT
+- Windowing
+
+Results of comparing output wav files from the partially fixed point decoder to 
+the output wav files of ffmpeg decoder are :
+- average relative error = 0.016%
+- maximum relative error = 0.3% 

 COMPILING

--- a/apps/codecs/libwmapro/SOURCES
+++ b/apps/codecs/libwmapro/SOURCES
@ -2,8 +2,10 @@ wmaprodec.c
 wma.c
 dsputil.c
 mdct.c
+mdct_tables.c
 fft.c
 bitstream.c
+wmapro_mdct.c
 libavutil/log.c
 libavutil/mem.c
 libavutil/mathematics.c
--- a/apps/codecs/libwmapro/fft.c
+++ b/apps/codecs/libwmapro/fft.c
@ -101,10 +101,10 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
    s2 = inverse ? 1.0 : -1.0;

    s->fft_permute = ff_fft_permute_c;
-    s->fft_calc    = ff_fft_calc_c;
+    s->fft_calc    = fff_fft_calc_c;
 //#if CONFIG_MDCT
    s->imdct_calc  = ff_imdct_calc_c;
-    s->imdct_half  = ff_imdct_half_c;
+    s->imdct_half  = fff_imdct_half_c;
    s->mdct_calc   = ff_mdct_calc_c;
 //#endif
    s->exptab1     = NULL;
@ -361,7 +361,7 @@ static void (* const fft_dispatch[])(FFTComplex*) = {
    fft2048, fft4096, fft8192, fft16384, fft32768, fft65536,
 };

-void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
+void fff_fft_calc_c(FFTContext *s, FFTComplex *z)
 {
    fft_dispatch[s->nbits-2](z);
 }
--- a/apps/codecs/libwmapro/fft.h
+++ b/apps/codecs/libwmapro/fft.h
@ -110,7 +110,7 @@ extern SINTABLE(65536);
 */
 int ff_fft_init(FFTContext *s, int nbits, int inverse);
 void ff_fft_permute_c(FFTContext *s, FFTComplex *z);
-void ff_fft_calc_c(FFTContext *s, FFTComplex *z);
+void fff_fft_calc_c(FFTContext *s, FFTComplex *z);

 void ff_fft_init_altivec(FFTContext *s);
 void ff_fft_init_mmx(FFTContext *s);
@ -127,7 +127,7 @@ static inline void ff_fft_permute(FFTContext *s, FFTComplex *z)
 * Do a complex FFT with the parameters defined in ff_fft_init(). The
 * input data must be permuted before. No 1.0/sqrt(n) normalization is done.
 */
-static inline void ff_fft_calc(FFTContext *s, FFTComplex *z)
+static inline void fff_fft_calc(FFTContext *s, FFTComplex *z)
 {
    s->fft_calc(s, z);
 }
@ -135,11 +135,11 @@ void ff_fft_end(FFTContext *s);

 /* MDCT computation */

-static inline void ff_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input)
+static inline void fff_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input)
 {
    s->imdct_calc(s, output, input);
 }
-static inline void ff_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input)
+static inline void fff_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input)
 {
    s->imdct_half(s, output, input);
 }
@ -181,7 +181,7 @@ extern SINETABLE_CONST float * const ff_sine_windows[13];

 int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale);
 void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input);
-void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input);
+void fff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input);
 void ff_mdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input);
 void ff_mdct_end(FFTContext *s);

--- a/apps/codecs/libwmapro/mdct.c
+++ b/apps/codecs/libwmapro/mdct.c
@ -121,7 +121,7 @@ av_cold int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale)
 * @param output N/2 samples
 * @param input N/2 samples
 */
-void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input)
+void fff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input)
 {
    int k, n8, n4, n2, n, j;
    const uint16_t *revtab = s->revtab;
@ -144,8 +144,8 @@ void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input)
        in1 += 2;
        in2 -= 2;
    }
-    ff_fft_calc(s, z);

+    fff_fft_calc(s, z);
    /* post rotation + reordering */
    for(k = 0; k < n8; k++) {
        FFTSample r0, i0, r1, i1;
@ -170,7 +170,7 @@ void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input)
    int n2 = n >> 1;
    int n4 = n >> 2;

-    ff_imdct_half_c(s, output+n4, input);
+    fff_imdct_half_c(s, output+n4, input);

    for(k = 0; k < n4; k++) {
        output[k] = -output[n2-k-1];
@ -211,7 +211,7 @@ void ff_mdct_calc_c(FFTContext *s, FFTSample *out, const FFTSample *input)
        CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]);
    }

-    ff_fft_calc(s, x);
+    fff_fft_calc(s, x);

    /* post rotation */
    for(i=0;i<n8;i++) {
--- a/apps/codecs/libwmapro/mdct_tables.c
+++ b/apps/codecs/libwmapro/mdct_tables.c
--- a/apps/codecs/libwmapro/mdct_tables.h
+++ b/apps/codecs/libwmapro/mdct_tables.h
@ -0,0 +1,9 @@
+#ifndef _MDCT_TABLES_H_
+#define _MDCT_TABLES_H_
+#include <inttypes.h>
+
+extern const int32_t *sine_windows[6];
+extern const int32_t sincos_lookup_wmap[8064];
+
+#endif /* _MDCT_TABLES_H_ */
+
--- a/apps/codecs/libwmapro/quant.h
+++ b/apps/codecs/libwmapro/quant.h
@ -0,0 +1,45 @@
+#ifndef _QUANT_H_
+#define _QUANT_H_
+
+#include <inttypes.h>
+
+/* This table contains unscaled integer casts of the floating point inverse
+ * quantization factors used by wma pro. The formula for calculating the 
+ * floating point value is :
+ *         quant = pow(10.0, exp/20)
+ * 'exp' is an integer value which I have exmerimentally found to fall in the 
+ * range (50,139). */
+const int32_t quant_tab[90] = {
+    0x0000013C, 0x00000163, 0x0000018E, 0x000001BF, 
+    0x000001F5, 0x00000232, 0x00000277, 0x000002C4, 
+    0x0000031A, 0x0000037B, 0x000003E8, 0x00000462, 
+    0x000004EB, 0x00000585, 0x00000631, 0x000006F2, 
+    0x000007CB, 0x000008BF, 0x000009D0, 0x00000B02, 
+    0x00000C5A, 0x00000DDC, 0x00000F8D, 0x00001173, 
+    0x00001394, 0x000015F7, 0x000018A6, 0x00001BA7, 
+    0x00001F07, 0x000022D1, 0x00002710, 0x00002BD4, 
+    0x0000312D, 0x0000372D, 0x00003DE9, 0x00004577, 
+    0x00004DF1, 0x00005773, 0x0000621F, 0x00006E18, 
+    0x00007B87, 0x00008A99, 0x00009B83, 0x0000AE7C, 
+    0x0000C3C7, 0x0000DBAA, 0x0000F678, 0x0001148B, 
+    0x00013649, 0x00015C25, 0x000186A0, 0x0001B64A, 
+    0x0001EBC5, 0x000227C6, 0x00026B19, 0x0002B6A4, 
+    0x00030B66, 0x00036A80, 0x0003D535, 0x00044CEE, 
+    0x0004D344, 0x000569FD, 0x0006131B, 0x0006D0DC, 
+    0x0007A5C3, 0x000894A5, 0x0009A0AD, 0x000ACD6A, 
+    0x000C1ED8, 0x000D9973, 0x000F4240, 0x00111EE2, 
+    0x001335AD, 0x00158DBA, 0x00182EFD, 0x001B2267, 
+    0x001E71FE, 0x00222901, 0x0026540E, 0x002B014F, 
+    0x003040A6, 0x003623E6, 0x003CBF10, 0x00442894, 
+    0x004C79A0, 0x0055CE75, 0x006046C5, 0x006C0622, 
+    0x00793472, 0x0087FE7D, 
+};
+
+#define EXP_MIN 50
+#define EXP_MAX 139
+
+/* return the correct value of quant based on exp */
+#define QUANT(exp)    quant_tab[exp - EXP_MIN]
+
+
+#endif /* _QUANT_H_ */
--- a/apps/codecs/libwmapro/types.h
+++ b/apps/codecs/libwmapro/types.h
@ -0,0 +1,8 @@
+#ifndef _TYPES_H_
+#define _TYPES_H_
+
+#include <inttypes.h>
+
+#define FIXED int32_t
+
+#endif
--- a/apps/codecs/libwmapro/wmapro_math.h
+++ b/apps/codecs/libwmapro/wmapro_math.h
@ -0,0 +1,46 @@
+#include <inttypes.h>
+#include "types.h"
+
+#define fixtof16(x)       (float)((float)(x) / (float)(1 << 16))
+#define ftofix16(x)       ((int32_t)((x) * (float)(1 << 16) + ((x) < 0 ? -0.5:0.5)))
+
+static inline FIXED fixmulshift(FIXED x, FIXED y, int shamt)
+{
+    int64_t temp;
+    temp = x;
+    temp *= y;
+
+    temp >>= shamt;
+
+    return (int32_t)temp;
+}
+
+
+static inline void vector_fixmul_window(FIXED *dst, const FIXED *src0, 
+                                   const FIXED *src1, const FIXED *win, 
+                                   FIXED add_bias, int len)
+{
+    int i, j;
+    dst += len;
+    win += len;
+    src0+= len;
+        for(i=-len, j=len-1; i<0; i++, j--) {
+        FIXED s0 = src0[i];
+        FIXED s1 = src1[j];
+        FIXED wi = win[i];
+        FIXED wj = win[j];
+        dst[i] = fixmulshift(s0,-1*wj,31) - fixmulshift(s1,-1*wi,31) + (add_bias<<16);
+        dst[j] = fixmulshift(s0,-1*wi,31) + fixmulshift(s1,-1*wj,31) + (add_bias<<16);
+    }   
+    
+}
+
+static inline void vector_fixmul_scalar(FIXED *dst, const FIXED *src, FIXED mul,
+                                        int len)
+{
+    int i;
+    for(i=0; i<len; i++) {
+        dst[i] = fixmulshift(src[i],mul,32);
+    }   
+    
+}
--- a/apps/codecs/libwmapro/wmapro_mdct.c
+++ b/apps/codecs/libwmapro/wmapro_mdct.c
@ -0,0 +1,51 @@
+#include <inttypes.h>
+#include "wmapro_mdct.h"
+#include "mdct_tables.h"    /* for sincos_lookup_wmap */
+#include "../lib/mdct_lookup.h" /* for revtab */
+#include "../lib/fft.h"     /* for FFT data structures */
+#include "codeclib.h"
+#include "../lib/codeclib_misc.h" /* for XNPROD31 */
+
+void imdct_half(unsigned int nbits, int32_t *output, const int32_t *input){
+    int k, n8, n4, n2, n, j;
+    //const uint16_t *revtab = s->revtab;
+    const int32_t *in1, *in2;
+    FFTComplex *z = (FFTComplex *)output;
+
+    n = 1 << nbits;
+    n2 = n >> 1;
+    n4 = n >> 2;
+    n8 = n >> 3;
+    
+    const int32_t *T = sincos_lookup_wmap + ((n2) - (1<<7));
+
+    /* pre rotation */
+    const int revtab_shift = (14- nbits);
+    in1 = input;
+    in2 = input + n2 - 1;
+    int step = 2<<(12-nbits);
+    for(k = 0; k < n4; k++) {
+        j=revtab[k]>>revtab_shift;
+        XNPROD31(*in2, *in1, T[1]<<16, T[0]<<16, &z[j].re, &z[j].im );
+        in1 += 2;
+        in2 -= 2;
+        T += 2;
+    }
+
+    ff_fft_calc_c(nbits-2, z);
+
+    /* post rotation + reordering */
+    T = sincos_lookup_wmap + ((n2) - (1<<7)) + n4;
+    const int32_t *V = T;
+    for(k = 0; k < n8; k++) {
+        int32_t r0, i0, r1, i1;
+        XNPROD31(z[n8-k-1].im, z[n8-k-1].re, T[0]<<16, T[1]<<16, &r0, &i1 );
+        XNPROD31(z[n8+k  ].im, z[n8+k  ].re, V[0]<<16, V[1]<<16, &r1, &i0 );
+        z[n8-k-1].re = r0;
+        z[n8-k-1].im = i0;
+        z[n8+k  ].re = r1;
+        z[n8+k  ].im = i1;
+        T-=2;
+        V+=2;
+    }
+}
--- a/apps/codecs/libwmapro/wmapro_mdct.h
+++ b/apps/codecs/libwmapro/wmapro_mdct.h
@ -0,0 +1,8 @@
+#ifndef _WMAPRO_MDCT_H_
+#define _WMAPRO_MDCT_H_
+
+#include <inttypes.h>
+
+void imdct_half(unsigned int nbits, int32_t *output, const int32_t *input);
+
+#endif
--- a/apps/codecs/libwmapro/wmaprodec.c
+++ b/apps/codecs/libwmapro/wmaprodec.c
@ -94,6 +94,12 @@
 #include "dsputil.h"
 #include "wma.h"
 #include "wmaprodec.h"
+#include "wmapro_mdct.h"
+#include "mdct_tables.h"
+#include "quant.h"
+#include "types.h"
+#include "wmapro_math.h"
+#include "codecs.h"

 /* Some defines to make it compile */
 #define AVERROR_INVALIDDATA  -1
@ -148,7 +154,9 @@ typedef struct {
    int*     scale_factors;                           ///< pointer to the scale factor values used for decoding
    uint8_t  table_idx;                               ///< index in sf_offsets for the scale factor reference block
    float*   coeffs;                                  ///< pointer to the subframe decode buffer
+    FIXED*   fixcoeffs;
    DECLARE_ALIGNED(16, float, out)[WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2]; ///< output buffer
+    DECLARE_ALIGNED(16, FIXED, fixout)[WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2]; ///< output buffer
 } WMAProChannelCtx;

 /**
@ -174,6 +182,7 @@ typedef struct WMAProDecodeCtx {
    PutBitContext    pb;                            ///< context for filling the frame_data buffer
    FFTContext       mdct_ctx[WMAPRO_BLOCK_SIZES];  ///< MDCT context per block size
    DECLARE_ALIGNED(16, float, tmp)[WMAPRO_BLOCK_MAX_SIZE]; ///< IMDCT output buffer
+    DECLARE_ALIGNED(16, FIXED, fixtmp)[WMAPRO_BLOCK_MAX_SIZE]; ///< IMDCT output buffer
    float*           windows[WMAPRO_BLOCK_SIZES];   ///< windows for the different block sizes

    /* frame size dependent frame information (set during initialization) */
@ -208,8 +217,9 @@ typedef struct WMAProDecodeCtx {
    uint32_t         frame_num;                     ///< current frame number (not used for decoding)
    GetBitContext    gb;                            ///< bitstream reader context
    int              buf_bit_size;                  ///< buffer size in bits
-    float*           samples;                       ///< current samplebuffer pointer
-    float*           samples_end;                   ///< maximum samplebuffer pointer
+    float*           samplesf;                       ///< current samplebuffer pointer
+    FIXED*           samples;
+    FIXED*           samples_end;                   ///< maximum samplebuffer pointer
    uint8_t          drc_gain;                      ///< gain for the DRC tool
    int8_t           skip_frame;                    ///< skip output step
    int8_t           parsed_all_subframes;          ///< all subframes decoded?
@ -1023,22 +1033,30 @@ static void inverse_channel_transform(WMAProDecodeCtx *s)
 static void wmapro_window(WMAProDecodeCtx *s)
 {
    int i;
+
    for (i = 0; i < s->channels_for_cur_subframe; i++) {
        int c = s->channel_indexes_for_cur_subframe[i];
-        float* window;
+        FIXED* window;
+        float* win2;
        int winlen = s->channel[c].prev_block_len;
        float* start = s->channel[c].coeffs - (winlen >> 1);
+        FIXED *xstart= s->channel[c].fixcoeffs - (winlen >> 1);
+        int j;

        if (s->subframe_len < winlen) {
            start += (winlen - s->subframe_len) >> 1;
+            xstart += (winlen - s->subframe_len) >> 1;
            winlen = s->subframe_len;
        }

-        window = s->windows[av_log2(winlen) - BLOCK_MIN_BITS];
-
+        window = sine_windows[av_log2(winlen) - BLOCK_MIN_BITS];
+        win2 = s->windows[av_log2(winlen) - BLOCK_MIN_BITS];       
+            
        winlen >>= 1;

        s->dsp.vector_fmul_window(start, start, start + winlen,
+                                  win2, 0, winlen);
+        vector_fixmul_window(xstart, xstart, xstart + winlen,
                                  window, 0, winlen);

        s->channel[c].prev_block_len = s->subframe_len;
@ -1116,6 +1134,8 @@ static int decode_subframe(WMAProDecodeCtx *s)

        s->channel[c].coeffs = &s->channel[c].out[(s->samples_per_frame >> 1)
                                                  + offset];
+        s->channel[c].fixcoeffs = &s->channel[c].fixout[(s->samples_per_frame >> 1)
+                                                  + offset];
    }

    s->subframe_len = subframe_len;
@ -1228,10 +1248,12 @@ static int decode_subframe(WMAProDecodeCtx *s)
            const int* sf = s->channel[c].scale_factors;
            int b;

-            if (c == s->lfe_channel)
+            if (c == s->lfe_channel){
                memset(&s->tmp[cur_subwoofer_cutoff], 0, sizeof(*s->tmp) *
                       (subframe_len - cur_subwoofer_cutoff));
-
+                memset(&s->fixtmp[cur_subwoofer_cutoff], 0, sizeof(*s->fixtmp) *
+                       (subframe_len - cur_subwoofer_cutoff));
+            }
            /** inverse quantization and rescaling */
            for (b = 0; b < s->num_bands; b++) {
                const int end = FFMIN(s->cur_sfb_offsets[b+1], s->subframe_len);
@ -1239,21 +1261,42 @@ static int decode_subframe(WMAProDecodeCtx *s)
                            (s->channel[c].max_scale_factor - *sf++) *
                            s->channel[c].scale_factor_step;
                const float quant = pow(10.0, exp / 20.0);
+                
+                if(exp < EXP_MIN || exp > EXP_MAX) {
+                    LOGF("in wmaprodec.c : unhandled value for exp, please report sample.\n");
+                    return -1;
+                }
+                const FIXED fixquant = QUANT(exp);
                int start = s->cur_sfb_offsets[b];
+                
+                int j;            
+                for(j = 0; j < WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE/2; j++)
+                    s->channel[c].fixout[j] = ftofix16(s->channel[c].out[j]);
+            
                s->dsp.vector_fmul_scalar(s->tmp + start,
                                          s->channel[c].coeffs + start,
-                                          quant, end - start);
+                                          quant, end - start);                      
+                vector_fixmul_scalar(s->fixtmp+start, 
+                                     s->channel[c].fixcoeffs + start,
+                                     fixquant, end-start);
+               
            }
+            
+            int j;

            /** apply imdct (ff_imdct_half == DCTIV with reverse) */
-            ff_imdct_half(&s->mdct_ctx[av_log2(subframe_len) - BLOCK_MIN_BITS],
+            fff_imdct_half(&s->mdct_ctx[av_log2(subframe_len) - BLOCK_MIN_BITS],
                          s->channel[c].coeffs, s->tmp);
+            imdct_half((s->mdct_ctx[av_log2(subframe_len) - BLOCK_MIN_BITS]).mdct_bits,
+                          s->channel[c].fixcoeffs, s->fixtmp);
+                          
        }
    }

    /** window and overlapp-add */
    wmapro_window(s);

+
    /** handled one subframe */
    for (i = 0; i < s->channels_for_cur_subframe; i++) {
        int c = s->channel_indexes_for_cur_subframe[i];
@ -1354,13 +1397,17 @@ static int decode_frame(WMAProDecodeCtx *s)

    /** interleave samples and write them to the output buffer */
    for (i = 0; i < s->num_channels; i++) {
-        float* ptr  = s->samples + i;
+        FIXED* ptr  = s->samples + i;
+        float* fptr = s->samplesf + i;
        int incr = s->num_channels;
-        float* iptr = s->channel[i].out;
-        float* iend = iptr + s->samples_per_frame;
-
+        FIXED* iptr = s->channel[i].fixout;
+        float* fiptr = s->channel[i].out;
+        FIXED* iend = iptr + s->samples_per_frame;
+        float* fiend = fiptr + s->samples_per_frame;
+        int j;
+        
        while (iptr < iend) {
-            *ptr = av_clipf(*iptr++, -1.0, 32767.0 / 32768.0);
+            *ptr = *iptr++ << 1;
            ptr += incr;
        }

@ -1548,20 +1595,6 @@ int decode_packet(AVCodecContext *avctx,
    *data_size = (int8_t *)s->samples - (int8_t *)data;
    s->packet_offset = get_bits_count(gb) & 7;

-/* Convert the pcm samples to signed 16-bit integers. This is the format that 
- * the rockbox simulator works with. */
-#ifdef ROCKBOX    
-    float* fptr = data;
-    int32_t* ptr = data;
-    int x;
-    for(x = 0; x < *data_size; x++)
-    {
-        fptr[x] *= ((float)(INT32_MAX));
-        ptr[x] = (int32_t)fptr[x];
-        
-    }
-#endif
-
    return (s->packet_loss) ? AVERROR_INVALIDDATA : get_bits_count(gb) >> 3;
 }

--- a/apps/codecs/wmapro.c
+++ b/apps/codecs/wmapro.c
@ -66,7 +66,7 @@ enum codec_status codec_main(void)
    int pktcnt = 0;             /* Count of the packets played */

    /* Generic codec initialisation */
-    ci->configure(DSP_SET_SAMPLE_DEPTH, 32);
+    ci->configure(DSP_SET_SAMPLE_DEPTH, 17);
    

 next_track: