Correct DSP_SET_SAMPLE_DEPTH to 29 for mpc (18.14 fixed point samples). Higher precision for fast 32x32=32 multiplication in dewindowing part of synthesis filter.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@17708 a1c6a512-1295-4272-9138-f99709370657
2008-06-10 06:11:50 +00:00 · 2008-06-10 06:11:50 +00:00 · f52696ef8a
commit f52696ef8a
parent 50763d512c
3 changed files with 25 additions and 16 deletions
--- a/apps/codecs/libmusepack/synth_filter.c
+++ b/apps/codecs/libmusepack/synth_filter.c
@ -44,11 +44,15 @@

 #if defined(MPC_FIXED_POINT)
   #if defined(OPTIMIZE_FOR_SPEED)
-      // round to +/- 2^14 as pre-shift before 32=32x32-multiply
+      // round at compile time to +/- 2^14 as a pre-shift before 32=32x32-multiply
      #define D(value)  (MPC_SHR_RND(value, 3))
      
-      // round to +/- 2^17 as pre-shift before 32=32x32-multiply
-      #define MPC_V_PRESHIFT(X) MPC_SHR_RND(X, 14)
+      // round at runtime to +/- 2^17 as a pre-shift before 32=32x32-multiply
+      // samples are 18.14 fixed point. 30.2 after this shift, whereas the
+      // 15.2 bits are significant (not including sign)
+      #define MPC_V_PRESHIFT(X) MPC_SHR_RND(X, 12)
+      
+      // in this configuration a post-shift by >>1 is needed after synthesis
   #else
      // saturate to +/- 2^31 (= value << (31-17)), D-values are +/- 2^17
      #define D(value)  (value << (14))
@ -65,7 +69,7 @@
   #define MPC_V_PRESHIFT(X) (X)
 #endif
    
-// Di_opt coefficients are +/- 2^17
+// Di_opt coefficients are +/- 2^17 (pre-shifted by <<16)
 static const MPC_SAMPLE_FORMAT  Di_opt [512] ICONST_ATTR = {
 /*           0        1        2         3         4         5          6          7         8         9       10        11       12       13      14     15  */
 /*  0 */  D( 0), -D( 29), D( 213), -D( 459),  D(2037), -D(5153),  D( 6574), -D(37489), D(75038),  D(37489), D(6574),  D(5153), D(2037),  D(459), D(213), D(29),
@ -462,6 +466,7 @@ mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data,
               + V[256]*D[ 4] + V[352]*D[ 5] + V[384]*D[ 6] + V[480]*D[ 7]
               + V[512]*D[ 8] + V[608]*D[ 9] + V[640]*D[10] + V[736]*D[11]
               + V[768]*D[12] + V[864]*D[13] + V[896]*D[14] + V[992]*D[15];
+         *Data >>= 1; // post shift to compensate for pre-shifting
         Data += 1;
         // total: 32 * (16 muls, 15 adds)
      }
@ -493,6 +498,7 @@ mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data,
               "mac.l %%d2, %%a5, (992*4, %[V]), %%a5, %%acc0\n\t"
               "mac.l %%d3, %%a5, %%acc0                     \n\t"
               "movclr.l %%acc0, %%d0                        \n\t"
+               "lsl.l #1, %%d0                               \n\t"
               "move.l %%d0, (%[Data])+                      \n"
               : [Data] "+a" (Data)
               : [V] "a" (V), [D] "a" (D)
@ -500,16 +506,16 @@ mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data,
         }
      #else
         // 64=64x64-multiply (FIXED_POINT) or float=float*float (!FIXED_POINT) in C
-         for ( k = 0; k < 32; k++, D += 16, V++ ) 
+         for ( k = 0; k < 32; k++, D += 16, V++ )
         {
-            *Data = MPC_MULTIPLY_EX(V[  0],D[ 0],31) + MPC_MULTIPLY_EX(V[ 96],D[ 1],31)
-                  + MPC_MULTIPLY_EX(V[128],D[ 2],31) + MPC_MULTIPLY_EX(V[224],D[ 3],31)
-                  + MPC_MULTIPLY_EX(V[256],D[ 4],31) + MPC_MULTIPLY_EX(V[352],D[ 5],31)
-                  + MPC_MULTIPLY_EX(V[384],D[ 6],31) + MPC_MULTIPLY_EX(V[480],D[ 7],31)
-                  + MPC_MULTIPLY_EX(V[512],D[ 8],31) + MPC_MULTIPLY_EX(V[608],D[ 9],31)
-                  + MPC_MULTIPLY_EX(V[640],D[10],31) + MPC_MULTIPLY_EX(V[736],D[11],31)
-                  + MPC_MULTIPLY_EX(V[768],D[12],31) + MPC_MULTIPLY_EX(V[864],D[13],31)
-                  + MPC_MULTIPLY_EX(V[896],D[14],31) + MPC_MULTIPLY_EX(V[992],D[15],31);
+            *Data = MPC_MULTIPLY_EX(V[  0],D[ 0],30) + MPC_MULTIPLY_EX(V[ 96],D[ 1],30)
+                  + MPC_MULTIPLY_EX(V[128],D[ 2],30) + MPC_MULTIPLY_EX(V[224],D[ 3],30)
+                  + MPC_MULTIPLY_EX(V[256],D[ 4],30) + MPC_MULTIPLY_EX(V[352],D[ 5],30)
+                  + MPC_MULTIPLY_EX(V[384],D[ 6],30) + MPC_MULTIPLY_EX(V[480],D[ 7],30)
+                  + MPC_MULTIPLY_EX(V[512],D[ 8],30) + MPC_MULTIPLY_EX(V[608],D[ 9],30)
+                  + MPC_MULTIPLY_EX(V[640],D[10],30) + MPC_MULTIPLY_EX(V[736],D[11],30)
+                  + MPC_MULTIPLY_EX(V[768],D[12],30) + MPC_MULTIPLY_EX(V[864],D[13],30)
+                  + MPC_MULTIPLY_EX(V[896],D[14],30) + MPC_MULTIPLY_EX(V[992],D[15],30);
            Data += 1;
            // total: 16 muls, 15 adds, 16 shifts
         }
--- a/apps/codecs/libmusepack/synth_filter_arm.S
+++ b/apps/codecs/libmusepack/synth_filter_arm.S
@ -75,7 +75,8 @@ mpc_decoder_windowing_D:
    ldr r11, [r1, #896*4]   /* 14 */
    mla r12, r9, r11, r12     
    ldr r11, [r1, #992*4]   /* 15 */
-    mla r12, r10, r11, r12     
+    mla r12, r10, r11, r12
+    mov r12, r12, asr #1    /* post shift to compensate for pre-shifting */
    str r12, [r0], #4       /* store Data */
    add r1, r1, #4          /* V++ */
    
@ -142,7 +143,7 @@ mpc_decoder_windowing_D:
    smlal r11, r12, r9, r11  
    ldr r11, [r1, #992*4]   /* 15 */
    smlal r11, r12, r10, r11
-    mov r4, r12, lsl #1     /* get result from hi-part */
+    mov r4, r12, lsl #2     /* get result from hi-part, loose 2 bits */
    str r4, [r0], #4        /* store Data */
    add r1, r1, #4          /* V++ */
    
--- a/apps/codecs/mpc.c
+++ b/apps/codecs/mpc.c
@ -76,7 +76,9 @@ enum codec_status codec_main(void)
    mpc_streaminfo info;
    int retval = CODEC_OK;
    
-    ci->configure(DSP_SET_SAMPLE_DEPTH, 28);
+    /* musepack's sample representation is 18.14
+     * DSP_SET_SAMPLE_DEPTH = 14 (FRACT) + 16 (NATIVE) - 1 (SIGN) = 29 */
+    ci->configure(DSP_SET_SAMPLE_DEPTH, 29);
    
    /* Create a decoder instance */
    reader.read = read_impl;