Commit optional code for high-precision EQ which will almost certainly not make a difference on 16 bit output targets.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12451 a1c6a512-1295-4272-9138-f99709370657
2007-02-22 13:55:49 +00:00 · 2007-02-22 13:55:49 +00:00 · c4ccd9ee86
commit c4ccd9ee86
parent 6c3db6e65f
2 changed files with 72 additions and 32 deletions
--- a/apps/eq_arm.S
+++ b/apps/eq_arm.S
@ -7,7 +7,7 @@
 *                     \/            \/     \/    \/            \/
 * $Id$
 *
- * Copyright (C) 2006 Thom Johansen
+ * Copyright (C) 2006-2007 Thom Johansen
 *
 * All files in this archive are subject to the GNU General Public License.
 * See the file COPYING in the source tree root for full license agreement.
@ -17,6 +17,15 @@
 *
 ****************************************************************************/
 /* uncomment this to make filtering calculate lower bits after shifting.
 * without this, "shift" of the lower bits will be lost here.
 */
 /* #define HIGH_PRECISION */
 /*
 * void eq_filter(int32_t **x, struct eqfilter *f, unsigned num,
 *                unsigned channels, unsigned shift)
 */
    .text
    .global eq_filter
 eq_filter:
@ -33,35 +42,40 @@ eq_filter:
    ldr r14, [sp, #8]       @ r14 = numsamples
    ldmia r10, { r0-r3 }    @ load history, r10 should be filter struct addr
    str r10, [sp, #4]       @ save it for loop end
-.loop:
+
    /* r0-r3 = history, r4-r8 = coefs, r9 = x[], r10..r11 = accumulator,
-       r12 = shift amount, r14 = number of samples.
+     * r12 = shift amount, r14 = number of samples.
       See eq_cf.S for explanation of what this loop does. Primary difference
       is the reordering of the equation we do here, which is done for register
       reuse reasons, we're pretty short on regs.
     */
-    smull r10, r11, r6, r1  @ acc = b2*x[i - 2]
+.loop:
-    mov r1, r0              @ fix input history
+    /* Direct form 1 filtering code.
-    smlal r10, r11, r5, r0  @ acc += b1*x[i - 1]
+     * y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2],
-    ldr r0, [r9]            @ load input and fix history in same operation
+     * where y[] is output and x[] is input. This is performed out of order to
-    smlal r10, r11, r4, r0  @ acc += b0*x[i]
+     * reuse registers, we're pretty short on regs.
-    smlal r10, r11, r7, r2  @ acc += a1*y[i - 1]
+     */
-    smlal r10, r11, r8, r3  @ acc += a2*y[i - 2]
+    smull r10, r11, r6, r1     @ acc = b2*x[i - 2]
-    mov r3, r2              @ fix output history
+    mov r1, r0                 @ fix input history
-    mov r2, r11, lsl r12    @ get result
+    smlal r10, r11, r5, r0     @ acc += b1*x[i - 1]
-    @ TODO: arm makes it easy to mix in lower bits from r10 for extended
+    ldr r0, [r9]               @ load input and fix history in same operation
-    @ precision here, but we don't have enough regs to save the shift factor
+    smlal r10, r11, r4, r0     @ acc += b0*x[i]
-    @ we would need (32 - r12).
+    smlal r10, r11, r7, r2     @ acc += a1*y[i - 1]
-    str r2, [r9], #4        @ save result
+    smlal r10, r11, r8, r3     @ acc += a2*y[i - 2]
-    subs r14, r14, #1       @ are we done with this channel?
+    mov r3, r2                 @ fix output history
    mov r2, r11, asl r12       @ get upper part of result and shift left
 #ifdef HIGH_PRECISION
    rsb r11, r12, #32          @ get shift amount for lower part
    orr r2, r2, r10, lsr r11   @ then mix in correctly shifted lower part
 #endif
    str r2, [r9], #4           @ save result
    subs r14, r14, #1          @ are we done with this channel?
    bne .loop
-    ldr r10, [sp, #4]       @ load filter struct pointer
+    ldr r10, [sp, #4]          @ load filter struct pointer
-    stmia r10!, { r0-r3 }   @ save back history
+    stmia r10!, { r0-r3 }      @ save back history
-    ldr r11, [sp, #12]      @ load number of channels
+    ldr r11, [sp, #12]         @ load number of channels
-    subs r11, r11, #1       @ all channels processed?
+    subs r11, r11, #1          @ all channels processed?
    strne r11, [sp, #12]
    bne .filterloop
-    add sp, sp, #16         @ compensate for temp storage
+    add sp, sp, #16            @ compensate for temp storage
    ldmia sp!, { r4-r11, pc }
--- a/apps/eq_cf.S
+++ b/apps/eq_cf.S
@ -7,7 +7,7 @@
 *                     \/            \/     \/    \/            \/
 * $Id$
 *
- * Copyright (C) 2006 Thom Johansen
+ * Copyright (C) 2006-2007 Thom Johansen
 *
 * All files in this archive are subject to the GNU General Public License.
 * See the file COPYING in the source tree root for full license agreement.
@ -17,14 +17,27 @@
 *
 ****************************************************************************/
 /* uncomment this to make filtering calculate lower bits after shifting.
 * without this, "shift" - 1 of the lower bits will be lost here.
 */
 /* #define HIGH_PRECISION */
 /*
 * void eq_filter(int32_t **x, struct eqfilter *f, unsigned num,
 *                unsigned channels, unsigned shift)
 */
    .text
    .global eq_filter
 eq_filter:
    lea.l (-11*4, %sp), %sp 
    movem.l %d2-%d7/%a2-%a6, (%sp)    | save clobbered regs
    move.l (11*4+8, %sp), %a5         | fetch filter structure address
-    movem.l (11*4+16, %sp), %d6-%d7   | load num. channels and shift count
+    move.l (11*4+20, %sp), %d7        | load shift count
    subq.l #1, %d7                    | EMAC gives us one free shift
 #ifdef HIGH_PRECISION
    moveq.l #8, %d6
    sub.l %d7, %d6                    | shift for lower part of accumulator
 #endif
    movem.l (%a5), %a0-%a4            | load coefs
    lea.l (5*4, %a5), %a5             | point to filter history
@ -34,11 +47,16 @@ eq_filter:
    move.l (%a6), %a6
    move.l (11*4+12, %sp), %d5        | number of samples
    movem.l (%a5), %d0-%d3            | load filter history
    /* d0-r3 = history, d4 = number of channels, d5 = sample count,
     * d6 = lower shift amount, d7 = upper shift amount, a0-a4 = coefs,
     * a5 = history pointer, a6 = x[]
     */
 .loop:
    /* Direct form 1 filtering code. We assume DSP has put EMAC in frac mode.
-       y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2],
+     * y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2],
-       where y[] is output and x[] is input. This is performed out of order
+     * where y[] is output and x[] is input. This is performed out of order
-       to do parallel load of input value.
+     * to do parallel load of input value.
     */
    mac.l %a2, %d1, %acc0               | acc = b2*x[i - 2]
    move.l %d0, %d1                     | fix input history
@ -47,15 +65,23 @@ eq_filter:
    mac.l %a3, %d2, %acc0               | acc += a1*y[i - 1]
    mac.l %a4, %d3, %acc0               | acc += a2*y[i - 2]
    move.l %d2, %d3                     | fix output history
-    movclr.l %acc0, %d2                 | fetch and write result
+#ifdef HIGH_PRECISION
    move.l %accext01, %d2               | fetch lower part of accumulator
    move.b %d2, %d4                     | clear upper three bytes
    lsr.l %d6, %d4                      | shift lower bits
 #endif
    movclr.l %acc0, %d2                 | fetch upper part of result
    asl.l %d7, %d2                      | restore fixed point format
 #ifdef HIGH_PRECISION
    or.l %d2, %d4                       | combine lower and upper parts
 #endif
    move.l %d2, (%a6)+                  | save result
    subq.l #1, %d5                      | are we done with this channel?
    jne .loop
    movem.l %d0-%d3, (%a5)              | save history back to struct
    lea.l (4*4, %a5), %a5               | point to next channel's history
-    subq.l #1, %d6                      | have we processed both channels?
+    subq.l #1, (11*4+16, %sp)           | have we processed both channels?
    jne .filterloop
    movem.l (%sp), %d2-%d7/%a2-%a6