1
0
Fork 0
forked from len0rd/rockbox

Commit optional code for high-precision EQ which will almost certainly not make a difference on 16 bit output targets.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12451 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Thom Johansen 2007-02-22 13:55:49 +00:00
parent 6c3db6e65f
commit c4ccd9ee86
2 changed files with 72 additions and 32 deletions

View file

@ -7,7 +7,7 @@
* \/ \/ \/ \/ \/ * \/ \/ \/ \/ \/
* $Id$ * $Id$
* *
* Copyright (C) 2006 Thom Johansen * Copyright (C) 2006-2007 Thom Johansen
* *
* All files in this archive are subject to the GNU General Public License. * All files in this archive are subject to the GNU General Public License.
* See the file COPYING in the source tree root for full license agreement. * See the file COPYING in the source tree root for full license agreement.
@ -17,6 +17,15 @@
* *
****************************************************************************/ ****************************************************************************/
/* uncomment this to make filtering calculate lower bits after shifting.
* without this, "shift" of the lower bits will be lost here.
*/
/* #define HIGH_PRECISION */
/*
* void eq_filter(int32_t **x, struct eqfilter *f, unsigned num,
* unsigned channels, unsigned shift)
*/
.text .text
.global eq_filter .global eq_filter
eq_filter: eq_filter:
@ -33,35 +42,40 @@ eq_filter:
ldr r14, [sp, #8] @ r14 = numsamples ldr r14, [sp, #8] @ r14 = numsamples
ldmia r10, { r0-r3 } @ load history, r10 should be filter struct addr ldmia r10, { r0-r3 } @ load history, r10 should be filter struct addr
str r10, [sp, #4] @ save it for loop end str r10, [sp, #4] @ save it for loop end
.loop:
/* r0-r3 = history, r4-r8 = coefs, r9 = x[], r10..r11 = accumulator, /* r0-r3 = history, r4-r8 = coefs, r9 = x[], r10..r11 = accumulator,
r12 = shift amount, r14 = number of samples. * r12 = shift amount, r14 = number of samples.
See eq_cf.S for explanation of what this loop does. Primary difference
is the reordering of the equation we do here, which is done for register
reuse reasons, we're pretty short on regs.
*/ */
smull r10, r11, r6, r1 @ acc = b2*x[i - 2] .loop:
mov r1, r0 @ fix input history /* Direct form 1 filtering code.
smlal r10, r11, r5, r0 @ acc += b1*x[i - 1] * y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2],
ldr r0, [r9] @ load input and fix history in same operation * where y[] is output and x[] is input. This is performed out of order to
smlal r10, r11, r4, r0 @ acc += b0*x[i] * reuse registers, we're pretty short on regs.
smlal r10, r11, r7, r2 @ acc += a1*y[i - 1] */
smlal r10, r11, r8, r3 @ acc += a2*y[i - 2] smull r10, r11, r6, r1 @ acc = b2*x[i - 2]
mov r3, r2 @ fix output history mov r1, r0 @ fix input history
mov r2, r11, lsl r12 @ get result smlal r10, r11, r5, r0 @ acc += b1*x[i - 1]
@ TODO: arm makes it easy to mix in lower bits from r10 for extended ldr r0, [r9] @ load input and fix history in same operation
@ precision here, but we don't have enough regs to save the shift factor smlal r10, r11, r4, r0 @ acc += b0*x[i]
@ we would need (32 - r12). smlal r10, r11, r7, r2 @ acc += a1*y[i - 1]
str r2, [r9], #4 @ save result smlal r10, r11, r8, r3 @ acc += a2*y[i - 2]
subs r14, r14, #1 @ are we done with this channel? mov r3, r2 @ fix output history
mov r2, r11, asl r12 @ get upper part of result and shift left
#ifdef HIGH_PRECISION
rsb r11, r12, #32 @ get shift amount for lower part
orr r2, r2, r10, lsr r11 @ then mix in correctly shifted lower part
#endif
str r2, [r9], #4 @ save result
subs r14, r14, #1 @ are we done with this channel?
bne .loop bne .loop
ldr r10, [sp, #4] @ load filter struct pointer ldr r10, [sp, #4] @ load filter struct pointer
stmia r10!, { r0-r3 } @ save back history stmia r10!, { r0-r3 } @ save back history
ldr r11, [sp, #12] @ load number of channels ldr r11, [sp, #12] @ load number of channels
subs r11, r11, #1 @ all channels processed? subs r11, r11, #1 @ all channels processed?
strne r11, [sp, #12] strne r11, [sp, #12]
bne .filterloop bne .filterloop
add sp, sp, #16 @ compensate for temp storage add sp, sp, #16 @ compensate for temp storage
ldmia sp!, { r4-r11, pc } ldmia sp!, { r4-r11, pc }

View file

@ -7,7 +7,7 @@
* \/ \/ \/ \/ \/ * \/ \/ \/ \/ \/
* $Id$ * $Id$
* *
* Copyright (C) 2006 Thom Johansen * Copyright (C) 2006-2007 Thom Johansen
* *
* All files in this archive are subject to the GNU General Public License. * All files in this archive are subject to the GNU General Public License.
* See the file COPYING in the source tree root for full license agreement. * See the file COPYING in the source tree root for full license agreement.
@ -17,14 +17,27 @@
* *
****************************************************************************/ ****************************************************************************/
/* uncomment this to make filtering calculate lower bits after shifting.
* without this, "shift" - 1 of the lower bits will be lost here.
*/
/* #define HIGH_PRECISION */
/*
* void eq_filter(int32_t **x, struct eqfilter *f, unsigned num,
* unsigned channels, unsigned shift)
*/
.text .text
.global eq_filter .global eq_filter
eq_filter: eq_filter:
lea.l (-11*4, %sp), %sp lea.l (-11*4, %sp), %sp
movem.l %d2-%d7/%a2-%a6, (%sp) | save clobbered regs movem.l %d2-%d7/%a2-%a6, (%sp) | save clobbered regs
move.l (11*4+8, %sp), %a5 | fetch filter structure address move.l (11*4+8, %sp), %a5 | fetch filter structure address
movem.l (11*4+16, %sp), %d6-%d7 | load num. channels and shift count move.l (11*4+20, %sp), %d7 | load shift count
subq.l #1, %d7 | EMAC gives us one free shift subq.l #1, %d7 | EMAC gives us one free shift
#ifdef HIGH_PRECISION
moveq.l #8, %d6
sub.l %d7, %d6 | shift for lower part of accumulator
#endif
movem.l (%a5), %a0-%a4 | load coefs movem.l (%a5), %a0-%a4 | load coefs
lea.l (5*4, %a5), %a5 | point to filter history lea.l (5*4, %a5), %a5 | point to filter history
@ -34,11 +47,16 @@ eq_filter:
move.l (%a6), %a6 move.l (%a6), %a6
move.l (11*4+12, %sp), %d5 | number of samples move.l (11*4+12, %sp), %d5 | number of samples
movem.l (%a5), %d0-%d3 | load filter history movem.l (%a5), %d0-%d3 | load filter history
/* d0-r3 = history, d4 = number of channels, d5 = sample count,
* d6 = lower shift amount, d7 = upper shift amount, a0-a4 = coefs,
* a5 = history pointer, a6 = x[]
*/
.loop: .loop:
/* Direct form 1 filtering code. We assume DSP has put EMAC in frac mode. /* Direct form 1 filtering code. We assume DSP has put EMAC in frac mode.
y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2], * y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2],
where y[] is output and x[] is input. This is performed out of order * where y[] is output and x[] is input. This is performed out of order
to do parallel load of input value. * to do parallel load of input value.
*/ */
mac.l %a2, %d1, %acc0 | acc = b2*x[i - 2] mac.l %a2, %d1, %acc0 | acc = b2*x[i - 2]
move.l %d0, %d1 | fix input history move.l %d0, %d1 | fix input history
@ -47,15 +65,23 @@ eq_filter:
mac.l %a3, %d2, %acc0 | acc += a1*y[i - 1] mac.l %a3, %d2, %acc0 | acc += a1*y[i - 1]
mac.l %a4, %d3, %acc0 | acc += a2*y[i - 2] mac.l %a4, %d3, %acc0 | acc += a2*y[i - 2]
move.l %d2, %d3 | fix output history move.l %d2, %d3 | fix output history
movclr.l %acc0, %d2 | fetch and write result #ifdef HIGH_PRECISION
move.l %accext01, %d2 | fetch lower part of accumulator
move.b %d2, %d4 | clear upper three bytes
lsr.l %d6, %d4 | shift lower bits
#endif
movclr.l %acc0, %d2 | fetch upper part of result
asl.l %d7, %d2 | restore fixed point format asl.l %d7, %d2 | restore fixed point format
#ifdef HIGH_PRECISION
or.l %d2, %d4 | combine lower and upper parts
#endif
move.l %d2, (%a6)+ | save result move.l %d2, (%a6)+ | save result
subq.l #1, %d5 | are we done with this channel? subq.l #1, %d5 | are we done with this channel?
jne .loop jne .loop
movem.l %d0-%d3, (%a5) | save history back to struct movem.l %d0-%d3, (%a5) | save history back to struct
lea.l (4*4, %a5), %a5 | point to next channel's history lea.l (4*4, %a5), %a5 | point to next channel's history
subq.l #1, %d6 | have we processed both channels? subq.l #1, (11*4+16, %sp) | have we processed both channels?
jne .filterloop jne .filterloop
movem.l (%sp), %d2-%d7/%a2-%a6 movem.l (%sp), %d2-%d7/%a2-%a6