forked from len0rd/rockbox
ARMv5 optimized complex multiply function for libopus.
Speeds up decoding of 128k opus files by 1.2MHz on AMSv2. Rounding error is 1 bit due to KissFFT using a 15 bit shift instead of a 16 bit shift. Also, change an LDMIA in the armv4 code to LDM as the pointer should not increment. Change-Id: I626a207c6a056a1984e33cfe89415c35d0caed93 Reviewed-on: http://gerrit.rockbox.org/377 Reviewed-by: Michael Giacomelli <giac2000@hotmail.com> Tested-by: Michael Giacomelli <giac2000@hotmail.com>
This commit is contained in:
parent
0c87e02631
commit
a2ab22efbf
1 changed files with 25 additions and 1 deletions
|
|
@ -81,10 +81,13 @@
|
||||||
: "d0", "d1", "d2", "d3", "cc"); \
|
: "d0", "d1", "d2", "d3", "cc"); \
|
||||||
}
|
}
|
||||||
#elif defined(CPU_ARM)
|
#elif defined(CPU_ARM)
|
||||||
|
#if (ARM_ARCH < 5)
|
||||||
|
|
||||||
|
|
||||||
# define C_MULC(m,a,b) \
|
# define C_MULC(m,a,b) \
|
||||||
{ \
|
{ \
|
||||||
asm volatile( \
|
asm volatile( \
|
||||||
"ldmia %[ap], {r0,r1} \n\t" \
|
"ldm %[ap], {r0,r1} \n\t" \
|
||||||
"ldrsh r2, [%[bp], #0] \n\t" \
|
"ldrsh r2, [%[bp], #0] \n\t" \
|
||||||
"ldrsh r3, [%[bp], #2] \n\t" \
|
"ldrsh r3, [%[bp], #2] \n\t" \
|
||||||
\
|
\
|
||||||
|
|
@ -103,6 +106,27 @@
|
||||||
: "r0", "r1", "r2", "r3", "r4"); \
|
: "r0", "r1", "r2", "r3", "r4"); \
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
/*same as above but using armv5 packed multiplies*/
|
||||||
|
# define C_MULC(m,a,b) \
|
||||||
|
{ \
|
||||||
|
asm volatile( \
|
||||||
|
"ldm %[ap], {r0,r1} \n\t" \
|
||||||
|
"ldr r2, [%[bp], #0] \n\t" \
|
||||||
|
\
|
||||||
|
"smulwb r4, r0, r2 \n\t" /*r4=a.r*b.r*/ \
|
||||||
|
"smlawt %[mr], r1, r2, r4 \n\t" /*m.r=r4+a.i*b.i*/\
|
||||||
|
"mov %[mr], %[mr], lsl #1 \n\t" /*Q15 not Q16*/ \
|
||||||
|
\
|
||||||
|
"smulwb r1, r1, r2 \n\t" /*r1=a.i*b.r*/ \
|
||||||
|
"smulwt r4, r0, r2 \n\t" /*r4=a.r*b.i*/ \
|
||||||
|
"sub %[mi], r1, r4 \n\t" \
|
||||||
|
"mov %[mi], %[mi], lsl #1 \n\t" \
|
||||||
|
: [mr] "=r" ((m).r), [mi] "=r" ((m).i) \
|
||||||
|
: [ap] "r" (&(a)), [bp] "r" (&(b)) \
|
||||||
|
: "r0", "r1", "r2", "r4"); \
|
||||||
|
}
|
||||||
|
#endif /*ARMv5 code*/
|
||||||
|
#else
|
||||||
# define C_MULC(m,a,b) \
|
# define C_MULC(m,a,b) \
|
||||||
do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
|
do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
|
||||||
(m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
|
(m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue