forked from len0rd/rockbox
Assemblerised CMUL() for ARM, giving ~20% speedup.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@13787 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
fe8ae10ab4
commit
e7cdd6cbc6
2 changed files with 23 additions and 25 deletions
|
|
@ -50,7 +50,29 @@ uint32_t bswap_32(uint32_t x)
|
|||
return (b1 >> 24) | (b2 >> 8) | (b3 << 8) | (b4 << 24);
|
||||
}
|
||||
|
||||
#ifdef CPU_COLDFIRE
|
||||
#ifdef CPU_ARM
|
||||
static inline
|
||||
void CMUL(fixed32 *x, fixed32 *y,
|
||||
fixed32 a, fixed32 b,
|
||||
fixed32 t, fixed32 v)
|
||||
{
|
||||
/* This version loses one bit of precision. Could be solved at the cost
|
||||
* of 2 extra cycles if it becomes an issue. */
|
||||
int x1, y1, l;
|
||||
asm(
|
||||
"smull %[l], %[y1], %[b], %[t] \n"
|
||||
"smlal %[l], %[y1], %[a], %[v] \n"
|
||||
"rsb %[b], %[b], #0 \n"
|
||||
"smull %[l], %[x1], %[a], %[t] \n"
|
||||
"smlal %[l], %[x1], %[b], %[v] \n"
|
||||
: [l] "=&r" (l), [x1]"=&r" (x1), [y1]"=&r" (y1), [b] "+r" (b)
|
||||
: [a] "r" (a), [t] "r" (t), [v] "r" (v)
|
||||
: "cc"
|
||||
);
|
||||
*x = x1 << 1;
|
||||
*y = y1 << 1;
|
||||
}
|
||||
#elif defined CPU_COLDFIRE
|
||||
static inline
|
||||
void CMUL(fixed32 *x, fixed32 *y,
|
||||
fixed32 a, fixed32 b,
|
||||
|
|
|
|||
|
|
@ -61,30 +61,6 @@ long fsincos(unsigned long phase, fixed32 *cos);
|
|||
__result; \
|
||||
})
|
||||
|
||||
/*
|
||||
Special fixmul32 that does a 16.16 x 1.31 multiply that returns a 16.16 value.
|
||||
this is needed because the fft constants are all normalized to be less then 1
|
||||
and can't fit into a 16 bit number without excessive rounding
|
||||
|
||||
|
||||
*/
|
||||
|
||||
|
||||
# define fixmul32b(x, y) \
|
||||
({ int32_t __hi; \
|
||||
uint32_t __lo; \
|
||||
int32_t __result; \
|
||||
asm ("smull %0, %1, %3, %4\n\t" \
|
||||
"movs %0, %0, lsr %5\n\t" \
|
||||
"adc %2, %0, %1, lsl %6" \
|
||||
: "=&r" (__lo), "=&r" (__hi), "=r" (__result) \
|
||||
: "%r" (x), "r" (y), \
|
||||
"M" (31), "M" (1) \
|
||||
: "cc"); \
|
||||
__result; \
|
||||
})
|
||||
|
||||
|
||||
#elif defined(CPU_COLDFIRE)
|
||||
static inline int32_t fixmul32(int32_t x, int32_t y)
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue