1
0
Fork 0
forked from len0rd/rockbox

Assemblerised CMUL() for ARM, giving ~20% speedup.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@13787 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Jens Arnold 2007-07-04 19:23:18 +00:00
parent fe8ae10ab4
commit e7cdd6cbc6
2 changed files with 23 additions and 25 deletions

View file

@ -50,7 +50,29 @@ uint32_t bswap_32(uint32_t x)
return (b1 >> 24) | (b2 >> 8) | (b3 << 8) | (b4 << 24);
}
#ifdef CPU_COLDFIRE
#ifdef CPU_ARM
static inline
void CMUL(fixed32 *x, fixed32 *y,
fixed32 a, fixed32 b,
fixed32 t, fixed32 v)
{
/* This version loses one bit of precision. Could be solved at the cost
* of 2 extra cycles if it becomes an issue. */
int x1, y1, l;
asm(
"smull %[l], %[y1], %[b], %[t] \n"
"smlal %[l], %[y1], %[a], %[v] \n"
"rsb %[b], %[b], #0 \n"
"smull %[l], %[x1], %[a], %[t] \n"
"smlal %[l], %[x1], %[b], %[v] \n"
: [l] "=&r" (l), [x1]"=&r" (x1), [y1]"=&r" (y1), [b] "+r" (b)
: [a] "r" (a), [t] "r" (t), [v] "r" (v)
: "cc"
);
*x = x1 << 1;
*y = y1 << 1;
}
#elif defined CPU_COLDFIRE
static inline
void CMUL(fixed32 *x, fixed32 *y,
fixed32 a, fixed32 b,

View file

@ -61,30 +61,6 @@ long fsincos(unsigned long phase, fixed32 *cos);
__result; \
})
/*
Special fixmul32 that does a 16.16 x 1.31 multiply that returns a 16.16 value.
this is needed because the fft constants are all normalized to be less then 1
and can't fit into a 16 bit number without excessive rounding
*/
# define fixmul32b(x, y) \
({ int32_t __hi; \
uint32_t __lo; \
int32_t __result; \
asm ("smull %0, %1, %3, %4\n\t" \
"movs %0, %0, lsr %5\n\t" \
"adc %2, %0, %1, lsl %6" \
: "=&r" (__lo), "=&r" (__hi), "=r" (__result) \
: "%r" (x), "r" (y), \
"M" (31), "M" (1) \
: "cc"); \
__result; \
})
#elif defined(CPU_COLDFIRE)
static inline int32_t fixmul32(int32_t x, int32_t y)
{