forked from len0rd/rockbox
Add ARM assembler to libwmapro vector_fixmul_scalar(). Speeds up decoding by 1% on PP5022.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27603 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
2fefcdf31c
commit
3bb8020f78
1 changed files with 48 additions and 18 deletions
|
|
@ -180,7 +180,7 @@
|
||||||
}
|
}
|
||||||
#endif /* CPU_COLDFIRE, CPU_ARM */
|
#endif /* CPU_COLDFIRE, CPU_ARM */
|
||||||
|
|
||||||
#ifdef CPU_COLDFIRE
|
#if defined(CPU_COLDFIRE)
|
||||||
static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0,
|
static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0,
|
||||||
const int32_t *src1, const int32_t *win,
|
const int32_t *src1, const int32_t *win,
|
||||||
int len)
|
int len)
|
||||||
|
|
@ -194,8 +194,8 @@ static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0,
|
||||||
int32_t s1 = src1[j];
|
int32_t s1 = src1[j];
|
||||||
int32_t wi = -win[i];
|
int32_t wi = -win[i];
|
||||||
int32_t wj = -win[j];
|
int32_t wj = -win[j];
|
||||||
|
asm volatile (
|
||||||
asm volatile ("mac.l %[s0], %[wj], %%acc0\n\t"
|
"mac.l %[s0], %[wj], %%acc0\n\t"
|
||||||
"msac.l %[s1], %[wi], %%acc0\n\t"
|
"msac.l %[s1], %[wi], %%acc0\n\t"
|
||||||
"mac.l %[s0], %[wi], %%acc1\n\t"
|
"mac.l %[s0], %[wi], %%acc1\n\t"
|
||||||
"mac.l %[s1], %[wj], %%acc1\n\t"
|
"mac.l %[s1], %[wj], %%acc1\n\t"
|
||||||
|
|
@ -229,6 +229,35 @@ static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(CPU_ARM)
|
||||||
|
static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src,
|
||||||
|
int32_t mul, int len)
|
||||||
|
{
|
||||||
|
/* len is _always_ a multiple of 4, because len is the difference of sfb's
|
||||||
|
* which themselves are always a multiple of 4. */
|
||||||
|
int i;
|
||||||
|
for (i=0; i<len; i+=4) {
|
||||||
|
asm volatile (
|
||||||
|
"ldmia %[src]!, {r1-r4} \n\t"
|
||||||
|
"smull r0, r5, r1, %[mul] \n\t"
|
||||||
|
"mov r0, r0, lsr #24 \n\t"
|
||||||
|
"orr r0, r0, r5, lsl #8 \n\t"
|
||||||
|
"smull r1, r5, r2, %[mul] \n\t"
|
||||||
|
"mov r1, r1, lsr #24 \n\t"
|
||||||
|
"orr r1, r1, r5, lsl #8 \n\t"
|
||||||
|
"smull r2, r5, r3, %[mul] \n\t"
|
||||||
|
"mov r2, r2, lsr #24 \n\t"
|
||||||
|
"orr r2, r2, r5, lsl #8 \n\t"
|
||||||
|
"smull r3, r5, r4, %[mul] \n\t"
|
||||||
|
"mov r3, r3, lsr #24 \n\t"
|
||||||
|
"orr r3, r3, r5, lsl #8 \n\t"
|
||||||
|
"stmia %[dst]!, {r0-r3} \n"
|
||||||
|
: [dst]"+r"(dst), [src]"+r"(src)
|
||||||
|
: [mul]"r"(mul)
|
||||||
|
: "r0", "r1", "r2", "r3", "r4", "r5", "memory");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src,
|
static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src,
|
||||||
int32_t mul, int len)
|
int32_t mul, int len)
|
||||||
{
|
{
|
||||||
|
|
@ -242,6 +271,7 @@ static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src,
|
||||||
dst[i+3] = fixmul24(src[i+3], mul);
|
dst[i+3] = fixmul24(src[i+3], mul);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif /* CPU_ARM */
|
||||||
|
|
||||||
static inline int av_clip(int a, int amin, int amax)
|
static inline int av_clip(int a, int amin, int amax)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue