forked from len0rd/rockbox
libwmapro: coldfire asm for vector_fixmul_window, gives a speedup of ~13%, drop the add_bias argument for the vector_fixmul_window function, since it was always 0
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27573 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
53b5abd93c
commit
025eed5c74
2 changed files with 42 additions and 9 deletions
|
|
@ -19,10 +19,10 @@ static inline int32_t fixmulshift(int32_t x, int32_t y, int shamt)
|
|||
return (int32_t)temp;
|
||||
}
|
||||
|
||||
|
||||
#ifdef CPU_COLDFIRE
|
||||
static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0,
|
||||
const int32_t *src1, const int32_t *win,
|
||||
int32_t add_bias, int len)
|
||||
int len)
|
||||
{
|
||||
int i, j;
|
||||
dst += len;
|
||||
|
|
@ -31,13 +31,42 @@ static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0,
|
|||
for(i=-len, j=len-1; i<0; i++, j--) {
|
||||
int32_t s0 = src0[i];
|
||||
int32_t s1 = src1[j];
|
||||
int32_t wi = win[i];
|
||||
int32_t wj = win[j];
|
||||
dst[i] = fixmulshift(s0,-1*wj,31) - fixmulshift(s1,-1*wi,31) + (add_bias<<16);
|
||||
dst[j] = fixmulshift(s0,-1*wi,31) + fixmulshift(s1,-1*wj,31) + (add_bias<<16);
|
||||
}
|
||||
|
||||
int32_t wi = -win[i];
|
||||
int32_t wj = -win[j];
|
||||
|
||||
asm volatile ("mac.l %[s0], %[wj], %%acc0\n\t"
|
||||
"msac.l %[s1], %[wi], %%acc0\n\t"
|
||||
"mac.l %[s0], %[wi], %%acc1\n\t"
|
||||
"mac.l %[s1], %[wj], %%acc1\n\t"
|
||||
"movclr.l %%acc0, %[s0]\n\t"
|
||||
"move.l %[s0], (%[dst_i])\n\t"
|
||||
"movclr.l %%acc1, %[s0]\n\t"
|
||||
"move.l %[s0], (%[dst_j])\n\t"
|
||||
: [s0] "+r" (s0) /* this register is clobbered so specify it as an input */
|
||||
: [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]),
|
||||
[s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj)
|
||||
: "cc", "memory");
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0,
|
||||
const int32_t *src1, const int32_t *win,
|
||||
int len)
|
||||
{
|
||||
int i, j;
|
||||
dst += len;
|
||||
win += len;
|
||||
src0+= len;
|
||||
for(i=-len, j=len-1; i<0; i++, j--) {
|
||||
int32_t s0 = src0[i];
|
||||
int32_t s1 = src1[j];
|
||||
int32_t wi = -win[i];
|
||||
int32_t wj = -win[j];
|
||||
dst[i] = fixmulshift(s0,wj,31) - fixmulshift(s1,wi,31);
|
||||
dst[j] = fixmulshift(s0,wi,31) + fixmulshift(s1,wj,31);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src, int32_t mul,
|
||||
int len, int shift)
|
||||
|
|
|
|||
|
|
@ -288,6 +288,10 @@ int decode_init(asf_waveformatex_t *wfx)
|
|||
int log2_max_num_subframes;
|
||||
int num_possible_block_sizes;
|
||||
|
||||
#if defined(CPU_COLDFIRE)
|
||||
coldfire_set_macsr(EMAC_FRACTIONAL | EMAC_SATURATE);
|
||||
#endif
|
||||
|
||||
init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
|
||||
|
||||
if (wfx->datalen >= 18) {
|
||||
|
|
@ -1050,7 +1054,7 @@ static void wmapro_window(WMAProDecodeCtx *s)
|
|||
winlen >>= 1;
|
||||
|
||||
vector_fixmul_window(xstart, xstart, xstart + winlen,
|
||||
window, 0, winlen);
|
||||
window, winlen);
|
||||
|
||||
s->channel[c].prev_block_len = s->subframe_len;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue