1
0
Fork 0
forked from len0rd/rockbox

opus: full precision MULT32_32_Q31 (32*32=64>>31) multiplication

Replace complicated macro doing three 16*16 muls and add an inline
asm implementation for arm, speeds up decoding a 64kbps test file
by 0.5MHz on c200 (pp) and gives slightly better precision.

Change-Id: I6fc5b83c210f01bffdc38aec54cc5a8b646d8169
Signed-off-by: Nils Wallménius <nils@rockbox.org>
This commit is contained in:
Andree Buschmann 2012-10-06 23:35:19 +02:00 committed by Nils Wallménius
parent d2875fc773
commit 2119f75af3

View file

@ -71,9 +71,23 @@ static inline int32_t MULT16_32_Q15(int32_t a, int32_t b)
#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15)) #define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
#endif #endif
/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ #if defined(CPU_ARM)
#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15)) static inline int32_t MULT32_32_Q31(int32_t a, int32_t b)
{
int32_t lo, hi;
asm volatile("smull %[lo], %[hi], %[a], %[b] \n\t"
"mov %[lo], %[lo], lsr #31 \n\t"
"orr %[hi], %[lo], %[hi], lsl #1 \n\t"
: [lo] "=&r" (lo), [hi] "=&r" (hi)
: [a] "r" (a), [b] "r" (b) );
return(hi);
}
#else
/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
//#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
#define MULT32_32_Q31(a,b) (opus_val32)((((int64_t)(a)) * ((int64_t)(b)))>>31)
#endif
/** Compile-time conversion of float constant to 16-bit value */ /** Compile-time conversion of float constant to 16-bit value */
#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits)))) #define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits))))