forked from len0rd/rockbox
Submit interim version of FS#10565. Performance optimization of atrac3 decoder for ARM. Introduce ASM routines for multiplications and two synthesis loops, refactured parts of synthesis and windowing. Speeds up decoding by a factor of 2.4 on PP502x.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@22548 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
eb3cb724e8
commit
f4774bf5bf
5 changed files with 343 additions and 146 deletions
|
@ -1,2 +1,5 @@
|
|||
atrac3.c
|
||||
#if defined(CPU_ARM)
|
||||
atrac3_arm.S
|
||||
#endif
|
||||
../lib/ffmpeg_bitstream.c
|
||||
|
|
|
@ -67,6 +67,101 @@ static inline int16_t av_clip_int16(int a)
|
|||
static int32_t qmf_window[48] IBSS_ATTR;
|
||||
static VLC spectral_coeff_tab[7];
|
||||
static channel_unit channel_units[2];
|
||||
|
||||
/**
|
||||
* Matrixing within quadrature mirror synthesis filter.
|
||||
*
|
||||
* @param p3 output buffer
|
||||
* @param inlo lower part of spectrum
|
||||
* @param inhi higher part of spectrum
|
||||
* @param nIn size of spectrum buffer
|
||||
*/
|
||||
|
||||
#if defined(CPU_ARM)
|
||||
extern void
|
||||
atrac3_iqmf_matrixing(int32_t *p3,
|
||||
int32_t *inlo,
|
||||
int32_t *inhi,
|
||||
unsigned int nIn);
|
||||
#else
|
||||
static inline void
|
||||
atrac3_iqmf_matrixing(int32_t *p3,
|
||||
int32_t *inlo,
|
||||
int32_t *inhi,
|
||||
unsigned int nIn)
|
||||
{
|
||||
for(i=0; i<nIn; i+=2){
|
||||
p3[2*i+0] = inlo[i ] + inhi[i ];
|
||||
p3[2*i+1] = inlo[i ] - inhi[i ];
|
||||
p3[2*i+2] = inlo[i+1] + inhi[i+1];
|
||||
p3[2*i+3] = inlo[i+1] - inhi[i+1];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Matrixing within quadrature mirror synthesis filter.
|
||||
*
|
||||
* @param out output buffer
|
||||
* @param in input buffer
|
||||
* @param win windowing coefficients
|
||||
* @param nIn size of spectrum buffer
|
||||
*/
|
||||
|
||||
#if defined(CPU_ARM)
|
||||
extern void
|
||||
atrac3_iqmf_dewindowing(int32_t *out,
|
||||
int32_t *in,
|
||||
int32_t *win,
|
||||
unsigned int nIn);
|
||||
#else
|
||||
static inline void
|
||||
atrac3_iqmf_dewindowing(int32_t *out,
|
||||
int32_t *in,
|
||||
int32_t *win,
|
||||
unsigned int nIn)
|
||||
{
|
||||
int32_t i, j, s1, s2;
|
||||
|
||||
for (j = nIn; j != 0; j--) {
|
||||
/* i=0 */
|
||||
s1 = fixmul31(win[0], in[0]);
|
||||
s2 = fixmul31(win[1], in[1]);
|
||||
|
||||
/* i=2..46 */
|
||||
for (i = 2; i < 48; i += 2) {
|
||||
s1 += fixmul31(win[i ], in[i ]);
|
||||
s2 += fixmul31(win[i+1], in[i+1]);
|
||||
}
|
||||
|
||||
out[0] = s2;
|
||||
out[1] = s1;
|
||||
|
||||
in += 2;
|
||||
out += 2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* IMDCT windowing.
|
||||
*
|
||||
* @param buffer sample buffer
|
||||
* @param win window coefficients
|
||||
*/
|
||||
|
||||
static inline void
|
||||
atrac3_imdct_windowing(int32_t *buffer,
|
||||
const int32_t *win)
|
||||
{
|
||||
int32_t i;
|
||||
/* win[0..127] = win[511..384], win[128..383] = 1 */
|
||||
for(i = 0; i<128; i++) {
|
||||
buffer[ i] = fixmul31(win[i], buffer[ i]);
|
||||
buffer[511-i] = fixmul31(win[i], buffer[511-i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Quadrature mirror synthesis filter.
|
||||
*
|
||||
|
@ -77,42 +172,19 @@ static channel_unit channel_units[2];
|
|||
* @param delayBuf delayBuf buffer
|
||||
* @param temp temp buffer
|
||||
*/
|
||||
|
||||
static void iqmf (int32_t *inlo, int32_t *inhi, unsigned int nIn, int32_t *pOut, int32_t *delayBuf, int32_t *temp)
|
||||
{
|
||||
unsigned int i, j;
|
||||
int32_t *p1, *p3;
|
||||
|
||||
/* Restore the delay buffer */
|
||||
memcpy(temp, delayBuf, 46*sizeof(int32_t));
|
||||
|
||||
p3 = temp + 46;
|
||||
/* loop1: matrixing */
|
||||
atrac3_iqmf_matrixing(temp + 46, inlo, inhi, nIn);
|
||||
|
||||
/* loop1 */
|
||||
for(i=0; i<nIn; i+=2){
|
||||
p3[2*i+0] = inlo[i ] + inhi[i ];
|
||||
p3[2*i+1] = inlo[i ] - inhi[i ];
|
||||
p3[2*i+2] = inlo[i+1] + inhi[i+1];
|
||||
p3[2*i+3] = inlo[i+1] - inhi[i+1];
|
||||
}
|
||||
/* loop2: dewindowing */
|
||||
atrac3_iqmf_dewindowing(pOut, temp, qmf_window, nIn);
|
||||
|
||||
/* loop2 */
|
||||
p1 = temp;
|
||||
for (j = nIn; j != 0; j--) {
|
||||
int32_t s1 = 0;
|
||||
int32_t s2 = 0;
|
||||
|
||||
for (i = 0; i < 48; i += 2) {
|
||||
s1 += fixmul31(p1[i], qmf_window[i]);
|
||||
s2 += fixmul31(p1[i+1], qmf_window[i+1]);
|
||||
}
|
||||
|
||||
pOut[0] = s2;
|
||||
pOut[1] = s1;
|
||||
|
||||
p1 += 2;
|
||||
pOut += 2;
|
||||
}
|
||||
|
||||
/* Update the delay buffer. */
|
||||
/* Save the delay buffer */
|
||||
memcpy(delayBuf, temp + (nIn << 1), 46*sizeof(int32_t));
|
||||
}
|
||||
|
||||
|
@ -146,9 +218,7 @@ static void IMLT(int32_t *pInput, int32_t *pOutput, int odd_band)
|
|||
mdct_backward(512, pInput, pOutput);
|
||||
|
||||
/* Windowing. */
|
||||
for(i = 0; i<512; i++)
|
||||
pOutput[i] = fixmul31(pOutput[i], window_lookup[i]);
|
||||
|
||||
atrac3_imdct_windowing(pOutput, window_lookup);
|
||||
}
|
||||
|
||||
|
||||
|
|
137
apps/codecs/libatrac/atrac3_arm.S
Executable file
137
apps/codecs/libatrac/atrac3_arm.S
Executable file
|
@ -0,0 +1,137 @@
|
|||
/***************************************************************************
|
||||
* __________ __ ___.
|
||||
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
||||
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
||||
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
||||
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
||||
* \/ \/ \/ \/ \/
|
||||
* $Id:
|
||||
*
|
||||
* Copyright (C) 2009 by Andree Buschmann
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
||||
* KIND, either express or implied.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
.section .text, "ax", %progbits
|
||||
|
||||
/****************************************************************************
|
||||
* void atrac3_iqmf_matrixing(int32_t *dest,
|
||||
* int32_t *inlo,
|
||||
* int32_t *inhi,
|
||||
* unsigned int count);
|
||||
*
|
||||
* Matrixing step within iqmf of atrac3 synthesis. Reference implementation:
|
||||
*
|
||||
* for(i=0; i<counter; i+=2){
|
||||
* dest[2*i+0] = inlo[i ] + inhi[i ];
|
||||
* dest[2*i+1] = inlo[i ] - inhi[i ];
|
||||
* dest[2*i+2] = inlo[i+1] + inhi[i+1];
|
||||
* dest[2*i+3] = inlo[i+1] - inhi[i+1];
|
||||
* }
|
||||
* Note: r12 is a scratch register and can be used without restorage.
|
||||
****************************************************************************/
|
||||
.align 2
|
||||
.global atrac3_iqmf_matrixing
|
||||
.type atrac3_iqmf_matrixing, %function
|
||||
|
||||
atrac3_iqmf_matrixing:
|
||||
/* r0 = dest */
|
||||
/* r1 = inlo */
|
||||
/* r2 = inhi */
|
||||
/* r3 = counter */
|
||||
stmfd sp!, {r4-r9, lr} /* save non-scratch registers */
|
||||
|
||||
.iqmf_matrixing_loop:
|
||||
ldmia r1!, { r4, r6, r8, r12} /* load inlo[0...3] */
|
||||
ldmia r2!, { r5, r7, r9, lr } /* load inhi[0...3] */
|
||||
add r4, r4, r5 /* r4 = inlo[0] + inhi[0] */
|
||||
sub r5, r4, r5, asl #1 /* r5 = inlo[0] - inhi[0] */
|
||||
add r6, r6, r7 /* r6 = inlo[1] + inhi[1] */
|
||||
sub r7, r6, r7, asl #1 /* r7 = inlo[1] - inhi[1] */
|
||||
add r8, r8, r9 /* r8 = inlo[2] + inhi[2] */
|
||||
sub r9, r8, r9, asl #1 /* r9 = inlo[2] - inhi[2] */
|
||||
add r12, r12, lr /* r12 = inlo[3] + inhi[3] */
|
||||
sub lr , r12, lr, asl #1 /* lr = inlo[3] - inhi[3] */
|
||||
stmia r0!, {r4-r9, r12, lr} /* store results to dest */
|
||||
subs r3, r3, #4 /* counter -= 4 */
|
||||
bgt .iqmf_matrixing_loop
|
||||
|
||||
ldmfd sp!, {r4-r9, pc} /* restore registers */
|
||||
|
||||
.atrac3_iqmf_matrixing_end:
|
||||
.size atrac3_iqmf_matrixing,.atrac3_iqmf_matrixing_end-atrac3_iqmf_matrixing
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* atrac3_iqmf_dewindowing(int32_t *out,
|
||||
* int32_t *in,
|
||||
* int32_t *win,
|
||||
* unsigned int nIn);
|
||||
*
|
||||
* Dewindowing step within iqmf of atrac3 synthesis. Reference implementation:
|
||||
*
|
||||
* for (j = nIn; j != 0; j--) {
|
||||
* s1 = fixmul32(in[0], win[0]);
|
||||
* s2 = fixmul32(in[1], win[1]);
|
||||
* for (i = 2; i < 48; i += 2) {
|
||||
* s1 += fixmul32(in[i ], win[i ]);
|
||||
* s2 += fixmul32(in[i+1], win[i+1]);
|
||||
* }
|
||||
* out[0] = s2 << 1;
|
||||
* out[1] = s1 << 1;
|
||||
* in += 2;
|
||||
* out += 2;
|
||||
* }
|
||||
* Note: r12 is a scratch register and can be used without restorage.
|
||||
****************************************************************************/
|
||||
.align 2
|
||||
.global atrac3_iqmf_dewindowing
|
||||
.type atrac3_iqmf_dewindowing, %function
|
||||
|
||||
atrac3_iqmf_dewindowing:
|
||||
/* r0 = dest */
|
||||
/* r1 = input samples */
|
||||
/* r2 = window coefficients */
|
||||
/* r3 = counter */
|
||||
stmfd sp!, {r4-r10, lr} /* save non-scratch registers */
|
||||
|
||||
.iqmf_dewindow_outer_loop: /* outer loop 0...counter-1 */
|
||||
|
||||
ldmia r2!, {r5, r6} /* load win[0..1] */
|
||||
ldmia r1!, {r7, r8} /* load in[0..1] */
|
||||
smull lr , r10, r5, r7 /* s1 = win[0] * in[0] */
|
||||
smull r12, r9 , r6, r8 /* s2 = win[1] * in[1] */
|
||||
|
||||
mov r4, #46 /* r4 = 46 */
|
||||
.iqmf_dewindow_inner_loop: /* inner loop i=2...48 */
|
||||
ldmia r2!, {r5, r6} /* load win[i...i+1] */
|
||||
ldmia r1!, {r7, r8} /* load in[i...i+1] */
|
||||
smlal lr , r10, r5, r7 /* s1 = win[i ] * in[i ] */
|
||||
smlal r12, r9 , r6, r8 /* s2 = win[i+1] * in[i+1] */
|
||||
|
||||
subs r4, r4, #2 /* inner loop -= 2*/
|
||||
bgt .iqmf_dewindow_inner_loop
|
||||
|
||||
mov lr , lr , lsr #31
|
||||
orr r10, lr , r10, lsl #1 /* s1 = low>>31 || hi<<1 */
|
||||
mov r12, r12, lsr #31
|
||||
orr r9 , r12, r9 , lsl #1 /* s2 = low>>31 || hi<<1 */
|
||||
|
||||
stmia r0!, {r9, r10} /* store result out[0]=s2, out[1]=s1 */
|
||||
sub r1, r1, #184 /* roll back 64 entries = 184 bytes */
|
||||
sub r2, r2, #192 /* roll back 48 entries = 192 bytes = win[0] */
|
||||
|
||||
subs r3, r3, #1 /* outer loop -= 1 */
|
||||
bgt .iqmf_dewindow_outer_loop
|
||||
|
||||
ldmfd sp!, {r4-r10, pc} /* restore registers */
|
||||
|
||||
.atrac3_iqmf_dewindowing_end:
|
||||
.size atrac3_iqmf_dewindowing,.atrac3_iqmf_dewindowing_end-atrac3_iqmf_dewindowing
|
|
@ -22,102 +22,39 @@ static const int32_t SFTable_fixed[64] ICONST_ATTR = {
|
|||
};
|
||||
|
||||
/* transform data */
|
||||
/* floating point values scaled by 2^31 */
|
||||
/* floating point values scaled by 2^31 */
|
||||
static const int32_t qmf_48tap_half_fix[24] ICONST_ATTR = {
|
||||
0xffff855e, 0xfffcfbca, 0xfffe28eb, 0x9de6b, 0x7f028, 0xffe40d08,
|
||||
0xffeef140, 0x42a692, 0x19ab1f, 0xff75dec7, 0xffe738f5, 0x100e928,
|
||||
0xfffdfedf, 0xfe478b84, 0x50b279, 0x2c83f88, 0xff005ad7, 0xfba2ee80,
|
||||
0x2685970, 0x6f42798, 0xfa6b6f10, 0xf3475f80, 0x10e7f7c0, 0x3b6c44c0
|
||||
0xffff855e, 0xfffcfbca, 0xfffe28eb, 0x0009de6b, 0x0007f028, 0xffe40d08,
|
||||
0xffeef140, 0x0042a692, 0x0019ab1f, 0xff75dec7, 0xffe738f5, 0x0100e928,
|
||||
0xfffdfedf, 0xfe478b84, 0x0050b279, 0x02c83f88, 0xff005ad7, 0xfba2ee80,
|
||||
0x02685970, 0x06f42798, 0xfa6b6f10, 0xf3475f80, 0x10e7f7c0, 0x3b6c44c0
|
||||
};
|
||||
|
||||
/* mdct window scaled by 2^31 */
|
||||
static const int32_t window_lookup[512] ICONST_ATTR = {
|
||||
0xffffb10c, 0xfffd394b, 0xfff8494f, 0xfff0e025, 0xffe6fc5f, 0xffda9c15,
|
||||
0xffcbbce6, 0xffba5bf4, 0xffa675e8, 0xff9006f0, 0xff770aba, 0xff5b7c7e,
|
||||
0xff3d56f2, 0xff1c9452, 0xfef92e59, 0xfed31e45, 0xfeaa5cd5, 0xfe7ee247,
|
||||
0xfe50a657, 0xfe1fa041, 0xfdebc6c1, 0xfdb5100d, 0xfd7b71d5, 0xfd3ee149,
|
||||
0xfcff5311, 0xfcbcbb49, 0xfc770d99, 0xfc2e3d15, 0xfbe23c39, 0xfb92fd29,
|
||||
0xfb407141, 0xfaea8989, 0xfa913661, 0xfa3467b1, 0xf9d40cd9, 0xf9701499,
|
||||
0xf9086d41, 0xf89d04a9, 0xf82dc7f1, 0xf7baa3e1, 0xf74384b1, 0xf6c85611,
|
||||
0xf6490321, 0xf5c576b1, 0xf53d9b21, 0xf4b15a01, 0xf4209ce1, 0xf38b4c71,
|
||||
0xf2f15171, 0xf2529411, 0xf1aefbf1, 0xf10670a1, 0xf058d941, 0xefa61cc1,
|
||||
0xeeee21c1, 0xee30cec1, 0xed6e0a41, 0xeca5ba61, 0xebd7c5c1, 0xeb041241,
|
||||
0xea2a8601, 0xe94b0861, 0xe8657f61, 0xe779d241, 0xe687e861, 0xe58fa9e1,
|
||||
0xe490fec1, 0xe38bd101, 0xe28009c1, 0xe16d93e1, 0xe0545ba1, 0xdf344dc1,
|
||||
0xde0d5881, 0xdcdf6bc1, 0xdbaa7801, 0xda6e70c1, 0xd92b4ac1, 0xd7e0fc81,
|
||||
0xd68f7ec1, 0xd536cd41, 0xd3d6e5c1, 0xd26fc901, 0xd10179c1, 0xcf8bff41,
|
||||
0xce0f6301, 0xcc8bb241, 0xcb00fdc1, 0xc96f5b01, 0xc7d6e141, 0xc637af41,
|
||||
0xc491e4c1, 0xc2e5a801, 0xc1332401, 0xbf7a8701, 0xbdbc0681, 0xbbf7da01,
|
||||
0xba2e4181, 0xb85f7f81, 0xb68bde01, 0xb4b3a981, 0xb2d73781, 0xb0f6df01,
|
||||
0xaf12ff01, 0xad2bfa81, 0xab423981, 0xa9562981, 0xa7683c01, 0xa578e701,
|
||||
0xa388a681, 0xa197f801, 0x9fa75e81, 0x9db75f01, 0x9bc88201, 0x99db5301,
|
||||
0x97f06001, 0x96083601, 0x94236601, 0x92427f81, 0x90661481, 0x8e8eb481,
|
||||
0x8cbced01, 0x8af14d81, 0x892c5f81, 0x876eab01, 0x85b8b681, 0x840b0301,
|
||||
0x82660c01, 0x80ca4a01, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x80ca4a01, 0x82660c01, 0x840b0301, 0x85b8b681, 0x876eab01, 0x892c5f81,
|
||||
0x8af14d81, 0x8cbced01, 0x8e8eb481, 0x90661481, 0x92427f81, 0x94236601,
|
||||
0x96083601, 0x97f06001, 0x99db5301, 0x9bc88201, 0x9db75f01, 0x9fa75e81,
|
||||
0xa197f801, 0xa388a681, 0xa578e701, 0xa7683c01, 0xa9562981, 0xab423981,
|
||||
0xad2bfa81, 0xaf12ff01, 0xb0f6df01, 0xb2d73781, 0xb4b3a981, 0xb68bde01,
|
||||
0xb85f7f81, 0xba2e4181, 0xbbf7da01, 0xbdbc0681, 0xbf7a8701, 0xc1332401,
|
||||
0xc2e5a801, 0xc491e4c1, 0xc637af41, 0xc7d6e141, 0xc96f5b01, 0xcb00fdc1,
|
||||
0xcc8bb241, 0xce0f6301, 0xcf8bff41, 0xd10179c1, 0xd26fc901, 0xd3d6e5c1,
|
||||
0xd536cd41, 0xd68f7ec1, 0xd7e0fc81, 0xd92b4ac1, 0xda6e70c1, 0xdbaa7801,
|
||||
0xdcdf6bc1, 0xde0d5881, 0xdf344dc1, 0xe0545ba1, 0xe16d93e1, 0xe28009c1,
|
||||
0xe38bd101, 0xe490fec1, 0xe58fa9e1, 0xe687e861, 0xe779d241, 0xe8657f61,
|
||||
0xe94b0861, 0xea2a8601, 0xeb041241, 0xebd7c5c1, 0xeca5ba61, 0xed6e0a41,
|
||||
0xee30cec1, 0xeeee21c1, 0xefa61cc1, 0xf058d941, 0xf10670a1, 0xf1aefbf1,
|
||||
0xf2529411, 0xf2f15171, 0xf38b4c71, 0xf4209ce1, 0xf4b15a01, 0xf53d9b21,
|
||||
0xf5c576b1, 0xf6490321, 0xf6c85611, 0xf74384b1, 0xf7baa3e1, 0xf82dc7f1,
|
||||
0xf89d04a9, 0xf9086d41, 0xf9701499, 0xf9d40cd9, 0xfa3467b1, 0xfa913661,
|
||||
0xfaea8989, 0xfb407141, 0xfb92fd29, 0xfbe23c39, 0xfc2e3d15, 0xfc770d99,
|
||||
0xfcbcbb49, 0xfcff5311, 0xfd3ee149, 0xfd7b71d5, 0xfdb5100d, 0xfdebc6c1,
|
||||
0xfe1fa041, 0xfe50a657, 0xfe7ee247, 0xfeaa5cd5, 0xfed31e45, 0xfef92e59,
|
||||
0xff1c9452, 0xff3d56f2, 0xff5b7c7e, 0xff770aba, 0xff9006f0, 0xffa675e8,
|
||||
0xffba5bf4, 0xffcbbce6, 0xffda9c15, 0xffe6fc5f, 0xfff0e025, 0xfff8494f,
|
||||
0xfffd394b, 0xffffb10c,
|
||||
/* Remark: The preceding sign corrects the sign of the hexadecimal values */
|
||||
static const int32_t window_lookup[128] ICONST_ATTR = {
|
||||
-0xffffb10c, -0xfffd394b, -0xfff8494f, -0xfff0e025, -0xffe6fc5f, -0xffda9c15,
|
||||
-0xffcbbce6, -0xffba5bf4, -0xffa675e8, -0xff9006f0, -0xff770aba, -0xff5b7c7e,
|
||||
-0xff3d56f2, -0xff1c9452, -0xfef92e59, -0xfed31e45, -0xfeaa5cd5, -0xfe7ee247,
|
||||
-0xfe50a657, -0xfe1fa041, -0xfdebc6c1, -0xfdb5100d, -0xfd7b71d5, -0xfd3ee149,
|
||||
-0xfcff5311, -0xfcbcbb49, -0xfc770d99, -0xfc2e3d15, -0xfbe23c39, -0xfb92fd29,
|
||||
-0xfb407141, -0xfaea8989, -0xfa913661, -0xfa3467b1, -0xf9d40cd9, -0xf9701499,
|
||||
-0xf9086d41, -0xf89d04a9, -0xf82dc7f1, -0xf7baa3e1, -0xf74384b1, -0xf6c85611,
|
||||
-0xf6490321, -0xf5c576b1, -0xf53d9b21, -0xf4b15a01, -0xf4209ce1, -0xf38b4c71,
|
||||
-0xf2f15171, -0xf2529411, -0xf1aefbf1, -0xf10670a1, -0xf058d941, -0xefa61cc1,
|
||||
-0xeeee21c1, -0xee30cec1, -0xed6e0a41, -0xeca5ba61, -0xebd7c5c1, -0xeb041241,
|
||||
-0xea2a8601, -0xe94b0861, -0xe8657f61, -0xe779d241, -0xe687e861, -0xe58fa9e1,
|
||||
-0xe490fec1, -0xe38bd101, -0xe28009c1, -0xe16d93e1, -0xe0545ba1, -0xdf344dc1,
|
||||
-0xde0d5881, -0xdcdf6bc1, -0xdbaa7801, -0xda6e70c1, -0xd92b4ac1, -0xd7e0fc81,
|
||||
-0xd68f7ec1, -0xd536cd41, -0xd3d6e5c1, -0xd26fc901, -0xd10179c1, -0xcf8bff41,
|
||||
-0xce0f6301, -0xcc8bb241, -0xcb00fdc1, -0xc96f5b01, -0xc7d6e141, -0xc637af41,
|
||||
-0xc491e4c1, -0xc2e5a801, -0xc1332401, -0xbf7a8701, -0xbdbc0681, -0xbbf7da01,
|
||||
-0xba2e4181, -0xb85f7f81, -0xb68bde01, -0xb4b3a981, -0xb2d73781, -0xb0f6df01,
|
||||
-0xaf12ff01, -0xad2bfa81, -0xab423981, -0xa9562981, -0xa7683c01, -0xa578e701,
|
||||
-0xa388a681, -0xa197f801, -0x9fa75e81, -0x9db75f01, -0x9bc88201, -0x99db5301,
|
||||
-0x97f06001, -0x96083601, -0x94236601, -0x92427f81, -0x90661481, -0x8e8eb481,
|
||||
-0x8cbced01, -0x8af14d81, -0x892c5f81, -0x876eab01, -0x85b8b681, -0x840b0301,
|
||||
-0x82660c01, -0x80ca4a01,
|
||||
};
|
||||
|
||||
/* Gain tables scaled by 2^16 */
|
||||
|
|
|
@ -10,27 +10,77 @@
|
|||
|
||||
/* Fixed point math routines for use in atrac3.c */
|
||||
|
||||
static inline int32_t fixmul16(int32_t x, int32_t y)
|
||||
{
|
||||
int64_t temp;
|
||||
temp = x;
|
||||
temp *= y;
|
||||
|
||||
temp >>= 16;
|
||||
|
||||
return (int32_t)temp;
|
||||
}
|
||||
|
||||
static inline int32_t fixmul31(int32_t x, int32_t y)
|
||||
{
|
||||
int64_t temp;
|
||||
temp = x;
|
||||
temp *= y;
|
||||
|
||||
temp >>= 31; //16+31-16 = 31 bits
|
||||
|
||||
return (int32_t)temp;
|
||||
}
|
||||
#if defined(CPU_ARM)
|
||||
#define fixmul16(X,Y) \
|
||||
({ \
|
||||
int32_t low; \
|
||||
int32_t high; \
|
||||
asm volatile ( /* calculates: result = (X*Y)>>16 */ \
|
||||
"smull %0,%1,%2,%3 \n\t" /* 64 = 32x32 multiply */ \
|
||||
"mov %0, %0, lsr #16 \n\t" /* %0 = %0 >> 16 */ \
|
||||
"orr %0, %0, %1, lsl #16 \n\t"/* result = %0 OR (%1 << 16) */ \
|
||||
: "=&r"(low), "=&r" (high) \
|
||||
: "r"(X),"r"(Y)); \
|
||||
low; \
|
||||
})
|
||||
|
||||
#define fixmul31(X,Y) \
|
||||
({ \
|
||||
int32_t low; \
|
||||
int32_t high; \
|
||||
asm volatile ( /* calculates: result = (X*Y)>>31 */ \
|
||||
"smull %0,%1,%2,%3 \n\t" /* 64 = 32x32 multiply */ \
|
||||
"mov %0, %0, lsr #31 \n\t" /* %0 = %0 >> 31 */ \
|
||||
"orr %0, %0, %1, lsl #1 \n\t" /* result = %0 OR (%1 << 1) */ \
|
||||
: "=&r"(low), "=&r" (high) \
|
||||
: "r"(X),"r"(Y)); \
|
||||
low; \
|
||||
})
|
||||
|
||||
#define fixmul32(X,Y) \
|
||||
({ \
|
||||
int32_t low; \
|
||||
int32_t high; \
|
||||
asm volatile ( /* calculates: result = (X*Y)>>32 */ \
|
||||
"smull %0,%1,%2,%3 \n\t" /* 64 = 32x32 multiply */ \
|
||||
: "=&r"(low), "=&r" (high) \
|
||||
: "r"(X),"r"(Y)); \
|
||||
high; \
|
||||
})
|
||||
#else
|
||||
static inline int32_t fixmul16(int32_t x, int32_t y)
|
||||
{
|
||||
int64_t temp;
|
||||
temp = x;
|
||||
temp *= y;
|
||||
|
||||
temp >>= 16;
|
||||
|
||||
return (int32_t)temp;
|
||||
}
|
||||
|
||||
static inline int32_t fixmul31(int32_t x, int32_t y)
|
||||
{
|
||||
int64_t temp;
|
||||
temp = x;
|
||||
temp *= y;
|
||||
|
||||
temp >>= 31; //16+31-16 = 31 bits
|
||||
|
||||
return (int32_t)temp;
|
||||
}
|
||||
|
||||
static inline int32_t fixmul32(int32_t x, int32_t y)
|
||||
{
|
||||
int64_t temp;
|
||||
temp = x;
|
||||
temp *= y;
|
||||
|
||||
temp >>= 32; //16+31-16 = 31 bits
|
||||
|
||||
return (int32_t)temp;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int32_t fixdiv16(int32_t x, int32_t y)
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue