forked from len0rd/rockbox
Speed up AAC-HE SBR by 2% on S5L8701. Use MEM_ALIGN on critical arrays and avoid stalls in asm code.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@29209 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
62fb090ac4
commit
8b540fa9d9
3 changed files with 33 additions and 33 deletions
|
|
@ -1453,7 +1453,7 @@ void DCT2_32_unscaled(real_t *y, real_t *x)
|
|||
#else /* #ifdef SBR_LOW_POWER */
|
||||
|
||||
/* table for pre-processing within dct4_kernel() */
|
||||
static const real_t dct4_pre_tab[] ICONST_ATTR = {
|
||||
static const real_t dct4_pre_tab[] ICONST_ATTR MEM_ALIGN_ATTR = {
|
||||
COEF_CONST(0.999924719333649), COEF_CONST(-1.01219630241394), COEF_CONST(-0.987653195858002),
|
||||
COEF_CONST(0.998118102550507), COEF_CONST(-1.05943882465363), COEF_CONST(-0.936797380447388),
|
||||
COEF_CONST(0.993906974792480), COEF_CONST(-1.10412919521332), COEF_CONST(-0.883684754371643),
|
||||
|
|
@ -1489,7 +1489,7 @@ static const real_t dct4_pre_tab[] ICONST_ATTR = {
|
|||
};
|
||||
|
||||
/* table for post-processing within dct4_kernel() */
|
||||
static const real_t dct4_post_tab[] ICONST_ATTR = {
|
||||
static const real_t dct4_post_tab[] ICONST_ATTR MEM_ALIGN_ATTR = {
|
||||
COEF_CONST(1 ), COEF_CONST(-1 ), COEF_CONST(-1 ),
|
||||
COEF_CONST(0.998795449733734), COEF_CONST(-1.04786312580109), COEF_CONST(-0.949727773666382),
|
||||
COEF_CONST(0.995184719562531), COEF_CONST(-1.09320187568665), COEF_CONST(-0.897167563438416),
|
||||
|
|
@ -1525,7 +1525,7 @@ static const real_t dct4_post_tab[] ICONST_ATTR = {
|
|||
};
|
||||
|
||||
// Table adapted from codeclib to fit into IRAM
|
||||
const uint32_t dct4_revtab[32] ICONST_ATTR = {
|
||||
const uint32_t dct4_revtab[32] ICONST_ATTR MEM_ALIGN_ATTR = {
|
||||
0, 24, 12, 22, 6, 30, 11, 19, 3, 27, 15, 21, 5, 29, 9, 17,
|
||||
1, 25, 13, 23, 7, 31, 10, 18, 2, 26, 14, 20, 4, 28, 8, 16};
|
||||
|
||||
|
|
|
|||
|
|
@ -521,8 +521,8 @@ uint8_t sbrDecodeSingleFrame(sbr_info *sbr, real_t *channel,
|
|||
return 0;
|
||||
}
|
||||
|
||||
ALIGN qmf_t X_left[MAX_NTSRHFG][64];// = {{0}};
|
||||
ALIGN qmf_t X_right[MAX_NTSRHFG][64];// = {{0}}; /* must set this to 0 */
|
||||
qmf_t X_left [MAX_NTSRHFG][64] MEM_ALIGN_ATTR;// = {{0}};
|
||||
qmf_t X_right[MAX_NTSRHFG][64] MEM_ALIGN_ATTR;// = {{0}}; /* must set this to 0 */
|
||||
|
||||
#if (defined(PS_DEC) || defined(DRM_PS))
|
||||
uint8_t sbrDecodeSingleFramePS(sbr_info *sbr, real_t *left_channel, real_t *right_channel,
|
||||
|
|
|
|||
|
|
@ -78,12 +78,12 @@ void qmfa_end(qmfa_info *qmfa)
|
|||
void sbr_qmf_analysis_32(sbr_info *sbr, qmfa_info *qmfa, const real_t *input,
|
||||
qmf_t X[MAX_NTSRHFG][64], uint8_t offset, uint8_t kx)
|
||||
{
|
||||
ALIGN real_t u[64];
|
||||
real_t u[64] MEM_ALIGN_ATTR;
|
||||
#ifndef SBR_LOW_POWER
|
||||
ALIGN real_t real[32];
|
||||
ALIGN real_t imag[32];
|
||||
real_t real[32] MEM_ALIGN_ATTR;
|
||||
real_t imag[32] MEM_ALIGN_ATTR;
|
||||
#else
|
||||
ALIGN real_t y[32];
|
||||
real_t y[32] MEM_ALIGN_ATTR;
|
||||
#endif
|
||||
qmf_t *pX;
|
||||
uint32_t in = 0;
|
||||
|
|
@ -227,8 +227,8 @@ void qmfs_end(qmfs_info *qmfs)
|
|||
void sbr_qmf_synthesis_32(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64],
|
||||
real_t *output)
|
||||
{
|
||||
ALIGN real_t x[16];
|
||||
ALIGN real_t y[16];
|
||||
real_t x[16] MEM_ALIGN_ATTR;
|
||||
real_t y[16] MEM_ALIGN_ATTR;
|
||||
int16_t n, k, out = 0;
|
||||
uint8_t l;
|
||||
|
||||
|
|
@ -291,8 +291,8 @@ void sbr_qmf_synthesis_32(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][6
|
|||
void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64],
|
||||
real_t *output)
|
||||
{
|
||||
ALIGN real_t x[64];
|
||||
ALIGN real_t y[64];
|
||||
real_t x[64] MEM_ALIGN_ATTR;
|
||||
real_t y[64] MEM_ALIGN_ATTR;
|
||||
int16_t n, k, out = 0;
|
||||
uint8_t l;
|
||||
|
||||
|
|
@ -401,8 +401,8 @@ static const complex_t qmf32_pre_twiddle[] =
|
|||
void sbr_qmf_synthesis_32(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64],
|
||||
real_t *output)
|
||||
{
|
||||
ALIGN real_t x1[32];
|
||||
ALIGN real_t x2[32];
|
||||
real_t x1[32] MEM_ALIGN_ATTR;
|
||||
real_t x2[32] MEM_ALIGN_ATTR;
|
||||
int32_t n, k, idx0, idx1, out = 0;
|
||||
uint32_t l;
|
||||
|
||||
|
|
@ -464,10 +464,10 @@ void sbr_qmf_synthesis_32(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][6
|
|||
void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64],
|
||||
real_t *output)
|
||||
{
|
||||
ALIGN real_t real1[32];
|
||||
ALIGN real_t imag1[32];
|
||||
ALIGN real_t real2[32];
|
||||
ALIGN real_t imag2[32];
|
||||
real_t real1[32] MEM_ALIGN_ATTR;
|
||||
real_t imag1[32] MEM_ALIGN_ATTR;
|
||||
real_t real2[32] MEM_ALIGN_ATTR;
|
||||
real_t imag2[32] MEM_ALIGN_ATTR;
|
||||
qmf_t *pX;
|
||||
real_t *p_buf_1, *p_buf_3;
|
||||
int32_t n, k, idx0, idx1, out = 0;
|
||||
|
|
@ -517,36 +517,36 @@ void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][6
|
|||
asm volatile (
|
||||
"ldmia %[qtab]!, { r0-r3 } \n\t"
|
||||
"ldr r4, [%[pbuf]] \n\t"
|
||||
"ldr r7, [%[pbuf], #192*4] \n\t"
|
||||
"smull r5, r6, r4, r0 \n\t"
|
||||
"ldr r4, [%[pbuf], #192*4] \n\t"
|
||||
"smlal r5, r6, r4, r1 \n\t"
|
||||
"ldr r4, [%[pbuf], #256*4] \n\t"
|
||||
"smlal r5, r6, r7, r1 \n\t"
|
||||
"ldr r7, [%[pbuf], #448*4] \n\t"
|
||||
"smlal r5, r6, r4, r2 \n\t"
|
||||
"ldr r4, [%[pbuf], #448*4] \n\t"
|
||||
"smlal r5, r6, r4, r3 \n\t"
|
||||
"ldr r4, [%[pbuf], #512*4] \n\t"
|
||||
"smlal r5, r6, r7, r3 \n\t"
|
||||
|
||||
"ldmia %[qtab]!, { r0-r3 } \n\t"
|
||||
"ldr r4, [%[pbuf], #512*4] \n\t"
|
||||
"ldr r7, [%[pbuf], #704*4] \n\t"
|
||||
"smlal r5, r6, r4, r0 \n\t"
|
||||
"ldr r4, [%[pbuf], #704*4] \n\t"
|
||||
"smlal r5, r6, r4, r1 \n\t"
|
||||
"ldr r4, [%[pbuf], #768*4] \n\t"
|
||||
"smlal r5, r6, r7, r1 \n\t"
|
||||
"ldr r7, [%[pbuf], #960*4] \n\t"
|
||||
"smlal r5, r6, r4, r2 \n\t"
|
||||
"ldr r4, [%[pbuf], #960*4] \n\t"
|
||||
"smlal r5, r6, r4, r3 \n\t"
|
||||
"mov r2, #1024*4 \n\t"
|
||||
|
||||
"ldmia %[qtab]!, { r0-r1 } \n\t"
|
||||
"mov r2, #1024*4 \n\t"
|
||||
"ldr r4, [%[pbuf], r2] \n\t"
|
||||
"smlal r5, r6, r4, r0 \n\t"
|
||||
"smlal r5, r6, r7, r3 \n\t"
|
||||
"mov r2, #1216*4 \n\t"
|
||||
"ldr r4, [%[pbuf], r2] \n\t"
|
||||
"smlal r5, r6, r4, r1 \n\t"
|
||||
"ldr r7, [%[pbuf], r2] \n\t"
|
||||
"smlal r5, r6, r4, r0 \n\t"
|
||||
"smlal r5, r6, r7, r1 \n\t"
|
||||
|
||||
"str r6, [%[pout]] \n"
|
||||
: [qtab] "+r" (qtab)
|
||||
: [pbuf] "r" (pbuf), [pout] "r" (pout)
|
||||
: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "memory");
|
||||
: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "memory");
|
||||
}
|
||||
#elif defined CPU_COLDFIRE
|
||||
const real_t *qtab = qmf_c;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue