forked from len0rd/rockbox
Revert r14786 which resulted in a substantial reduction in accuracy to save a 7.6KB of RAM. Accuracy should be greatly improved now, and if we want to make that trade off again, I think of better ways to do it.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@17783 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
dab9fa1ef7
commit
ff9f3f8c04
2 changed files with 40 additions and 25 deletions
|
|
@ -20,7 +20,14 @@
|
||||||
#include "wmafixed.h"
|
#include "wmafixed.h"
|
||||||
#include "mdct.h"
|
#include "mdct.h"
|
||||||
|
|
||||||
fixed32 tcos0[1024], tsin0[1024]; //these are the sin and cos rotations used by the MDCT
|
/*these are the sin and cos rotations used by the MDCT*/
|
||||||
|
|
||||||
|
/*accessed too infrequently to give much speedup in IRAM*/
|
||||||
|
|
||||||
|
fixed32 *tcosarray[5], *tsinarray[5];
|
||||||
|
fixed32 tcos0[1024], tcos1[512], tcos2[256], tcos3[128], tcos4[64];
|
||||||
|
fixed32 tsin0[1024], tsin1[512], tsin2[256], tsin3[128], tsin4[64];
|
||||||
|
|
||||||
uint16_t revtab0[1024];
|
uint16_t revtab0[1024];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -28,16 +35,28 @@ uint16_t revtab0[1024];
|
||||||
*/
|
*/
|
||||||
int ff_mdct_init(MDCTContext *s, int nbits, int inverse)
|
int ff_mdct_init(MDCTContext *s, int nbits, int inverse)
|
||||||
{
|
{
|
||||||
int n;
|
int n, n4, i;
|
||||||
// fixed32 alpha;
|
|
||||||
|
|
||||||
memset(s, 0, sizeof(*s));
|
memset(s, 0, sizeof(*s));
|
||||||
n = 1 << nbits; //nbits ranges from 12 to 8 inclusive
|
n = 1 << nbits; //nbits ranges from 12 to 8 inclusive
|
||||||
|
|
||||||
s->nbits = nbits;
|
s->nbits = nbits;
|
||||||
s->n = n;
|
s->n = n;
|
||||||
|
n4 = n >> 2;
|
||||||
|
s->tcos = tcosarray[12-nbits];
|
||||||
|
s->tsin = tsinarray[12-nbits];
|
||||||
|
for(i=0;i<n4;i++)
|
||||||
|
{
|
||||||
|
|
||||||
|
fixed32 ip = itofix32(i) + 0x2000;
|
||||||
|
ip = ip >> nbits;
|
||||||
|
|
||||||
|
/*I can't remember why this works, but it seems to agree for ~24 bits, maybe more!*/
|
||||||
|
s->tsin[i] = - fsincos(ip<<16, &(s->tcos[i]));
|
||||||
|
s->tcos[i] *=-1;
|
||||||
|
}
|
||||||
|
|
||||||
(&s->fft)->nbits = nbits-2;
|
(&s->fft)->nbits = nbits-2;
|
||||||
|
|
||||||
(&s->fft)->inverse = inverse;
|
(&s->fft)->inverse = inverse;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
@ -55,6 +74,8 @@ void ff_imdct_calc(MDCTContext *s,
|
||||||
fixed32 *input)
|
fixed32 *input)
|
||||||
{
|
{
|
||||||
int k, n8, n4, n2, n, j,scale;
|
int k, n8, n4, n2, n, j,scale;
|
||||||
|
const fixed32 *tcos = s->tcos;
|
||||||
|
const fixed32 *tsin = s->tsin;
|
||||||
const fixed32 *in1, *in2;
|
const fixed32 *in1, *in2;
|
||||||
FFTComplex *z1 = (FFTComplex *)output;
|
FFTComplex *z1 = (FFTComplex *)output;
|
||||||
FFTComplex *z2 = (FFTComplex *)input;
|
FFTComplex *z2 = (FFTComplex *)input;
|
||||||
|
|
@ -73,9 +94,8 @@ void ff_imdct_calc(MDCTContext *s,
|
||||||
|
|
||||||
for(k = 0; k < n4; k++)
|
for(k = 0; k < n4; k++)
|
||||||
{
|
{
|
||||||
int kshift = k<<revtabshift;
|
j=revtab0[k<<revtabshift];
|
||||||
j=revtab0[kshift];
|
CMUL(&z1[j].re, &z1[j].im, *in2, *in1, tcos[k], tsin[k]);
|
||||||
CMUL(&z1[j].re, &z1[j].im, *in2, *in1, tcos0[kshift], tsin0[kshift]);
|
|
||||||
in1 += 2;
|
in1 += 2;
|
||||||
in2 -= 2;
|
in2 -= 2;
|
||||||
}
|
}
|
||||||
|
|
@ -86,8 +106,7 @@ void ff_imdct_calc(MDCTContext *s,
|
||||||
|
|
||||||
for(k = 0; k < n4; k++)
|
for(k = 0; k < n4; k++)
|
||||||
{
|
{
|
||||||
int kshift = k<<revtabshift;
|
CMUL(&z2[k].re, &z2[k].im, (z1[k].re), (z1[k].im), tcos[k], tsin[k]);
|
||||||
CMUL(&z2[k].re, &z2[k].im, (z1[k].re), (z1[k].im), tcos0[kshift], tsin0[kshift]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for(k = 0; k < n8; k++)
|
for(k = 0; k < n8; k++)
|
||||||
|
|
@ -116,9 +135,18 @@ void ff_imdct_calc(MDCTContext *s,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* init MDCT */
|
||||||
|
|
||||||
int mdct_init_global(void)
|
int mdct_init_global(void)
|
||||||
{
|
{
|
||||||
int i,j,m;
|
int i,j,m;
|
||||||
|
|
||||||
|
/* although seemingly degenerate, these cannot actually be merged together without
|
||||||
|
a substantial increase in error which is unjustified by the tiny memory savings*/
|
||||||
|
|
||||||
|
tcosarray[0] = tcos0; tcosarray[1] = tcos1; tcosarray[2] = tcos2; tcosarray[3] = tcos3;tcosarray[4] = tcos4;
|
||||||
|
tsinarray[0] = tsin0; tsinarray[1] = tsin1; tsinarray[2] = tsin2; tsinarray[3] = tsin3;tsinarray[4] = tsin4;
|
||||||
|
|
||||||
/* init the MDCT bit reverse table here rather then in fft_init */
|
/* init the MDCT bit reverse table here rather then in fft_init */
|
||||||
|
|
||||||
for(i=0;i<1024;i++) /*hard coded to a 2048 bit rotation*/
|
for(i=0;i<1024;i++) /*hard coded to a 2048 bit rotation*/
|
||||||
|
|
@ -132,21 +160,6 @@ int mdct_init_global(void)
|
||||||
revtab0[i]=m;
|
revtab0[i]=m;
|
||||||
}
|
}
|
||||||
|
|
||||||
for(i=0;i<1024;i++)
|
|
||||||
{
|
|
||||||
//fixed32 pi2 = fixmul32(0x20000, M_PI_F);
|
|
||||||
fixed32 ip = itofix32(i) + 0x2000;
|
|
||||||
ip = ip >> 12;
|
|
||||||
//ip = fixdiv32(ip,itofix32(n)); // PJJ optimize
|
|
||||||
//alpha = fixmul32(TWO_M_PI_F, ip);
|
|
||||||
//s->tcos[i] = -fixcos32(alpha); //alpha between 0 and pi/2
|
|
||||||
//s->tsin[i] = -fixsin32(alpha);
|
|
||||||
|
|
||||||
//I can't remember why this works, but it seems to agree for ~24 bits, maybe more!
|
|
||||||
tsin0[i] = - fsincos(ip<<16, &(tcos0[i]));
|
|
||||||
tcos0[i] *=-1;
|
|
||||||
}
|
|
||||||
|
|
||||||
fft_init_global();
|
fft_init_global();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,8 @@ typedef struct MDCTContext
|
||||||
int n; /* size of MDCT (i.e. number of input data * 2) */
|
int n; /* size of MDCT (i.e. number of input data * 2) */
|
||||||
int nbits; /* n = 2^nbits */
|
int nbits; /* n = 2^nbits */
|
||||||
/* pre/post rotation tables */
|
/* pre/post rotation tables */
|
||||||
|
fixed32 *tcos;
|
||||||
|
fixed32 *tsin;
|
||||||
FFTContext fft;
|
FFTContext fft;
|
||||||
}
|
}
|
||||||
MDCTContext;
|
MDCTContext;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue