1
0
Fork 0
forked from len0rd/rockbox

Vorbis optimizations: allocation of PCM buffers in IRAM, avoiding copying.

Slight code improvements to the MDCT functions.


git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6590 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Pedro Vasconcelos 2005-06-06 23:01:51 +00:00
parent 127f14210f
commit 0f10e898f0
11 changed files with 110 additions and 132 deletions

View file

@ -49,13 +49,12 @@ static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
} }
static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) { static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
ogg_int32_t r;
asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply
"movclr.l %%acc0, %[r];" // move and clear "movclr.l %%acc0, %[x];" // move and clear
: [r] "=r" (r) : [x] "+&r" (x)
: [x] "r" (x), [y] "r" (y) : [y] "r" (y)
: "cc"); : "cc");
return r; return x;
} }
@ -64,11 +63,11 @@ static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply
"movclr.l %%acc0, %[r];" // get higher half "movclr.l %%acc0, %[r];" // get higher half
"mulu.l %[y], %[x];" // get lower half "mulu.l %[y], %[x];" // get lower half
"asl.l #8, %[r];" // hi << 17 "asl.l #8, %[r];" // hi << 17
"asl.l #8, %[r];" "asl.l #8, %[r];"
"lsr.l #8, %[x];" // (unsigned)lo >> 15 "lsr.l #8, %[x];" // (unsigned)lo >> 15
"lsr.l #7, %[x];" "lsr.l #7, %[x];"
"or.l %[x], %[r];" // or "or.l %[x], %[r];" // or
: [r] "=&d" (r), [x] "+d" (x) : [r] "=&d" (r), [x] "+d" (x)
: [y] "d" (y) : [y] "d" (y)
: "cc"); : "cc");
@ -81,18 +80,17 @@ void XPROD31(ogg_int32_t a, ogg_int32_t b,
ogg_int32_t t, ogg_int32_t v, ogg_int32_t t, ogg_int32_t v,
ogg_int32_t *x, ogg_int32_t *y) ogg_int32_t *x, ogg_int32_t *y)
{ {
ogg_int32_t r;
asm volatile ("mac.l %[a], %[t], %%acc0;" asm volatile ("mac.l %[a], %[t], %%acc0;"
"mac.l %[b], %[v], %%acc0;" "mac.l %[b], %[v], %%acc0;"
"mac.l %[b], %[t], %%acc1;" "mac.l %[b], %[t], %%acc1;"
"msac.l %[a], %[v], %%acc1;" "msac.l %[a], %[v], %%acc1;"
"movclr.l %%acc0, %[r];" "movclr.l %%acc0, %[a];"
"move.l %[r], (%[x]);" "move.l %[a], (%[x]);"
"movclr.l %%acc1, %[r];" "movclr.l %%acc1, %[a];"
"move.l %[r], (%[y]);" "move.l %[a], (%[y]);"
: [r] "=&r" (r) : [a] "+&r" (a)
: [x] "a" (x), [y] "a" (y), : [x] "a" (x), [y] "a" (y),
[a] "r" (a), [b] "r" (b), [t] "r" (t), [v] "r" (v) [b] "r" (b), [t] "r" (t), [v] "r" (v)
: "cc", "memory"); : "cc", "memory");
} }
@ -102,23 +100,23 @@ void XNPROD31(ogg_int32_t a, ogg_int32_t b,
ogg_int32_t t, ogg_int32_t v, ogg_int32_t t, ogg_int32_t v,
ogg_int32_t *x, ogg_int32_t *y) ogg_int32_t *x, ogg_int32_t *y)
{ {
ogg_int32_t r;
asm volatile ("mac.l %[a], %[t], %%acc0;" asm volatile ("mac.l %[a], %[t], %%acc0;"
"msac.l %[b], %[v], %%acc0;" "msac.l %[b], %[v], %%acc0;"
"mac.l %[b], %[t], %%acc1;" "mac.l %[b], %[t], %%acc1;"
"mac.l %[a], %[v], %%acc1;" "mac.l %[a], %[v], %%acc1;"
"movclr.l %%acc0, %[r];" "movclr.l %%acc0, %[a];"
"move.l %[r], (%[x]);" "move.l %[a], (%[x]);"
"movclr.l %%acc1, %[r];" "movclr.l %%acc1, %[a];"
"move.l %[r], (%[y]);" "move.l %[a], (%[y]);"
: [r] "=&r" (r) : [a] "+&r" (a)
: [x] "a" (x), [y] "a" (y), : [x] "a" (x), [y] "a" (y),
[a] "r" (a), [b] "r" (b), [t] "r" (t), [v] "r" (v) [b] "r" (b), [t] "r" (t), [v] "r" (v)
: "cc", "memory"); : "cc", "memory");
} }
/* no faster way of doing this using the MAC? */
/* is there no better way of doing this using the MAC? */
#define XPROD32(_a, _b, _t, _v, _x, _y) \ #define XPROD32(_a, _b, _t, _v, _x, _y) \
{ (_x)=MULT32(_a,_t)+MULT32(_b,_v); \ { (_x)=MULT32(_a,_t)+MULT32(_b,_v); \
(_y)=MULT32(_b,_t)-MULT32(_a,_v); } (_y)=MULT32(_b,_t)-MULT32(_a,_v); }

View file

@ -22,7 +22,7 @@
#include <string.h> #include <string.h>
#include "ogg.h" #include "ogg.h"
static const unsigned long mask[] IDATA_ATTR = static const unsigned long mask[] =
{0x00000000,0x00000001,0x00000003,0x00000007,0x0000000f, {0x00000000,0x00000001,0x00000003,0x00000007,0x0000000f,
0x0000001f,0x0000003f,0x0000007f,0x000000ff,0x000001ff, 0x0000001f,0x0000003f,0x0000007f,0x000000ff,0x000001ff,
0x000003ff,0x000007ff,0x00000fff,0x00001fff,0x00003fff, 0x000003ff,0x000007ff,0x00000fff,0x00001fff,0x00003fff,

View file

@ -140,7 +140,7 @@ int vorbis_staticbook_unpack(oggpack_buffer *opb,static_codebook *s){
be. The first-stage decode table catches most words so that be. The first-stage decode table catches most words so that
bitreverse is not in the main execution path. */ bitreverse is not in the main execution path. */
static ogg_uint32_t bitreverse(ogg_uint32_t x){ static inline ogg_uint32_t bitreverse(register ogg_uint32_t x){
x= ((x>>16)&0x0000ffff) | ((x<<16)&0xffff0000); x= ((x>>16)&0x0000ffff) | ((x<<16)&0xffff0000);
x= ((x>> 8)&0x00ff00ff) | ((x<< 8)&0xff00ff00); x= ((x>> 8)&0x00ff00ff) | ((x<< 8)&0xff00ff00);
x= ((x>> 4)&0x0f0f0f0f) | ((x<< 4)&0xf0f0f0f0); x= ((x>> 4)&0x0f0f0f0f) | ((x<< 4)&0xf0f0f0f0);
@ -265,12 +265,13 @@ long vorbis_book_decodev_add(codebook *book,ogg_int32_t *a,
a[i++]+=t[j++]>>shift; a[i++]+=t[j++]>>shift;
} }
}else{ }else{
shift = -shift;
for(i=0;i<n;){ for(i=0;i<n;){
entry = decode_packed_entry_number(book,b); entry = decode_packed_entry_number(book,b);
if(entry==-1)return(-1); if(entry==-1)return(-1);
t = book->valuelist+entry*book->dim; t = book->valuelist+entry*book->dim;
for (j=0;j<book->dim;) for (j=0;j<book->dim;)
a[i++]+=t[j++]<<-shift; a[i++]+=t[j++]<<shift;
} }
} }
return(0); return(0);
@ -293,13 +294,13 @@ long vorbis_book_decodev_set(codebook *book,ogg_int32_t *a,
} }
} }
}else{ }else{
shift = -shift;
for(i=0;i<n;){ for(i=0;i<n;){
entry = decode_packed_entry_number(book,b); entry = decode_packed_entry_number(book,b);
if(entry==-1)return(-1); if(entry==-1)return(-1);
t = book->valuelist+entry*book->dim; t = book->valuelist+entry*book->dim;
for (j=0;j<book->dim;){ for (j=0;j<book->dim;){
a[i++]=t[j++]<<-shift; a[i++]=t[j++]<<shift;
} }
} }
} }
@ -330,14 +331,14 @@ long vorbis_book_decodevv_add(codebook *book,ogg_int32_t **a,
} }
} }
}else{ }else{
shift = -shift;
for(i=offset;i<offset+n;){ for(i=offset;i<offset+n;){
entry = decode_packed_entry_number(book,b); entry = decode_packed_entry_number(book,b);
if(entry==-1)return(-1); if(entry==-1)return(-1);
{ {
const ogg_int32_t *t = book->valuelist+entry*book->dim; const ogg_int32_t *t = book->valuelist+entry*book->dim;
for (j=0;j<book->dim;j++){ for (j=0;j<book->dim;j++){
a[chptr++][i]+=t[j]<<-shift; a[chptr++][i]+=t[j]<<shift;
if(chptr==ch){ if(chptr==ch){
chptr=0; chptr=0;
i++; i++;

View file

@ -45,7 +45,7 @@ typedef struct {
static void floor1_free_info(vorbis_info_floor *i){ static void floor1_free_info(vorbis_info_floor *i){
vorbis_info_floor1 *info=(vorbis_info_floor1 *)i; vorbis_info_floor1 *info=(vorbis_info_floor1 *)i;
if(info){ if(info){
memset(info,0,sizeof(*info)); //memset(info,0,sizeof(*info));
_ogg_free(info); _ogg_free(info);
} }
} }
@ -53,13 +53,13 @@ static void floor1_free_info(vorbis_info_floor *i){
static void floor1_free_look(vorbis_look_floor *i){ static void floor1_free_look(vorbis_look_floor *i){
vorbis_look_floor1 *look=(vorbis_look_floor1 *)i; vorbis_look_floor1 *look=(vorbis_look_floor1 *)i;
if(look){ if(look){
memset(look,0,sizeof(*look)); //memset(look,0,sizeof(*look));
_ogg_free(look); _ogg_free(look);
} }
} }
static int ilog(unsigned int v){ static inline int ilog(register unsigned int v){
int ret=0; register int ret=0;
while(v){ while(v){
ret++; ret++;
v>>=1; v>>=1;
@ -124,7 +124,7 @@ static int icomp(const void *a,const void *b){
static vorbis_look_floor *floor1_look(vorbis_dsp_state *vd,vorbis_info_mode *mi, static vorbis_look_floor *floor1_look(vorbis_dsp_state *vd,vorbis_info_mode *mi,
vorbis_info_floor *in){ vorbis_info_floor *in){
static int *sortpointer[VIF_POSIT+2] IDATA_ATTR; int *sortpointer[VIF_POSIT+2];
vorbis_info_floor1 *info=(vorbis_info_floor1 *)in; vorbis_info_floor1 *info=(vorbis_info_floor1 *)in;
vorbis_look_floor1 *look=(vorbis_look_floor1 *)_ogg_calloc(1,sizeof(*look)); vorbis_look_floor1 *look=(vorbis_look_floor1 *)_ogg_calloc(1,sizeof(*look));
int i,j,n=0; int i,j,n=0;
@ -216,7 +216,7 @@ static int render_point(int x0,int x1,int y0,int y1,int x){
# define XdB(n) (n) # define XdB(n) (n)
#endif #endif
static ogg_int32_t FLOOR_fromdB_LOOKUP[256] IDATA_ATTR ={ static ogg_int32_t FLOOR_fromdB_LOOKUP[256] ={
XdB(0x000000e5), XdB(0x000000f4), XdB(0x00000103), XdB(0x00000114), XdB(0x000000e5), XdB(0x000000f4), XdB(0x00000103), XdB(0x00000114),
XdB(0x00000126), XdB(0x00000139), XdB(0x0000014e), XdB(0x00000163), XdB(0x00000126), XdB(0x00000139), XdB(0x0000014e), XdB(0x00000163),
XdB(0x0000017a), XdB(0x00000193), XdB(0x000001ad), XdB(0x000001c9), XdB(0x0000017a), XdB(0x00000193), XdB(0x000001ad), XdB(0x000001c9),
@ -313,16 +313,15 @@ static void *floor1_inverse1(vorbis_block *vb,vorbis_look_floor *in){
vorbis_look_floor1 *look=(vorbis_look_floor1 *)in; vorbis_look_floor1 *look=(vorbis_look_floor1 *)in;
vorbis_info_floor1 *info=look->vi; vorbis_info_floor1 *info=look->vi;
codec_setup_info *ci=(codec_setup_info *)vb->vd->vi->codec_setup; codec_setup_info *ci=(codec_setup_info *)vb->vd->vi->codec_setup;
int i,j,k; int i,j,k;
codebook *books=ci->fullbooks; codebook *books=ci->fullbooks;
/* unpack wrapped/predicted values from stream */ /* unpack wrapped/predicted values from stream */
if(oggpack_read(&vb->opb,1)==1){ if(oggpack_read(&vb->opb,1)==1){
int *fit_value=(int *)_vorbis_block_alloc(vb,(look->posts)*sizeof(*fit_value)); int *fit_value=(int *)_vorbis_block_alloc(vb,(look->posts)*sizeof(*fit_value));
int ilg = ilog(look->quant_q-1);
fit_value[0]=oggpack_read(&vb->opb,ilog(look->quant_q-1)); fit_value[0]=oggpack_read(&vb->opb,ilg);
fit_value[1]=oggpack_read(&vb->opb,ilog(look->quant_q-1)); fit_value[1]=oggpack_read(&vb->opb,ilg);
/* partition by partition */ /* partition by partition */
/* partition by partition */ /* partition by partition */

View file

@ -501,7 +501,7 @@ int ogg_page_packets(ogg_page *og){
/* Static CRC calculation table. See older code in CVS for dead /* Static CRC calculation table. See older code in CVS for dead
run-time initialization code. */ run-time initialization code. */
static ogg_uint32_t crc_lookup[256] IDATA_ATTR = { static ogg_uint32_t crc_lookup[256] = {
0x00000000,0x04c11db7,0x09823b6e,0x0d4326d9, 0x00000000,0x04c11db7,0x09823b6e,0x0d4326d9,
0x130476dc,0x17c56b6b,0x1a864db2,0x1e475005, 0x130476dc,0x17c56b6b,0x1a864db2,0x1e475005,
0x2608edb8,0x22c9f00f,0x2f8ad6d6,0x2b4bcb61, 0x2608edb8,0x22c9f00f,0x2f8ad6d6,0x2b4bcb61,

View file

@ -180,12 +180,6 @@ static vorbis_info_mapping *mapping0_unpack(vorbis_info *vi,oggpack_buffer *opb)
} }
/* IRAM buffer keep the pcm data; only for windows size upto 2048
for space restrictions. No real compromise, larger window sizes
are only used for very low quality settings (q<0?) */
#define IRAM_PCM_SIZE 2048
static ogg_int32_t pcm_iram[IRAM_PCM_SIZE] IDATA_ATTR;
static int seq = 0; static int seq = 0;
#define CHANNELS 2 /* max 2 channels on the ihp-1xx (stereo) */ #define CHANNELS 2 /* max 2 channels on the ihp-1xx (stereo) */
@ -201,11 +195,12 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
int i,j; int i,j;
long n=vb->pcmend=ci->blocksizes[vb->W]; long n=vb->pcmend=ci->blocksizes[vb->W];
/* statically allocate mapping structures in IRAM */ /* bounded mapping arrays instead of using alloca();
static ogg_int32_t *pcmbundle[CHANNELS] IDATA_ATTR; avoids memory leak; we can only deal with stereo anyway */
static int zerobundle[CHANNELS] IDATA_ATTR; ogg_int32_t *pcmbundle[CHANNELS];
static int nonzero[CHANNELS] IDATA_ATTR; int zerobundle[CHANNELS];
static void *floormemo[CHANNELS] IDATA_ATTR; int nonzero[CHANNELS];
void *floormemo[CHANNELS];
/* test for too many channels; /* test for too many channels;
(maybe this is can be checked at the stream level?) */ (maybe this is can be checked at the stream level?) */
@ -249,7 +244,7 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
pcmbundle[ch_in_bundle++]=vb->pcm[j]; pcmbundle[ch_in_bundle++]=vb->pcm[j];
} }
} }
look->residue_func[i]->inverse(vb,look->residue_look[i], look->residue_func[i]->inverse(vb,look->residue_look[i],
pcmbundle,zerobundle,ch_in_bundle); pcmbundle,zerobundle,ch_in_bundle);
} }
@ -286,13 +281,10 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
} }
} }
//for(j=0;j<vi->channels;j++) //for(j=0;j<vi->channels;j++)
//_analysis_output("residue",seq+j,vb->pcm[j],-8,n/2,0,0); //_analysis_output("residue",seq+j,vb->pcm[j],-8,n/2,0,0);
/* pbv: removed this loop by fusion with the following one
to avoid recopying data to/from the IRAM */
#if 0
/* compute and apply spectral envelope */ /* compute and apply spectral envelope */
for(i=0;i<vi->channels;i++){ for(i=0;i<vi->channels;i++){
ogg_int32_t *pcm=vb->pcm[i]; ogg_int32_t *pcm=vb->pcm[i];
@ -300,7 +292,6 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
look->floor_func[submap]-> look->floor_func[submap]->
inverse2(vb,look->floor_look[submap],floormemo[i],pcm); inverse2(vb,look->floor_look[submap],floormemo[i],pcm);
} }
#endif
//for(j=0;j<vi->channels;j++) //for(j=0;j<vi->channels;j++)
//_analysis_output("mdct",seq+j,vb->pcm[j],-24,n/2,0,1); //_analysis_output("mdct",seq+j,vb->pcm[j],-24,n/2,0,1);
@ -308,32 +299,9 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
/* transform the PCM data; takes PCM vector, vb; modifies PCM vector */ /* transform the PCM data; takes PCM vector, vb; modifies PCM vector */
/* only MDCT right now.... */ /* only MDCT right now.... */
/* check if we can do this in IRAM */ for(i=0;i<vi->channels;i++){
if(n <= IRAM_PCM_SIZE) { /* normal window size: yes */ ogg_int32_t *pcm=vb->pcm[i];
for(i=0;i<vi->channels;i++){
ogg_int32_t *pcm=vb->pcm[i];
int submap=info->chmuxlist[i];
if(nonzero[i]) {
memcpy(pcm_iram, pcm, sizeof(ogg_int32_t)*n);
look->floor_func[submap]->
inverse2(vb,look->floor_look[submap],floormemo[i],pcm_iram);
mdct_backward(n, pcm_iram, pcm_iram);
/* window the data */
_vorbis_apply_window(pcm_iram,b->window,ci->blocksizes,vb->lW,vb->W,vb->nW);
memcpy(pcm, pcm_iram, sizeof(ogg_int32_t)*n);
}
else
memset(pcm, 0, sizeof(ogg_int32_t)*n);
}
}
else { /* large window: no, do it in the normal memory */
for(i=0;i<vi->channels;i++){
ogg_int32_t *pcm=vb->pcm[i];
int submap=info->chmuxlist[i];
look->floor_func[submap]->
inverse2(vb,look->floor_look[submap],floormemo[i],pcm);
if(nonzero[i]) { if(nonzero[i]) {
mdct_backward(n, pcm, pcm); mdct_backward(n, pcm, pcm);
/* window the data */ /* window the data */
@ -341,7 +309,6 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
} }
else else
memset(pcm, 0, sizeof(ogg_int32_t)*n); memset(pcm, 0, sizeof(ogg_int32_t)*n);
}
} }
//for(j=0;j<vi->channels;j++) //for(j=0;j<vi->channels;j++)

View file

@ -41,7 +41,6 @@
/* 8 point butterfly (in place) */ /* 8 point butterfly (in place) */
STIN void mdct_butterfly_8(DATA_TYPE *x){ STIN void mdct_butterfly_8(DATA_TYPE *x){
REG_TYPE r0 = x[4] + x[0]; REG_TYPE r0 = x[4] + x[0];
REG_TYPE r1 = x[4] - x[0]; REG_TYPE r1 = x[4] - x[0];
REG_TYPE r2 = x[5] + x[1]; REG_TYPE r2 = x[5] + x[1];
@ -144,88 +143,81 @@ STIN void mdct_butterfly_32(DATA_TYPE *x){
mdct_butterfly_16(x+16); mdct_butterfly_16(x+16);
} }
/* N/stage point generic N stage butterfly (in place, 2 register) */ /* N/stage point generic N stage butterfly (in place, 4 register) */
STIN void mdct_butterfly_generic(DATA_TYPE *x,int points,int step){ void mdct_butterfly_generic(DATA_TYPE *x,int points, int step){
LOOKUP_T *T = sincos_lookup0; LOOKUP_T *T = sincos_lookup0;
DATA_TYPE *x1 = x + points - 8; DATA_TYPE *x1 = x + points - 8;
DATA_TYPE *x2 = x + (points>>1) - 8; DATA_TYPE *x2 = x + (points>>1) - 8;
REG_TYPE r0; REG_TYPE r0;
REG_TYPE r1; REG_TYPE r1;
REG_TYPE r2;
REG_TYPE r3;
do{ do{
r0 = x1[6] - x2[6]; x1[6] += x2[6]; r0 = x1[6] - x2[6]; x1[6] += x2[6];
r1 = x2[7] - x1[7]; x1[7] += x2[7]; r1 = x2[7] - x1[7]; x1[7] += x2[7];
r2 = x1[4] - x2[4]; x1[4] += x2[4];
r3 = x2[5] - x1[5]; x1[5] += x2[5];
XPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T+=step; XPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T+=step;
XPROD31( r3, r2, T[0], T[1], &x2[4], &x2[5] ); T+=step;
r0 = x1[4] - x2[4]; x1[4] += x2[4];
r1 = x2[5] - x1[5]; x1[5] += x2[5];
XPROD31( r1, r0, T[0], T[1], &x2[4], &x2[5] ); T+=step;
r0 = x1[2] - x2[2]; x1[2] += x2[2]; r0 = x1[2] - x2[2]; x1[2] += x2[2];
r1 = x2[3] - x1[3]; x1[3] += x2[3]; r1 = x2[3] - x1[3]; x1[3] += x2[3];
r2 = x1[0] - x2[0]; x1[0] += x2[0];
r3 = x2[1] - x1[1]; x1[1] += x2[1];
XPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T+=step; XPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T+=step;
XPROD31( r3, r2, T[0], T[1], &x2[0], &x2[1] ); T+=step;
r0 = x1[0] - x2[0]; x1[0] += x2[0];
r1 = x2[1] - x1[1]; x1[1] += x2[1];
XPROD31( r1, r0, T[0], T[1], &x2[0], &x2[1] ); T+=step;
x1-=8; x2-=8; x1-=8; x2-=8;
}while(T<sincos_lookup0+1024); }while(T<sincos_lookup0+1024);
do{ do{
r0 = x1[6] - x2[6]; x1[6] += x2[6]; r0 = x1[6] - x2[6]; x1[6] += x2[6];
r1 = x1[7] - x2[7]; x1[7] += x2[7]; r1 = x1[7] - x2[7]; x1[7] += x2[7];
r2 = x1[4] - x2[4]; x1[4] += x2[4];
r3 = x1[5] - x2[5]; x1[5] += x2[5];
XNPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T-=step; XNPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T-=step;
XNPROD31( r2, r3, T[0], T[1], &x2[4], &x2[5] ); T-=step;
r0 = x1[4] - x2[4]; x1[4] += x2[4];
r1 = x1[5] - x2[5]; x1[5] += x2[5];
XNPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T-=step;
r0 = x1[2] - x2[2]; x1[2] += x2[2]; r0 = x1[2] - x2[2]; x1[2] += x2[2];
r1 = x1[3] - x2[3]; x1[3] += x2[3]; r1 = x1[3] - x2[3]; x1[3] += x2[3];
r2 = x1[0] - x2[0]; x1[0] += x2[0];
r3 = x1[1] - x2[1]; x1[1] += x2[1];
XNPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T-=step; XNPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T-=step;
XNPROD31( r2, r3, T[0], T[1], &x2[0], &x2[1] ); T-=step;
r0 = x1[0] - x2[0]; x1[0] += x2[0];
r1 = x1[1] - x2[1]; x1[1] += x2[1];
XNPROD31( r0, r1, T[0], T[1], &x2[0], &x2[1] ); T-=step;
x1-=8; x2-=8; x1-=8; x2-=8;
}while(T>sincos_lookup0); }while(T>sincos_lookup0);
do{ do{
r0 = x2[6] - x1[6]; x1[6] += x2[6]; r0 = x2[6] - x1[6]; x1[6] += x2[6];
r1 = x2[7] - x1[7]; x1[7] += x2[7]; r1 = x2[7] - x1[7]; x1[7] += x2[7];
r2 = x2[4] - x1[4]; x1[4] += x2[4];
r3 = x2[5] - x1[5]; x1[5] += x2[5];
XPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T+=step; XPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T+=step;
XPROD31( r2, r3, T[0], T[1], &x2[4], &x2[5] ); T+=step;
r0 = x2[4] - x1[4]; x1[4] += x2[4];
r1 = x2[5] - x1[5]; x1[5] += x2[5];
XPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T+=step;
r0 = x2[2] - x1[2]; x1[2] += x2[2]; r0 = x2[2] - x1[2]; x1[2] += x2[2];
r1 = x2[3] - x1[3]; x1[3] += x2[3]; r1 = x2[3] - x1[3]; x1[3] += x2[3];
r2 = x2[0] - x1[0]; x1[0] += x2[0];
r3 = x2[1] - x1[1]; x1[1] += x2[1];
XPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T+=step; XPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T+=step;
XPROD31( r2, r3, T[0], T[1], &x2[0], &x2[1] ); T+=step;
r0 = x2[0] - x1[0]; x1[0] += x2[0];
r1 = x2[1] - x1[1]; x1[1] += x2[1];
XPROD31( r0, r1, T[0], T[1], &x2[0], &x2[1] ); T+=step;
x1-=8; x2-=8; x1-=8; x2-=8;
}while(T<sincos_lookup0+1024); }while(T<sincos_lookup0+1024);
do{ do{
r0 = x1[6] - x2[6]; x1[6] += x2[6]; r0 = x1[6] - x2[6]; x1[6] += x2[6];
r1 = x2[7] - x1[7]; x1[7] += x2[7]; r1 = x2[7] - x1[7]; x1[7] += x2[7];
r2 = x1[4] - x2[4]; x1[4] += x2[4];
r3 = x2[5] - x1[5]; x1[5] += x2[5];
XNPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T-=step; XNPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T-=step;
XNPROD31( r3, r2, T[0], T[1], &x2[4], &x2[5] ); T-=step;
r0 = x1[4] - x2[4]; x1[4] += x2[4];
r1 = x2[5] - x1[5]; x1[5] += x2[5];
XNPROD31( r1, r0, T[0], T[1], &x2[4], &x2[5] ); T-=step;
r0 = x1[2] - x2[2]; x1[2] += x2[2]; r0 = x1[2] - x2[2]; x1[2] += x2[2];
r1 = x2[3] - x1[3]; x1[3] += x2[3]; r1 = x2[3] - x1[3]; x1[3] += x2[3];
r2 = x1[0] - x2[0]; x1[0] += x2[0];
r3 = x2[1] - x1[1]; x1[1] += x2[1];
XNPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T-=step; XNPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T-=step;
XNPROD31( r3, r2, T[0], T[1], &x2[0], &x2[1] ); T-=step;
r0 = x1[0] - x2[0]; x1[0] += x2[0];
r1 = x2[1] - x1[1]; x1[1] += x2[1];
XNPROD31( r1, r0, T[0], T[1], &x2[0], &x2[1] ); T-=step;
x1-=8; x2-=8; x1-=8; x2-=8;
}while(T>sincos_lookup0); }while(T>sincos_lookup0);
@ -246,8 +238,8 @@ STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift) {
} }
static const unsigned char bitrev[16] static const unsigned char bitrev[16] IDATA_ATTR =
IDATA_ATTR = {0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15}; {0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15};
STIN int bitrev12(int x){ STIN int bitrev12(int x){
return bitrev[x>>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8); return bitrev[x>>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8);

View file

@ -281,7 +281,7 @@ static LOOKUP_T sincos_lookup0[1026] IDATA_ATTR = {
}; };
/* {sin((2*i+1)*PI/4096), cos((2*i+1)*PI/4096)}, with i = 0 to 511 */ /* {sin((2*i+1)*PI/4096), cos((2*i+1)*PI/4096)}, with i = 0 to 511 */
static LOOKUP_T sincos_lookup1[1024] IDATA_ATTR = { static LOOKUP_T sincos_lookup1[1024] = {
X(0x001921fb), X(0x7ffffd88), X(0x004b65ee), X(0x7fffe9cb), X(0x001921fb), X(0x7ffffd88), X(0x004b65ee), X(0x7fffe9cb),
X(0x007da9d4), X(0x7fffc251), X(0x00afeda8), X(0x7fff8719), X(0x007da9d4), X(0x7fffc251), X(0x00afeda8), X(0x7fff8719),
X(0x00e23160), X(0x7fff3824), X(0x011474f6), X(0x7ffed572), X(0x00e23160), X(0x7fff3824), X(0x011474f6), X(0x7ffed572),

View file

@ -172,6 +172,7 @@ vorbis_look_residue *res0_look(vorbis_dsp_state *vd,vorbis_info_mode *vm,
return(look); return(look);
} }
#define CHANNELS 2
/* a truncated packet here just means 'stop working'; it's not an error */ /* a truncated packet here just means 'stop working'; it's not an error */
static int _01inverse(vorbis_block *vb,vorbis_look_residue *vl, static int _01inverse(vorbis_block *vb,vorbis_look_residue *vl,
@ -254,6 +255,8 @@ int res1_inverse(vorbis_block *vb,vorbis_look_residue *vl,
return(0); return(0);
} }
/* duplicate code here as speed is somewhat more important */ /* duplicate code here as speed is somewhat more important */
int res2_inverse(vorbis_block *vb,vorbis_look_residue *vl, int res2_inverse(vorbis_block *vb,vorbis_look_residue *vl,
ogg_int32_t **in,int *nonzero,int ch){ ogg_int32_t **in,int *nonzero,int ch){
@ -291,7 +294,6 @@ int res2_inverse(vorbis_block *vb,vorbis_look_residue *vl,
for(k=0;k<partitions_per_word && i<partvals;k++,i++) for(k=0;k<partitions_per_word && i<partvals;k++,i++)
if(info->secondstages[partword[l][k]]&(1<<s)){ if(info->secondstages[partword[l][k]]&(1<<s)){
codebook *stagebook=look->partbooks[partword[l][k]][s]; codebook *stagebook=look->partbooks[partword[l][k]][s];
if(stagebook){ if(stagebook){
if(vorbis_book_decodevv_add(stagebook,in, if(vorbis_book_decodevv_add(stagebook,in,
i*samples_per_partition+beginoff,ch, i*samples_per_partition+beginoff,ch,

View file

@ -24,6 +24,17 @@
#include "misc.h" #include "misc.h"
#include "os.h" #include "os.h"
/* IRAM buffer keep the block pcm data; only for windows size upto 2048
for space restrictions. No real compromise, larger window sizes
are only used for very low quality settings (q<0?) */
/* max 2 channels on the ihp-1xx (stereo), 2048 samples (2*2048*4=16Kb) */
#define IRAM_PCM_END 2048
#define CHANNELS 2
static ogg_int32_t *ipcm_vect[CHANNELS] IDATA_ATTR;
static ogg_int32_t ipcm_buff[CHANNELS*IRAM_PCM_END] IDATA_ATTR;
int vorbis_synthesis(vorbis_block *vb,ogg_packet *op,int decodep){ int vorbis_synthesis(vorbis_block *vb,ogg_packet *op,int decodep){
vorbis_dsp_state *vd=vb->vd; vorbis_dsp_state *vd=vb->vd;
private_state *b=(private_state *)vd->backend_state; private_state *b=(private_state *)vd->backend_state;
@ -65,10 +76,18 @@ int vorbis_synthesis(vorbis_block *vb,ogg_packet *op,int decodep){
if(decodep){ if(decodep){
/* alloc pcm passback storage */ /* alloc pcm passback storage */
vb->pcmend=ci->blocksizes[vb->W]; vb->pcmend=ci->blocksizes[vb->W];
vb->pcm=(ogg_int32_t **)_vorbis_block_alloc(vb,sizeof(*vb->pcm)*vi->channels); if (vi->channels <= CHANNELS && vb->pcmend<=IRAM_PCM_END) {
for(i=0;i<vi->channels;i++) /* use statically allocated iram buffer */
vb->pcm[i]=(ogg_int32_t *)_vorbis_block_alloc(vb,vb->pcmend*sizeof(*vb->pcm[i])); vb->pcm = ipcm_vect;
for(i=0; i<CHANNELS; i++)
vb->pcm[i] = &ipcm_buff[i*IRAM_PCM_END];
} else {
/* dynamic allocation (slower) */
vb->pcm=(ogg_int32_t **)_vorbis_block_alloc(vb,sizeof(*vb->pcm)*vi->channels);
for(i=0;i<vi->channels;i++)
vb->pcm[i]=(ogg_int32_t *)_vorbis_block_alloc(vb,vb->pcmend*sizeof(*vb->pcm[i]));
}
/* unpack_header enforces range checking */ /* unpack_header enforces range checking */
type=ci->map_type[ci->mode_param[mode]->mapping]; type=ci->map_type[ci->mode_param[mode]->mapping];

View file

@ -18,7 +18,7 @@
#include "os_types.h" #include "os_types.h"
/* keep small window table in fast IRAM */ /* keep small window tables in fast IRAM */
static LOOKUP_T vwin64[32] IDATA_ATTR = { static LOOKUP_T vwin64[32] IDATA_ATTR = {
X(0x001f0003), X(0x01168c98), X(0x030333c8), X(0x05dfe3a4), X(0x001f0003), X(0x01168c98), X(0x030333c8), X(0x05dfe3a4),
X(0x09a49562), X(0x0e45df18), X(0x13b47ef2), X(0x19dcf676), X(0x09a49562), X(0x0e45df18), X(0x13b47ef2), X(0x19dcf676),