forked from len0rd/rockbox
Vorbis optimizations: allocation of PCM buffers in IRAM, avoiding copying.
Slight code improvements to the MDCT functions. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6590 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
127f14210f
commit
0f10e898f0
11 changed files with 110 additions and 132 deletions
|
@ -49,13 +49,12 @@ static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
|
static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
|
||||||
ogg_int32_t r;
|
|
||||||
asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply
|
asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply
|
||||||
"movclr.l %%acc0, %[r];" // move and clear
|
"movclr.l %%acc0, %[x];" // move and clear
|
||||||
: [r] "=r" (r)
|
: [x] "+&r" (x)
|
||||||
: [x] "r" (x), [y] "r" (y)
|
: [y] "r" (y)
|
||||||
: "cc");
|
: "cc");
|
||||||
return r;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -64,11 +63,11 @@ static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
|
||||||
asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply
|
asm volatile ("mac.l %[x], %[y], %%acc0;" // multiply
|
||||||
"movclr.l %%acc0, %[r];" // get higher half
|
"movclr.l %%acc0, %[r];" // get higher half
|
||||||
"mulu.l %[y], %[x];" // get lower half
|
"mulu.l %[y], %[x];" // get lower half
|
||||||
"asl.l #8, %[r];" // hi << 17
|
"asl.l #8, %[r];" // hi << 17
|
||||||
"asl.l #8, %[r];"
|
"asl.l #8, %[r];"
|
||||||
"lsr.l #8, %[x];" // (unsigned)lo >> 15
|
"lsr.l #8, %[x];" // (unsigned)lo >> 15
|
||||||
"lsr.l #7, %[x];"
|
"lsr.l #7, %[x];"
|
||||||
"or.l %[x], %[r];" // or
|
"or.l %[x], %[r];" // or
|
||||||
: [r] "=&d" (r), [x] "+d" (x)
|
: [r] "=&d" (r), [x] "+d" (x)
|
||||||
: [y] "d" (y)
|
: [y] "d" (y)
|
||||||
: "cc");
|
: "cc");
|
||||||
|
@ -81,18 +80,17 @@ void XPROD31(ogg_int32_t a, ogg_int32_t b,
|
||||||
ogg_int32_t t, ogg_int32_t v,
|
ogg_int32_t t, ogg_int32_t v,
|
||||||
ogg_int32_t *x, ogg_int32_t *y)
|
ogg_int32_t *x, ogg_int32_t *y)
|
||||||
{
|
{
|
||||||
ogg_int32_t r;
|
|
||||||
asm volatile ("mac.l %[a], %[t], %%acc0;"
|
asm volatile ("mac.l %[a], %[t], %%acc0;"
|
||||||
"mac.l %[b], %[v], %%acc0;"
|
"mac.l %[b], %[v], %%acc0;"
|
||||||
"mac.l %[b], %[t], %%acc1;"
|
"mac.l %[b], %[t], %%acc1;"
|
||||||
"msac.l %[a], %[v], %%acc1;"
|
"msac.l %[a], %[v], %%acc1;"
|
||||||
"movclr.l %%acc0, %[r];"
|
"movclr.l %%acc0, %[a];"
|
||||||
"move.l %[r], (%[x]);"
|
"move.l %[a], (%[x]);"
|
||||||
"movclr.l %%acc1, %[r];"
|
"movclr.l %%acc1, %[a];"
|
||||||
"move.l %[r], (%[y]);"
|
"move.l %[a], (%[y]);"
|
||||||
: [r] "=&r" (r)
|
: [a] "+&r" (a)
|
||||||
: [x] "a" (x), [y] "a" (y),
|
: [x] "a" (x), [y] "a" (y),
|
||||||
[a] "r" (a), [b] "r" (b), [t] "r" (t), [v] "r" (v)
|
[b] "r" (b), [t] "r" (t), [v] "r" (v)
|
||||||
: "cc", "memory");
|
: "cc", "memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -102,23 +100,23 @@ void XNPROD31(ogg_int32_t a, ogg_int32_t b,
|
||||||
ogg_int32_t t, ogg_int32_t v,
|
ogg_int32_t t, ogg_int32_t v,
|
||||||
ogg_int32_t *x, ogg_int32_t *y)
|
ogg_int32_t *x, ogg_int32_t *y)
|
||||||
{
|
{
|
||||||
ogg_int32_t r;
|
|
||||||
asm volatile ("mac.l %[a], %[t], %%acc0;"
|
asm volatile ("mac.l %[a], %[t], %%acc0;"
|
||||||
"msac.l %[b], %[v], %%acc0;"
|
"msac.l %[b], %[v], %%acc0;"
|
||||||
"mac.l %[b], %[t], %%acc1;"
|
"mac.l %[b], %[t], %%acc1;"
|
||||||
"mac.l %[a], %[v], %%acc1;"
|
"mac.l %[a], %[v], %%acc1;"
|
||||||
"movclr.l %%acc0, %[r];"
|
"movclr.l %%acc0, %[a];"
|
||||||
"move.l %[r], (%[x]);"
|
"move.l %[a], (%[x]);"
|
||||||
"movclr.l %%acc1, %[r];"
|
"movclr.l %%acc1, %[a];"
|
||||||
"move.l %[r], (%[y]);"
|
"move.l %[a], (%[y]);"
|
||||||
: [r] "=&r" (r)
|
: [a] "+&r" (a)
|
||||||
: [x] "a" (x), [y] "a" (y),
|
: [x] "a" (x), [y] "a" (y),
|
||||||
[a] "r" (a), [b] "r" (b), [t] "r" (t), [v] "r" (v)
|
[b] "r" (b), [t] "r" (t), [v] "r" (v)
|
||||||
: "cc", "memory");
|
: "cc", "memory");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* no faster way of doing this using the MAC? */
|
|
||||||
|
/* is there no better way of doing this using the MAC? */
|
||||||
#define XPROD32(_a, _b, _t, _v, _x, _y) \
|
#define XPROD32(_a, _b, _t, _v, _x, _y) \
|
||||||
{ (_x)=MULT32(_a,_t)+MULT32(_b,_v); \
|
{ (_x)=MULT32(_a,_t)+MULT32(_b,_v); \
|
||||||
(_y)=MULT32(_b,_t)-MULT32(_a,_v); }
|
(_y)=MULT32(_b,_t)-MULT32(_a,_v); }
|
||||||
|
|
|
@ -22,7 +22,7 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "ogg.h"
|
#include "ogg.h"
|
||||||
|
|
||||||
static const unsigned long mask[] IDATA_ATTR =
|
static const unsigned long mask[] =
|
||||||
{0x00000000,0x00000001,0x00000003,0x00000007,0x0000000f,
|
{0x00000000,0x00000001,0x00000003,0x00000007,0x0000000f,
|
||||||
0x0000001f,0x0000003f,0x0000007f,0x000000ff,0x000001ff,
|
0x0000001f,0x0000003f,0x0000007f,0x000000ff,0x000001ff,
|
||||||
0x000003ff,0x000007ff,0x00000fff,0x00001fff,0x00003fff,
|
0x000003ff,0x000007ff,0x00000fff,0x00001fff,0x00003fff,
|
||||||
|
|
|
@ -140,7 +140,7 @@ int vorbis_staticbook_unpack(oggpack_buffer *opb,static_codebook *s){
|
||||||
be. The first-stage decode table catches most words so that
|
be. The first-stage decode table catches most words so that
|
||||||
bitreverse is not in the main execution path. */
|
bitreverse is not in the main execution path. */
|
||||||
|
|
||||||
static ogg_uint32_t bitreverse(ogg_uint32_t x){
|
static inline ogg_uint32_t bitreverse(register ogg_uint32_t x){
|
||||||
x= ((x>>16)&0x0000ffff) | ((x<<16)&0xffff0000);
|
x= ((x>>16)&0x0000ffff) | ((x<<16)&0xffff0000);
|
||||||
x= ((x>> 8)&0x00ff00ff) | ((x<< 8)&0xff00ff00);
|
x= ((x>> 8)&0x00ff00ff) | ((x<< 8)&0xff00ff00);
|
||||||
x= ((x>> 4)&0x0f0f0f0f) | ((x<< 4)&0xf0f0f0f0);
|
x= ((x>> 4)&0x0f0f0f0f) | ((x<< 4)&0xf0f0f0f0);
|
||||||
|
@ -265,12 +265,13 @@ long vorbis_book_decodev_add(codebook *book,ogg_int32_t *a,
|
||||||
a[i++]+=t[j++]>>shift;
|
a[i++]+=t[j++]>>shift;
|
||||||
}
|
}
|
||||||
}else{
|
}else{
|
||||||
|
shift = -shift;
|
||||||
for(i=0;i<n;){
|
for(i=0;i<n;){
|
||||||
entry = decode_packed_entry_number(book,b);
|
entry = decode_packed_entry_number(book,b);
|
||||||
if(entry==-1)return(-1);
|
if(entry==-1)return(-1);
|
||||||
t = book->valuelist+entry*book->dim;
|
t = book->valuelist+entry*book->dim;
|
||||||
for (j=0;j<book->dim;)
|
for (j=0;j<book->dim;)
|
||||||
a[i++]+=t[j++]<<-shift;
|
a[i++]+=t[j++]<<shift;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return(0);
|
return(0);
|
||||||
|
@ -293,13 +294,13 @@ long vorbis_book_decodev_set(codebook *book,ogg_int32_t *a,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}else{
|
}else{
|
||||||
|
shift = -shift;
|
||||||
for(i=0;i<n;){
|
for(i=0;i<n;){
|
||||||
entry = decode_packed_entry_number(book,b);
|
entry = decode_packed_entry_number(book,b);
|
||||||
if(entry==-1)return(-1);
|
if(entry==-1)return(-1);
|
||||||
t = book->valuelist+entry*book->dim;
|
t = book->valuelist+entry*book->dim;
|
||||||
for (j=0;j<book->dim;){
|
for (j=0;j<book->dim;){
|
||||||
a[i++]=t[j++]<<-shift;
|
a[i++]=t[j++]<<shift;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -330,14 +331,14 @@ long vorbis_book_decodevv_add(codebook *book,ogg_int32_t **a,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}else{
|
}else{
|
||||||
|
shift = -shift;
|
||||||
for(i=offset;i<offset+n;){
|
for(i=offset;i<offset+n;){
|
||||||
entry = decode_packed_entry_number(book,b);
|
entry = decode_packed_entry_number(book,b);
|
||||||
if(entry==-1)return(-1);
|
if(entry==-1)return(-1);
|
||||||
{
|
{
|
||||||
const ogg_int32_t *t = book->valuelist+entry*book->dim;
|
const ogg_int32_t *t = book->valuelist+entry*book->dim;
|
||||||
for (j=0;j<book->dim;j++){
|
for (j=0;j<book->dim;j++){
|
||||||
a[chptr++][i]+=t[j]<<-shift;
|
a[chptr++][i]+=t[j]<<shift;
|
||||||
if(chptr==ch){
|
if(chptr==ch){
|
||||||
chptr=0;
|
chptr=0;
|
||||||
i++;
|
i++;
|
||||||
|
|
|
@ -45,7 +45,7 @@ typedef struct {
|
||||||
static void floor1_free_info(vorbis_info_floor *i){
|
static void floor1_free_info(vorbis_info_floor *i){
|
||||||
vorbis_info_floor1 *info=(vorbis_info_floor1 *)i;
|
vorbis_info_floor1 *info=(vorbis_info_floor1 *)i;
|
||||||
if(info){
|
if(info){
|
||||||
memset(info,0,sizeof(*info));
|
//memset(info,0,sizeof(*info));
|
||||||
_ogg_free(info);
|
_ogg_free(info);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -53,13 +53,13 @@ static void floor1_free_info(vorbis_info_floor *i){
|
||||||
static void floor1_free_look(vorbis_look_floor *i){
|
static void floor1_free_look(vorbis_look_floor *i){
|
||||||
vorbis_look_floor1 *look=(vorbis_look_floor1 *)i;
|
vorbis_look_floor1 *look=(vorbis_look_floor1 *)i;
|
||||||
if(look){
|
if(look){
|
||||||
memset(look,0,sizeof(*look));
|
//memset(look,0,sizeof(*look));
|
||||||
_ogg_free(look);
|
_ogg_free(look);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int ilog(unsigned int v){
|
static inline int ilog(register unsigned int v){
|
||||||
int ret=0;
|
register int ret=0;
|
||||||
while(v){
|
while(v){
|
||||||
ret++;
|
ret++;
|
||||||
v>>=1;
|
v>>=1;
|
||||||
|
@ -124,7 +124,7 @@ static int icomp(const void *a,const void *b){
|
||||||
static vorbis_look_floor *floor1_look(vorbis_dsp_state *vd,vorbis_info_mode *mi,
|
static vorbis_look_floor *floor1_look(vorbis_dsp_state *vd,vorbis_info_mode *mi,
|
||||||
vorbis_info_floor *in){
|
vorbis_info_floor *in){
|
||||||
|
|
||||||
static int *sortpointer[VIF_POSIT+2] IDATA_ATTR;
|
int *sortpointer[VIF_POSIT+2];
|
||||||
vorbis_info_floor1 *info=(vorbis_info_floor1 *)in;
|
vorbis_info_floor1 *info=(vorbis_info_floor1 *)in;
|
||||||
vorbis_look_floor1 *look=(vorbis_look_floor1 *)_ogg_calloc(1,sizeof(*look));
|
vorbis_look_floor1 *look=(vorbis_look_floor1 *)_ogg_calloc(1,sizeof(*look));
|
||||||
int i,j,n=0;
|
int i,j,n=0;
|
||||||
|
@ -216,7 +216,7 @@ static int render_point(int x0,int x1,int y0,int y1,int x){
|
||||||
# define XdB(n) (n)
|
# define XdB(n) (n)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static ogg_int32_t FLOOR_fromdB_LOOKUP[256] IDATA_ATTR ={
|
static ogg_int32_t FLOOR_fromdB_LOOKUP[256] ={
|
||||||
XdB(0x000000e5), XdB(0x000000f4), XdB(0x00000103), XdB(0x00000114),
|
XdB(0x000000e5), XdB(0x000000f4), XdB(0x00000103), XdB(0x00000114),
|
||||||
XdB(0x00000126), XdB(0x00000139), XdB(0x0000014e), XdB(0x00000163),
|
XdB(0x00000126), XdB(0x00000139), XdB(0x0000014e), XdB(0x00000163),
|
||||||
XdB(0x0000017a), XdB(0x00000193), XdB(0x000001ad), XdB(0x000001c9),
|
XdB(0x0000017a), XdB(0x00000193), XdB(0x000001ad), XdB(0x000001c9),
|
||||||
|
@ -313,16 +313,15 @@ static void *floor1_inverse1(vorbis_block *vb,vorbis_look_floor *in){
|
||||||
vorbis_look_floor1 *look=(vorbis_look_floor1 *)in;
|
vorbis_look_floor1 *look=(vorbis_look_floor1 *)in;
|
||||||
vorbis_info_floor1 *info=look->vi;
|
vorbis_info_floor1 *info=look->vi;
|
||||||
codec_setup_info *ci=(codec_setup_info *)vb->vd->vi->codec_setup;
|
codec_setup_info *ci=(codec_setup_info *)vb->vd->vi->codec_setup;
|
||||||
|
|
||||||
int i,j,k;
|
int i,j,k;
|
||||||
codebook *books=ci->fullbooks;
|
codebook *books=ci->fullbooks;
|
||||||
|
|
||||||
/* unpack wrapped/predicted values from stream */
|
/* unpack wrapped/predicted values from stream */
|
||||||
if(oggpack_read(&vb->opb,1)==1){
|
if(oggpack_read(&vb->opb,1)==1){
|
||||||
int *fit_value=(int *)_vorbis_block_alloc(vb,(look->posts)*sizeof(*fit_value));
|
int *fit_value=(int *)_vorbis_block_alloc(vb,(look->posts)*sizeof(*fit_value));
|
||||||
|
int ilg = ilog(look->quant_q-1);
|
||||||
fit_value[0]=oggpack_read(&vb->opb,ilog(look->quant_q-1));
|
fit_value[0]=oggpack_read(&vb->opb,ilg);
|
||||||
fit_value[1]=oggpack_read(&vb->opb,ilog(look->quant_q-1));
|
fit_value[1]=oggpack_read(&vb->opb,ilg);
|
||||||
|
|
||||||
/* partition by partition */
|
/* partition by partition */
|
||||||
/* partition by partition */
|
/* partition by partition */
|
||||||
|
|
|
@ -501,7 +501,7 @@ int ogg_page_packets(ogg_page *og){
|
||||||
/* Static CRC calculation table. See older code in CVS for dead
|
/* Static CRC calculation table. See older code in CVS for dead
|
||||||
run-time initialization code. */
|
run-time initialization code. */
|
||||||
|
|
||||||
static ogg_uint32_t crc_lookup[256] IDATA_ATTR = {
|
static ogg_uint32_t crc_lookup[256] = {
|
||||||
0x00000000,0x04c11db7,0x09823b6e,0x0d4326d9,
|
0x00000000,0x04c11db7,0x09823b6e,0x0d4326d9,
|
||||||
0x130476dc,0x17c56b6b,0x1a864db2,0x1e475005,
|
0x130476dc,0x17c56b6b,0x1a864db2,0x1e475005,
|
||||||
0x2608edb8,0x22c9f00f,0x2f8ad6d6,0x2b4bcb61,
|
0x2608edb8,0x22c9f00f,0x2f8ad6d6,0x2b4bcb61,
|
||||||
|
|
|
@ -180,12 +180,6 @@ static vorbis_info_mapping *mapping0_unpack(vorbis_info *vi,oggpack_buffer *opb)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* IRAM buffer keep the pcm data; only for windows size upto 2048
|
|
||||||
for space restrictions. No real compromise, larger window sizes
|
|
||||||
are only used for very low quality settings (q<0?) */
|
|
||||||
#define IRAM_PCM_SIZE 2048
|
|
||||||
static ogg_int32_t pcm_iram[IRAM_PCM_SIZE] IDATA_ATTR;
|
|
||||||
|
|
||||||
static int seq = 0;
|
static int seq = 0;
|
||||||
|
|
||||||
#define CHANNELS 2 /* max 2 channels on the ihp-1xx (stereo) */
|
#define CHANNELS 2 /* max 2 channels on the ihp-1xx (stereo) */
|
||||||
|
@ -201,11 +195,12 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
|
||||||
int i,j;
|
int i,j;
|
||||||
long n=vb->pcmend=ci->blocksizes[vb->W];
|
long n=vb->pcmend=ci->blocksizes[vb->W];
|
||||||
|
|
||||||
/* statically allocate mapping structures in IRAM */
|
/* bounded mapping arrays instead of using alloca();
|
||||||
static ogg_int32_t *pcmbundle[CHANNELS] IDATA_ATTR;
|
avoids memory leak; we can only deal with stereo anyway */
|
||||||
static int zerobundle[CHANNELS] IDATA_ATTR;
|
ogg_int32_t *pcmbundle[CHANNELS];
|
||||||
static int nonzero[CHANNELS] IDATA_ATTR;
|
int zerobundle[CHANNELS];
|
||||||
static void *floormemo[CHANNELS] IDATA_ATTR;
|
int nonzero[CHANNELS];
|
||||||
|
void *floormemo[CHANNELS];
|
||||||
|
|
||||||
/* test for too many channels;
|
/* test for too many channels;
|
||||||
(maybe this is can be checked at the stream level?) */
|
(maybe this is can be checked at the stream level?) */
|
||||||
|
@ -249,7 +244,7 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
|
||||||
pcmbundle[ch_in_bundle++]=vb->pcm[j];
|
pcmbundle[ch_in_bundle++]=vb->pcm[j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
look->residue_func[i]->inverse(vb,look->residue_look[i],
|
look->residue_func[i]->inverse(vb,look->residue_look[i],
|
||||||
pcmbundle,zerobundle,ch_in_bundle);
|
pcmbundle,zerobundle,ch_in_bundle);
|
||||||
}
|
}
|
||||||
|
@ -286,13 +281,10 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//for(j=0;j<vi->channels;j++)
|
//for(j=0;j<vi->channels;j++)
|
||||||
//_analysis_output("residue",seq+j,vb->pcm[j],-8,n/2,0,0);
|
//_analysis_output("residue",seq+j,vb->pcm[j],-8,n/2,0,0);
|
||||||
|
|
||||||
|
|
||||||
/* pbv: removed this loop by fusion with the following one
|
|
||||||
to avoid recopying data to/from the IRAM */
|
|
||||||
#if 0
|
|
||||||
/* compute and apply spectral envelope */
|
/* compute and apply spectral envelope */
|
||||||
for(i=0;i<vi->channels;i++){
|
for(i=0;i<vi->channels;i++){
|
||||||
ogg_int32_t *pcm=vb->pcm[i];
|
ogg_int32_t *pcm=vb->pcm[i];
|
||||||
|
@ -300,7 +292,6 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
|
||||||
look->floor_func[submap]->
|
look->floor_func[submap]->
|
||||||
inverse2(vb,look->floor_look[submap],floormemo[i],pcm);
|
inverse2(vb,look->floor_look[submap],floormemo[i],pcm);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
//for(j=0;j<vi->channels;j++)
|
//for(j=0;j<vi->channels;j++)
|
||||||
//_analysis_output("mdct",seq+j,vb->pcm[j],-24,n/2,0,1);
|
//_analysis_output("mdct",seq+j,vb->pcm[j],-24,n/2,0,1);
|
||||||
|
@ -308,32 +299,9 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
|
||||||
/* transform the PCM data; takes PCM vector, vb; modifies PCM vector */
|
/* transform the PCM data; takes PCM vector, vb; modifies PCM vector */
|
||||||
/* only MDCT right now.... */
|
/* only MDCT right now.... */
|
||||||
|
|
||||||
/* check if we can do this in IRAM */
|
for(i=0;i<vi->channels;i++){
|
||||||
if(n <= IRAM_PCM_SIZE) { /* normal window size: yes */
|
ogg_int32_t *pcm=vb->pcm[i];
|
||||||
for(i=0;i<vi->channels;i++){
|
|
||||||
ogg_int32_t *pcm=vb->pcm[i];
|
|
||||||
int submap=info->chmuxlist[i];
|
|
||||||
|
|
||||||
if(nonzero[i]) {
|
|
||||||
memcpy(pcm_iram, pcm, sizeof(ogg_int32_t)*n);
|
|
||||||
look->floor_func[submap]->
|
|
||||||
inverse2(vb,look->floor_look[submap],floormemo[i],pcm_iram);
|
|
||||||
mdct_backward(n, pcm_iram, pcm_iram);
|
|
||||||
/* window the data */
|
|
||||||
_vorbis_apply_window(pcm_iram,b->window,ci->blocksizes,vb->lW,vb->W,vb->nW);
|
|
||||||
memcpy(pcm, pcm_iram, sizeof(ogg_int32_t)*n);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
memset(pcm, 0, sizeof(ogg_int32_t)*n);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else { /* large window: no, do it in the normal memory */
|
|
||||||
for(i=0;i<vi->channels;i++){
|
|
||||||
ogg_int32_t *pcm=vb->pcm[i];
|
|
||||||
int submap=info->chmuxlist[i];
|
|
||||||
|
|
||||||
look->floor_func[submap]->
|
|
||||||
inverse2(vb,look->floor_look[submap],floormemo[i],pcm);
|
|
||||||
if(nonzero[i]) {
|
if(nonzero[i]) {
|
||||||
mdct_backward(n, pcm, pcm);
|
mdct_backward(n, pcm, pcm);
|
||||||
/* window the data */
|
/* window the data */
|
||||||
|
@ -341,7 +309,6 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
memset(pcm, 0, sizeof(ogg_int32_t)*n);
|
memset(pcm, 0, sizeof(ogg_int32_t)*n);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//for(j=0;j<vi->channels;j++)
|
//for(j=0;j<vi->channels;j++)
|
||||||
|
|
|
@ -41,7 +41,6 @@
|
||||||
|
|
||||||
/* 8 point butterfly (in place) */
|
/* 8 point butterfly (in place) */
|
||||||
STIN void mdct_butterfly_8(DATA_TYPE *x){
|
STIN void mdct_butterfly_8(DATA_TYPE *x){
|
||||||
|
|
||||||
REG_TYPE r0 = x[4] + x[0];
|
REG_TYPE r0 = x[4] + x[0];
|
||||||
REG_TYPE r1 = x[4] - x[0];
|
REG_TYPE r1 = x[4] - x[0];
|
||||||
REG_TYPE r2 = x[5] + x[1];
|
REG_TYPE r2 = x[5] + x[1];
|
||||||
|
@ -144,88 +143,81 @@ STIN void mdct_butterfly_32(DATA_TYPE *x){
|
||||||
mdct_butterfly_16(x+16);
|
mdct_butterfly_16(x+16);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* N/stage point generic N stage butterfly (in place, 2 register) */
|
/* N/stage point generic N stage butterfly (in place, 4 register) */
|
||||||
STIN void mdct_butterfly_generic(DATA_TYPE *x,int points,int step){
|
void mdct_butterfly_generic(DATA_TYPE *x,int points, int step){
|
||||||
|
|
||||||
LOOKUP_T *T = sincos_lookup0;
|
LOOKUP_T *T = sincos_lookup0;
|
||||||
DATA_TYPE *x1 = x + points - 8;
|
DATA_TYPE *x1 = x + points - 8;
|
||||||
DATA_TYPE *x2 = x + (points>>1) - 8;
|
DATA_TYPE *x2 = x + (points>>1) - 8;
|
||||||
REG_TYPE r0;
|
REG_TYPE r0;
|
||||||
REG_TYPE r1;
|
REG_TYPE r1;
|
||||||
|
REG_TYPE r2;
|
||||||
|
REG_TYPE r3;
|
||||||
|
|
||||||
do{
|
do{
|
||||||
r0 = x1[6] - x2[6]; x1[6] += x2[6];
|
r0 = x1[6] - x2[6]; x1[6] += x2[6];
|
||||||
r1 = x2[7] - x1[7]; x1[7] += x2[7];
|
r1 = x2[7] - x1[7]; x1[7] += x2[7];
|
||||||
|
r2 = x1[4] - x2[4]; x1[4] += x2[4];
|
||||||
|
r3 = x2[5] - x1[5]; x1[5] += x2[5];
|
||||||
XPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T+=step;
|
XPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T+=step;
|
||||||
|
XPROD31( r3, r2, T[0], T[1], &x2[4], &x2[5] ); T+=step;
|
||||||
r0 = x1[4] - x2[4]; x1[4] += x2[4];
|
|
||||||
r1 = x2[5] - x1[5]; x1[5] += x2[5];
|
|
||||||
XPROD31( r1, r0, T[0], T[1], &x2[4], &x2[5] ); T+=step;
|
|
||||||
|
|
||||||
r0 = x1[2] - x2[2]; x1[2] += x2[2];
|
r0 = x1[2] - x2[2]; x1[2] += x2[2];
|
||||||
r1 = x2[3] - x1[3]; x1[3] += x2[3];
|
r1 = x2[3] - x1[3]; x1[3] += x2[3];
|
||||||
|
r2 = x1[0] - x2[0]; x1[0] += x2[0];
|
||||||
|
r3 = x2[1] - x1[1]; x1[1] += x2[1];
|
||||||
XPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T+=step;
|
XPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T+=step;
|
||||||
|
XPROD31( r3, r2, T[0], T[1], &x2[0], &x2[1] ); T+=step;
|
||||||
r0 = x1[0] - x2[0]; x1[0] += x2[0];
|
|
||||||
r1 = x2[1] - x1[1]; x1[1] += x2[1];
|
|
||||||
XPROD31( r1, r0, T[0], T[1], &x2[0], &x2[1] ); T+=step;
|
|
||||||
|
|
||||||
x1-=8; x2-=8;
|
x1-=8; x2-=8;
|
||||||
}while(T<sincos_lookup0+1024);
|
}while(T<sincos_lookup0+1024);
|
||||||
do{
|
do{
|
||||||
r0 = x1[6] - x2[6]; x1[6] += x2[6];
|
r0 = x1[6] - x2[6]; x1[6] += x2[6];
|
||||||
r1 = x1[7] - x2[7]; x1[7] += x2[7];
|
r1 = x1[7] - x2[7]; x1[7] += x2[7];
|
||||||
|
r2 = x1[4] - x2[4]; x1[4] += x2[4];
|
||||||
|
r3 = x1[5] - x2[5]; x1[5] += x2[5];
|
||||||
XNPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T-=step;
|
XNPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T-=step;
|
||||||
|
XNPROD31( r2, r3, T[0], T[1], &x2[4], &x2[5] ); T-=step;
|
||||||
|
|
||||||
r0 = x1[4] - x2[4]; x1[4] += x2[4];
|
|
||||||
r1 = x1[5] - x2[5]; x1[5] += x2[5];
|
|
||||||
XNPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T-=step;
|
|
||||||
|
|
||||||
r0 = x1[2] - x2[2]; x1[2] += x2[2];
|
r0 = x1[2] - x2[2]; x1[2] += x2[2];
|
||||||
r1 = x1[3] - x2[3]; x1[3] += x2[3];
|
r1 = x1[3] - x2[3]; x1[3] += x2[3];
|
||||||
|
r2 = x1[0] - x2[0]; x1[0] += x2[0];
|
||||||
|
r3 = x1[1] - x2[1]; x1[1] += x2[1];
|
||||||
XNPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T-=step;
|
XNPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T-=step;
|
||||||
|
XNPROD31( r2, r3, T[0], T[1], &x2[0], &x2[1] ); T-=step;
|
||||||
r0 = x1[0] - x2[0]; x1[0] += x2[0];
|
|
||||||
r1 = x1[1] - x2[1]; x1[1] += x2[1];
|
|
||||||
XNPROD31( r0, r1, T[0], T[1], &x2[0], &x2[1] ); T-=step;
|
|
||||||
|
|
||||||
x1-=8; x2-=8;
|
x1-=8; x2-=8;
|
||||||
}while(T>sincos_lookup0);
|
}while(T>sincos_lookup0);
|
||||||
do{
|
do{
|
||||||
r0 = x2[6] - x1[6]; x1[6] += x2[6];
|
r0 = x2[6] - x1[6]; x1[6] += x2[6];
|
||||||
r1 = x2[7] - x1[7]; x1[7] += x2[7];
|
r1 = x2[7] - x1[7]; x1[7] += x2[7];
|
||||||
|
r2 = x2[4] - x1[4]; x1[4] += x2[4];
|
||||||
|
r3 = x2[5] - x1[5]; x1[5] += x2[5];
|
||||||
XPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T+=step;
|
XPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T+=step;
|
||||||
|
XPROD31( r2, r3, T[0], T[1], &x2[4], &x2[5] ); T+=step;
|
||||||
r0 = x2[4] - x1[4]; x1[4] += x2[4];
|
|
||||||
r1 = x2[5] - x1[5]; x1[5] += x2[5];
|
|
||||||
XPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T+=step;
|
|
||||||
|
|
||||||
r0 = x2[2] - x1[2]; x1[2] += x2[2];
|
r0 = x2[2] - x1[2]; x1[2] += x2[2];
|
||||||
r1 = x2[3] - x1[3]; x1[3] += x2[3];
|
r1 = x2[3] - x1[3]; x1[3] += x2[3];
|
||||||
|
r2 = x2[0] - x1[0]; x1[0] += x2[0];
|
||||||
|
r3 = x2[1] - x1[1]; x1[1] += x2[1];
|
||||||
XPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T+=step;
|
XPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T+=step;
|
||||||
|
XPROD31( r2, r3, T[0], T[1], &x2[0], &x2[1] ); T+=step;
|
||||||
r0 = x2[0] - x1[0]; x1[0] += x2[0];
|
|
||||||
r1 = x2[1] - x1[1]; x1[1] += x2[1];
|
|
||||||
XPROD31( r0, r1, T[0], T[1], &x2[0], &x2[1] ); T+=step;
|
|
||||||
|
|
||||||
x1-=8; x2-=8;
|
x1-=8; x2-=8;
|
||||||
}while(T<sincos_lookup0+1024);
|
}while(T<sincos_lookup0+1024);
|
||||||
do{
|
do{
|
||||||
r0 = x1[6] - x2[6]; x1[6] += x2[6];
|
r0 = x1[6] - x2[6]; x1[6] += x2[6];
|
||||||
r1 = x2[7] - x1[7]; x1[7] += x2[7];
|
r1 = x2[7] - x1[7]; x1[7] += x2[7];
|
||||||
|
r2 = x1[4] - x2[4]; x1[4] += x2[4];
|
||||||
|
r3 = x2[5] - x1[5]; x1[5] += x2[5];
|
||||||
XNPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T-=step;
|
XNPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T-=step;
|
||||||
|
XNPROD31( r3, r2, T[0], T[1], &x2[4], &x2[5] ); T-=step;
|
||||||
r0 = x1[4] - x2[4]; x1[4] += x2[4];
|
|
||||||
r1 = x2[5] - x1[5]; x1[5] += x2[5];
|
|
||||||
XNPROD31( r1, r0, T[0], T[1], &x2[4], &x2[5] ); T-=step;
|
|
||||||
|
|
||||||
r0 = x1[2] - x2[2]; x1[2] += x2[2];
|
r0 = x1[2] - x2[2]; x1[2] += x2[2];
|
||||||
r1 = x2[3] - x1[3]; x1[3] += x2[3];
|
r1 = x2[3] - x1[3]; x1[3] += x2[3];
|
||||||
|
r2 = x1[0] - x2[0]; x1[0] += x2[0];
|
||||||
|
r3 = x2[1] - x1[1]; x1[1] += x2[1];
|
||||||
XNPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T-=step;
|
XNPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T-=step;
|
||||||
|
XNPROD31( r3, r2, T[0], T[1], &x2[0], &x2[1] ); T-=step;
|
||||||
r0 = x1[0] - x2[0]; x1[0] += x2[0];
|
|
||||||
r1 = x2[1] - x1[1]; x1[1] += x2[1];
|
|
||||||
XNPROD31( r1, r0, T[0], T[1], &x2[0], &x2[1] ); T-=step;
|
|
||||||
|
|
||||||
x1-=8; x2-=8;
|
x1-=8; x2-=8;
|
||||||
}while(T>sincos_lookup0);
|
}while(T>sincos_lookup0);
|
||||||
|
@ -246,8 +238,8 @@ STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static const unsigned char bitrev[16]
|
static const unsigned char bitrev[16] IDATA_ATTR =
|
||||||
IDATA_ATTR = {0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15};
|
{0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15};
|
||||||
|
|
||||||
STIN int bitrev12(int x){
|
STIN int bitrev12(int x){
|
||||||
return bitrev[x>>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8);
|
return bitrev[x>>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8);
|
||||||
|
|
|
@ -281,7 +281,7 @@ static LOOKUP_T sincos_lookup0[1026] IDATA_ATTR = {
|
||||||
};
|
};
|
||||||
|
|
||||||
/* {sin((2*i+1)*PI/4096), cos((2*i+1)*PI/4096)}, with i = 0 to 511 */
|
/* {sin((2*i+1)*PI/4096), cos((2*i+1)*PI/4096)}, with i = 0 to 511 */
|
||||||
static LOOKUP_T sincos_lookup1[1024] IDATA_ATTR = {
|
static LOOKUP_T sincos_lookup1[1024] = {
|
||||||
X(0x001921fb), X(0x7ffffd88), X(0x004b65ee), X(0x7fffe9cb),
|
X(0x001921fb), X(0x7ffffd88), X(0x004b65ee), X(0x7fffe9cb),
|
||||||
X(0x007da9d4), X(0x7fffc251), X(0x00afeda8), X(0x7fff8719),
|
X(0x007da9d4), X(0x7fffc251), X(0x00afeda8), X(0x7fff8719),
|
||||||
X(0x00e23160), X(0x7fff3824), X(0x011474f6), X(0x7ffed572),
|
X(0x00e23160), X(0x7fff3824), X(0x011474f6), X(0x7ffed572),
|
||||||
|
|
|
@ -172,6 +172,7 @@ vorbis_look_residue *res0_look(vorbis_dsp_state *vd,vorbis_info_mode *vm,
|
||||||
return(look);
|
return(look);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define CHANNELS 2
|
||||||
|
|
||||||
/* a truncated packet here just means 'stop working'; it's not an error */
|
/* a truncated packet here just means 'stop working'; it's not an error */
|
||||||
static int _01inverse(vorbis_block *vb,vorbis_look_residue *vl,
|
static int _01inverse(vorbis_block *vb,vorbis_look_residue *vl,
|
||||||
|
@ -254,6 +255,8 @@ int res1_inverse(vorbis_block *vb,vorbis_look_residue *vl,
|
||||||
return(0);
|
return(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* duplicate code here as speed is somewhat more important */
|
/* duplicate code here as speed is somewhat more important */
|
||||||
int res2_inverse(vorbis_block *vb,vorbis_look_residue *vl,
|
int res2_inverse(vorbis_block *vb,vorbis_look_residue *vl,
|
||||||
ogg_int32_t **in,int *nonzero,int ch){
|
ogg_int32_t **in,int *nonzero,int ch){
|
||||||
|
@ -291,7 +294,6 @@ int res2_inverse(vorbis_block *vb,vorbis_look_residue *vl,
|
||||||
for(k=0;k<partitions_per_word && i<partvals;k++,i++)
|
for(k=0;k<partitions_per_word && i<partvals;k++,i++)
|
||||||
if(info->secondstages[partword[l][k]]&(1<<s)){
|
if(info->secondstages[partword[l][k]]&(1<<s)){
|
||||||
codebook *stagebook=look->partbooks[partword[l][k]][s];
|
codebook *stagebook=look->partbooks[partword[l][k]][s];
|
||||||
|
|
||||||
if(stagebook){
|
if(stagebook){
|
||||||
if(vorbis_book_decodevv_add(stagebook,in,
|
if(vorbis_book_decodevv_add(stagebook,in,
|
||||||
i*samples_per_partition+beginoff,ch,
|
i*samples_per_partition+beginoff,ch,
|
||||||
|
|
|
@ -24,6 +24,17 @@
|
||||||
#include "misc.h"
|
#include "misc.h"
|
||||||
#include "os.h"
|
#include "os.h"
|
||||||
|
|
||||||
|
|
||||||
|
/* IRAM buffer keep the block pcm data; only for windows size upto 2048
|
||||||
|
for space restrictions. No real compromise, larger window sizes
|
||||||
|
are only used for very low quality settings (q<0?) */
|
||||||
|
/* max 2 channels on the ihp-1xx (stereo), 2048 samples (2*2048*4=16Kb) */
|
||||||
|
#define IRAM_PCM_END 2048
|
||||||
|
#define CHANNELS 2
|
||||||
|
|
||||||
|
static ogg_int32_t *ipcm_vect[CHANNELS] IDATA_ATTR;
|
||||||
|
static ogg_int32_t ipcm_buff[CHANNELS*IRAM_PCM_END] IDATA_ATTR;
|
||||||
|
|
||||||
int vorbis_synthesis(vorbis_block *vb,ogg_packet *op,int decodep){
|
int vorbis_synthesis(vorbis_block *vb,ogg_packet *op,int decodep){
|
||||||
vorbis_dsp_state *vd=vb->vd;
|
vorbis_dsp_state *vd=vb->vd;
|
||||||
private_state *b=(private_state *)vd->backend_state;
|
private_state *b=(private_state *)vd->backend_state;
|
||||||
|
@ -65,10 +76,18 @@ int vorbis_synthesis(vorbis_block *vb,ogg_packet *op,int decodep){
|
||||||
if(decodep){
|
if(decodep){
|
||||||
/* alloc pcm passback storage */
|
/* alloc pcm passback storage */
|
||||||
vb->pcmend=ci->blocksizes[vb->W];
|
vb->pcmend=ci->blocksizes[vb->W];
|
||||||
vb->pcm=(ogg_int32_t **)_vorbis_block_alloc(vb,sizeof(*vb->pcm)*vi->channels);
|
if (vi->channels <= CHANNELS && vb->pcmend<=IRAM_PCM_END) {
|
||||||
for(i=0;i<vi->channels;i++)
|
/* use statically allocated iram buffer */
|
||||||
vb->pcm[i]=(ogg_int32_t *)_vorbis_block_alloc(vb,vb->pcmend*sizeof(*vb->pcm[i]));
|
vb->pcm = ipcm_vect;
|
||||||
|
for(i=0; i<CHANNELS; i++)
|
||||||
|
vb->pcm[i] = &ipcm_buff[i*IRAM_PCM_END];
|
||||||
|
} else {
|
||||||
|
/* dynamic allocation (slower) */
|
||||||
|
vb->pcm=(ogg_int32_t **)_vorbis_block_alloc(vb,sizeof(*vb->pcm)*vi->channels);
|
||||||
|
for(i=0;i<vi->channels;i++)
|
||||||
|
vb->pcm[i]=(ogg_int32_t *)_vorbis_block_alloc(vb,vb->pcmend*sizeof(*vb->pcm[i]));
|
||||||
|
}
|
||||||
|
|
||||||
/* unpack_header enforces range checking */
|
/* unpack_header enforces range checking */
|
||||||
type=ci->map_type[ci->mode_param[mode]->mapping];
|
type=ci->map_type[ci->mode_param[mode]->mapping];
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,7 @@
|
||||||
|
|
||||||
#include "os_types.h"
|
#include "os_types.h"
|
||||||
|
|
||||||
/* keep small window table in fast IRAM */
|
/* keep small window tables in fast IRAM */
|
||||||
static LOOKUP_T vwin64[32] IDATA_ATTR = {
|
static LOOKUP_T vwin64[32] IDATA_ATTR = {
|
||||||
X(0x001f0003), X(0x01168c98), X(0x030333c8), X(0x05dfe3a4),
|
X(0x001f0003), X(0x01168c98), X(0x030333c8), X(0x05dfe3a4),
|
||||||
X(0x09a49562), X(0x0e45df18), X(0x13b47ef2), X(0x19dcf676),
|
X(0x09a49562), X(0x0e45df18), X(0x13b47ef2), X(0x19dcf676),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue