1
0
Fork 0
forked from len0rd/rockbox

A little improvement on Vorbis block synthesis.

Added myself to the list of contributors.


git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6664 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Pedro Vasconcelos 2005-06-10 22:34:57 +00:00
parent cd1d87c8cf
commit 6b9350b4d3
7 changed files with 204 additions and 107 deletions

View file

@ -21,6 +21,9 @@
#if CONFIG_CPU == MCF5249 && !defined(SIMULATOR)
/* attribute for 16-byte alignment */
#define LINE_ATTR __attribute__ ((aligned (16)))
#ifndef _V_WIDE_MATH
#define _V_WIDE_MATH
@ -107,15 +110,14 @@ void XNPROD31(ogg_int32_t a, ogg_int32_t b,
}
#if 1 /* Canonical definition */
#if 1
/* canonical definition */
#define XPROD32(_a, _b, _t, _v, _x, _y) \
{ (_x)=MULT32(_a,_t)+MULT32(_b,_v); \
(_y)=MULT32(_b,_t)-MULT32(_a,_v); }
#else
/* Thom Johansen suggestion; this could loose the lsb by overflow
but does it matter in practice? */
/* Thom Johansen's suggestion; this could loose the LSB by overflow;
Does it matter in practice? */
#define XPROD32(_a, _b, _t, _v, _x, _y) \
asm volatile ("mac.l %[a], %[t], %%acc0;" \
"mac.l %[b], %[v], %%acc0;" \
@ -132,11 +134,79 @@ void XNPROD31(ogg_int32_t a, ogg_int32_t b,
#endif
/* asm versions of vector multiplication for window.c */
/* asm versions of vector operations for block.c, window.c */
/* assumes MAC is initialized & accumulators cleared */
static inline
void mcf5249_vect_add(ogg_int32_t *x, ogg_int32_t *y, int n)
{
/* align to 16 bytes */
while(n>0 && (int)x&16) {
*x++ += *y++;
n--;
}
asm volatile ("bra 1f;"
"0:" /* loop start */
"movem.l (%[x]), %%d0-%%d3;" /* fetch values */
"movem.l (%[y]), %%a0-%%a3;"
/* add */
"add.l %%a0, %%d0;"
"add.l %%a1, %%d1;"
"add.l %%a2, %%d2;"
"add.l %%a3, %%d3;"
/* store and advance */
"movem.l %%d0-%%d3, (%[x]);"
"lea.l (4*4, %[x]), %[x];"
"lea.l (4*4, %[y]), %[y];"
"subq.l #4, %[n];" /* done 4 elements */
"1: cmpi.l #4, %[n];"
"bge 0b;"
: [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
: : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
"cc", "memory");
/* add final elements */
while (n>0) {
*x++ += *y++;
n--;
}
}
static inline
void mcf5249_vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n)
{
/* align to 16 bytes */
while(n>0 && (int)x&16) {
*x++ = *y++;
n--;
}
asm volatile ("bra 1f;"
"0:" /* loop start */
"movem.l (%[y]), %%d0-%%d3;" /* fetch values */
"movem.l %%d0-%%d3, (%[x]);" /* store */
"lea.l (4*4, %[x]), %[x];" /* advance */
"lea.l (4*4, %[y]), %[y];"
"subq.l #4, %[n];" /* done 4 elements */
"1: cmpi.l #4, %[n];"
"bge 0b;"
: [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
: : "%d0", "%d1", "%d2", "%d3", "cc", "memory");
/* copy final elements */
while (n>0) {
*x++ = *y++;
n--;
}
}
static inline
void mcf5249_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
{
/* ensure data is aligned to 16-bytes */
while(n>0 && (int)data%16) {
*data = MULT31(*data, *window);
data++;
window++;
n--;
}
asm volatile ("movem.l (%[d]), %%d0-%%d3;" /* loop start */
"movem.l (%[w]), %%a0-%%a3;" /* pre-fetch registers */
"lea.l (4*4, %[w]), %[w];"
@ -184,6 +254,13 @@ void mcf5249_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
static inline
void mcf5249_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
{
/* ensure at least data is aligned to 16-bytes */
while(n>0 && (int)data%16) {
*data = MULT31(*data, *window);
data++;
window--;
n--;
}
asm volatile ("lea.l (-3*4, %[w]), %[w];" /* loop start */
"movem.l (%[d]), %%d0-%%d3;" /* pre-fetch registers */
"movem.l (%[w]), %%a0-%%a3;"
@ -232,6 +309,11 @@ void mcf5249_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
static inline
void mcf5249_vect_zero(ogg_int32_t *ptr, int n)
{
/* ensure ptr is aligned to 16-bytes */
while(n>0 && (int)ptr%16) {
*ptr++ = 0;
n--;
}
asm volatile ("clr.l %%d0;"
"clr.l %%d1;"
"clr.l %%d2;"
@ -241,23 +323,16 @@ void mcf5249_vect_zero(ogg_int32_t *ptr, int n)
"bra 1f;"
"0: movem.l %%d0-%%d3, (%[ptr]);"
"lea (4*4, %[ptr]), %[ptr];"
"subq.l #4, %[n];"
"subq.l #4, %[n];" /* done 4 elements */
"1: bgt 0b;"
/* remaing elements */
"tst.l %[n];"
"beq 1f;" /* n=0 */
"clr.l (%[ptr])+;"
"subq.l #1, %[n];"
"beq 1f;" /* n=1 */
"clr.l (%[ptr])+;"
"subq.l #1, %[n];"
"beq 1f;" /* n=2 */
/* otherwise n = 3 */
"clr.l (%[ptr])+;"
"1:"
: [n] "+d" (n), [ptr] "+a" (ptr)
:
: "%d0","%d1","%d2","%d3","cc","memory");
/* clear remaining elements */
while(n>0) {
*ptr++ = 0;
n--;
}
}
#endif
@ -272,4 +347,6 @@ static inline ogg_int32_t CLIP_TO_15(register ogg_int32_t x) {
}
#endif
#else
#define LINE_ATTR
#endif

View file

@ -70,8 +70,8 @@ static int ilog(unsigned int v){
| | |endSr
| |beginSr
| |endSl
|beginSl
|beginW
|beginSl
|beginW
*/
/* block abstraction setup *********************************************/
@ -173,10 +173,8 @@ static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){
v->pcm=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcm));
v->pcmret=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcmret));
// pbv: allow for extra padding for windowing
for(i=0;i<vi->channels;i++)
v->pcm[i]=(ogg_int32_t *)_ogg_calloc(v->pcm_storage,sizeof(*v->pcm[i]));
// v->pcm[i]=(ogg_int32_t *)_ogg_calloc(v->pcm_storage,sizeof(*v->pcm[i]));
/* all 1 (large block) or 0 (small block) */
@ -190,7 +188,7 @@ static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){
int mapnum=ci->mode_param[i]->mapping;
int maptype=ci->map_type[mapnum];
b->mode[i]=_mapping_P[maptype]->look(v,ci->mode_param[i],
ci->map_param[mapnum]);
ci->map_param[mapnum]);
}
return(0);
}
@ -231,7 +229,7 @@ void vorbis_dsp_clear(vorbis_dsp_state *v){
if(v->pcm){
for(i=0;i<vi->channels;i++)
if(v->pcm[i])_ogg_free(v->pcm[i]);
if(v->pcm[i])_ogg_free(v->pcm[i]);
_ogg_free(v->pcm);
if(v->pcmret)_ogg_free(v->pcmret);
}
@ -239,9 +237,9 @@ void vorbis_dsp_clear(vorbis_dsp_state *v){
/* free mode lookups; these are actually vorbis_look_mapping structs */
if(ci){
for(i=0;i<ci->modes;i++){
int mapnum=ci->mode_param[i]->mapping;
int maptype=ci->map_type[mapnum];
if(b && b->mode)_mapping_P[maptype]->free_look(b->mode[i]);
int mapnum=ci->mode_param[i]->mapping;
int maptype=ci->map_type[mapnum];
if(b && b->mode)_mapping_P[maptype]->free_look(b->mode[i]);
}
}
@ -262,7 +260,11 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
vorbis_info *vi=v->vi;
codec_setup_info *ci=(codec_setup_info *)vi->codec_setup;
private_state *b=v->backend_state;
#if CONFIG_CPU == MCF5249
int j;
#else
int i,j;
#endif
if(v->pcm_current>v->pcm_returned && v->pcm_returned!=-1)return(OV_EINVAL);
@ -304,43 +306,64 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
for(j=0;j<vi->channels;j++){
/* the overlap/add section */
if(v->lW){
if(v->W){
/* large/large */
ogg_int32_t *pcm=v->pcm[j]+prevCenter;
ogg_int32_t *p=vb->pcm[j];
for(i=0;i<n1;i++)
pcm[i]+=p[i];
}else{
/* large/small */
ogg_int32_t *pcm=v->pcm[j]+prevCenter+n1/2-n0/2;
ogg_int32_t *p=vb->pcm[j];
for(i=0;i<n0;i++)
pcm[i]+=p[i];
}
if(v->W){
/* large/large */
ogg_int32_t *pcm=v->pcm[j]+prevCenter;
ogg_int32_t *p=vb->pcm[j];
#if CONFIG_CPU == MCF5249
mcf5249_vect_add(pcm, p, n1);
#else
for(i=0;i<n1;i++)
pcm[i]+=p[i];
#endif
}else{
/* large/small */
ogg_int32_t *pcm=v->pcm[j]+prevCenter+n1/2-n0/2;
ogg_int32_t *p=vb->pcm[j];
#if CONFIG_CPU == MCF5249
mcf5249_vect_add(pcm, p, n0);
#else
for(i=0;i<n0;i++)
pcm[i]+=p[i];
#endif
}
}else{
if(v->W){
/* small/large */
ogg_int32_t *pcm=v->pcm[j]+prevCenter;
ogg_int32_t *p=vb->pcm[j]+n1/2-n0/2;
for(i=0;i<n0;i++)
pcm[i]+=p[i];
for(;i<n1/2+n0/2;i++)
pcm[i]=p[i];
}else{
/* small/small */
ogg_int32_t *pcm=v->pcm[j]+prevCenter;
ogg_int32_t *p=vb->pcm[j];
for(i=0;i<n0;i++)
pcm[i]+=p[i];
}
if(v->W){
/* small/large */
ogg_int32_t *pcm=v->pcm[j]+prevCenter;
ogg_int32_t *p=vb->pcm[j]+n1/2-n0/2;
#if CONFIG_CPU == MCF5249
mcf5249_vect_add(pcm, p, n0);
mcf5249_vect_copy(&pcm[n0], &p[n0], n1/2-n0/2);
#else
for(i=0;i<n0;i++)
pcm[i]+=p[i];
for(;i<n1/2+n0/2;i++)
pcm[i]=p[i];
#endif
}else{
/* small/small */
ogg_int32_t *pcm=v->pcm[j]+prevCenter;
ogg_int32_t *p=vb->pcm[j];
#if CONFIG_CPU == MCF5249
mcf5249_vect_add(pcm, p, n0);
#else
for(i=0;i<n0;i++)
pcm[i]+=p[i];
#endif
}
}
/* the copy section */
{
ogg_int32_t *pcm=v->pcm[j]+thisCenter;
ogg_int32_t *p=vb->pcm[j]+n;
for(i=0;i<n;i++)
pcm[i]=p[i];
ogg_int32_t *pcm=v->pcm[j]+thisCenter;
ogg_int32_t *p=vb->pcm[j]+n;
#if CONFIG_CPU == MCF5249
mcf5249_vect_copy(pcm, p, n);
#else
for(i=0;i<n;i++)
pcm[i]=p[i];
#endif
}
}
@ -359,8 +382,8 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
}else{
v->pcm_returned=prevCenter;
v->pcm_current=prevCenter+
ci->blocksizes[v->lW]/4+
ci->blocksizes[v->W]/4;
ci->blocksizes[v->lW]/4+
ci->blocksizes[v->W]/4;
}
}
@ -389,22 +412,22 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
/* is this a short page? */
if(b->sample_count>v->granulepos){
/* corner case; if this is both the first and last audio page,
then spec says the end is cut, not beginning */
if(vb->eofflag){
/* trim the end */
/* no preceeding granulepos; assume we started at zero (we'd
have to in a short single-page stream) */
/* granulepos could be -1 due to a seek, but that would result
in a long coun`t, not short count */
/* corner case; if this is both the first and last audio page,
then spec says the end is cut, not beginning */
if(vb->eofflag){
/* trim the end */
/* no preceeding granulepos; assume we started at zero (we'd
have to in a short single-page stream) */
/* granulepos could be -1 due to a seek, but that would result
in a long coun`t, not short count */
v->pcm_current-=(b->sample_count-v->granulepos);
}else{
/* trim the beginning */
v->pcm_returned+=(b->sample_count-v->granulepos);
if(v->pcm_returned>v->pcm_current)
v->pcm_returned=v->pcm_current;
}
v->pcm_current-=(b->sample_count-v->granulepos);
}else{
/* trim the beginning */
v->pcm_returned+=(b->sample_count-v->granulepos);
if(v->pcm_returned>v->pcm_current)
v->pcm_returned=v->pcm_current;
}
}
@ -414,16 +437,16 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
if(vb->granulepos!=-1 && v->granulepos!=vb->granulepos){
if(v->granulepos>vb->granulepos){
long extra=v->granulepos-vb->granulepos;
long extra=v->granulepos-vb->granulepos;
if(extra)
if(vb->eofflag){
/* partial last frame. Strip the extra samples off */
v->pcm_current-=extra;
} /* else {Shouldn't happen *unless* the bitstream is out of
spec. Either way, believe the bitstream } */
if(extra)
if(vb->eofflag){
/* partial last frame. Strip the extra samples off */
v->pcm_current-=extra;
} /* else {Shouldn't happen *unless* the bitstream is out of
spec. Either way, believe the bitstream } */
} /* else {Shouldn't happen *unless* the bitstream is out of
spec. Either way, believe the bitstream } */
spec. Either way, believe the bitstream } */
v->granulepos=vb->granulepos;
}
}
@ -441,7 +464,7 @@ int vorbis_synthesis_pcmout(vorbis_dsp_state *v,ogg_int32_t ***pcm){
if(pcm){
int i;
for(i=0;i<vi->channels;i++)
v->pcmret[i]=v->pcm[i]+v->pcm_returned;
v->pcmret[i]=v->pcm[i]+v->pcm_returned;
*pcm=v->pcmret;
}
return(v->pcm_current-v->pcm_returned);

View file

@ -202,10 +202,6 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
int nonzero[CHANNELS];
void *floormemo[CHANNELS];
/* test for too many channels;
(maybe this is can be checked at the stream level?) */
if (vi->channels > CHANNELS) return (-1);
/* time domain information decode (note that applying the
information would have to happen later; we'll probably add a
function entry to the harness for that later */
@ -286,13 +282,14 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
//_analysis_output("residue",seq+j,vb->pcm[j],-8,n/2,0,0);
/* compute and apply spectral envelope */
#if 0
for(i=0;i<vi->channels;i++){
ogg_int32_t *pcm=vb->pcm[i];
int submap=info->chmuxlist[i];
look->floor_func[submap]->
inverse2(vb,look->floor_look[submap],floormemo[i],pcm);
}
#endif
//for(j=0;j<vi->channels;j++)
//_analysis_output("mdct",seq+j,vb->pcm[j],-24,n/2,0,1);
@ -301,8 +298,11 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
for(i=0;i<vi->channels;i++){
ogg_int32_t *pcm=vb->pcm[i];
int submap=info->chmuxlist[i];
if(nonzero[i]) {
if(nonzero[i]) {
look->floor_func[submap]->
inverse2(vb,look->floor_look[submap],floormemo[i],pcm);
mdct_backward(n, pcm, pcm);
/* window the data */
_vorbis_apply_window(pcm,b->window,ci->blocksizes,vb->lW,vb->W,vb->nW);

View file

@ -341,10 +341,6 @@ void mdct_backward(int n, DATA_TYPE *in, DATA_TYPE *out) {
int shift;
int step;
#if CONFIG_CPU == MCF5249
/* mcf5249_init_mac(); */ /* should be redundant */
#endif
for (shift=6;!(n&(1<<shift));shift++);
shift=13-shift;
step=2<<shift;

View file

@ -33,7 +33,7 @@
#define CHANNELS 2
static ogg_int32_t *ipcm_vect[CHANNELS] IDATA_ATTR;
static ogg_int32_t ipcm_buff[CHANNELS*IRAM_PCM_END] IDATA_ATTR;
static ogg_int32_t ipcm_buff[CHANNELS*IRAM_PCM_END] IDATA_ATTR LINE_ATTR;
int vorbis_synthesis(vorbis_block *vb,ogg_packet *op,int decodep){
vorbis_dsp_state *vd=vb->vd;
@ -73,10 +73,10 @@ int vorbis_synthesis(vorbis_block *vb,ogg_packet *op,int decodep){
vb->sequence=op->packetno-3; /* first block is third packet */
vb->eofflag=op->e_o_s;
if(decodep){
if(decodep && vi->channels<=CHANNELS){
/* alloc pcm passback storage */
vb->pcmend=ci->blocksizes[vb->W];
if (vi->channels <= CHANNELS && vb->pcmend<=IRAM_PCM_END) {
if (vb->pcmend<=IRAM_PCM_END) {
/* use statically allocated iram buffer */
vb->pcm = ipcm_vect;
for(i=0; i<CHANNELS; i++)

View file

@ -32,7 +32,7 @@ static LOOKUP_T vwin64[32] = {
X(0x7fdd78a5), X(0x7ff6ec6d), X(0x7ffed0e9), X(0x7ffffc3f),
};
static LOOKUP_T vwin128[64] IDATA_ATTR = {
static LOOKUP_T vwin128[64] IDATA_ATTR LINE_ATTR = {
X(0x0007c04d), X(0x0045bb89), X(0x00c18b87), X(0x017ae294),
X(0x02714a4e), X(0x03a4217a), X(0x05129952), X(0x06bbb24f),
X(0x089e38a1), X(0x0ab8c073), X(0x0d09a228), X(0x0f8ef6bd),
@ -51,7 +51,7 @@ static LOOKUP_T vwin128[64] IDATA_ATTR = {
X(0x7ffdcf39), X(0x7fff6dac), X(0x7fffed01), X(0x7fffffc4),
};
static LOOKUP_T vwin256[128] IDATA_ATTR = {
static LOOKUP_T vwin256[128] IDATA_ATTR LINE_ATTR = {
X(0x0001f018), X(0x00117066), X(0x00306e9e), X(0x005ee5f1),
X(0x009ccf26), X(0x00ea208b), X(0x0146cdea), X(0x01b2c87f),
X(0x022dfedf), X(0x02b85ced), X(0x0351cbbd), X(0x03fa317f),
@ -86,7 +86,7 @@ static LOOKUP_T vwin256[128] IDATA_ATTR = {
X(0x7fffdcd2), X(0x7ffff6d6), X(0x7ffffed0), X(0x7ffffffc),
};
static LOOKUP_T vwin512[256] IDATA_ATTR = {
static LOOKUP_T vwin512[256] IDATA_ATTR LINE_ATTR = {
X(0x00007c06), X(0x00045c32), X(0x000c1c62), X(0x0017bc4c),
X(0x00273b7a), X(0x003a9955), X(0x0051d51c), X(0x006cede7),
X(0x008be2a9), X(0x00aeb22a), X(0x00d55b0d), X(0x00ffdbcc),
@ -284,7 +284,7 @@ static LOOKUP_T vwin1024[512] = {
X(0x7fffffdd), X(0x7ffffff7), X(0x7fffffff), X(0x7fffffff),
};
static LOOKUP_T vwin2048[1024] IDATA_ATTR = {
static LOOKUP_T vwin2048[1024] IDATA_ATTR LINE_ATTR = {
X(0x000007c0), X(0x000045c4), X(0x0000c1ca), X(0x00017bd3),
X(0x000273de), X(0x0003a9eb), X(0x00051df9), X(0x0006d007),
X(0x0008c014), X(0x000aee1e), X(0x000d5a25), X(0x00100428),

View file

@ -115,3 +115,4 @@ Alessio Lenzi
David Bryant
Martin Arver
Alexander Spyridakis
Pedro Baltazar Vasconcelos