1
0
Fork 0
forked from len0rd/rockbox

SPC codec: enable echo on ColdFire CPU. Do a couple general small optimizations. Preswap some data when running DSP for big endian.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12410 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Michael Sevakis 2007-02-20 10:27:39 +00:00
parent 8336eb2390
commit d31162a9d3
2 changed files with 191 additions and 31 deletions

View file

@ -51,8 +51,13 @@ CODEC_HEADER
/* Disable gaussian interpolation */
#define SPC_NOINTERP 1
#ifndef CPU_COLDFIRE
/* Disable echo processing */
#define SPC_NOECHO 1
#else
/* Enable echo processing */
#define SPC_NOECHO 0
#endif
#else
/* Don't cache BRR waves */
#define SPC_BRRCACHE 0
@ -100,6 +105,8 @@ static inline void set_le16( void* p, unsigned n )
#define GET_LE16( addr ) get_le16( addr )
#define SET_LE16( addr, data ) set_le16( addr, data )
#define INT16A( addr ) (*(uint16_t*) (addr))
#define INT16SA( addr ) (*(int16_t*) (addr))
#ifdef ROCKBOX_LITTLE_ENDIAN
#define GET_LE16A( addr ) (*(uint16_t*) (addr))
@ -794,6 +801,10 @@ enum codec_status codec_main(void)
{
memcpy( spc_emu.cycle_table, cycle_table, sizeof cycle_table );
#ifdef CPU_COLDFIRE
coldfire_set_macsr(EMAC_SATURATE);
#endif
do
{
DEBUGF("SPC: next_track\n");

View file

@ -107,6 +107,19 @@ static int16_t BRRcache [0x20000 + 32];
enum { fir_buf_half = 8 };
#ifdef CPU_COLDFIRE
/* global because of the large aligment requirement for hardware masking -
* L-R interleaved 16-bit samples for easy loading and mac.w use.
*/
enum
{
fir_buf_size = fir_buf_half * sizeof ( int32_t ),
fir_buf_mask = ~fir_buf_size
};
int32_t fir_buf[fir_buf_half]
__attribute__ ((aligned (fir_buf_size*2))) IBSS_ATTR;
#endif /* CPU_COLDFIRE */
struct Spc_Dsp
{
union
@ -122,11 +135,21 @@ struct Spc_Dsp
int noise_count;
uint16_t noise; /* also read as int16_t */
#ifdef CPU_COLDFIRE
/* circularly hardware masked address */
int32_t *fir_ptr;
/* wrapped address just behind current position -
allows mac.w to increment and mask fir_ptr */
int32_t *last_fir_ptr;
/* copy of echo FIR constants as int16_t for use with mac.w */
int16_t fir_coeff[voice_count];
#else
/* fir_buf [i + 8] == fir_buf [i], to avoid wrap checking in FIR code */
int fir_pos; /* (0 to 7) */
int fir_buf [fir_buf_half * 2] [2];
/* copy of echo FIR constants as int, for faster access */
int fir_coeff [voice_count];
#endif
struct voice_t voice_state [voice_count];
@ -149,7 +172,6 @@ static void DSP_reset( struct Spc_Dsp* this )
this->echo_pos = 0;
this->noise_count = 0;
this->noise = 2;
this->fir_pos = 0;
this->r.g.flags = 0xE0; /* reset, mute, echo off */
this->r.g.key_ons = 0;
@ -169,8 +191,16 @@ static void DSP_reset( struct Spc_Dsp* this )
for ( i = 0; i < 256; i++ )
this->wave_entry [i].start_addr = -1;
#endif
#ifdef CPU_COLDFIRE
this->fir_ptr = fir_buf;
this->last_fir_ptr = &fir_buf [7];
memset( fir_buf, 0, sizeof fir_buf );
#else
this->fir_pos = 0;
memset( this->fir_buf, 0, sizeof this->fir_buf );
#endif
assert( offsetof (struct globals_t,unused9 [2]) == register_count );
assert( sizeof (this->r.voice) == register_count );
}
@ -394,7 +424,7 @@ static void key_on(struct Spc_Dsp* const this, struct voice_t* const voice,
voice->envx = 0;
voice->env_mode = state_attack;
voice->env_timer = env_rate_init; /* TODO: inaccurate? */
unsigned start_addr = GET_LE16A( sd [raw_voice->waveform].start );
unsigned start_addr = GET_LE16A(sd [raw_voice->waveform].start);
#if !SPC_BRRCACHE
{
voice->addr = RAM + start_addr;
@ -442,7 +472,7 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
EXIT_TIMER(cpu);
ENTER_TIMER(dsp);
#endif
/* Here we check for keys on/off. Docs say that successive writes
to KON/KOF must be separated by at least 2 Ts periods or risk
being neglected. Therefore DSP only looks at these during an
@ -479,16 +509,42 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
struct src_dir const* const sd =
(struct src_dir*) &RAM [this->r.g.wave_page * 0x100];
#ifdef ROCKBOX_BIG_ENDIAN
/* Convert endiannesses before entering loops - these
get used alot */
const uint32_t rates[voice_count] =
{
GET_LE16A( this->r.voice[0].rate ) & 0x3FFF,
GET_LE16A( this->r.voice[1].rate ) & 0x3FFF,
GET_LE16A( this->r.voice[2].rate ) & 0x3FFF,
GET_LE16A( this->r.voice[3].rate ) & 0x3FFF,
GET_LE16A( this->r.voice[4].rate ) & 0x3FFF,
GET_LE16A( this->r.voice[5].rate ) & 0x3FFF,
GET_LE16A( this->r.voice[6].rate ) & 0x3FFF,
GET_LE16A( this->r.voice[7].rate ) & 0x3FFF,
};
#define VOICE_RATE(x) *(x)
#define IF_RBE(...) __VA_ARGS__
#ifdef CPU_COLDFIRE
/* Initialize mask register with the buffer address mask */
asm ("move.l %[m], %%mask" : : [m]"i"(fir_buf_mask));
const int echo_delay_mask = (this->r.g.echo_delay & 15) * 0x800 - 1;
const int echo_page = this->r.g.echo_page * 0x100;
#endif /* CPU_COLDFIRE */
#else
#define VOICE_RATE(x) (INT16A(raw_voice->rate) & 0x3FFF)
#define IF_RBE(...)
#endif /* ROCKBOX_BIG_ENDIAN */
#if !SPC_NOINTERP
int const slow_gaussian = (this->r.g.pitch_mods >> 1) |
this->r.g.noise_enables;
#endif
/* (g.flags & 0x40) ? 30 : 14 */
int const global_muting = ((this->r.g.flags & 0x40) >> 2) + 14;
int const global_vol_0 = this->r.g.volume_0;
int const global_vol_1 = this->r.g.volume_1;
int const global_muting = ((this->r.g.flags & 0x40) >> 2) + 14 - 8;
int const global_vol_0 = this->r.g.volume_0;
int const global_vol_1 = this->r.g.volume_1;
/* each rate divides exactly into 0x7800 without remainder */
int const env_rate_init = 0x7800;
@ -525,7 +581,8 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
struct raw_voice_t * raw_voice = this->r.voice;
struct voice_t* voice = this->voice_state;
int vbit = 1;
for ( ; vbit < 0x100; vbit <<= 1, ++voice, ++raw_voice )
IF_RBE( const uint32_t* vr = rates; )
for ( ; vbit < 0x100; vbit <<= 1, ++voice, ++raw_voice IF_RBE( , ++vr ) )
{
/* pregen involves checking keyon, etc */
#if 0
@ -816,7 +873,7 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
#endif
/* Get rate (with possible modulation) */
int rate = GET_LE16A( raw_voice->rate ) & 0x3FFF;
int rate = VOICE_RATE(vr);
if ( this->r.g.pitch_mods & vbit )
rate = (rate * (prev_outx + 32768)) >> 15;
@ -918,19 +975,20 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
{
uint32_t f = voice->position;
int32_t y1;
asm (
"move.l %[f], %[y0] \n" /* separate fraction */
"and.l #0xfff, %[f] \n" /* and whole parts */
"lsr.l %[sh], %[y0] \n"
"move.l 2(%[s], %[y0].l*2), %[y1] \n" /* load two samples */
"move.l %[y1], %[y0] \n" /* separate samples */
"ext.l %[y1] \n" /* y0=s[1], y1=s[2] */
"swap %[y0] \n"
"ext.l %[y0] \n"
"sub.l %[y0], %[y1] \n" /* diff = y1 - y0 */
"muls.l %[f], %[y1] \n" /* y0 += f*diff */
"asr.l %[sh], %[y1] \n"
"add.l %[y1], %[y0] \n"
"move.l %[f], %[y0] \r\n" /* separate fraction */
"and.l #0xfff, %[f] \r\n" /* and whole parts */
"lsr.l %[sh], %[y0] \r\n"
"move.l 2(%[s], %[y0].l*2), %[y1] \r\n" /* load two samples */
"move.l %[y1], %[y0] \r\n" /* separate samples */
"ext.l %[y1] \r\n" /* y0=s[1], y1=s[2] */
"swap %[y0] \r\n"
"ext.l %[y0] \r\n"
"sub.l %[y0], %[y1] \r\n" /* diff = y1 - y0 */
"muls.l %[f], %[y1] \r\n" /* y0 += f*diff */
"asr.l %[sh], %[y1] \r\n"
"add.l %[y1], %[y0] \r\n"
: [f]"+&d"(f), [y0]"=&d"(output), [y1]"=&d"(y1)
: [s]"a"(voice->samples), [sh]"r"(12)
);
@ -1020,6 +1078,100 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
/* end of voice loop */
#if !SPC_NOECHO
#ifdef CPU_COLDFIRE
/* Read feedback from echo buffer */
int echo_pos = this->echo_pos;
uint8_t* const echo_ptr = RAM + ((echo_page + echo_pos) & 0xFFFF);
echo_pos = (echo_pos + 4) & echo_delay_mask;
this->echo_pos = echo_pos;
int fb = swap_odd_even32(*(int32_t *)echo_ptr);
int out_0, out_1;
/* Keep last 8 samples */
*this->last_fir_ptr = fb;
this->last_fir_ptr = this->fir_ptr;
/* Apply echo FIR filter to output - circular buffer is hardware
incremented and masked; FIR coefficients and buffer history are
loaded in parallel with multiply accumulate operations. Apply
scale factor to do hardware clipping later. */
int _0, _1, _2;
asm (
"move.l (%[fir_c]) , %[_2] \r\n"
"mac.w %[fb]u, %[_2]u, <<, (%[fir_p])+&, %[_0], %%acc0 \r\n"
"mac.w %[fb]l, %[_2]u, <<, (%[fir_p])& , %[_1], %%acc1 \r\n"
"mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
"mac.w %[_0]l, %[_2]l, <<, 4(%[fir_c]) , %[_2], %%acc1 \r\n"
"mac.w %[_1]u, %[_2]u, <<, 4(%[fir_p])& , %[_0], %%acc0 \r\n"
"mac.w %[_1]l, %[_2]u, <<, 8(%[fir_p])& , %[_1], %%acc1 \r\n"
"mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
"mac.w %[_0]l, %[_2]l, <<, 8(%[fir_c]) , %[_2], %%acc1 \r\n"
"mac.w %[_1]u, %[_2]u, <<, 12(%[fir_p])& , %[_0], %%acc0 \r\n"
"mac.w %[_1]l, %[_2]u, <<, 16(%[fir_p])& , %[_1], %%acc1 \r\n"
"mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
"mac.w %[_0]l, %[_2]l, <<, 12(%[fir_c]) , %[_2], %%acc1 \r\n"
"mac.w %[_1]u, %[_2]u, <<, 20(%[fir_p])& , %[_0], %%acc0 \r\n"
"mac.w %[_1]l, %[_2]u, << , %%acc1 \r\n"
"mac.w %[_0]u, %[_2]l, << , %%acc0 \r\n"
"mac.w %[_0]l, %[_2]l, << , %%acc1 \r\n"
"movclr.l %%acc0, %[out_0] \r\n"
"movclr.l %%acc1, %[out_1] \r\n"
: [_0]"=&r"(_0), [_1]"=&r"(_1), [_2]"=&r"(_2),
[fir_p]"+a"(this->fir_ptr),
[out_0]"=r"(out_0), [out_1]"=r"(out_1)
: [fir_c]"a"(this->fir_coeff), [fb]"r"(fb)
);
/* Generate output */
asm (
"mac.l %[chans_0], %[gv_0] , %%acc2 \r\n"
"mac.l %[chans_1], %[gv_1] , %%acc3 \r\n"
"mac.l %[ev_0], %[out_0], >>, %%acc2 \r\n"
"mac.l %[ev_1], %[out_1], >>, %%acc3 \r\n"
:
: [chans_0]"r"(chans_0), [gv_0]"r"(global_vol_0),
[ev_0]"r"((int)this->r.g.echo_volume_0),
[chans_1]"r"(chans_1), [gv_1]"r"(global_vol_1),
[ev_1]"r"((int)this->r.g.echo_volume_1),
[out_0]"r"(out_0), [out_1]"r"(out_1)
);
/* Feedback into echo buffer */
if ( !(this->r.g.flags & 0x20) )
{
asm (
"lsl.l %[sh], %[e0] \r\n"
"move.l %[e0], %%acc0 \r\n"
"mac.l %[out_0], %[ef], <<, %%acc0 \r\n"
"lsl.l %[sh], %[e1] \r\n"
"move.l %[e1], %%acc1 \r\n"
"mac.l %[out_1], %[ef], <<, %%acc1 \r\n"
"movclr.l %%acc0, %[e0] \r\n"
"movclr.l %%acc1, %[e1] \r\n"
"swap %[e1] \r\n"
"move.w %[e1], %[e0] \r\n"
: [e0]"+&d"(echo_0), [e1]"+&d"(echo_1)
: [out_0]"r"(out_0), [out_1]"r"(out_1),
[ef]"r"((int)this->r.g.echo_feedback),
[sh]"d"(9)
);
*(int32_t *)echo_ptr = swap_odd_even32(echo_0);
}
/* Output final samples */
asm (
"movclr.l %%acc2, %[out_0] \r\n"
"movclr.l %%acc3, %[out_1] \r\n"
"asr.l %[gm], %[out_0] \r\n"
"asr.l %[gm], %[out_1] \r\n"
: [out_0]"=&d"(out_0), [out_1]"=&d"(out_1)
: [gm]"d"(global_muting)
);
out_buf [ 0] = out_0;
out_buf [WAV_CHUNK_SIZE] = out_1;
out_buf ++;
#else /* !CPU_COLDFIRE */
/* Read feedback from echo buffer */
int echo_pos = this->echo_pos;
uint8_t* const echo_ptr = RAM +
@ -1061,10 +1213,8 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
>> global_muting;
int amp_1 = (chans_1 * global_vol_1 + fb_1 * this->r.g.echo_volume_1)
>> global_muting;
CLAMP16( amp_0, amp_0 );
out_buf [0] = amp_0 * (1 << 8);
CLAMP16( amp_1, amp_1 );
out_buf [WAV_CHUNK_SIZE] = amp_1 * (1 << 8);
out_buf [ 0] = amp_0;
out_buf [WAV_CHUNK_SIZE] = amp_1;
out_buf ++;
/* Feedback into echo buffer */
@ -1077,14 +1227,13 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf )
CLAMP16( e1, e1 );
SET_LE16A( echo_ptr + 2, e1 );
}
#endif /* CPU_COLDFIRE */
#else
/* Generate output */
/* Generate output */
int amp_0 = (chans_0 * global_vol_0) >> global_muting;
int amp_1 = (chans_1 * global_vol_1) >> global_muting;
CLAMP16( amp_0, amp_0 );
out_buf [0] = amp_0 * (1 << 8);
CLAMP16( amp_1, amp_1 );
out_buf [WAV_CHUNK_SIZE] = amp_1 * (1 << 8);
out_buf [ 0] = amp_0;
out_buf [WAV_CHUNK_SIZE] = amp_1;
out_buf ++;
#endif
}