forked from len0rd/rockbox
		
	SPC codec: enable echo on ColdFire CPU. Do a couple general small optimizations. Preswap some data when running DSP for big endian.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12410 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
		
							parent
							
								
									8336eb2390
								
							
						
					
					
						commit
						d31162a9d3
					
				
					 2 changed files with 191 additions and 31 deletions
				
			
		|  | @ -51,8 +51,13 @@ CODEC_HEADER | |||
|     /* Disable gaussian interpolation */ | ||||
|     #define SPC_NOINTERP 1 | ||||
| 
 | ||||
| #ifndef CPU_COLDFIRE | ||||
|     /* Disable echo processing */ | ||||
|     #define SPC_NOECHO 1 | ||||
| #else | ||||
|     /* Enable echo processing */ | ||||
|     #define SPC_NOECHO 0 | ||||
| #endif | ||||
| #else | ||||
|     /* Don't cache BRR waves */ | ||||
|     #define SPC_BRRCACHE 0  | ||||
|  | @ -100,6 +105,8 @@ static inline void set_le16( void* p, unsigned n ) | |||
| 
 | ||||
| #define GET_LE16( addr )        get_le16( addr ) | ||||
| #define SET_LE16( addr, data )  set_le16( addr, data ) | ||||
| #define INT16A( addr ) (*(uint16_t*) (addr)) | ||||
| #define INT16SA( addr ) (*(int16_t*) (addr)) | ||||
| 
 | ||||
| #ifdef ROCKBOX_LITTLE_ENDIAN | ||||
|     #define GET_LE16A( addr )       (*(uint16_t*) (addr)) | ||||
|  | @ -794,6 +801,10 @@ enum codec_status codec_main(void) | |||
| { | ||||
|     memcpy( spc_emu.cycle_table, cycle_table, sizeof cycle_table ); | ||||
| 
 | ||||
| #ifdef CPU_COLDFIRE | ||||
|     coldfire_set_macsr(EMAC_SATURATE); | ||||
| #endif | ||||
| 
 | ||||
|     do | ||||
|     { | ||||
|         DEBUGF("SPC: next_track\n"); | ||||
|  |  | |||
|  | @ -107,6 +107,19 @@ static int16_t BRRcache [0x20000 + 32]; | |||
| 
 | ||||
| enum { fir_buf_half = 8 }; | ||||
| 
 | ||||
| #ifdef CPU_COLDFIRE | ||||
| /* global because of the large aligment requirement for hardware masking -
 | ||||
|  * L-R interleaved 16-bit samples for easy loading and mac.w use. | ||||
|  */ | ||||
| enum | ||||
| { | ||||
|     fir_buf_size = fir_buf_half * sizeof ( int32_t ), | ||||
|     fir_buf_mask = ~fir_buf_size | ||||
| }; | ||||
| int32_t fir_buf[fir_buf_half] | ||||
|     __attribute__ ((aligned (fir_buf_size*2))) IBSS_ATTR; | ||||
| #endif /* CPU_COLDFIRE */ | ||||
| 
 | ||||
| struct Spc_Dsp | ||||
| { | ||||
|     union | ||||
|  | @ -122,11 +135,21 @@ struct Spc_Dsp | |||
|     int noise_count; | ||||
|     uint16_t noise; /* also read as int16_t */ | ||||
|      | ||||
| #ifdef CPU_COLDFIRE | ||||
|     /* circularly hardware masked address */ | ||||
|     int32_t *fir_ptr; | ||||
|     /* wrapped address just behind current position -
 | ||||
|        allows mac.w to increment and mask fir_ptr */ | ||||
|     int32_t *last_fir_ptr; | ||||
|     /* copy of echo FIR constants as int16_t for use with mac.w */ | ||||
|     int16_t fir_coeff[voice_count]; | ||||
| #else | ||||
|     /* fir_buf [i + 8] == fir_buf [i], to avoid wrap checking in FIR code */ | ||||
|     int fir_pos; /* (0 to 7) */ | ||||
|     int fir_buf [fir_buf_half * 2] [2]; | ||||
|     /* copy of echo FIR constants as int, for faster access */ | ||||
|     int fir_coeff [voice_count];  | ||||
| #endif | ||||
|      | ||||
|     struct voice_t voice_state [voice_count]; | ||||
|      | ||||
|  | @ -149,7 +172,6 @@ static void DSP_reset( struct Spc_Dsp* this ) | |||
|     this->echo_pos    = 0; | ||||
|     this->noise_count = 0; | ||||
|     this->noise       = 2; | ||||
|     this->fir_pos     = 0; | ||||
|      | ||||
|     this->r.g.flags   = 0xE0; /* reset, mute, echo off */ | ||||
|     this->r.g.key_ons = 0; | ||||
|  | @ -169,8 +191,16 @@ static void DSP_reset( struct Spc_Dsp* this ) | |||
|         for ( i = 0; i < 256; i++ ) | ||||
|             this->wave_entry [i].start_addr = -1; | ||||
|     #endif | ||||
|      | ||||
| 
 | ||||
| #ifdef CPU_COLDFIRE | ||||
|     this->fir_ptr      = fir_buf; | ||||
|     this->last_fir_ptr = &fir_buf [7]; | ||||
|     memset( fir_buf, 0, sizeof fir_buf ); | ||||
| #else | ||||
|     this->fir_pos = 0; | ||||
|     memset( this->fir_buf, 0, sizeof this->fir_buf ); | ||||
| #endif | ||||
| 
 | ||||
|     assert( offsetof (struct globals_t,unused9 [2]) == register_count ); | ||||
|     assert( sizeof (this->r.voice) == register_count ); | ||||
| } | ||||
|  | @ -394,7 +424,7 @@ static void key_on(struct Spc_Dsp* const this, struct voice_t* const voice, | |||
|         voice->envx         = 0; | ||||
|         voice->env_mode     = state_attack; | ||||
|         voice->env_timer    = env_rate_init; /* TODO: inaccurate? */ | ||||
|         unsigned start_addr = GET_LE16A( sd [raw_voice->waveform].start ); | ||||
|         unsigned start_addr = GET_LE16A(sd [raw_voice->waveform].start); | ||||
|         #if !SPC_BRRCACHE | ||||
|         { | ||||
|             voice->addr = RAM + start_addr; | ||||
|  | @ -442,7 +472,7 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
|     EXIT_TIMER(cpu); | ||||
|     ENTER_TIMER(dsp); | ||||
| #endif | ||||
|      | ||||
| 
 | ||||
|     /* Here we check for keys on/off.  Docs say that successive writes
 | ||||
|        to KON/KOF must be separated by at least 2 Ts periods or risk | ||||
|        being neglected.  Therefore DSP only looks at these during an | ||||
|  | @ -479,16 +509,42 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
|      | ||||
|     struct src_dir const* const sd = | ||||
|         (struct src_dir*) &RAM [this->r.g.wave_page * 0x100]; | ||||
| 
 | ||||
|     #ifdef ROCKBOX_BIG_ENDIAN | ||||
|         /* Convert endiannesses before entering loops - these
 | ||||
|            get used alot */ | ||||
|         const uint32_t rates[voice_count] = | ||||
|         { | ||||
|             GET_LE16A( this->r.voice[0].rate ) & 0x3FFF, | ||||
|             GET_LE16A( this->r.voice[1].rate ) & 0x3FFF, | ||||
|             GET_LE16A( this->r.voice[2].rate ) & 0x3FFF, | ||||
|             GET_LE16A( this->r.voice[3].rate ) & 0x3FFF, | ||||
|             GET_LE16A( this->r.voice[4].rate ) & 0x3FFF, | ||||
|             GET_LE16A( this->r.voice[5].rate ) & 0x3FFF, | ||||
|             GET_LE16A( this->r.voice[6].rate ) & 0x3FFF, | ||||
|             GET_LE16A( this->r.voice[7].rate ) & 0x3FFF, | ||||
|         }; | ||||
|         #define VOICE_RATE(x) *(x) | ||||
|         #define IF_RBE(...) __VA_ARGS__ | ||||
|     #ifdef CPU_COLDFIRE | ||||
|         /* Initialize mask register with the buffer address mask */ | ||||
|         asm ("move.l %[m], %%mask" : : [m]"i"(fir_buf_mask)); | ||||
|         const int echo_delay_mask = (this->r.g.echo_delay & 15) * 0x800 - 1; | ||||
|         const int echo_page       = this->r.g.echo_page * 0x100; | ||||
|     #endif /* CPU_COLDFIRE */ | ||||
|     #else | ||||
|         #define VOICE_RATE(x) (INT16A(raw_voice->rate) & 0x3FFF) | ||||
|         #define IF_RBE(...) | ||||
|     #endif /* ROCKBOX_BIG_ENDIAN */ | ||||
|      | ||||
| #if !SPC_NOINTERP | ||||
|     int const slow_gaussian = (this->r.g.pitch_mods >> 1) | | ||||
|         this->r.g.noise_enables; | ||||
| #endif | ||||
|     /* (g.flags & 0x40) ? 30 : 14 */ | ||||
|     int const global_muting = ((this->r.g.flags & 0x40) >> 2) + 14;  | ||||
|      | ||||
|     int const global_vol_0 = this->r.g.volume_0; | ||||
|     int const global_vol_1 = this->r.g.volume_1; | ||||
|     int const global_muting = ((this->r.g.flags & 0x40) >> 2) + 14 - 8;  | ||||
|     int const global_vol_0  = this->r.g.volume_0; | ||||
|     int const global_vol_1  = this->r.g.volume_1; | ||||
|      | ||||
|     /* each rate divides exactly into 0x7800 without remainder */ | ||||
|     int const env_rate_init = 0x7800; | ||||
|  | @ -525,7 +581,8 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
|         struct raw_voice_t * raw_voice = this->r.voice; | ||||
|         struct voice_t* voice = this->voice_state; | ||||
|         int vbit = 1; | ||||
|         for ( ; vbit < 0x100; vbit <<= 1, ++voice, ++raw_voice ) | ||||
|         IF_RBE( const uint32_t* vr = rates; ) | ||||
|         for ( ; vbit < 0x100; vbit <<= 1, ++voice, ++raw_voice IF_RBE( , ++vr ) ) | ||||
|         { | ||||
|             /* pregen involves checking keyon, etc */ | ||||
| #if 0 | ||||
|  | @ -816,7 +873,7 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
|             #endif | ||||
| 
 | ||||
|             /* Get rate (with possible modulation) */ | ||||
|             int rate = GET_LE16A( raw_voice->rate ) & 0x3FFF; | ||||
|             int rate = VOICE_RATE(vr); | ||||
|             if ( this->r.g.pitch_mods & vbit ) | ||||
|                 rate = (rate * (prev_outx + 32768)) >> 15; | ||||
| 
 | ||||
|  | @ -918,19 +975,20 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
|             { | ||||
|                 uint32_t f = voice->position; | ||||
|                 int32_t y1; | ||||
| 
 | ||||
|                 asm ( | ||||
|               "move.l     %[f], %[y0]               \n" /* separate fraction */ | ||||
|               "and.l      #0xfff, %[f]              \n" /* and whole parts   */ | ||||
|               "lsr.l      %[sh], %[y0]              \n" | ||||
|               "move.l     2(%[s], %[y0].l*2), %[y1] \n" /* load two samples  */ | ||||
|               "move.l     %[y1], %[y0]              \n" /* separate samples  */ | ||||
|               "ext.l      %[y1]                     \n" /* y0=s[1], y1=s[2]  */ | ||||
|               "swap       %[y0]                     \n" | ||||
|               "ext.l      %[y0]                     \n" | ||||
|               "sub.l      %[y0], %[y1]              \n" /* diff = y1 - y0    */ | ||||
|               "muls.l     %[f], %[y1]               \n" /* y0 += f*diff      */ | ||||
|               "asr.l      %[sh], %[y1]              \n" | ||||
|               "add.l      %[y1], %[y0]              \n" | ||||
|               "move.l     %[f], %[y0]               \r\n" /* separate fraction */ | ||||
|               "and.l      #0xfff, %[f]              \r\n" /* and whole parts   */ | ||||
|               "lsr.l      %[sh], %[y0]              \r\n" | ||||
|               "move.l     2(%[s], %[y0].l*2), %[y1] \r\n" /* load two samples  */ | ||||
|               "move.l     %[y1], %[y0]              \r\n" /* separate samples  */ | ||||
|               "ext.l      %[y1]                     \r\n" /* y0=s[1], y1=s[2]  */ | ||||
|               "swap       %[y0]                     \r\n" | ||||
|               "ext.l      %[y0]                     \r\n" | ||||
|               "sub.l      %[y0], %[y1]              \r\n" /* diff = y1 - y0    */ | ||||
|               "muls.l     %[f], %[y1]               \r\n" /* y0 += f*diff      */ | ||||
|               "asr.l      %[sh], %[y1]              \r\n" | ||||
|               "add.l      %[y1], %[y0]              \r\n" | ||||
|               : [f]"+&d"(f), [y0]"=&d"(output), [y1]"=&d"(y1) | ||||
|               : [s]"a"(voice->samples), [sh]"r"(12) | ||||
|                     ); | ||||
|  | @ -1020,6 +1078,100 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
|         /* end of voice loop */ | ||||
|          | ||||
|     #if !SPC_NOECHO | ||||
|     #ifdef CPU_COLDFIRE | ||||
|         /* Read feedback from echo buffer */ | ||||
|         int echo_pos = this->echo_pos; | ||||
|         uint8_t* const echo_ptr = RAM + ((echo_page + echo_pos) & 0xFFFF); | ||||
|         echo_pos = (echo_pos + 4) & echo_delay_mask; | ||||
|         this->echo_pos = echo_pos; | ||||
|         int fb = swap_odd_even32(*(int32_t *)echo_ptr); | ||||
|         int out_0, out_1; | ||||
| 
 | ||||
|         /* Keep last 8 samples */ | ||||
|         *this->last_fir_ptr = fb; | ||||
|         this->last_fir_ptr  = this->fir_ptr; | ||||
| 
 | ||||
|         /* Apply echo FIR filter to output - circular buffer is hardware
 | ||||
|            incremented and masked; FIR coefficients and buffer history are | ||||
|            loaded in parallel with multiply accumulate operations. Apply | ||||
|            scale factor to do hardware clipping later. */ | ||||
|         int _0, _1, _2; | ||||
|         asm ( | ||||
|         "move.l                           (%[fir_c])  , %[_2]         \r\n" | ||||
|         "mac.w      %[fb]u, %[_2]u, <<,   (%[fir_p])+&, %[_0], %%acc0 \r\n" | ||||
|         "mac.w      %[fb]l, %[_2]u, <<,   (%[fir_p])& , %[_1], %%acc1 \r\n" | ||||
|         "mac.w      %[_0]u, %[_2]l, <<                       , %%acc0 \r\n" | ||||
|         "mac.w      %[_0]l, %[_2]l, <<,  4(%[fir_c])  , %[_2], %%acc1 \r\n" | ||||
|         "mac.w      %[_1]u, %[_2]u, <<,  4(%[fir_p])& , %[_0], %%acc0 \r\n" | ||||
|         "mac.w      %[_1]l, %[_2]u, <<,  8(%[fir_p])& , %[_1], %%acc1 \r\n" | ||||
|         "mac.w      %[_0]u, %[_2]l, <<                       , %%acc0 \r\n" | ||||
|         "mac.w      %[_0]l, %[_2]l, <<,  8(%[fir_c])  , %[_2], %%acc1 \r\n" | ||||
|         "mac.w      %[_1]u, %[_2]u, <<, 12(%[fir_p])& , %[_0], %%acc0 \r\n" | ||||
|         "mac.w      %[_1]l, %[_2]u, <<, 16(%[fir_p])& , %[_1], %%acc1 \r\n" | ||||
|         "mac.w      %[_0]u, %[_2]l, <<                       , %%acc0 \r\n" | ||||
|         "mac.w      %[_0]l, %[_2]l, <<, 12(%[fir_c])  , %[_2], %%acc1 \r\n" | ||||
|         "mac.w      %[_1]u, %[_2]u, <<, 20(%[fir_p])& , %[_0], %%acc0 \r\n" | ||||
|         "mac.w      %[_1]l, %[_2]u, <<                       , %%acc1 \r\n" | ||||
|         "mac.w      %[_0]u, %[_2]l, <<                       , %%acc0 \r\n" | ||||
|         "mac.w      %[_0]l, %[_2]l, <<                       , %%acc1 \r\n" | ||||
|         "movclr.l   %%acc0, %[out_0]                                  \r\n" | ||||
|         "movclr.l   %%acc1, %[out_1]                                  \r\n" | ||||
|         : [_0]"=&r"(_0), [_1]"=&r"(_1), [_2]"=&r"(_2), | ||||
|           [fir_p]"+a"(this->fir_ptr), | ||||
|           [out_0]"=r"(out_0), [out_1]"=r"(out_1) | ||||
|         : [fir_c]"a"(this->fir_coeff), [fb]"r"(fb) | ||||
|         ); | ||||
| 
 | ||||
|         /* Generate output */ | ||||
|         asm ( | ||||
|         "mac.l      %[chans_0], %[gv_0]    , %%acc2 \r\n" | ||||
|         "mac.l      %[chans_1], %[gv_1]    , %%acc3 \r\n" | ||||
|         "mac.l      %[ev_0],   %[out_0], >>, %%acc2 \r\n" | ||||
|         "mac.l      %[ev_1],   %[out_1], >>, %%acc3 \r\n" | ||||
|         : | ||||
|         : [chans_0]"r"(chans_0), [gv_0]"r"(global_vol_0), | ||||
|           [ev_0]"r"((int)this->r.g.echo_volume_0), | ||||
|           [chans_1]"r"(chans_1), [gv_1]"r"(global_vol_1), | ||||
|           [ev_1]"r"((int)this->r.g.echo_volume_1), | ||||
|           [out_0]"r"(out_0), [out_1]"r"(out_1) | ||||
|         ); | ||||
| 
 | ||||
|         /* Feedback into echo buffer */ | ||||
|         if ( !(this->r.g.flags & 0x20) ) | ||||
|         { | ||||
|             asm ( | ||||
|             "lsl.l      %[sh], %[e0]                \r\n" | ||||
|             "move.l     %[e0], %%acc0               \r\n" | ||||
|             "mac.l      %[out_0], %[ef], <<, %%acc0 \r\n" | ||||
|             "lsl.l      %[sh], %[e1]                \r\n" | ||||
|             "move.l     %[e1], %%acc1               \r\n" | ||||
|             "mac.l      %[out_1], %[ef], <<, %%acc1 \r\n" | ||||
|             "movclr.l   %%acc0, %[e0]               \r\n" | ||||
|             "movclr.l   %%acc1, %[e1]               \r\n" | ||||
|             "swap       %[e1]                       \r\n" | ||||
|             "move.w     %[e1], %[e0]                \r\n" | ||||
|             : [e0]"+&d"(echo_0), [e1]"+&d"(echo_1) | ||||
|             : [out_0]"r"(out_0), [out_1]"r"(out_1), | ||||
|               [ef]"r"((int)this->r.g.echo_feedback), | ||||
|               [sh]"d"(9) | ||||
|             ); | ||||
|             *(int32_t *)echo_ptr = swap_odd_even32(echo_0); | ||||
|         } | ||||
| 
 | ||||
|         /* Output final samples */ | ||||
|         asm ( | ||||
|         "movclr.l   %%acc2, %[out_0] \r\n" | ||||
|         "movclr.l   %%acc3, %[out_1] \r\n" | ||||
|         "asr.l      %[gm],  %[out_0] \r\n" | ||||
|         "asr.l      %[gm],  %[out_1] \r\n" | ||||
|         : [out_0]"=&d"(out_0), [out_1]"=&d"(out_1) | ||||
|         : [gm]"d"(global_muting) | ||||
|         ); | ||||
| 
 | ||||
|         out_buf [             0] = out_0; | ||||
|         out_buf [WAV_CHUNK_SIZE] = out_1; | ||||
|         out_buf ++; | ||||
|     #else /* !CPU_COLDFIRE */ | ||||
|         /* Read feedback from echo buffer */ | ||||
|         int echo_pos = this->echo_pos; | ||||
|         uint8_t* const echo_ptr = RAM + | ||||
|  | @ -1061,10 +1213,8 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
|                     >> global_muting; | ||||
|         int amp_1 = (chans_1 * global_vol_1 + fb_1 * this->r.g.echo_volume_1) | ||||
|                     >> global_muting; | ||||
|         CLAMP16( amp_0, amp_0 ); | ||||
|         out_buf [0] = amp_0 * (1 << 8); | ||||
|         CLAMP16( amp_1, amp_1 ); | ||||
|         out_buf [WAV_CHUNK_SIZE] = amp_1 * (1 << 8); | ||||
|         out_buf [             0] = amp_0; | ||||
|         out_buf [WAV_CHUNK_SIZE] = amp_1; | ||||
|         out_buf ++; | ||||
|          | ||||
|         /* Feedback into echo buffer */ | ||||
|  | @ -1077,14 +1227,13 @@ static void DSP_run_( struct Spc_Dsp* this, long count, int32_t* out_buf ) | |||
|             CLAMP16( e1, e1 ); | ||||
|             SET_LE16A( echo_ptr + 2, e1 ); | ||||
|         } | ||||
|     #endif /* CPU_COLDFIRE */ | ||||
|     #else | ||||
|         /* Generate output */ | ||||
|         /* Generate output  */ | ||||
|         int amp_0 = (chans_0 * global_vol_0) >> global_muting; | ||||
|         int amp_1 = (chans_1 * global_vol_1) >> global_muting; | ||||
|         CLAMP16( amp_0, amp_0 ); | ||||
|         out_buf [0] = amp_0 * (1 << 8); | ||||
|         CLAMP16( amp_1, amp_1 ); | ||||
|         out_buf [WAV_CHUNK_SIZE] = amp_1 * (1 << 8); | ||||
|         out_buf [             0] = amp_0; | ||||
|         out_buf [WAV_CHUNK_SIZE] = amp_1; | ||||
|         out_buf ++; | ||||
|     #endif | ||||
|     } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue