SWCODEC & Coldfire: Do some more DSP straigntening out. Do as much Coldfire optimizing as seems reasonably possible by jumping through some hoops to avoid stalls. Further boost reduction will just be fractional points if taken to extremes-- not worth it. Wrap up the ASM for awhile.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@12905 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Michael Sevakis 2007-03-25 04:03:44 +00:00
parent cd630c9e0a
commit 369c2a37b7
3 changed files with 469 additions and 370 deletions

View file

@ -38,9 +38,14 @@
#define WORD_FRACBITS 27 #define WORD_FRACBITS 27
#define NATIVE_DEPTH 16 #define NATIVE_DEPTH 16
/* If the buffer sizes change, check the assembly code! */
#define SAMPLE_BUF_COUNT 256 #define SAMPLE_BUF_COUNT 256
#define RESAMPLE_BUF_COUNT (256 * 4) /* Enough for 11,025 Hz -> 44,100 Hz*/ #define RESAMPLE_BUF_COUNT (256 * 4) /* Enough for 11,025 Hz -> 44,100 Hz*/
#define DEFAULT_GAIN 0x01000000 #define DEFAULT_GAIN 0x01000000
#define SAMPLE_BUF_LEFT_CHANNEL 0
#define SAMPLE_BUF_RIGHT_CHANNEL (SAMPLE_BUF_COUNT/2)
#define RESAMPLE_BUF_LEFT_CHANNEL 0
#define RESAMPLE_BUF_RIGHT_CHANNEL (RESAMPLE_BUF_COUNT/2)
/* enums to index conversion properly with stereo mode and other settings */ /* enums to index conversion properly with stereo mode and other settings */
enum enum
@ -66,11 +71,10 @@ enum
* NOTE: Any assembly routines that use these structures must be updated * NOTE: Any assembly routines that use these structures must be updated
* if current data members are moved or changed. * if current data members are moved or changed.
*/ */
/* 32-bit achitecture offset */
struct resample_data struct resample_data
{ {
long delta; /* 00h */ uint32_t delta; /* 00h */
long phase; /* 04h */ uint32_t phase; /* 04h */
int32_t last_sample[2]; /* 08h */ int32_t last_sample[2]; /* 08h */
/* 10h */ /* 10h */
}; };
@ -93,9 +97,10 @@ struct dsp_data
int output_scale; /* 00h */ int output_scale; /* 00h */
int num_channels; /* 04h */ int num_channels; /* 04h */
struct resample_data resample_data; /* 08h */ struct resample_data resample_data; /* 08h */
int clip_min; /* 18h */ int32_t clip_min; /* 18h */
int clip_max; /* 2ch */ int32_t clip_max; /* 1ch */
/* 30h */ int32_t gain; /* 20h - Note that this is in S8.23 format. */
/* 24h */
}; };
/* No asm...yet */ /* No asm...yet */
@ -132,13 +137,18 @@ struct eq_state
#include <dsp_asm.h> #include <dsp_asm.h>
/* Typedefs keep things much neater in this case */ /* Typedefs keep things much neater in this case */
typedef int (*sample_input_fn_type)(int count, const char *src[], typedef void (*sample_input_fn_type)(int count, const char *src[],
int32_t *dst[]); int32_t *dst[]);
typedef int (*resample_fn_type)(int count, struct dsp_data *data, typedef int (*resample_fn_type)(int count, struct dsp_data *data,
int32_t *src[], int32_t *dst[]); int32_t *src[], int32_t *dst[]);
typedef void (*sample_output_fn_type)(int count, struct dsp_data *data, typedef void (*sample_output_fn_type)(int count, struct dsp_data *data,
int32_t *src[], int16_t *dst); int32_t *src[], int16_t *dst);
/* Single-DSP channel processing in place */
typedef void (*channels_process_fn_type)(int count, int32_t *buf[]); typedef void (*channels_process_fn_type)(int count, int32_t *buf[]);
/* DSP local channel processing in place */
typedef void (*channels_process_dsp_fn_type)(int count, struct dsp_data *data,
int32_t *buf[]);
/* /*
***************************************************************************/ ***************************************************************************/
@ -152,16 +162,16 @@ struct dsp_config
int sample_bytes; int sample_bytes;
int stereo_mode; int stereo_mode;
int frac_bits; int frac_bits;
long gain; /* Note that this is in S8.23 format. */
/* Functions that change depending upon settings - NULL if stage is /* Functions that change depending upon settings - NULL if stage is
disabled */ disabled */
sample_input_fn_type input_samples; sample_input_fn_type input_samples;
resample_fn_type resample; resample_fn_type resample;
sample_output_fn_type output_samples; sample_output_fn_type output_samples;
/* These will be NULL for the voice codec and is more economical that /* These will be NULL for the voice codec and is more economical that
way */ way */
channels_process_fn_type apply_crossfeed; channels_process_dsp_fn_type apply_gain;
channels_process_fn_type channels_process; channels_process_fn_type apply_crossfeed;
channels_process_fn_type channels_process;
}; };
/* General DSP config */ /* General DSP config */
@ -211,7 +221,7 @@ static struct dsp_config *dsp IDATA_ATTR = audio_dsp;
* of copying needed is minimized for that case. * of copying needed is minimized for that case.
*/ */
static int32_t sample_buf[SAMPLE_BUF_COUNT] IBSS_ATTR; int32_t sample_buf[SAMPLE_BUF_COUNT] IBSS_ATTR;
static int32_t resample_buf[RESAMPLE_BUF_COUNT] IBSS_ATTR; static int32_t resample_buf[RESAMPLE_BUF_COUNT] IBSS_ATTR;
/* set a new dsp and return old one */ /* set a new dsp and return old one */
@ -258,23 +268,20 @@ void sound_set_pitch(int permille)
dsp_configure(DSP_SWITCH_FREQUENCY, dsp->codec_frequency); dsp_configure(DSP_SWITCH_FREQUENCY, dsp->codec_frequency);
} }
/* Convert at most count samples to the internal format, if needed. Returns /* Convert count samples to the internal format, if needed. Updates src
* number of samples ready for further processing. Updates src to point * to point past the samples "consumed" and dst is set to point to the
* past the samples "consumed" and dst is set to point to the samples to * samples to consume. Note that for mono, dst[0] equals dst[1], as there
* consume. Note that for mono, dst[0] equals dst[1], as there is no point * is no point in processing the same data twice.
* in processing the same data twice.
*/ */
/* convert count 16-bit mono to 32-bit mono */ /* convert count 16-bit mono to 32-bit mono */
static int sample_input_lte_native_mono( static void sample_input_lte_native_mono(
int count, const char *src[], int32_t *dst[]) int count, const char *src[], int32_t *dst[])
{ {
count = MIN(SAMPLE_BUF_COUNT/2, count);
const int16_t *s = (int16_t *) src[0]; const int16_t *s = (int16_t *) src[0];
const int16_t * const send = s + count; const int16_t * const send = s + count;
int32_t *d = dst[0] = dst[1] = sample_buf; int32_t *d = dst[0] = dst[1] = &sample_buf[SAMPLE_BUF_LEFT_CHANNEL];
const int scale = WORD_SHIFT; int scale = WORD_SHIFT;
do do
{ {
@ -283,21 +290,17 @@ static int sample_input_lte_native_mono(
while (s < send); while (s < send);
src[0] = (char *)s; src[0] = (char *)s;
return count;
} }
/* convert count 16-bit interleaved stereo to 32-bit noninterleaved */ /* convert count 16-bit interleaved stereo to 32-bit noninterleaved */
static int sample_input_lte_native_i_stereo( static void sample_input_lte_native_i_stereo(
int count, const char *src[], int32_t *dst[]) int count, const char *src[], int32_t *dst[])
{ {
count = MIN(SAMPLE_BUF_COUNT/2, count);
const int32_t *s = (int32_t *) src[0]; const int32_t *s = (int32_t *) src[0];
const int32_t * const send = s + count; const int32_t * const send = s + count;
int32_t *dl = dst[0] = sample_buf; int32_t *dl = dst[0] = &sample_buf[SAMPLE_BUF_LEFT_CHANNEL];
int32_t *dr = dst[1] = sample_buf + SAMPLE_BUF_COUNT/2; int32_t *dr = dst[1] = &sample_buf[SAMPLE_BUF_RIGHT_CHANNEL];
const int scale = WORD_SHIFT; int scale = WORD_SHIFT;
do do
{ {
@ -313,22 +316,18 @@ static int sample_input_lte_native_i_stereo(
while (s < send); while (s < send);
src[0] = (char *)s; src[0] = (char *)s;
return count;
} }
/* convert count 16-bit noninterleaved stereo to 32-bit noninterleaved */ /* convert count 16-bit noninterleaved stereo to 32-bit noninterleaved */
static int sample_input_lte_native_ni_stereo( static void sample_input_lte_native_ni_stereo(
int count, const char *src[], int32_t *dst[]) int count, const char *src[], int32_t *dst[])
{ {
count = MIN(SAMPLE_BUF_COUNT/2, count);
const int16_t *sl = (int16_t *) src[0]; const int16_t *sl = (int16_t *) src[0];
const int16_t *sr = (int16_t *) src[1]; const int16_t *sr = (int16_t *) src[1];
const int16_t * const slend = sl + count; const int16_t * const slend = sl + count;
int32_t *dl = dst[0] = sample_buf; int32_t *dl = dst[0] = &sample_buf[SAMPLE_BUF_LEFT_CHANNEL];
int32_t *dr = dst[1] = sample_buf + SAMPLE_BUF_COUNT/2; int32_t *dr = dst[1] = &sample_buf[SAMPLE_BUF_RIGHT_CHANNEL];
const int scale = WORD_SHIFT; int scale = WORD_SHIFT;
do do
{ {
@ -339,35 +338,24 @@ static int sample_input_lte_native_ni_stereo(
src[0] = (char *)sl; src[0] = (char *)sl;
src[1] = (char *)sr; src[1] = (char *)sr;
return count;
} }
/* convert count 32-bit mono to 32-bit mono */ /* convert count 32-bit mono to 32-bit mono */
static int sample_input_gt_native_mono( static void sample_input_gt_native_mono(
int count, const char *src[], int32_t *dst[]) int count, const char *src[], int32_t *dst[])
{ {
count = MIN(SAMPLE_BUF_COUNT/2, count);
dst[0] = dst[1] = (int32_t *)src[0]; dst[0] = dst[1] = (int32_t *)src[0];
src[0] = (char *)(dst[0] + count); src[0] = (char *)(dst[0] + count);
return count;
} }
/* convert count 32-bit interleaved stereo to 32-bit noninterleaved stereo */ /* convert count 32-bit interleaved stereo to 32-bit noninterleaved stereo */
static int sample_input_gt_native_i_stereo( static void sample_input_gt_native_i_stereo(
int count, const char *src[], int32_t *dst[]) int count, const char *src[], int32_t *dst[])
{ {
count = MIN(SAMPLE_BUF_COUNT/2, count);
const int32_t *s = (int32_t *)src[0]; const int32_t *s = (int32_t *)src[0];
const int32_t * const send = s + 2*count; const int32_t * const send = s + 2*count;
int32_t *dl = sample_buf; int32_t *dl = dst[0] = &sample_buf[SAMPLE_BUF_LEFT_CHANNEL];
int32_t *dr = sample_buf + SAMPLE_BUF_COUNT/2; int32_t *dr = dst[1] = &sample_buf[SAMPLE_BUF_RIGHT_CHANNEL];
dst[0] = dl;
dst[1] = dr;
do do
{ {
@ -377,22 +365,16 @@ static int sample_input_gt_native_i_stereo(
while (s < send); while (s < send);
src[0] = (char *)send; src[0] = (char *)send;
return count;
} }
/* convert 32 bit-noninterleaved stereo to 32-bit noninterleaved stereo */ /* convert 32 bit-noninterleaved stereo to 32-bit noninterleaved stereo */
static int sample_input_gt_native_ni_stereo( static void sample_input_gt_native_ni_stereo(
int count, const char *src[], int32_t *dst[]) int count, const char *src[], int32_t *dst[])
{ {
count = MIN(SAMPLE_BUF_COUNT/2, count);
dst[0] = (int32_t *)src[0]; dst[0] = (int32_t *)src[0];
dst[1] = (int32_t *)src[1]; dst[1] = (int32_t *)src[1];
src[0] = (char *)(dst[0] + count); src[0] = (char *)(dst[0] + count);
src[1] = (char *)(dst[1] + count); src[1] = (char *)(dst[1] + count);
return count;
} }
/** /**
@ -573,12 +555,6 @@ static void sample_output_new_format(void)
dsp->output_samples = sample_output_functions[out]; dsp->output_samples = sample_output_functions[out];
} }
static void resampler_set_delta(int frequency)
{
dsp->data.resample_data.delta = (unsigned long)
frequency * 65536LL / NATIVE_FREQUENCY;
}
/** /**
* Linear interpolation resampling that introduces a one sample delay because * Linear interpolation resampling that introduces a one sample delay because
* of our inability to look into the future at the end of a frame. * of our inability to look into the future at the end of a frame.
@ -587,9 +563,9 @@ static void resampler_set_delta(int frequency)
static int dsp_downsample(int count, struct dsp_data *data, static int dsp_downsample(int count, struct dsp_data *data,
int32_t *src[], int32_t *dst[]) int32_t *src[], int32_t *dst[])
{ {
int ch = data->num_channels - 1; int ch = data->num_channels - 1;
long delta = data->resample_data.delta; uint32_t delta = data->resample_data.delta;
long phase, pos; uint32_t phase, pos;
int32_t *d; int32_t *d;
/* Rolled channel loop actually showed slightly faster. */ /* Rolled channel loop actually showed slightly faster. */
@ -610,7 +586,7 @@ static int dsp_downsample(int count, struct dsp_data *data,
if (pos > 0) if (pos > 0)
last = s[pos - 1]; last = s[pos - 1];
while (pos < count) while (pos < (uint32_t)count)
{ {
*d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last); *d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last);
phase += delta; phase += delta;
@ -625,12 +601,12 @@ static int dsp_downsample(int count, struct dsp_data *data,
return d - dst[0]; return d - dst[0];
} }
static int dsp_upsample(int count, struct dsp_data *data, static int dsp_upsample(int count, struct dsp_data *data,
int32_t *src[], int32_t *dst[]) int32_t *src[], int32_t *dst[])
{ {
int ch = data->num_channels - 1; int ch = data->num_channels - 1;
long delta = data->resample_data.delta; uint32_t delta = data->resample_data.delta;
long phase, pos; uint32_t phase, pos;
int32_t *d; int32_t *d;
/* Rolled channel loop actually showed slightly faster. */ /* Rolled channel loop actually showed slightly faster. */
@ -653,7 +629,7 @@ static int dsp_upsample(int count, struct dsp_data *data,
pos = phase >> 16; pos = phase >> 16;
} }
while (pos < count) while (pos < (uint32_t)count)
{ {
last = s[pos - 1]; last = s[pos - 1];
*d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last); *d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last);
@ -669,24 +645,43 @@ static int dsp_upsample(int count, struct dsp_data *data,
} }
#endif /* DSP_HAVE_ASM_RESAMPLING */ #endif /* DSP_HAVE_ASM_RESAMPLING */
static void resampler_new_delta(void)
{
dsp->data.resample_data.delta = (unsigned long)
dsp->frequency * 65536LL / NATIVE_FREQUENCY;
if (dsp->frequency == NATIVE_FREQUENCY)
{
/* NOTE: If fully glitch-free transistions from no resampling to
resampling are desired, last_sample history should be maintained
even when not resampling. */
dsp->resample = NULL;
dsp->data.resample_data.phase = 0;
dsp->data.resample_data.last_sample[0] = 0;
dsp->data.resample_data.last_sample[1] = 0;
}
else if (dsp->frequency < NATIVE_FREQUENCY)
dsp->resample = dsp_upsample;
else
dsp->resample = dsp_downsample;
}
/* Resample count stereo samples. Updates the src array, if resampling is /* Resample count stereo samples. Updates the src array, if resampling is
* done, to refer to the resampled data. Returns number of stereo samples * done, to refer to the resampled data. Returns number of stereo samples
* for further processing. * for further processing.
*/ */
static inline int resample(int count, int32_t *src[]) static inline int resample(int count, int32_t *src[])
{ {
if (dsp->resample) int32_t *dst[2] =
{ {
int32_t *dst[2] = &resample_buf[RESAMPLE_BUF_LEFT_CHANNEL],
{ &resample_buf[RESAMPLE_BUF_RIGHT_CHANNEL],
resample_buf, };
resample_buf + RESAMPLE_BUF_COUNT/2,
};
count = dsp->resample(count, &dsp->data, src, dst); count = dsp->resample(count, &dsp->data, src, dst);
src[0] = dst[0];
src[1] = dst[dsp->data.num_channels - 1]; src[0] = dst[0];
} src[1] = dst[dsp->data.num_channels - 1];
return count; return count;
} }
@ -810,30 +805,59 @@ void dsp_set_crossfeed_cross_params(long lf_gain, long hf_gain, long cutoff)
c[2] <<= 4; c[2] <<= 4;
} }
/* Apply a constant gain to the samples (e.g., for ReplayGain).
* Note that this must be called before the resampler.
*/
#ifndef DSP_HAVE_ASM_APPLY_GAIN
static void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[])
{
const int32_t gain = data->gain;
int ch = data->num_channels - 1;
do
{
int32_t *s = buf[ch];
int32_t *d = buf[ch];
int32_t samp = *s++;
int i = 0;
do
{
FRACMUL_8_LOOP(samp, gain, s, d);
}
while (++i < count);
}
while (--ch >= 0);
}
#endif /* DSP_HAVE_ASM_APPLY_GAIN */
/* Combine all gains to a global gain. */ /* Combine all gains to a global gain. */
static void set_gain(struct dsp_config *dsp) static void set_gain(struct dsp_config *dsp)
{ {
dsp->gain = DEFAULT_GAIN; dsp->data.gain = DEFAULT_GAIN;
/* Replay gain not relevant to voice */ /* Replay gain not relevant to voice */
if (dsp == audio_dsp && replaygain) if (dsp == audio_dsp && replaygain)
{ {
dsp->gain = replaygain; dsp->data.gain = replaygain;
} }
if (eq_enabled && eq_precut) if (eq_enabled && eq_precut)
{ {
dsp->gain = (long) (((int64_t) dsp->gain * eq_precut) >> 24); dsp->data.gain =
(long) (((int64_t) dsp->data.gain * eq_precut) >> 24);
} }
if (dsp->gain == DEFAULT_GAIN) if (dsp->data.gain == DEFAULT_GAIN)
{ {
dsp->gain = 0; dsp->data.gain = 0;
} }
else else
{ {
dsp->gain >>= 1; dsp->data.gain >>= 1;
} }
dsp->apply_gain = dsp->data.gain != 0 ? dsp_apply_gain : NULL;
} }
/** /**
@ -927,50 +951,6 @@ static void eq_process(int count, int32_t *buf[])
} }
} }
/* Apply a constant gain to the samples (e.g., for ReplayGain). May update
* the src array if gain was applied.
* Note that this must be called before the resampler.
*/
static void apply_gain(int count, int32_t *buf[])
{
int32_t *sl, *sr;
int32_t s, *d;
long gain;
int i;
if (new_gain)
{
/* Gain has changed */
dsp_set_replaygain();
if (dsp->gain == 0)
return; /* No gain to apply now */
}
sl = buf[0], sr = buf[1];
gain = dsp->gain;
if (sl != sr)
{
d = &sample_buf[SAMPLE_BUF_COUNT / 2];
buf[1] = d;
s = *sr++;
for (i = 0; i < count; i++)
FRACMUL_8_LOOP(s, gain, sr, d);
}
else
{
buf[1] = &sample_buf[0];
}
d = &sample_buf[0];
buf[0] = d;
s = *sl++;
for (i = 0; i < count; i++)
FRACMUL_8_LOOP(s, gain, sl, d);
}
void dsp_set_stereo_width(int value) void dsp_set_stereo_width(int value)
{ {
long width, straight, cross; long width, straight, cross;
@ -993,35 +973,6 @@ void dsp_set_stereo_width(int value)
dsp_sw_cross = cross << 8; dsp_sw_cross = cross << 8;
} }
/**
* Implements the different channel configurations and stereo width.
*/
/* SOUND_CHAN_STEREO mode is a noop so has no function - just outline one for
* completeness. */
#if 0
static void channels_process_sound_chan_stereo(int count, int32_t *buf[])
{
/* The channels are each just themselves */
(void)count; (void)buf;
}
#endif
#ifndef DSP_HAVE_ASM_SOUND_CHAN_MONO
static void channels_process_sound_chan_mono(int count, int32_t *buf[])
{
int32_t *sl = buf[0], *sr = buf[1];
do
{
int32_t lr = *sl/2 + *sr/2;
*sl++ = lr;
*sr++ = lr;
}
while (--count > 0);
}
#endif /* DSP_HAVE_ASM_SOUND_CHAN_MONO */
#if CONFIG_CODEC == SWCODEC #if CONFIG_CODEC == SWCODEC
#ifdef HAVE_SW_TONE_CONTROLS #ifdef HAVE_SW_TONE_CONTROLS
@ -1063,6 +1014,35 @@ int dsp_callback(int msg, intptr_t param)
} }
#endif #endif
/**
* Implements the different channel configurations and stereo width.
*/
/* SOUND_CHAN_STEREO mode is a noop so has no function - just outline one for
* completeness. */
#if 0
static void channels_process_sound_chan_stereo(int count, int32_t *buf[])
{
/* The channels are each just themselves */
(void)count; (void)buf;
}
#endif
#ifndef DSP_HAVE_ASM_SOUND_CHAN_MONO
static void channels_process_sound_chan_mono(int count, int32_t *buf[])
{
int32_t *sl = buf[0], *sr = buf[1];
do
{
int32_t lr = *sl/2 + *sr/2;
*sl++ = lr;
*sr++ = lr;
}
while (--count > 0);
}
#endif /* DSP_HAVE_ASM_SOUND_CHAN_MONO */
#ifndef DSP_HAVE_ASM_SOUND_CHAN_CUSTOM #ifndef DSP_HAVE_ASM_SOUND_CHAN_CUSTOM
static void channels_process_sound_chan_custom(int count, int32_t *buf[]) static void channels_process_sound_chan_custom(int count, int32_t *buf[])
{ {
@ -1151,30 +1131,47 @@ int dsp_process(char *dst, const char *src[], int count)
coldfire_set_macsr(EMAC_FRACTIONAL | EMAC_SATURATE); coldfire_set_macsr(EMAC_FRACTIONAL | EMAC_SATURATE);
#endif #endif
if (new_gain)
dsp_set_replaygain(); /* Gain has changed */
/* Testing function pointers for NULL is preferred since the pointer
will be preloaded to be used for the call if not. */
while (count > 0) while (count > 0)
{ {
samples = dsp->input_samples(count, src, tmp); samples = MIN(SAMPLE_BUF_COUNT/2, count);
count -= samples; count -= samples;
if (dsp->gain != 0)
apply_gain(samples, tmp); dsp->input_samples(samples, src, tmp);
if ((samples = resample(samples, tmp)) <= 0)
if (dsp->apply_gain)
dsp->apply_gain(samples, &dsp->data, tmp);
if (dsp->resample && (samples = resample(samples, tmp)) <= 0)
break; /* I'm pretty sure we're downsampling here */ break; /* I'm pretty sure we're downsampling here */
if (dsp->apply_crossfeed) if (dsp->apply_crossfeed)
dsp->apply_crossfeed(samples, tmp); dsp->apply_crossfeed(samples, tmp);
/* TODO: EQ and tone controls need separate structs for audio and voice /* TODO: EQ and tone controls need separate structs for audio and voice
* DSP processing thanks to filter history. isn't really audible now, but * DSP processing thanks to filter history. isn't really audible now, but
* might be the day we start handling voice more delicately. * might be the day we start handling voice more delicately. Planned
* changes may well run all relevent channels through the same EQ so
* perhaps not.
*/ */
if (eq_enabled) if (eq_enabled)
eq_process(samples, tmp); eq_process(samples, tmp);
#ifdef HAVE_SW_TONE_CONTROLS #ifdef HAVE_SW_TONE_CONTROLS
if ((bass | treble) != 0) if ((bass | treble) != 0)
eq_filter(tmp, &tone_filter, samples, dsp->data.num_channels, eq_filter(tmp, &tone_filter, samples, dsp->data.num_channels,
FILTER_BISHELF_SHIFT); FILTER_BISHELF_SHIFT);
#endif #endif
if (dsp->channels_process) if (dsp->channels_process)
dsp->channels_process(samples, tmp); dsp->channels_process(samples, tmp);
dsp->output_samples(samples, &dsp->data, tmp, (int16_t *)dst); dsp->output_samples(samples, &dsp->data, tmp, (int16_t *)dst);
written += samples; written += samples;
dst += samples * sizeof (int16_t) * 2; dst += samples * sizeof (int16_t) * 2;
yield(); yield();
@ -1245,9 +1242,6 @@ bool dsp_configure(int setting, intptr_t value)
if (dsp == audio_dsp) if (dsp == audio_dsp)
{ {
*var = value; *var = value;
/* In case current gain is zero, force at least one call
to apply_gain or apply_gain won't pick up on new_gain */
audio_dsp->gain = -1;
new_gain = true; new_gain = true;
} }
} }
@ -1282,15 +1276,7 @@ bool dsp_configure(int setting, intptr_t value)
else else
dsp->frequency = dsp->codec_frequency; dsp->frequency = dsp->codec_frequency;
resampler_set_delta(dsp->frequency); resampler_new_delta();
if (dsp->frequency == NATIVE_FREQUENCY)
dsp->resample = NULL;
else if (dsp->frequency < NATIVE_FREQUENCY)
dsp->resample = dsp_upsample;
else
dsp->resample = dsp_downsample;
break; break;
case DSP_SET_SAMPLE_DEPTH: case DSP_SET_SAMPLE_DEPTH:
@ -1348,7 +1334,7 @@ bool dsp_configure(int setting, intptr_t value)
case DSP_FLUSH: case DSP_FLUSH:
memset(&dsp->data.resample_data, 0, memset(&dsp->data.resample_data, 0,
sizeof (dsp->data.resample_data)); sizeof (dsp->data.resample_data));
resampler_set_delta(dsp->frequency); resampler_new_delta();
dither_init(); dither_init();
break; break;

View file

@ -22,32 +22,61 @@
#ifndef _DSP_ASM_H #ifndef _DSP_ASM_H
#define _DSP_ASM_H #define _DSP_ASM_H
/* Set the appropriate #defines based on CPU or whatever matters */
#ifndef SIMULATOR #ifndef SIMULATOR
#if defined(CPU_COLDFIRE) || defined(CPU_ARM) #if defined(CPU_ARM)
#define DSP_HAVE_ASM_CROSSFEED
void apply_crossfeed(int count, int32_t *buf[]);
#define DSP_HAVE_ASM_RESAMPLING #define DSP_HAVE_ASM_RESAMPLING
int dsp_downsample(int count, struct dsp_data *data, int32_t *src[], int32_t *dst[]); #define DSP_HAVE_ASM_CROSSFEED
int dsp_upsample(int count, struct dsp_data *data, int32_t *src[], int32_t *dst[]); #elif defined (CPU_COLDFIRE)
#endif /* defined(CPU_COLDFIRE) || defined(CPU_ARM) */ #define DSP_HAVE_ASM_APPLY_GAIN
#define DSP_HAVE_ASM_RESAMPLING
#if defined (CPU_COLDFIRE) #define DSP_HAVE_ASM_CROSSFEED
#define DSP_HAVE_ASM_SOUND_CHAN_MONO #define DSP_HAVE_ASM_SOUND_CHAN_MONO
void channels_process_sound_chan_mono(int count, int32_t *buf[]);
#define DSP_HAVE_ASM_SOUND_CHAN_CUSTOM #define DSP_HAVE_ASM_SOUND_CHAN_CUSTOM
void channels_process_sound_chan_custom(int count, int32_t *buf[]);
#define DSP_HAVE_ASM_SOUND_CHAN_KARAOKE #define DSP_HAVE_ASM_SOUND_CHAN_KARAOKE
void channels_process_sound_chan_karaoke(int count, int32_t *buf[]);
#define DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO #define DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO
void sample_output_mono(int count, struct dsp_data *data,
int32_t *src[], int16_t *dst);
#define DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO #define DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO
void sample_output_stereo(int count, struct dsp_data *data,
int32_t *src[], int16_t *dst);
#endif /* CPU_COLDFIRE */ #endif /* CPU_COLDFIRE */
#endif /* SIMULATOR */ #endif /* SIMULATOR */
/* Declare prototypes based upon what's #defined above */
#ifdef DSP_HAVE_ASM_CROSSFEED
void apply_crossfeed(int count, int32_t *buf[]);
#endif
#ifdef DSP_HAVE_ASM_APPLY_GAIN
void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[]);
#endif /* DSP_HAVE_ASM_APPLY_GAIN* */
#ifdef DSP_HAVE_ASM_RESAMPLING
int dsp_upsample(int count, struct dsp_data *data,
int32_t *src[], int32_t *dst[]);
int dsp_downsample(int count, struct dsp_data *data,
int32_t *src[], int32_t *dst[]);
#endif /* DSP_HAVE_ASM_RESAMPLING */
#ifdef DSP_HAVE_ASM_SOUND_CHAN_MONO
void channels_process_sound_chan_mono(int count, int32_t *buf[]);
#endif
#ifdef DSP_HAVE_ASM_SOUND_CHAN_CUSTOM
void channels_process_sound_chan_custom(int count, int32_t *buf[]);
#endif
#ifdef DSP_HAVE_ASM_SOUND_CHAN_KARAOKE
void channels_process_sound_chan_karaoke(int count, int32_t *buf[]);
#endif
#ifdef DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO
void sample_output_stereo(int count, struct dsp_data *data,
int32_t *src[], int16_t *dst);
#endif
#ifdef DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO
void sample_output_mono(int count, struct dsp_data *data,
int32_t *src[], int16_t *dst);
#endif
#endif /* _DSP_ASM_H */ #endif /* _DSP_ASM_H */

View file

@ -19,68 +19,117 @@
****************************************************************************/ ****************************************************************************/
/**************************************************************************** /****************************************************************************
* void apply_crossfeed(int count, int32_t *src[]) * void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[])
*/ */
.section .text .section .text
.align 2
.global dsp_apply_gain
dsp_apply_gain:
lea.l -20(%sp), %sp | save registers
movem.l %d2-%d4/%a2-%a3, (%sp) |
movem.l 28(%sp), %a0-%a1 | %a0 = data,
| %a1 = buf
move.l 4(%a0), %d1 | %d1 = data->num_channels
move.l 32(%a0), %a0 | %a0 = data->gain (in s8.23)
10: | channel loop |
move.l 24(%sp), %d0 | %d0 = count
move.l -4(%a1, %d1.l*4), %a2 | %a2 = s = buf[ch-1]
move.l %a2, %a3 | %a3 = d = s
move.l (%a2)+, %d2 | %d2 = *s++,
mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1)
subq.l #1, %d0 | --count > 0 ? : effectively n++
ble.b 30f | loop done | no? finish up
20: | loop |
move.l %accext01, %d4 | fetch S(n-1)[7:0]
movclr.l %acc0, %d3 | fetch S(n-1)[40:8] in %d5[31:0]
asl.l #8, %d3 | *s++ = (S(n-1)[40:8] << 8) | S(n-1)[7:0]
mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1)
move.b %d4, %d3 |
move.l %d3, (%a3)+ |
subq.l #1, %d0 | --count > 0 ? : effectively n++
bgt.b 20b | loop | yes? do more samples
30: | loop done |
move.l %accext01, %d4 | fetch S(n-1)[7:0]
movclr.l %acc0, %d3 | fetch S(n-1)[40:8] in %d5[31:0]
asl.l #8, %d3 | *s = (S(n-1)[40:8] << 8) | S(n-1)[7:0]
move.b %d4, %d3 |
move.l %d3, (%a3) |
subq.l #1, %d1 | next channel
bgt.b 10b | channel loop |
movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
lea.l 20(%sp), %sp | cleanup stack
rts |
.size dsp_apply_gain,.-dsp_apply_gain
/****************************************************************************
* void apply_crossfeed(int count, int32_t *buf[])
*/
.section .text
.align 2
.global apply_crossfeed .global apply_crossfeed
apply_crossfeed: apply_crossfeed:
lea.l -44(%sp), %sp lea.l -44(%sp), %sp |
movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs
movem.l 48(%sp), %d7/%a4 | %d7 = count, %a4 = src movem.l 48(%sp), %d7/%a4 | %d7 = count, %a4 = src
movem.l (%a4), %a4-%a5 | %a4 = src[0], %a5 = src[1] movem.l (%a4), %a4-%a5 | %a4 = src[0], %a5 = src[1]
lea.l crossfeed_data, %a1 lea.l crossfeed_data, %a1 | %a1 = &crossfeed_data
move.l (%a1)+, %a6 | a6 = direct gain move.l (%a1)+, %d6 | %d6 = direct gain
movem.l 12(%a1), %d0-%d3 | fetch filter history samples movem.l 12(%a1), %d0-%d3 | fetch filter history samples
move.l 132(%a1), %a0 | fetch delay line address move.l 132(%a1), %a0 | fetch delay line address
movem.l (%a1), %a1-%a3 | load filter coefs movem.l (%a1), %a1-%a3 | load filter coefs
lea.l crossfeed_data+136, %a6 | %a6 = delay line wrap limit
bra.b 20f | loop start | go to loop start point
/* Register usage in loop: /* Register usage in loop:
* %a0 = delay_p, %a1..%a3 = b0, b1, a1 (filter coefs), * %a0 = delay_p, %a1..%a3 = b0, b1, a1 (filter coefs),
* %a4 = src[0], %a5 = src[1], %a6 = direct gain, * %a4 = buf[0], %a5 = buf[1],
* %a6 = delay line pointer wrap limit,
* %d0..%d3 = history * %d0..%d3 = history
* %d4..%d6 = temp. * %d4..%d5 = temp.
* %d6 = direct gain,
* %d7 = count * %d7 = count
*/ */
.cfloop: 10: | loop |
mac.l %a2, %d0, 4(%a0), %d0, %acc0 | acc = b1*dr[n - 1] d0 = dr[n] movclr.l %acc0, %d4 | write outputs
mac.l %a1, %d0 , %acc0 | acc += b0*dr[n] move.l %d4, (%a4)+ | .
mac.l %a3, %d1, (%a4), %d4, %acc0 | acc += a1*y_l[n - 1], load L movclr.l %acc1, %d5 | .
move.l %acc0, %d1 | get filtered delayed sample move.l %d5, (%a5)+ | .
mac.l %a6, %d4, %acc0 | acc += gain*x_l[n] 20: | loop start |
movclr.l %acc0, %d6 | mac.l %a2, %d0, (%a0)+, %d0, %acc0 | %acc0 = b1*dl[n - 1], %d0 = dl[n]
move.l %d6, (%a4)+ | write result mac.l %a1, %d0 , %acc0 | %acc0 += b0*dl[n]
mac.l %a3, %d1, (%a5), %d5, %acc0 | %acc0 += a1*y_r[n - 1], load R
mac.l %a2, %d2, (%a0), %d2, %acc0 | acc = b1*dl[n - 1], d2 = dl[n] mac.l %a2, %d2, (%a0)+, %d2, %acc1 | %acc1 = b1*dr[n - 1], %d2 = dr[n]
mac.l %a1, %d2 , %acc0 | acc += b0*dl[n] mac.l %a1, %d2 , %acc1 | %acc1 += b0*dr[n]
mac.l %a3, %d3, (%a5), %d5, %acc0 | acc += a1*y_r[n - 1], load R mac.l %a3, %d3, (%a4), %d4, %acc1 | %acc1 += a1*y_l[n - 1], load L
movem.l %d4-%d5, (%a0) | save left & right inputs to delay line movem.l %d4-%d5, -8(%a0) | save left & right inputs to delay line
move.l %acc0, %d3 | get filtered delayed sample move.l %acc0, %d3 | get filtered delayed left sample (y_l[n])
mac.l %a6, %d5, %acc0 | acc += gain*x_r[n] move.l %acc1, %d1 | get filtered delayed right sample (y_r[n])
lea.l 8(%a0), %a0 | increment delay pointer mac.l %d6, %d4, %acc0 | %acc0 += gain*x_l[n]
movclr.l %acc0, %d6 | mac.l %d6, %d5, %acc1 | %acc1 += gain*x_r[n]
move.l %d6, (%a5)+ | write result cmp.l %a6, %a0 | wrap %a0 if passed end
bhs.b 30f | wrap buffer |
cmpa.l #crossfeed_data+136, %a0| wrap a0 if passed end .word 0x51fb | tpf.l | trap the buffer wrap
bge.b .cfwrap | 30: | wrap buffer | ...fwd taken branches more costly
.word 0x51fb | tpf.l - trap the buffer wrap lea.l -104(%a0), %a0 | wrap it up
.cfwrap: subq.l #1, %d7 | --count > 0 ?
lea.l -104(%a0), %a0 | wrap bgt.b 10b | loop | yes? do more
subq.l #1, %d7 | --count < 0 ? movclr.l %acc0, %d4 | write last outputs
bgt.b .cfloop | move.l %d4, (%a4) | .
movclr.l %acc1, %d5 | .
move.l %d5, (%a5) | .
lea.l crossfeed_data+16, %a1 | save data back to struct lea.l crossfeed_data+16, %a1 | save data back to struct
movem.l %d0-%d3, (%a1) | ...history movem.l %d0-%d3, (%a1) | ...history
move.l %a0, 120(%a1) | ...delay_p move.l %a0, 120(%a1) | ...delay_p
movem.l (%sp), %d2-%d7/%a2-%a6 | restore all regs movem.l (%sp), %d2-%d7/%a2-%a6 | restore all regs
lea.l 44(%sp), %sp lea.l 44(%sp), %sp |
rts rts |
.cfend: .size apply_crossfeed,.-apply_crossfeed
.size apply_crossfeed,.cfend-apply_crossfeed
/**************************************************************************** /****************************************************************************
* int dsp_downsample(int count, struct dsp_data *data, * int dsp_downsample(int count, struct dsp_data *data,
* in32_t *src[], int32_t *dst[]) * in32_t *src[], int32_t *dst[])
*/ */
.section .text .section .text
.align 2
.global dsp_downsample .global dsp_downsample
dsp_downsample: dsp_downsample:
lea.l -40(%sp), %sp | save non-clobberables lea.l -40(%sp), %sp | save non-clobberables
@ -92,7 +141,7 @@ dsp_downsample:
movem.l 4(%a0), %d3-%d4 | %d3 = ch = data->num_channels movem.l 4(%a0), %d3-%d4 | %d3 = ch = data->num_channels
| %d4 = delta = data->resample_data.delta | %d4 = delta = data->resample_data.delta
moveq.l #16, %d7 | %d7 = shift moveq.l #16, %d7 | %d7 = shift
.dschannel_loop: 10: | channel loop |
move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase
move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1] move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1]
move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1] move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1]
@ -102,15 +151,15 @@ dsp_downsample:
move.l %d5, %d6 | %d6 = pos = phase >> 16 move.l %d5, %d6 | %d6 = pos = phase >> 16
lsr.l %d7, %d6 | lsr.l %d7, %d6 |
cmp.l %d2, %d6 | past end of samples? cmp.l %d2, %d6 | past end of samples?
bge.b .dsloop_skip | yes? skip loop bge.b 40f | skip resample loop| yes? skip loop
tst.l %d6 | need last sample of prev. frame? tst.l %d6 | need last sample of prev. frame?
bne.b .dsloop | no? start main loop bne.b 20f | resample loop | no? start main loop
move.l (%a3, %d6.l*4), %d1 | %d1 = s[pos] move.l (%a3, %d6.l*4), %d1 | %d1 = s[pos]
bra.b .dsuse_last_start | start with last (last in %d0) bra.b 30f | resample start last | start with last (last in %d0)
.dsloop: 20: | resample loop |
lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos] lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos]
movem.l (%a5), %d0-%d1 | movem.l (%a5), %d0-%d1 |
.dsuse_last_start: 30: | resample start last |
sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
move.l %d0, %acc0 | %acc0 = previous sample move.l %d0, %acc0 | %acc0 = previous sample
move.l %d5, %d0 | frac = (phase << 16) >> 1 move.l %d5, %d0 | frac = (phase << 16) >> 1
@ -123,11 +172,11 @@ dsp_downsample:
movclr.l %acc0, %d0 | movclr.l %acc0, %d0 |
move.l %d0, (%a4)+ | *d++ = %d0 move.l %d0, (%a4)+ | *d++ = %d0
cmp.l %d2, %d6 | pos < count? cmp.l %d2, %d6 | pos < count?
blt.b .dsloop | yes? continue resampling blt.b 20b | resample loop | yes? continue resampling
.dsloop_skip: 40: | skip resample loop |
subq.l #1, %d3 | ch > 0? subq.l #1, %d3 | ch > 0?
bgt.b .dschannel_loop | yes? process next channel bgt.b 10b | channel loop | yes? process next channel
asl.l %d7, %d2 | wrap phase to start of next frame lsl.l %d7, %d2 | wrap phase to start of next frame
sub.l %d2, %d5 | data->resample_data.phase = sub.l %d2, %d5 | data->resample_data.phase =
move.l %d5, 12(%a0) | ... phase - (count << 16) move.l %d5, 12(%a0) | ... phase - (count << 16)
move.l %a4, %d0 | return d - d[0] move.l %a4, %d0 | return d - d[0]
@ -136,14 +185,14 @@ dsp_downsample:
movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables
lea.l 40(%sp), %sp | cleanup stack lea.l 40(%sp), %sp | cleanup stack
rts | buh-bye rts | buh-bye
.dsend: .size dsp_downsample,.-dsp_downsample
.size dsp_downsample,.dsend-dsp_downsample
/**************************************************************************** /****************************************************************************
* int dsp_upsample(int count, struct dsp_data *dsp, * int dsp_upsample(int count, struct dsp_data *dsp,
* in32_t *src[], int32_t *dst[]) * int32_t *src[], int32_t *dst[])
*/ */
.section .text .section .text
.align 2
.global dsp_upsample .global dsp_upsample
dsp_upsample: dsp_upsample:
lea.l -40(%sp), %sp | save non-clobberables lea.l -40(%sp), %sp | save non-clobberables
@ -154,47 +203,55 @@ dsp_upsample:
| %a2 = dst | %a2 = dst
movem.l 4(%a0), %d3-%d4 | %d3 = ch = channels movem.l 4(%a0), %d3-%d4 | %d3 = ch = channels
| %d4 = delta = data->resample_data.delta | %d4 = delta = data->resample_data.delta
swap %d4 | swap delta to high word to use swap %d4 | swap delta to high word to use...
| carries to increment position | ...carries to increment position
.uschannel_loop: 10: | channel loop |
move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase
move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1] move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1]
lea.l 12(%a0, %d3.l*4), %a4 | %a4 = &data->resample_data.last_sample[ch-1] lea.l 12(%a0, %d3.l*4), %a4 | %a4 = &data->resample_data.last_sample[ch-1]
lea.l (%a3, %d2.l*4), %a5 | %a5 = src_end = &src[count] lea.l -4(%a3, %d2.l*4), %a5 | %a5 = src_end = &src[count-1]
move.l (%a4), %d0 | %d0 = last = data->resample_data.last_sample[ch-1] move.l (%a4), %d0 | %d0 = last = data->resample_data.last_sample[ch-1]
move.l -(%a5), (%a4) | data->resample_data.last_sample[ch-1] = s[count-1] move.l (%a5), (%a4) | data->resample_data.last_sample[ch-1] = s[count-1]
move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1] move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1]
move.l (%a3)+, %d1 | fetch first sample - might throw this...
| ...away later but we'll be preincremented
move.l %d1, %d6 | save sample value
sub.l %d0, %d1 | %d1 = diff = s[0] - last
swap %d5 | swap phase to high word to use swap %d5 | swap phase to high word to use
| carries to increment position | carries to increment position
move.l %d5, %d6 | %d6 = pos = phase >> 16 move.l %d5, %d7 | %d7 = pos = phase >> 16
clr.w %d5 | clr.w %d5 |
eor.l %d5, %d6 | pos == 0? eor.l %d5, %d7 | pos == 0?
beq.b .usstart_0 | no? transistion from down beq.b 40f | loop start | yes? start loop
cmp.l %d2, %d6 | past end of samples? cmp.l %d2, %d7 | past end of samples?
bge.b .usloop_skip | yes? skip loop bge.b 50f | skip resample loop| yes? go to next channel and collect info
lea.l -4(%a3, %d6.l*4), %a3 | %a3 = s = &s[pos-1] (previous) lea.l (%a3, %d7.l*4), %a3 | %a3 = s = &s[pos+1]
move.l (%a3)+, %d0 | %d0 = *s++ movem.l -8(%a3), %d0-%d1 | %d0 = s[pos-1], %d1 = s[pos]
.word 0x51fa | tpf.w - trap next instruction move.l %d1, %d6 | save sample value
.usloop_1: sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
bra.b 40f | loop start |
20: | next sample loop |
move.l %d6, %d0 | move previous sample to %d0 move.l %d6, %d0 | move previous sample to %d0
.usstart_0:
move.l (%a3)+, %d1 | fetch next sample move.l (%a3)+, %d1 | fetch next sample
move.l %d1, %d6 | save sample value move.l %d1, %d6 | save sample value
sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1] sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
.usloop_0: 30: | same sample loop |
movclr.l %acc0, %d7 | %d7 = result
move.l %d7, (%a4)+ | *d++ = %d7
40: | loop start |
lsr.l #1, %d5 | make phase into frac lsr.l #1, %d5 | make phase into frac
move.l %d0, %acc0 | %acc0 = s[pos-1]
mac.l %d1, %d5, %acc0 | %acc0 = diff * frac mac.l %d1, %d5, %acc0 | %acc0 = diff * frac
lsl.l #1, %d5 | restore frac to phase lsl.l #1, %d5 | restore frac to phase
movclr.l %acc0, %d7 | %d7 = product
add.l %d0, %d7 | %d7 = last + product
move.l %d7, (%a4)+ | *d++ = %d7
add.l %d4, %d5 | phase += delta add.l %d4, %d5 | phase += delta
bcc.b .usloop_0 | load next values? bcc.b 30b | same sample loop | load next values?
cmp.l %a5, %a3 | src <= src_end? cmp.l %a5, %a3 | src <= src_end?
ble.b .usloop_1 | yes? continue resampling bls.b 20b | next sample loop | yes? continue resampling
.usloop_skip: movclr.l %acc0, %d7 | %d7 = result
move.l %d7, (%a4)+ | *d++ = %d7
50: | skip resample loop |
subq.l #1, %d3 | ch > 0? subq.l #1, %d3 | ch > 0?
bgt.b .uschannel_loop | yes? process next channel bgt.b 10b | channel loop | yes? process next channel
swap %d5 | wrap phase to start of next frame swap %d5 | wrap phase to start of next frame
move.l %d5, 12(%a0) | ...and save in data->resample_data.phase move.l %d5, 12(%a0) | ...and save in data->resample_data.phase
move.l %a4, %d0 | return d - d[0] move.l %a4, %d0 | return d - d[0]
@ -203,12 +260,7 @@ dsp_upsample:
asr.l #2, %d0 | convert bytes->samples asr.l #2, %d0 | convert bytes->samples
lea.l 40(%sp), %sp | cleanup stack lea.l 40(%sp), %sp | cleanup stack
rts | buh-bye rts | buh-bye
.usend: .size dsp_upsample,.-dsp_upsample
.size dsp_upsample,.usend-dsp_upsample
/* These routines might benefit from burst transfers but we'll keep them
* small for now since they're rather light weight
*/
/**************************************************************************** /****************************************************************************
* void channels_process_sound_chan_mono(int count, int32_t *buf[]) * void channels_process_sound_chan_mono(int count, int32_t *buf[])
@ -216,31 +268,39 @@ dsp_upsample:
* Mix left and right channels 50/50 into a center channel. * Mix left and right channels 50/50 into a center channel.
*/ */
.section .text .section .text
.align 2
.global channels_process_sound_chan_mono .global channels_process_sound_chan_mono
channels_process_sound_chan_mono: channels_process_sound_chan_mono:
movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
lea.l -12(%sp), %sp | save registers lea.l -20(%sp), %sp | save registers
move.l %macsr, %d1 | movem.l %d2-%d4/%a2-%a3, (%sp) |
movem.l %d1-%d3, (%sp) |
move.l #0xb0, %macsr | put emac in rounding fractional mode
movem.l (%a0), %a0-%a1 | get channel pointers movem.l (%a0), %a0-%a1 | get channel pointers
move.l %a0, %a2 | use separate dst pointers since read
move.l %a1, %a3 | pointers run one ahead of write
move.l #0x40000000, %d3 | %d3 = 0.5 move.l #0x40000000, %d3 | %d3 = 0.5
1: move.l (%a0)+, %d1 | prime the input registers
move.l (%a0), %d1 | L = R = l/2 + r/2 move.l (%a1)+, %d2 |
mac.l %d1, %d3, (%a1), %d2, %acc0 | mac.l %d1, %d3, (%a0)+, %d1, %acc0 |
mac.l %d2, %d3, %acc0 | mac.l %d2, %d3, (%a1)+, %d2, %acc0 |
movclr.l %acc0, %d1 | subq.l #1, %d0 |
move.l %d1, (%a0)+ | output to original buffer ble.s 20f | loop done |
move.l %d1, (%a1)+ | 10: | loop |
subq.l #1, %d0 | movclr.l %acc0, %d4 | L = R = l/2 + r/2
bgt.s 1b | mac.l %d1, %d3, (%a0)+, %d1, %acc0 |
movem.l (%sp), %d1-%d3 | restore registers mac.l %d2, %d3, (%a1)+, %d2, %acc0 |
move.l %d1, %macsr | move.l %d4, (%a2)+ | output to original buffer
lea.l 12(%sp), %sp | cleanup move.l %d4, (%a3)+ |
rts subq.l #1, %d0 |
.cpmono_end: bgt.s 10b | loop |
.size channels_process_sound_chan_mono, .cpmono_end-channels_process_sound_chan_mono 20: | loop done |
movclr.l %acc0, %d4 | output last sample
move.l %d4, (%a2) |
move.l %d4, (%a3) |
movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
lea.l 20(%sp), %sp | cleanup
rts |
.size channels_process_sound_chan_mono, \
.-channels_process_sound_chan_mono
/**************************************************************************** /****************************************************************************
* void channels_process_sound_chan_custom(int count, int32_t *buf[]) * void channels_process_sound_chan_custom(int count, int32_t *buf[])
@ -248,34 +308,47 @@ channels_process_sound_chan_mono:
* Apply stereo width (narrowing/expanding) effect. * Apply stereo width (narrowing/expanding) effect.
*/ */
.section .text .section .text
.align 2
.global channels_process_sound_chan_custom .global channels_process_sound_chan_custom
channels_process_sound_chan_custom: channels_process_sound_chan_custom:
movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
lea.l -16(%sp), %sp | save registers lea.l -28(%sp), %sp | save registers
move.l %macsr, %d1 | movem.l %d2-%d6/%a2-%a3, (%sp) |
movem.l %d1-%d4, (%sp) |
move.l #0xb0, %macsr | put emac in rounding fractional mode
movem.l (%a0), %a0-%a1 | get channel pointers movem.l (%a0), %a0-%a1 | get channel pointers
move.l %a0, %a2 | use separate dst pointers since read
move.l %a1, %a3 | pointers run one ahead of write
move.l dsp_sw_gain, %d3 | load straight (mid) gain move.l dsp_sw_gain, %d3 | load straight (mid) gain
move.l dsp_sw_cross, %d4 | load cross (side) gain move.l dsp_sw_cross, %d4 | load cross (side) gain
1: move.l (%a0)+, %d1 | prime the input registers
move.l (%a0), %d1 | move.l (%a1)+, %d2 |
mac.l %d1, %d3, (%a1), %d2, %acc0 | L = l*gain + r*cross mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross
mac.l %d1, %d4 , %acc1 | R = r*gain + l*cross mac.l %d1, %d4, (%a0)+, %d1, %acc1 | R = r*gain + l*cross
mac.l %d2, %d4 , %acc0 | mac.l %d2, %d4 , %acc0 |
mac.l %d2, %d3 , %acc1 | mac.l %d2, %d3, (%a1)+, %d2, %acc1 |
movclr.l %acc0, %d1 |
movclr.l %acc1, %d2 |
move.l %d1, (%a0)+ |
move.l %d2, (%a1)+ |
subq.l #1, %d0 | subq.l #1, %d0 |
bgt.s 1b | ble.b 20f | loop done |
movem.l (%sp), %d1-%d4 | restore registers 10: | loop |
move.l %d1, %macsr | movclr.l %acc0, %d5 |
lea.l 16(%sp), %sp | cleanup movclr.l %acc1, %d6 |
rts 15: | loop start |
.cpcustom_end: mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross
.size channels_process_sound_chan_custom, .cpcustom_end-channels_process_sound_chan_custom mac.l %d1, %d4, (%a0)+, %d1, %acc1 | R = r*gain + l*cross
mac.l %d2, %d4 , %acc0 |
mac.l %d2, %d3, (%a1)+, %d2, %acc1 |
move.l %d5, (%a2)+ |
move.l %d6, (%a3)+ |
subq.l #1, %d0 |
bgt.s 10b | loop |
20: | loop done |
movclr.l %acc0, %d5 | output last sample
movclr.l %acc1, %d6 |
move.l %d5, (%a2) |
move.l %d6, (%a3) |
movem.l (%sp), %d2-%d6/%a2-%a3 | restore registers
lea.l 28(%sp), %sp | cleanup
rts |
.size channels_process_sound_chan_custom, \
.-channels_process_sound_chan_custom
/**************************************************************************** /****************************************************************************
* void channels_process_sound_chan_karaoke(int count, int32_t *buf[]) * void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
@ -283,31 +356,42 @@ channels_process_sound_chan_custom:
* Separate channels into side channels. * Separate channels into side channels.
*/ */
.section .text .section .text
.align 2
.global channels_process_sound_chan_karaoke .global channels_process_sound_chan_karaoke
channels_process_sound_chan_karaoke: channels_process_sound_chan_karaoke:
movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
lea.l -16(%sp), %sp | save registers lea.l -20(%sp), %sp | save registers
move.l %macsr, %d1 | movem.l %d2-%d4/%a2-%a3, (%sp) |
movem.l %d1-%d4, (%sp) | movem.l (%a0), %a0-%a1 | get channel src pointers
move.l #0xb0, %macsr | put emac in rounding fractional mode move.l %a0, %a2 | use separate dst pointers since read
movem.l (%a0), %a0-%a1 | get channel pointers move.l %a1, %a3 | pointers run one ahead of write
move.l #0x40000000, %d4 | %d3 = 0.5 move.l #0x40000000, %d3 | %d3 = 0.5
1: move.l (%a0)+, %d1 | prime the input registers
move.l (%a0), %d1 | move.l (%a1)+, %d2 |
msac.l %d1, %d4, (%a1), %d2, %acc0 | R = r/2 - l/2 mac.l %d1, %d3, (%a0)+, %d1, %acc0 | L = l/2 - r/2
mac.l %d2, %d4 , %acc0 | msac.l %d2, %d3, (%a1)+, %d2, %acc0 |
movclr.l %acc0, %d1 | subq.l #1, %d0 |
move.l %d1, (%a1)+ | ble.b 20f | loop done |
neg.l %d1 | L = -R = -(r/2 - l/2) = l/2 - r/2 10: | loop |
move.l %d1, (%a0)+ | movclr.l %acc0, %d4 |
subq.l #1, %d0 | mac.l %d1, %d3, (%a0)+, %d1, %acc0 | L = l/2 - r/2
bgt.s 1b | msac.l %d2, %d3, (%a1)+, %d2, %acc0 |
movem.l (%sp), %d1-%d4 | restore registers move.l %d4, (%a2)+ |
move.l %d1, %macsr | neg.l %d4 | R = -L = -(l/2 - r/2) = r/2 - l/2
lea.l 16(%sp), %sp | cleanup move.l %d4, (%a3)+ |
rts subq.l #1, %d0 |
.cpkaraoke_end: bgt.s 10b | loop |
.size channels_process_sound_chan_karaoke, .cpkaraoke_end-channels_process_sound_chan_karaoke 20: | loop done |
movclr.l %acc0, %d4 | output last sample
move.l %d4, (%a2) |
neg.l %d4 | R = -L = -(l/2 - r/2) = r/2 - l/2
move.l %d4, (%a3) |
movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
lea.l 20(%sp), %sp | cleanup
rts |
.size channels_process_sound_chan_karaoke, \
.-channels_process_sound_chan_karaoke
/**************************************************************************** /****************************************************************************
* void sample_output_stereo(int count, struct dsp_data *data, * void sample_output_stereo(int count, struct dsp_data *data,
* int32_t *src[], int16_t *dst) * int32_t *src[], int16_t *dst)
@ -329,6 +413,7 @@ channels_process_sound_chan_karaoke:
* *
*/ */
.section .text .section .text
.align 2
.global sample_output_stereo .global sample_output_stereo
sample_output_stereo: sample_output_stereo:
lea.l -44(%sp), %sp | save registers lea.l -44(%sp), %sp | save registers
@ -348,11 +433,11 @@ sample_output_stereo:
add.l %a4, %d0 | add.l %a4, %d0 |
and.l #0xfffffff0, %d0 | and.l #0xfffffff0, %d0 |
cmp.l %a0, %d0 | at least a full line? cmp.l %a0, %d0 | at least a full line?
bhi.w .sos_longloop_1_start | no? jump to trailing longword bhi.w 40f | long loop 1 start | no? do as trailing longwords
sub.l #16, %d0 | %d1 = first line bound sub.l #16, %d0 | %d1 = first line bound
cmp.l %a4, %d0 | any leading longwords? cmp.l %a4, %d0 | any leading longwords?
bls.b .sos_lineloop_start | no? jump to line loop bls.b 20f | line loop start | no? start line loop
.sos_longloop_0: 10: | long loop 0 |
move.l (%a2)+, %d1 | read longword from L and R move.l (%a2)+, %d1 | read longword from L and R
mac.l %d1, %a1, (%a3)+, %d2, %acc0 | shift L to high word mac.l %d1, %a1, (%a3)+, %d2, %acc0 | shift L to high word
mac.l %d2, %a1, %acc1 | shift R to high word mac.l %d2, %a1, %acc1 | shift R to high word
@ -362,10 +447,10 @@ sample_output_stereo:
move.w %d2, %d1 | interleave MS 16 bits of each move.w %d2, %d1 | interleave MS 16 bits of each
move.l %d1, (%a4)+ | ...and write both move.l %d1, (%a4)+ | ...and write both
cmp.l %a4, %d0 | cmp.l %a4, %d0 |
bhi.b .sos_longloop_0 | bhi.b 10b | long loop 0 |
.sos_lineloop_start: 20: | line loop start |
lea.l -12(%a0), %a5 | %a5 = at or just before last line bound lea.l -12(%a0), %a5 | %a5 = at or just before last line bound
.sos_lineloop: 30: | line loop |
move.l (%a3)+, %d4 | get next 4 R samples and scale move.l (%a3)+, %d4 | get next 4 R samples and scale
mac.l %d4, %a1, (%a3)+, %d5, %acc0 | with saturation mac.l %d4, %a1, (%a3)+, %d5, %acc0 | with saturation
mac.l %d5, %a1, (%a3)+, %d6, %acc1 | mac.l %d5, %a1, (%a3)+, %d6, %acc1 |
@ -394,11 +479,11 @@ sample_output_stereo:
move.w %d7, %d3 | move.w %d7, %d3 |
movem.l %d0-%d3, -16(%a4) | write four stereo samples movem.l %d0-%d3, -16(%a4) | write four stereo samples
cmp.l %a4, %a5 | cmp.l %a4, %a5 |
bhi.b .sos_lineloop | bhi.b 30b | line loop |
.sos_longloop_1_start: 40: | long loop 1 start |
cmp.l %a4, %a0 | any longwords left? cmp.l %a4, %a0 | any longwords left?
bls.b .sos_done | no? finished. bls.b 60f | output end | no? stop
.sos_longloop_1: 50: | long loop 1 |
move.l (%a2)+, %d1 | handle trailing longwords move.l (%a2)+, %d1 | handle trailing longwords
mac.l %d1, %a1, (%a3)+, %d2, %acc0 | the same way as leading ones mac.l %d1, %a1, (%a3)+, %d2, %acc0 | the same way as leading ones
mac.l %d2, %a1, %acc1 | mac.l %d2, %a1, %acc1 |
@ -408,14 +493,13 @@ sample_output_stereo:
move.w %d2, %d1 | move.w %d2, %d1 |
move.l %d1, (%a4)+ | move.l %d1, (%a4)+ |
cmp.l %a4, %a0 | cmp.l %a4, %a0 |
bhi.b .sos_longloop_1 | bhi.b 50b | long loop 1
.sos_done: 60: | output end |
movem.l (%sp), %d1-%d7/%a2-%a5 | restore registers movem.l (%sp), %d1-%d7/%a2-%a5 | restore registers
move.l %d1, %macsr | move.l %d1, %macsr |
lea.l 44(%sp), %sp | cleanup lea.l 44(%sp), %sp | cleanup
rts | rts |
.sos_end: .size sample_output_stereo, .-sample_output_stereo
.size sample_output_stereo, .sos_end-sample_output_stereo
/**************************************************************************** /****************************************************************************
* void sample_output_mono(int count, struct dsp_data *data, * void sample_output_mono(int count, struct dsp_data *data,
@ -424,6 +508,7 @@ sample_output_stereo:
* Same treatment as sample_output_stereo but for one channel. * Same treatment as sample_output_stereo but for one channel.
*/ */
.section .text .section .text
.align 2
.global sample_output_mono .global sample_output_mono
sample_output_mono: sample_output_mono:
lea.l -28(%sp), %sp | save registers lea.l -28(%sp), %sp | save registers
@ -442,11 +527,11 @@ sample_output_mono:
add.l %a3, %d0 | add.l %a3, %d0 |
and.l #0xfffffff0, %d0 | and.l #0xfffffff0, %d0 |
cmp.l %a0, %d0 | at least a full line? cmp.l %a0, %d0 | at least a full line?
bhi.w .som_longloop_1_start | no? jump to trailing longword bhi.w 40f | long loop 1 start | no? do as trailing longwords
sub.l #16, %d0 | %d1 = first line bound sub.l #16, %d0 | %d1 = first line bound
cmp.l %a3, %d0 | any leading longwords? cmp.l %a3, %d0 | any leading longwords?
bls.b .som_lineloop_start | no? jump to line loop bls.b 20f | line loop start | no? start line loop
.som_longloop_0: 10: | long loop 0 |
move.l (%a2)+, %d1 | read longword from L and R move.l (%a2)+, %d1 | read longword from L and R
mac.l %d1, %d5, %acc0 | shift L to high word mac.l %d1, %d5, %acc0 | shift L to high word
movclr.l %acc0, %d1 | get possibly saturated results movclr.l %acc0, %d1 | get possibly saturated results
@ -455,10 +540,10 @@ sample_output_mono:
move.w %d2, %d1 | duplicate single channel into move.w %d2, %d1 | duplicate single channel into
move.l %d1, (%a3)+ | L and R move.l %d1, (%a3)+ | L and R
cmp.l %a3, %d0 | cmp.l %a3, %d0 |
bhi.b .som_longloop_0 | bhi.b 10b | long loop 0 |
.som_lineloop_start: 20: | line loop start |
lea.l -12(%a0), %a1 | %a1 = at or just before last line bound lea.l -12(%a0), %a1 | %a1 = at or just before last line bound
.som_lineloop: 30: | line loop |
move.l (%a2)+, %d0 | get next 4 L samples and scale move.l (%a2)+, %d0 | get next 4 L samples and scale
mac.l %d0, %d5, (%a2)+, %d1, %acc0 | with saturation mac.l %d0, %d5, (%a2)+, %d1, %acc0 | with saturation
mac.l %d1, %d5, (%a2)+, %d2, %acc1 | mac.l %d1, %d5, (%a2)+, %d2, %acc1 |
@ -483,11 +568,11 @@ sample_output_mono:
move.w %d4, %d3 | move.w %d4, %d3 |
movem.l %d0-%d3, -16(%a3) | write four stereo samples movem.l %d0-%d3, -16(%a3) | write four stereo samples
cmp.l %a3, %a1 | cmp.l %a3, %a1 |
bhi.b .som_lineloop | bhi.b 30b | line loop |
.som_longloop_1_start: 40: | long loop 1 start |
cmp.l %a3, %a0 | any longwords left? cmp.l %a3, %a0 | any longwords left?
bls.b .som_done | no? finished. bls.b 60f | output end | no? stop
.som_longloop_1: 50: | loop loop 1 |
move.l (%a2)+, %d1 | handle trailing longwords move.l (%a2)+, %d1 | handle trailing longwords
mac.l %d1, %d5, %acc0 | the same way as leading ones mac.l %d1, %d5, %acc0 | the same way as leading ones
movclr.l %acc0, %d1 | movclr.l %acc0, %d1 |
@ -496,11 +581,10 @@ sample_output_mono:
move.w %d2, %d1 | move.w %d2, %d1 |
move.l %d1, (%a3)+ | move.l %d1, (%a3)+ |
cmp.l %a3, %a0 | cmp.l %a3, %a0 |
bhi.b .som_longloop_1 | bhi.b 50b | long loop 1 |
.som_done: 60: | output end |
movem.l (%sp), %d1-%d5/%a2-%a3 | restore registers movem.l (%sp), %d1-%d5/%a2-%a3 | restore registers
move.l %d1, %macsr | move.l %d1, %macsr |
lea.l 28(%sp), %sp | cleanup lea.l 28(%sp), %sp | cleanup
rts | rts |
.som_end: .size sample_output_mono, .-sample_output_mono
.size sample_output_mono, .som_end-sample_output_mono