1
0
Fork 0
forked from len0rd/rockbox

Commit FS#8750. Add ARM assembler for the dsp-functions channels_process_sound_chan_mono(), channels_process_sound_chan_karaoke(), sample_output_mono() and sample_output_stereo(). By measurement the speed up is ~75% for the first three functions and ~40% for sample_output_stereo(). Additionally avoid calling yield() to often in dsp.c -- it is now limited to once per tick.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@16717 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Andree Buschmann 2008-03-19 13:55:53 +00:00
parent 178df1cfcf
commit fd052ec753
3 changed files with 189 additions and 1 deletions

View file

@ -1112,6 +1112,7 @@ int dsp_callback(int msg, intptr_t param)
int dsp_process(struct dsp_config *dsp, char *dst, const char *src[], int count)
{
int32_t *tmp[2];
long last_yield = current_tick;
int written = 0;
int samples;
@ -1159,7 +1160,13 @@ int dsp_process(struct dsp_config *dsp, char *dst, const char *src[], int count)
written += samples;
dst += samples * sizeof (int16_t) * 2;
yield();
/* yield at least once each tick */
if (current_tick > last_yield)
{
yield();
last_yield = current_tick;
}
}
#if defined(CPU_COLDFIRE)

View file

@ -17,6 +17,183 @@
*
****************************************************************************/
/****************************************************************************
* void channels_process_sound_chan_mono(int count, int32_t *buf[])
*
* NOTE: The following code processes two samples at once. When count is odd,
* there is an additional obsolete sample processed, which will not be
* used by the calling functions.
*/
.section .icode, "ax", %progbits
.align 2
.global channels_process_sound_chan_mono
.type channels_process_sound_chan_mono, %function
channels_process_sound_chan_mono:
@ input: r0 = count, r1 = buf
stmfd sp!, {r4-r6, lr}
ldmia r1, {r2-r3} @ r4 = buf[0], r5 = buf[1]
.monoloop:
ldmia r2, {r4-r5}
ldmia r3, {r6,lr}
mov r4, r4, asr #1 @ r4 = r4/2
add r4, r4, r6, asr #1 @ r4 = r4 + r6/2 = (buf[0]+buf[1])/2
mov r5, r5, asr #1 @ r5 = r5/2
add r5, r5, lr, asr #1 @ r5 = r5 + lr/2 = (buf[0]+buf[1])/2
stmia r2!, {r4-r5}
stmia r3!, {r4-r5}
subs r0, r0, #2
bgt .monoloop
ldmfd sp!, {r4-r6, pc}
.monoend:
.size channels_process_sound_chan_mono,.monoend-channels_process_sound_chan_mono
/****************************************************************************
* void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
* NOTE: The following code processes two samples at once. When count is odd,
* there is an additional obsolete sample processed, which will not be
* used by the calling functions.
*/
.section .icode, "ax", %progbits
.align 2
.global channels_process_sound_chan_karaoke
.type channels_process_sound_chan_karaoke, %function
channels_process_sound_chan_karaoke:
@ input: r0 = count, r1 = buf
stmfd sp!, {r4-r6, lr}
ldmia r1, {r2-r3} @ r4 = buf[0], r5 = buf[1]
.karaokeloop:
ldmia r2, {r4-r5}
ldmia r3, {r6,lr}
mov r6, r6, asr #1 @ r6 = r6/2
rsb r4, r6, r4, asr #1 @ r4 = -r6 + r4/2 = (buf[0]-buf[1])/2
rsb r6, r4, #0 @ r6 = -r4
mov lr, lr, asr #1 @ lr = lr/2
rsb r5, lr, r5, asr #1 @ r5 = -lr + r5/2 = (buf[0]-buf[1])/2
rsb lr, r5, #0 @ lr = -r5
stmia r2!, {r4-r5}
stmia r3!, {r6,lr}
subs r0, r0, #2
bgt .karaokeloop
ldmfd sp!, {r4-r6, pc}
.karaokeend:
.size channels_process_sound_chan_karaoke,.karaokeend-channels_process_sound_chan_karaoke
/****************************************************************************
* void sample_output_mono(int count, struct dsp_data *data,
int32_t *src[], int16_t *dst)
* NOTE: The following code processes two samples at once. When count is odd,
* there is an additional obsolete sample processed, which will not be
* used by the calling functions.
*/
.section .icode, "ax", %progbits
.align 2
.global sample_output_mono
.type sample_output_mono, %function
sample_output_mono:
@ input: r0 = count, r1 = data, r2 = src, r3 = dst
stmfd sp!, {r4-r9, lr}
ldr r4, [r2] @ r4 = src[0]
ldr r5, [r1] @ lr = data->output_scale
sub r1, r5, #1 @ r1 = r5-1
mov r2, #1
mov r2, r2, asl r1 @ r2 = 1<<r1 = 1 << (scale-1)
mvn r1, #0x8000 @ r1 needed for clipping
mov r8, #0xff00
orr r8, r8, #0xff @ r8 needed for masking
.somloop:
ldmia r4!, {r6-r7}
add r6, r6, r2
mov r6, r6, asr r5 @ r6 = (r6 + 1<<(scale-1)) >> scale
mov lr, r6, asr #15
teq lr, lr, asr #31
eorne r6, r1, lr, asr #31 @ Clip (-32768...+32767)
add r7, r7, r2
mov r7, r7, asr r5 @ r7 = (r7 + 1<<(scale-1)) >> scale
mov lr, r7, asr #15
teq lr, lr, asr #31
eorne r7, r1, lr, asr #31 @ Clip (-32768...+32767)
and r6, r6, r8
orr r6, r6, r6, asl #16 @ pack first 2 halfwords into 1 word
and r7, r7, r8
orr r7, r7, r7, asl #16 @ pack last 2 halfwords into 1 word
stmia r3!, {r6-r7}
subs r0, r0, #2
bgt .somloop
ldmfd sp!, {r4-r9, pc}
.somend:
.size sample_output_mono,.somend-sample_output_mono
/****************************************************************************
* void sample_output_stereo(int count, struct dsp_data *data,
int32_t *src[], int16_t *dst)
* NOTE: The following code processes two samples at once. When count is odd,
* there is an additional obsolete sample processed, which will not be
* used by the calling functions.
*/
.section .icode, "ax", %progbits
.align 2
.global sample_output_stereo
.type sample_output_stereo, %function
sample_output_stereo:
@ input: r0 = count, r1 = data, r2 = src, r3 = dst
stmfd sp!, {r4-r11, lr}
ldmia r2, {r4-r5} @ r4 = src[0], r5 = src[1]
ldr r6, [r1] @ r6 = data->output_scale
sub r1, r6, #1 @ r1 = r6-1
mov r2, #1
mov r2, r2, asl r1 @ r2 = 1<<r1 = 1 << (scale-1)
mvn r1, #0x8000 @ r1 needed for clipping
mov r11, #0xff00
orr r11, r11, #0xff @ r11 needed for masking
.sosloop:
ldmia r4!, {r7-r8}
add r7, r7, r2
mov r7, r7, asr r6 @ r7 = (r7 + 1<<(scale-1)) >> scale
mov lr, r7, asr #15
teq lr, lr, asr #31
eorne r7, r1, lr, asr #31 @ Clip (-32768...+32767)
add r8, r8, r2
mov r8, r8, asr r6 @ r8 = (r8 + 1<<(scale-1)) >> scale
mov lr, r8, asr #15
teq lr, lr, asr #31
eorne r8, r1, lr, asr #31 @ Clip (-32768...+32767)
ldmia r5!, {r9-r10}
add r9, r9, r2
mov r9, r9, asr r6 @ r9 = (r9 + 1<<(scale-1)) >> scale
mov lr, r9, asr #15
teq lr, lr, asr #31
eorne r9, r1, lr, asr #31 @ Clip (-32768...+32767)
add r10, r10, r2
mov r10, r10, asr r6 @ r10 = (r10 + 1<<(scale-1)) >> scale
mov lr, r10, asr #15
teq lr, lr, asr #31
eorne r10, r1, lr, asr #31 @ Clip (-32768...+32767)
and r7, r7, r11
orr r9, r7, r9, asl #16 @ pack first 2 halfwords into 1 word
and r8, r8, r11
orr r10, r8, r10, asl #16 @ pack last 2 halfwords into 1 word
stmia r3!, {r9-r10}
subs r0, r0, #2
bgt .sosloop
ldmfd sp!, {r4-r11, pc}
.sosend:
.size sample_output_stereo,.sosend-sample_output_stereo
/****************************************************************************
* void apply_crossfeed(int count, int32_t* src[])
*/

View file

@ -26,6 +26,10 @@
#if defined(CPU_ARM)
#define DSP_HAVE_ASM_RESAMPLING
#define DSP_HAVE_ASM_CROSSFEED
#define DSP_HAVE_ASM_SOUND_CHAN_MONO
#define DSP_HAVE_ASM_SOUND_CHAN_KARAOKE
#define DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO
#define DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO
#elif defined (CPU_COLDFIRE)
#define DSP_HAVE_ASM_APPLY_GAIN
#define DSP_HAVE_ASM_RESAMPLING