forked from len0rd/rockbox
Commit FS#8750. Add ARM assembler for the dsp-functions channels_process_sound_chan_mono(), channels_process_sound_chan_karaoke(), sample_output_mono() and sample_output_stereo(). By measurement the speed up is ~75% for the first three functions and ~40% for sample_output_stereo(). Additionally avoid calling yield() to often in dsp.c -- it is now limited to once per tick.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@16717 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
178df1cfcf
commit
fd052ec753
3 changed files with 189 additions and 1 deletions
|
@ -1112,6 +1112,7 @@ int dsp_callback(int msg, intptr_t param)
|
|||
int dsp_process(struct dsp_config *dsp, char *dst, const char *src[], int count)
|
||||
{
|
||||
int32_t *tmp[2];
|
||||
long last_yield = current_tick;
|
||||
int written = 0;
|
||||
int samples;
|
||||
|
||||
|
@ -1159,7 +1160,13 @@ int dsp_process(struct dsp_config *dsp, char *dst, const char *src[], int count)
|
|||
|
||||
written += samples;
|
||||
dst += samples * sizeof (int16_t) * 2;
|
||||
yield();
|
||||
|
||||
/* yield at least once each tick */
|
||||
if (current_tick > last_yield)
|
||||
{
|
||||
yield();
|
||||
last_yield = current_tick;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(CPU_COLDFIRE)
|
||||
|
|
177
apps/dsp_arm.S
177
apps/dsp_arm.S
|
@ -17,6 +17,183 @@
|
|||
*
|
||||
****************************************************************************/
|
||||
|
||||
/****************************************************************************
|
||||
* void channels_process_sound_chan_mono(int count, int32_t *buf[])
|
||||
*
|
||||
* NOTE: The following code processes two samples at once. When count is odd,
|
||||
* there is an additional obsolete sample processed, which will not be
|
||||
* used by the calling functions.
|
||||
*/
|
||||
.section .icode, "ax", %progbits
|
||||
.align 2
|
||||
.global channels_process_sound_chan_mono
|
||||
.type channels_process_sound_chan_mono, %function
|
||||
channels_process_sound_chan_mono:
|
||||
@ input: r0 = count, r1 = buf
|
||||
stmfd sp!, {r4-r6, lr}
|
||||
ldmia r1, {r2-r3} @ r4 = buf[0], r5 = buf[1]
|
||||
|
||||
.monoloop:
|
||||
ldmia r2, {r4-r5}
|
||||
ldmia r3, {r6,lr}
|
||||
mov r4, r4, asr #1 @ r4 = r4/2
|
||||
add r4, r4, r6, asr #1 @ r4 = r4 + r6/2 = (buf[0]+buf[1])/2
|
||||
mov r5, r5, asr #1 @ r5 = r5/2
|
||||
add r5, r5, lr, asr #1 @ r5 = r5 + lr/2 = (buf[0]+buf[1])/2
|
||||
stmia r2!, {r4-r5}
|
||||
stmia r3!, {r4-r5}
|
||||
subs r0, r0, #2
|
||||
bgt .monoloop
|
||||
|
||||
ldmfd sp!, {r4-r6, pc}
|
||||
.monoend:
|
||||
.size channels_process_sound_chan_mono,.monoend-channels_process_sound_chan_mono
|
||||
|
||||
/****************************************************************************
|
||||
* void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
|
||||
* NOTE: The following code processes two samples at once. When count is odd,
|
||||
* there is an additional obsolete sample processed, which will not be
|
||||
* used by the calling functions.
|
||||
*/
|
||||
.section .icode, "ax", %progbits
|
||||
.align 2
|
||||
.global channels_process_sound_chan_karaoke
|
||||
.type channels_process_sound_chan_karaoke, %function
|
||||
channels_process_sound_chan_karaoke:
|
||||
@ input: r0 = count, r1 = buf
|
||||
stmfd sp!, {r4-r6, lr}
|
||||
ldmia r1, {r2-r3} @ r4 = buf[0], r5 = buf[1]
|
||||
|
||||
.karaokeloop:
|
||||
ldmia r2, {r4-r5}
|
||||
ldmia r3, {r6,lr}
|
||||
mov r6, r6, asr #1 @ r6 = r6/2
|
||||
rsb r4, r6, r4, asr #1 @ r4 = -r6 + r4/2 = (buf[0]-buf[1])/2
|
||||
rsb r6, r4, #0 @ r6 = -r4
|
||||
mov lr, lr, asr #1 @ lr = lr/2
|
||||
rsb r5, lr, r5, asr #1 @ r5 = -lr + r5/2 = (buf[0]-buf[1])/2
|
||||
rsb lr, r5, #0 @ lr = -r5
|
||||
stmia r2!, {r4-r5}
|
||||
stmia r3!, {r6,lr}
|
||||
subs r0, r0, #2
|
||||
bgt .karaokeloop
|
||||
|
||||
ldmfd sp!, {r4-r6, pc}
|
||||
.karaokeend:
|
||||
.size channels_process_sound_chan_karaoke,.karaokeend-channels_process_sound_chan_karaoke
|
||||
|
||||
/****************************************************************************
|
||||
* void sample_output_mono(int count, struct dsp_data *data,
|
||||
int32_t *src[], int16_t *dst)
|
||||
* NOTE: The following code processes two samples at once. When count is odd,
|
||||
* there is an additional obsolete sample processed, which will not be
|
||||
* used by the calling functions.
|
||||
*/
|
||||
.section .icode, "ax", %progbits
|
||||
.align 2
|
||||
.global sample_output_mono
|
||||
.type sample_output_mono, %function
|
||||
sample_output_mono:
|
||||
@ input: r0 = count, r1 = data, r2 = src, r3 = dst
|
||||
stmfd sp!, {r4-r9, lr}
|
||||
|
||||
ldr r4, [r2] @ r4 = src[0]
|
||||
ldr r5, [r1] @ lr = data->output_scale
|
||||
sub r1, r5, #1 @ r1 = r5-1
|
||||
mov r2, #1
|
||||
mov r2, r2, asl r1 @ r2 = 1<<r1 = 1 << (scale-1)
|
||||
mvn r1, #0x8000 @ r1 needed for clipping
|
||||
mov r8, #0xff00
|
||||
orr r8, r8, #0xff @ r8 needed for masking
|
||||
|
||||
.somloop:
|
||||
ldmia r4!, {r6-r7}
|
||||
add r6, r6, r2
|
||||
mov r6, r6, asr r5 @ r6 = (r6 + 1<<(scale-1)) >> scale
|
||||
mov lr, r6, asr #15
|
||||
teq lr, lr, asr #31
|
||||
eorne r6, r1, lr, asr #31 @ Clip (-32768...+32767)
|
||||
add r7, r7, r2
|
||||
mov r7, r7, asr r5 @ r7 = (r7 + 1<<(scale-1)) >> scale
|
||||
mov lr, r7, asr #15
|
||||
teq lr, lr, asr #31
|
||||
eorne r7, r1, lr, asr #31 @ Clip (-32768...+32767)
|
||||
|
||||
and r6, r6, r8
|
||||
orr r6, r6, r6, asl #16 @ pack first 2 halfwords into 1 word
|
||||
and r7, r7, r8
|
||||
orr r7, r7, r7, asl #16 @ pack last 2 halfwords into 1 word
|
||||
stmia r3!, {r6-r7}
|
||||
|
||||
subs r0, r0, #2
|
||||
bgt .somloop
|
||||
|
||||
ldmfd sp!, {r4-r9, pc}
|
||||
.somend:
|
||||
.size sample_output_mono,.somend-sample_output_mono
|
||||
|
||||
/****************************************************************************
|
||||
* void sample_output_stereo(int count, struct dsp_data *data,
|
||||
int32_t *src[], int16_t *dst)
|
||||
* NOTE: The following code processes two samples at once. When count is odd,
|
||||
* there is an additional obsolete sample processed, which will not be
|
||||
* used by the calling functions.
|
||||
*/
|
||||
.section .icode, "ax", %progbits
|
||||
.align 2
|
||||
.global sample_output_stereo
|
||||
.type sample_output_stereo, %function
|
||||
sample_output_stereo:
|
||||
@ input: r0 = count, r1 = data, r2 = src, r3 = dst
|
||||
stmfd sp!, {r4-r11, lr}
|
||||
|
||||
ldmia r2, {r4-r5} @ r4 = src[0], r5 = src[1]
|
||||
ldr r6, [r1] @ r6 = data->output_scale
|
||||
sub r1, r6, #1 @ r1 = r6-1
|
||||
mov r2, #1
|
||||
mov r2, r2, asl r1 @ r2 = 1<<r1 = 1 << (scale-1)
|
||||
mvn r1, #0x8000 @ r1 needed for clipping
|
||||
mov r11, #0xff00
|
||||
orr r11, r11, #0xff @ r11 needed for masking
|
||||
|
||||
.sosloop:
|
||||
ldmia r4!, {r7-r8}
|
||||
add r7, r7, r2
|
||||
mov r7, r7, asr r6 @ r7 = (r7 + 1<<(scale-1)) >> scale
|
||||
mov lr, r7, asr #15
|
||||
teq lr, lr, asr #31
|
||||
eorne r7, r1, lr, asr #31 @ Clip (-32768...+32767)
|
||||
add r8, r8, r2
|
||||
mov r8, r8, asr r6 @ r8 = (r8 + 1<<(scale-1)) >> scale
|
||||
mov lr, r8, asr #15
|
||||
teq lr, lr, asr #31
|
||||
eorne r8, r1, lr, asr #31 @ Clip (-32768...+32767)
|
||||
|
||||
ldmia r5!, {r9-r10}
|
||||
add r9, r9, r2
|
||||
mov r9, r9, asr r6 @ r9 = (r9 + 1<<(scale-1)) >> scale
|
||||
mov lr, r9, asr #15
|
||||
teq lr, lr, asr #31
|
||||
eorne r9, r1, lr, asr #31 @ Clip (-32768...+32767)
|
||||
add r10, r10, r2
|
||||
mov r10, r10, asr r6 @ r10 = (r10 + 1<<(scale-1)) >> scale
|
||||
mov lr, r10, asr #15
|
||||
teq lr, lr, asr #31
|
||||
eorne r10, r1, lr, asr #31 @ Clip (-32768...+32767)
|
||||
|
||||
and r7, r7, r11
|
||||
orr r9, r7, r9, asl #16 @ pack first 2 halfwords into 1 word
|
||||
and r8, r8, r11
|
||||
orr r10, r8, r10, asl #16 @ pack last 2 halfwords into 1 word
|
||||
stmia r3!, {r9-r10}
|
||||
|
||||
subs r0, r0, #2
|
||||
bgt .sosloop
|
||||
|
||||
ldmfd sp!, {r4-r11, pc}
|
||||
.sosend:
|
||||
.size sample_output_stereo,.sosend-sample_output_stereo
|
||||
|
||||
/****************************************************************************
|
||||
* void apply_crossfeed(int count, int32_t* src[])
|
||||
*/
|
||||
|
|
|
@ -26,6 +26,10 @@
|
|||
#if defined(CPU_ARM)
|
||||
#define DSP_HAVE_ASM_RESAMPLING
|
||||
#define DSP_HAVE_ASM_CROSSFEED
|
||||
#define DSP_HAVE_ASM_SOUND_CHAN_MONO
|
||||
#define DSP_HAVE_ASM_SOUND_CHAN_KARAOKE
|
||||
#define DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO
|
||||
#define DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO
|
||||
#elif defined (CPU_COLDFIRE)
|
||||
#define DSP_HAVE_ASM_APPLY_GAIN
|
||||
#define DSP_HAVE_ASM_RESAMPLING
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue