forked from len0rd/rockbox
ASM optimization for fiq_playback(). Saves about 0.4MHz of CPU while playback on PP502x/PP5002.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@17097 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
2bf4178018
commit
b90a766d01
1 changed files with 61 additions and 23 deletions
|
|
@ -107,11 +107,17 @@ void pcm_apply_settings(void)
|
||||||
pcm_curr_sampr = pcm_freq;
|
pcm_curr_sampr = pcm_freq;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ASM optimised FIQ handler. GCC fails to make use of the fact that FIQ mode
|
/* ASM optimised FIQ handler. Checks for the minimum allowed loop cycles by evalutation of
|
||||||
has registers r8-r14 banked, and so does not need to be saved. This routine
|
* free IISFIFO-slots against available source buffer words. Through this it is possible to
|
||||||
uses only these registers, and so will never touch the stack unless it
|
* move the check for IIS_TX_FREE_COUNT outside the loop and do some further optimization.
|
||||||
actually needs to do so when calling pcm_callback_for_more. C version is
|
* Right after the loops (source buffer -> IISFIFO) are done we need to check whether we
|
||||||
still included below for reference and testing.
|
* have to exit FIQ handler (this must be done, if all free FIFO slots were filled) or
|
||||||
|
* we will have to get some new source data.
|
||||||
|
* Important information kept from former ASM implementation (not used anymore): GCC fails
|
||||||
|
* to make use of the fact that FIQ mode has registers r8-r14 banked, and so does not need
|
||||||
|
* to be saved. This routine uses only these registers, and so will never touch the stack
|
||||||
|
* unless it actually needs to do so when calling pcm_callback_for_more. C version is still
|
||||||
|
* included below for reference and testing.
|
||||||
*/
|
*/
|
||||||
#if 1
|
#if 1
|
||||||
void fiq_playback(void) ICODE_ATTR __attribute__((naked));
|
void fiq_playback(void) ICODE_ATTR __attribute__((naked));
|
||||||
|
|
@ -122,9 +128,11 @@ void fiq_playback(void)
|
||||||
* addresses we need are generated by using offsets with these two.
|
* addresses we need are generated by using offsets with these two.
|
||||||
* r10 + 0x40 is IISFIFO_WR, and r10 + 0x0c is IISFIFO_CFG.
|
* r10 + 0x40 is IISFIFO_WR, and r10 + 0x0c is IISFIFO_CFG.
|
||||||
* r8 and r9 contains local copies of p and size respectively.
|
* r8 and r9 contains local copies of p and size respectively.
|
||||||
* r12 is a working register.
|
* r0-r3 and r12 is a working register.
|
||||||
*/
|
*/
|
||||||
asm volatile (
|
asm volatile (
|
||||||
|
"stmfd sp!, { r0-r3, lr } \n" /* stack scratch regs and lr */
|
||||||
|
|
||||||
#if CONFIG_CPU == PP5002
|
#if CONFIG_CPU == PP5002
|
||||||
"ldr r12, =0xcf001040 \n" /* Some magic from iPodLinux */
|
"ldr r12, =0xcf001040 \n" /* Some magic from iPodLinux */
|
||||||
"ldr r12, [r12] \n"
|
"ldr r12, [r12] \n"
|
||||||
|
|
@ -132,24 +140,54 @@ void fiq_playback(void)
|
||||||
"ldmia r11, { r8-r9 } \n" /* r8 = p, r9 = size */
|
"ldmia r11, { r8-r9 } \n" /* r8 = p, r9 = size */
|
||||||
"cmp r9, #0 \n" /* is size 0? */
|
"cmp r9, #0 \n" /* is size 0? */
|
||||||
"beq .more_data \n" /* if so, ask pcmbuf for more data */
|
"beq .more_data \n" /* if so, ask pcmbuf for more data */
|
||||||
".fifo_loop: \n"
|
|
||||||
"ldr r12, [r10, %[cfg]] \n" /* read IISFIFO_CFG to check FIFO status */
|
|
||||||
"ands r12, r12, %[mask] \n"
|
|
||||||
"beq .exit \n" /* FIFO full, exit */
|
|
||||||
#if SAMPLE_SIZE == 16
|
#if SAMPLE_SIZE == 16
|
||||||
"ldr r12, [r8], #4 \n" /* load two samples */
|
".check_fifo: \n"
|
||||||
"str r12, [r10, %[wr]] \n" /* write them */
|
"ldr r0, [r10, %[cfg]] \n" /* read IISFIFO_CFG to check FIFO status */
|
||||||
|
"and r0, r0, %[mask] \n" /* r0 = IIS_TX_FREE_COUNT << 16 (PP502x) */
|
||||||
|
|
||||||
|
"mov r1, r0, lsr #16 \n" /* number of free FIFO slots */
|
||||||
|
"cmp r1, r9, lsr #2 \n" /* number of words from source */
|
||||||
|
"movgt r1, r9, lsr #2 \n" /* r1 = amount of allowed loops */
|
||||||
|
"sub r9, r9, r1, lsl #2 \n" /* r1 words will be written in following loop */
|
||||||
|
|
||||||
|
"subs r1, r1, #2 \n"
|
||||||
|
".fifo_loop_2: \n"
|
||||||
|
"ldmgeia r8!, {r2, r12} \n" /* load four samples */
|
||||||
|
"strge r2 , [r10, %[wr]] \n" /* write sample 0-1 to IISFIFO_WR */
|
||||||
|
"strge r12, [r10, %[wr]] \n" /* write sample 2-3 to IISFIFO_WR */
|
||||||
|
"subges r1, r1, #2 \n" /* one more loop? */
|
||||||
|
"bge .fifo_loop_2 \n" /* yes, continue */
|
||||||
|
|
||||||
|
"tst r1, #1 \n" /* two samples (one word) left? */
|
||||||
|
"ldrne r12, [r8], #4 \n" /* load two samples */
|
||||||
|
"strne r12, [r10, %[wr]] \n" /* write sample 0-1 to IISFIFO_WR */
|
||||||
|
|
||||||
|
"cmp r9, #0 \n" /* either FIFO is full or source buffer is empty */
|
||||||
|
"bgt .exit \n" /* if source buffer is not empty, FIFO must be full */
|
||||||
#elif SAMPLE_SIZE == 32
|
#elif SAMPLE_SIZE == 32
|
||||||
|
".check_fifo: \n"
|
||||||
|
"ldr r0, [r10, %[cfg]] \n" /* read IISFIFO_CFG to check FIFO status */
|
||||||
|
"and r0, r0, %[mask] \n" /* r0 = IIS_TX_FREE_COUNT << 23 (PP5002) */
|
||||||
|
|
||||||
|
"mov r1, r0, lsr #24 \n" /* number of free pairs of FIFO slots */
|
||||||
|
"cmp r1, r9, lsr #2 \n" /* number of words from source */
|
||||||
|
"movgt r1, r9, lsr #2 \n" /* r1 = amount of allowed loops */
|
||||||
|
"sub r9, r9, r1, lsl #2 \n" /* r1 words will be written in following loop */
|
||||||
|
|
||||||
|
".fifo_loop: \n"
|
||||||
"ldr r12, [r8], #4 \n" /* load two samples */
|
"ldr r12, [r8], #4 \n" /* load two samples */
|
||||||
"mov r12, r12, ror #16 \n" /* put left sample at the top bits */
|
"mov r2 , r12, lsl #16 \n" /* put left sample at the top bits */
|
||||||
"str r12, [r10, %[wr]] \n" /* write top sample, lower sample ignored */
|
"str r2 , [r10, %[wr]] \n" /* write top sample to IISFIFO_WR */
|
||||||
"mov r12, r12, lsl #16 \n" /* shift lower sample up */
|
"str r12, [r10, %[wr]] \n" /* write low sample to IISFIFO_WR*/
|
||||||
"str r12, [r10, %[wr]] \n" /* then write it */
|
"subs r1, r1, #1 \n" /* one more loop? */
|
||||||
|
"bgt .fifo_loop \n" /* yes, continue */
|
||||||
|
|
||||||
|
"cmp r9, #0 \n" /* either FIFO is full or source buffer is empty */
|
||||||
|
"bgt .exit \n" /* if source buffer is not empty, FIFO must be full */
|
||||||
#endif
|
#endif
|
||||||
"subs r9, r9, #4 \n" /* check if we have more samples */
|
|
||||||
"bne .fifo_loop \n" /* yes, continue */
|
|
||||||
".more_data: \n"
|
".more_data: \n"
|
||||||
"stmfd sp!, { r0-r3, lr } \n" /* stack scratch regs and lr */
|
|
||||||
"ldr r2, =pcm_callback_for_more \n"
|
"ldr r2, =pcm_callback_for_more \n"
|
||||||
"ldr r2, [r2] \n" /* get callback address */
|
"ldr r2, [r2] \n" /* get callback address */
|
||||||
"cmp r2, #0 \n" /* check for null pointer */
|
"cmp r2, #0 \n" /* check for null pointer */
|
||||||
|
|
@ -160,21 +198,21 @@ void fiq_playback(void)
|
||||||
"bxne r2 \n"
|
"bxne r2 \n"
|
||||||
"ldmia r11, { r8-r9 } \n" /* reload p and size */
|
"ldmia r11, { r8-r9 } \n" /* reload p and size */
|
||||||
"cmp r9, #0 \n" /* did we actually get more data? */
|
"cmp r9, #0 \n" /* did we actually get more data? */
|
||||||
"ldmnefd sp!, { r0-r3, lr } \n"
|
"bne .check_fifo \n"
|
||||||
"bne .fifo_loop \n" /* yes, continue to try feeding FIFO */
|
|
||||||
"ldr r12, =pcm_play_dma_stop \n"
|
"ldr r12, =pcm_play_dma_stop \n"
|
||||||
"mov lr, pc \n"
|
"mov lr, pc \n"
|
||||||
"bx r12 \n"
|
"bx r12 \n"
|
||||||
"ldr r12, =pcm_play_dma_stopped_callback \n"
|
"ldr r12, =pcm_play_dma_stopped_callback \n"
|
||||||
"mov lr, pc \n"
|
"mov lr, pc \n"
|
||||||
"bx r12 \n"
|
"bx r12 \n"
|
||||||
"ldmfd sp!, { r0-r3, lr } \n"
|
|
||||||
".exit: \n" /* (r8=0 if stopping, look above) */
|
".exit: \n" /* (r8=0 if stopping, look above) */
|
||||||
"stmia r11, { r8-r9 } \n" /* save p and size */
|
"stmia r11, { r8-r9 } \n" /* save p and size */
|
||||||
|
"ldmfd sp!, { r0-r3, lr } \n"
|
||||||
"subs pc, lr, #4 \n" /* FIQ specific return sequence */
|
"subs pc, lr, #4 \n" /* FIQ specific return sequence */
|
||||||
".ltorg \n"
|
".ltorg \n"
|
||||||
: /* These must only be integers! No regs */
|
: /* These must only be integers! No regs */
|
||||||
: [mask]"i"(IIS_TX_FREE_MASK & (IIS_TX_FREE_MASK-1)),
|
: [mask]"i"(IIS_TX_FREE_MASK),
|
||||||
[cfg]"i"((int)&IISFIFO_CFG - (int)&IISCONFIG),
|
[cfg]"i"((int)&IISFIFO_CFG - (int)&IISCONFIG),
|
||||||
[wr]"i"((int)&IISFIFO_WR - (int)&IISCONFIG)
|
[wr]"i"((int)&IISFIFO_WR - (int)&IISCONFIG)
|
||||||
);
|
);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue