1
0
Fork 0
forked from len0rd/rockbox

ASM optimization for fiq_playback(). Saves about 0.4MHz of CPU while playback on PP502x/PP5002.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@17097 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Andree Buschmann 2008-04-13 17:03:24 +00:00
parent 2bf4178018
commit b90a766d01

View file

@ -107,11 +107,17 @@ void pcm_apply_settings(void)
pcm_curr_sampr = pcm_freq;
}
/* ASM optimised FIQ handler. GCC fails to make use of the fact that FIQ mode
has registers r8-r14 banked, and so does not need to be saved. This routine
uses only these registers, and so will never touch the stack unless it
actually needs to do so when calling pcm_callback_for_more. C version is
still included below for reference and testing.
/* ASM optimised FIQ handler. Checks for the minimum allowed loop cycles by evalutation of
* free IISFIFO-slots against available source buffer words. Through this it is possible to
* move the check for IIS_TX_FREE_COUNT outside the loop and do some further optimization.
* Right after the loops (source buffer -> IISFIFO) are done we need to check whether we
* have to exit FIQ handler (this must be done, if all free FIFO slots were filled) or
* we will have to get some new source data.
* Important information kept from former ASM implementation (not used anymore): GCC fails
* to make use of the fact that FIQ mode has registers r8-r14 banked, and so does not need
* to be saved. This routine uses only these registers, and so will never touch the stack
* unless it actually needs to do so when calling pcm_callback_for_more. C version is still
* included below for reference and testing.
*/
#if 1
void fiq_playback(void) ICODE_ATTR __attribute__((naked));
@ -122,9 +128,11 @@ void fiq_playback(void)
* addresses we need are generated by using offsets with these two.
* r10 + 0x40 is IISFIFO_WR, and r10 + 0x0c is IISFIFO_CFG.
* r8 and r9 contains local copies of p and size respectively.
* r12 is a working register.
* r0-r3 and r12 is a working register.
*/
asm volatile (
"stmfd sp!, { r0-r3, lr } \n" /* stack scratch regs and lr */
#if CONFIG_CPU == PP5002
"ldr r12, =0xcf001040 \n" /* Some magic from iPodLinux */
"ldr r12, [r12] \n"
@ -132,24 +140,54 @@ void fiq_playback(void)
"ldmia r11, { r8-r9 } \n" /* r8 = p, r9 = size */
"cmp r9, #0 \n" /* is size 0? */
"beq .more_data \n" /* if so, ask pcmbuf for more data */
".fifo_loop: \n"
"ldr r12, [r10, %[cfg]] \n" /* read IISFIFO_CFG to check FIFO status */
"ands r12, r12, %[mask] \n"
"beq .exit \n" /* FIFO full, exit */
#if SAMPLE_SIZE == 16
"ldr r12, [r8], #4 \n" /* load two samples */
"str r12, [r10, %[wr]] \n" /* write them */
".check_fifo: \n"
"ldr r0, [r10, %[cfg]] \n" /* read IISFIFO_CFG to check FIFO status */
"and r0, r0, %[mask] \n" /* r0 = IIS_TX_FREE_COUNT << 16 (PP502x) */
"mov r1, r0, lsr #16 \n" /* number of free FIFO slots */
"cmp r1, r9, lsr #2 \n" /* number of words from source */
"movgt r1, r9, lsr #2 \n" /* r1 = amount of allowed loops */
"sub r9, r9, r1, lsl #2 \n" /* r1 words will be written in following loop */
"subs r1, r1, #2 \n"
".fifo_loop_2: \n"
"ldmgeia r8!, {r2, r12} \n" /* load four samples */
"strge r2 , [r10, %[wr]] \n" /* write sample 0-1 to IISFIFO_WR */
"strge r12, [r10, %[wr]] \n" /* write sample 2-3 to IISFIFO_WR */
"subges r1, r1, #2 \n" /* one more loop? */
"bge .fifo_loop_2 \n" /* yes, continue */
"tst r1, #1 \n" /* two samples (one word) left? */
"ldrne r12, [r8], #4 \n" /* load two samples */
"strne r12, [r10, %[wr]] \n" /* write sample 0-1 to IISFIFO_WR */
"cmp r9, #0 \n" /* either FIFO is full or source buffer is empty */
"bgt .exit \n" /* if source buffer is not empty, FIFO must be full */
#elif SAMPLE_SIZE == 32
".check_fifo: \n"
"ldr r0, [r10, %[cfg]] \n" /* read IISFIFO_CFG to check FIFO status */
"and r0, r0, %[mask] \n" /* r0 = IIS_TX_FREE_COUNT << 23 (PP5002) */
"mov r1, r0, lsr #24 \n" /* number of free pairs of FIFO slots */
"cmp r1, r9, lsr #2 \n" /* number of words from source */
"movgt r1, r9, lsr #2 \n" /* r1 = amount of allowed loops */
"sub r9, r9, r1, lsl #2 \n" /* r1 words will be written in following loop */
".fifo_loop: \n"
"ldr r12, [r8], #4 \n" /* load two samples */
"mov r12, r12, ror #16 \n" /* put left sample at the top bits */
"str r12, [r10, %[wr]] \n" /* write top sample, lower sample ignored */
"mov r12, r12, lsl #16 \n" /* shift lower sample up */
"str r12, [r10, %[wr]] \n" /* then write it */
"mov r2 , r12, lsl #16 \n" /* put left sample at the top bits */
"str r2 , [r10, %[wr]] \n" /* write top sample to IISFIFO_WR */
"str r12, [r10, %[wr]] \n" /* write low sample to IISFIFO_WR*/
"subs r1, r1, #1 \n" /* one more loop? */
"bgt .fifo_loop \n" /* yes, continue */
"cmp r9, #0 \n" /* either FIFO is full or source buffer is empty */
"bgt .exit \n" /* if source buffer is not empty, FIFO must be full */
#endif
"subs r9, r9, #4 \n" /* check if we have more samples */
"bne .fifo_loop \n" /* yes, continue */
".more_data: \n"
"stmfd sp!, { r0-r3, lr } \n" /* stack scratch regs and lr */
"ldr r2, =pcm_callback_for_more \n"
"ldr r2, [r2] \n" /* get callback address */
"cmp r2, #0 \n" /* check for null pointer */
@ -160,21 +198,21 @@ void fiq_playback(void)
"bxne r2 \n"
"ldmia r11, { r8-r9 } \n" /* reload p and size */
"cmp r9, #0 \n" /* did we actually get more data? */
"ldmnefd sp!, { r0-r3, lr } \n"
"bne .fifo_loop \n" /* yes, continue to try feeding FIFO */
"bne .check_fifo \n"
"ldr r12, =pcm_play_dma_stop \n"
"mov lr, pc \n"
"bx r12 \n"
"ldr r12, =pcm_play_dma_stopped_callback \n"
"mov lr, pc \n"
"bx r12 \n"
"ldmfd sp!, { r0-r3, lr } \n"
".exit: \n" /* (r8=0 if stopping, look above) */
"stmia r11, { r8-r9 } \n" /* save p and size */
"ldmfd sp!, { r0-r3, lr } \n"
"subs pc, lr, #4 \n" /* FIQ specific return sequence */
".ltorg \n"
: /* These must only be integers! No regs */
: [mask]"i"(IIS_TX_FREE_MASK & (IIS_TX_FREE_MASK-1)),
: [mask]"i"(IIS_TX_FREE_MASK),
[cfg]"i"((int)&IISFIFO_CFG - (int)&IISCONFIG),
[wr]"i"((int)&IISFIFO_WR - (int)&IISCONFIG)
);