1
0
Fork 0
forked from len0rd/rockbox

14% faster bitswap, thanks Jens

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@4337 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Jörg Hohensohn 2004-03-03 07:18:26 +00:00
parent 860586d992
commit 239a91c28c

View file

@ -18,7 +18,7 @@
****************************************************************************/ ****************************************************************************/
.section .icode,"ax",@progbits .section .icode,"ax",@progbits
.align 4 .align 2
.global _bitswap .global _bitswap
.type _bitswap,@function .type _bitswap,@function
@ -26,68 +26,69 @@
* *
* r0 Temporary (required by some instructions) * r0 Temporary (required by some instructions)
* r1 Low byte * r1 Low byte
* r2 High byte * r2 High byte / final result
* r3 Result after flip * r4 &Data
* r4 Data
* r5 Length * r5 Length
* r6 1
* r7 Flip table * r7 Flip table
*/ */
/* The instruction order below is a bit strange, because:
* 1) Keeping load/stores on longword boundaries means the instruction fetch
* won't compete with the memory access (because instructions are fetched
* in pairs).
* 2) Using the result of a fetch in the next instruction causes a stall
* (except in certain circumstances).
* See the SH-1 programming manual for details.
*/
_bitswap: _bitswap:
mov.l .fliptable,r7 mov.l .fliptable,r7
mov #1,r6 add #-2,r4 /* ptr is used shifted by 2 */
add r4,r5 /* r5 = end_address - 2 */
add #-1,r5 /* r5 = &last_byte - 2 */
mov r4,r0 mov r4,r0
tst #1,r0 /* odd address? */ tst #1,r0 /* even address? */
bt .init /* no, address is even */ bt .init /* yes */
mov.b @r4,r0 /* swap first byte */ add #1,r4 /* r4 now even */
mov.b @(1,r4),r0 /* no, swap first byte */
extu.b r0,r0 extu.b r0,r0
mov.b @(r0,r7),r0 mov.b @(r0,r7),r0
mov.b r0,@r4 mov.b r0,@(1,r4)
add #1,r4
add #-1,r5
bra .init
/* The instruction order below is a bit strange, because: .init:
* 1) Keeping load/stores on longword boundaries means the instruction cmp/hi r4,r5 /* at least 2 bytes to swap? */
* fetch won't compete with the memory access (because instructions bf .last /* no, skip main loop */
* are fetched in pairs).
* 2) Using the result of a fetch in the next instruction causes a
* stall (except in certain circumstances).
* See the SH-1 programming manual for details.
*/
.loop: .loop:
mov.w @r4,r1 /* data to flip */ mov.w @(2,r4),r0 /* data to flip */
add #-2,r5 add #2,r4 /* early increment */
swap.b r1,r2 /* get high byte */ swap.b r0,r2 /* get high byte */
extu.b r0,r0 /* prepare low byte */
mov.b @(r0,r7),r1 /* swap low byte */
extu.b r2,r0 /* prepare high byte */ extu.b r2,r0 /* prepare high byte */
mov.b @(r0,r7),r2 /* swap high byte */ mov.b @(r0,r7),r2 /* swap high byte */
extu.b r1,r0 /* perpare low byte */ extu.b r1,r1 /* zero extend low byte */
mov.b @(r0,r7),r1 /* swap low byte */ shll8 r2 /* shift high byte, low byte zeroed */
extu.b r2,r2 /* zero extend high byte */ or r1,r2 /* put low byte in result */
swap.b r2,r3 /* put high byte in result */ mov.w r2,@r4 /* store result, ptr already incr'd */
extu.b r1,r0 /* zero extend low byte */ cmp/hi r4,r5 /* while &last_byte > data */
or r0,r3 /* put low byte in result */ bt .loop
mov.w r3,@r4 /* store result */
add #2,r4
.init:
cmp/gt r6,r5 /* while [bytes remaining] > 1 */
bt .loop /* (at least 2 bytes left) */
cmp/eq r6,r5 .last:
bf .exit /* if not 1 byte left, exit */ cmp/eq r4,r5 /* if behind (&last_byte - 2), exit */
bf .exit
mov.b @r4,r0 /* swap last byte */ mov.b @(2,r4),r0 /* swap last byte */
extu.b r0,r0 extu.b r0,r0
mov.b @(r0,r7),r0 mov.b @(r0,r7),r0
mov.b r0,@r4 mov.b r0,@(2,r4)
.exit: .exit:
rts rts
nop nop
.align 4 .align 2
.fliptable: .fliptable:
.long _fliptable .long _fliptable