forked from len0rd/rockbox
14% faster bitswap, thanks Jens
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@4337 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
860586d992
commit
239a91c28c
1 changed files with 41 additions and 40 deletions
|
@ -18,7 +18,7 @@
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
|
||||||
.section .icode,"ax",@progbits
|
.section .icode,"ax",@progbits
|
||||||
.align 4
|
.align 2
|
||||||
.global _bitswap
|
.global _bitswap
|
||||||
.type _bitswap,@function
|
.type _bitswap,@function
|
||||||
|
|
||||||
|
@ -26,68 +26,69 @@
|
||||||
*
|
*
|
||||||
* r0 Temporary (required by some instructions)
|
* r0 Temporary (required by some instructions)
|
||||||
* r1 Low byte
|
* r1 Low byte
|
||||||
* r2 High byte
|
* r2 High byte / final result
|
||||||
* r3 Result after flip
|
* r4 &Data
|
||||||
* r4 Data
|
|
||||||
* r5 Length
|
* r5 Length
|
||||||
* r6 1
|
|
||||||
* r7 Flip table
|
* r7 Flip table
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* The instruction order below is a bit strange, because:
|
||||||
|
* 1) Keeping load/stores on longword boundaries means the instruction fetch
|
||||||
|
* won't compete with the memory access (because instructions are fetched
|
||||||
|
* in pairs).
|
||||||
|
* 2) Using the result of a fetch in the next instruction causes a stall
|
||||||
|
* (except in certain circumstances).
|
||||||
|
* See the SH-1 programming manual for details.
|
||||||
|
*/
|
||||||
|
|
||||||
_bitswap:
|
_bitswap:
|
||||||
mov.l .fliptable,r7
|
mov.l .fliptable,r7
|
||||||
mov #1,r6
|
add #-2,r4 /* ptr is used shifted by 2 */
|
||||||
|
add r4,r5 /* r5 = end_address - 2 */
|
||||||
|
add #-1,r5 /* r5 = &last_byte - 2 */
|
||||||
mov r4,r0
|
mov r4,r0
|
||||||
tst #1,r0 /* odd address? */
|
tst #1,r0 /* even address? */
|
||||||
bt .init /* no, address is even */
|
bt .init /* yes */
|
||||||
|
|
||||||
mov.b @r4,r0 /* swap first byte */
|
add #1,r4 /* r4 now even */
|
||||||
|
mov.b @(1,r4),r0 /* no, swap first byte */
|
||||||
extu.b r0,r0
|
extu.b r0,r0
|
||||||
mov.b @(r0,r7),r0
|
mov.b @(r0,r7),r0
|
||||||
mov.b r0,@r4
|
mov.b r0,@(1,r4)
|
||||||
add #1,r4
|
|
||||||
add #-1,r5
|
|
||||||
bra .init
|
|
||||||
|
|
||||||
/* The instruction order below is a bit strange, because:
|
.init:
|
||||||
* 1) Keeping load/stores on longword boundaries means the instruction
|
cmp/hi r4,r5 /* at least 2 bytes to swap? */
|
||||||
* fetch won't compete with the memory access (because instructions
|
bf .last /* no, skip main loop */
|
||||||
* are fetched in pairs).
|
|
||||||
* 2) Using the result of a fetch in the next instruction causes a
|
|
||||||
* stall (except in certain circumstances).
|
|
||||||
* See the SH-1 programming manual for details.
|
|
||||||
*/
|
|
||||||
|
|
||||||
.loop:
|
.loop:
|
||||||
mov.w @r4,r1 /* data to flip */
|
mov.w @(2,r4),r0 /* data to flip */
|
||||||
add #-2,r5
|
add #2,r4 /* early increment */
|
||||||
swap.b r1,r2 /* get high byte */
|
swap.b r0,r2 /* get high byte */
|
||||||
|
extu.b r0,r0 /* prepare low byte */
|
||||||
|
mov.b @(r0,r7),r1 /* swap low byte */
|
||||||
extu.b r2,r0 /* prepare high byte */
|
extu.b r2,r0 /* prepare high byte */
|
||||||
mov.b @(r0,r7),r2 /* swap high byte */
|
mov.b @(r0,r7),r2 /* swap high byte */
|
||||||
extu.b r1,r0 /* perpare low byte */
|
extu.b r1,r1 /* zero extend low byte */
|
||||||
mov.b @(r0,r7),r1 /* swap low byte */
|
shll8 r2 /* shift high byte, low byte zeroed */
|
||||||
extu.b r2,r2 /* zero extend high byte */
|
or r1,r2 /* put low byte in result */
|
||||||
swap.b r2,r3 /* put high byte in result */
|
mov.w r2,@r4 /* store result, ptr already incr'd */
|
||||||
extu.b r1,r0 /* zero extend low byte */
|
cmp/hi r4,r5 /* while &last_byte > data */
|
||||||
or r0,r3 /* put low byte in result */
|
bt .loop
|
||||||
mov.w r3,@r4 /* store result */
|
|
||||||
add #2,r4
|
|
||||||
.init:
|
|
||||||
cmp/gt r6,r5 /* while [bytes remaining] > 1 */
|
|
||||||
bt .loop /* (at least 2 bytes left) */
|
|
||||||
|
|
||||||
cmp/eq r6,r5
|
.last:
|
||||||
bf .exit /* if not 1 byte left, exit */
|
cmp/eq r4,r5 /* if behind (&last_byte - 2), exit */
|
||||||
|
bf .exit
|
||||||
|
|
||||||
mov.b @r4,r0 /* swap last byte */
|
mov.b @(2,r4),r0 /* swap last byte */
|
||||||
extu.b r0,r0
|
extu.b r0,r0
|
||||||
mov.b @(r0,r7),r0
|
mov.b @(r0,r7),r0
|
||||||
mov.b r0,@r4
|
mov.b r0,@(2,r4)
|
||||||
|
|
||||||
.exit:
|
.exit:
|
||||||
rts
|
rts
|
||||||
nop
|
nop
|
||||||
|
|
||||||
.align 4
|
.align 2
|
||||||
|
|
||||||
.fliptable:
|
.fliptable:
|
||||||
.long _fliptable
|
.long _fliptable
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue