ARM asm LCD and ATA driver functions: Don't save r12 as it is a scratch reg. Saves a bit of stack and execution time.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@21795 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Jens Arnold 2009-07-11 23:43:44 +00:00
parent fa59ed6ae7
commit 47d4c4739b
12 changed files with 256 additions and 261 deletions

View file

@ -48,7 +48,7 @@ lcd_write_yuv420_lines:
@ r0 = yuv_src
@ r1 = width
@ r2 = stride
stmfd sp!, { r4-r12 } @ save non-scratch
stmfd sp!, { r4-r10, lr } @ save non-scratch
ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
@ r5 = yuv_src[1] = Cb_p
@ r6 = yuv_src[2] = Cr_p
@ -73,9 +73,9 @@ lcd_write_yuv420_lines:
add r10, r10, r8, asl #3 @
add r10, r10, r8, asl #4 @
@
add r11, r9, r9, asl #2 @ r9 = Cr*101
add r11, r11, r9, asl #5 @
add r9, r11, r9, asl #6 @
add lr, r9, r9, asl #2 @ r9 = Cr*101
add lr, lr, r9, asl #5 @
add r9, lr, r9, asl #6 @
@
add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
mov r8, r8, asr #2 @
@ -85,19 +85,19 @@ lcd_write_yuv420_lines:
mov r10, r10, asr #8 @
@ compute R, G, and B
add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r0, r11 @ check if clamping is needed...
orr r12, r0, lr @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r0, #31 @ clamp b
mvnhi r0, r0, asr #31 @
andhi r0, r0, #31 @
cmp r11, #31 @ clamp r
mvnhi r11, r11, asr #31 @
andhi r11, r11, #31 @
cmp lr, #31 @ clamp r
mvnhi lr, lr, asr #31 @
andhi lr, lr, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
@ -105,14 +105,14 @@ lcd_write_yuv420_lines:
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
mov r11, r11, lsl #3 @
orr r11, r11, r7, lsr #3 @ r11 = (r << 3) | (g >> 3)
mov lr, lr, lsl #3 @
orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3)
orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r11, [r3, #0x10] @ send MSB
str lr, [r3, #0x10] @ send MSB
1: @busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
@ -124,19 +124,19 @@ lcd_write_yuv420_lines:
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r0, r11 @ check if clamping is needed...
orr r12, r0, lr @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r0, #31 @ clamp b
mvnhi r0, r0, asr #31 @
andhi r0, r0, #31 @
cmp r11, #31 @ clamp r
mvnhi r11, r11, asr #31 @
andhi r11, r11, #31 @
cmp lr, #31 @ clamp r
mvnhi lr, lr, asr #31 @
andhi lr, lr, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
@ -144,14 +144,14 @@ lcd_write_yuv420_lines:
@
ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
@
mov r11, r11, lsl #3 @
orr r11, r11, r7, lsr #3 @ r11 = (r << 3) | (g >> 3)
mov lr, lr, lsl #3 @
orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3)
orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r11, [r3, #0x10] @ send MSB
str lr, [r3, #0x10] @ send MSB
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
@ -163,19 +163,19 @@ lcd_write_yuv420_lines:
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r0, r11 @ check if clamping is needed...
orr r12, r0, lr @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r0, #31 @ clamp b
mvnhi r0, r0, asr #31 @
andhi r0, r0, #31 @
cmp r11, #31 @ clamp r
mvnhi r11, r11, asr #31 @
andhi r11, r11, #31 @
cmp lr, #31 @ clamp r
mvnhi lr, lr, asr #31 @
andhi lr, lr, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
@ -184,14 +184,14 @@ lcd_write_yuv420_lines:
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
@
mov r11, r11, lsl #3 @
orr r11, r11, r7, lsr #3 @ r11 = (r << 3) | (g >> 3)
mov lr, lr, lsl #3 @
orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3)
orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r11, [r3, #0x10] @ send MSB
str lr, [r3, #0x10] @ send MSB
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
@ -203,32 +203,32 @@ lcd_write_yuv420_lines:
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r0, r11 @ check if clamping is needed...
orr r12, r0, lr @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r0, #31 @ clamp b
mvnhi r0, r0, asr #31 @
andhi r0, r0, #31 @
cmp r11, #31 @ clamp r
mvnhi r11, r11, asr #31 @
andhi r11, r11, #31 @
cmp lr, #31 @ clamp r
mvnhi lr, lr, asr #31 @
andhi lr, lr, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
mov r11, r11, lsl #3 @
orr r11, r11, r7, lsr #3 @ r11 = (r << 3) | (g >> 3)
mov lr, lr, lsl #3 @
orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3)
orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r11, [r3, #0x10] @ send MSB
str lr, [r3, #0x10] @ send MSB
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
@ -238,8 +238,7 @@ lcd_write_yuv420_lines:
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmfd sp!, { r4-r12 } @ restore registers and return
bx lr @
ldmfd sp!, { r4-r10, pc } @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
@ -279,7 +278,7 @@ lcd_write_yuv420_lines_odither:
@ r2 = stride
@ r3 = x_screen
@ [sp] = y_screen
stmfd sp!, { r4-r12, lr } @ save non-scratch
stmfd sp!, { r4-r11, lr } @ save non-scratch
ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
@ r5 = yuv_src[1] = Cb_p
@ r6 = yuv_src[2] = Cr_p
@ -546,6 +545,6 @@ lcd_write_yuv420_lines_odither:
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmfd sp!, { r4-r12, pc } @ restore registers and return
ldmfd sp!, { r4-r11, pc } @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither