mirror of
https://github.com/Rockbox/rockbox.git
synced 2025-11-09 21:22:39 -05:00
Speedup of iPod nano 2G YUV blitting by 3%.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@28825 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
298bbe8d3c
commit
abf28a9586
1 changed files with 35 additions and 37 deletions
|
|
@ -100,7 +100,7 @@ lcd_write_yuv420_lines:
|
|||
|
||||
mov r7, r2 /* r7 = loop count */
|
||||
add r8, sp, #16 /* chroma buffer */
|
||||
mov lr, r1 /* LCD data port = LCD_BASE */
|
||||
add lr, r1, #0x40 /* LCD data port = LCD_BASE + 0x40 */
|
||||
|
||||
/* 1st loop start */
|
||||
10: /* loop start */
|
||||
|
|
@ -153,21 +153,21 @@ lcd_write_yuv420_lines:
|
|||
andhi r4, r4, #31
|
||||
15: /* no clamp */
|
||||
|
||||
/* calculate pixel_1 and save to r5 for later pixel packing */
|
||||
/* calculate pixel_1 and save to r4 for later pixel packing */
|
||||
orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */
|
||||
orr r5, r4, r6, lsl #11 /* r5 = pixel_1 */
|
||||
orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */
|
||||
|
||||
/* 1st loop, second pixel */
|
||||
ldrb r4, [r9], #1 /* r4 = *ysrc++ = *Y'_p++ */
|
||||
sub r4, r4, #16 /* r4 = (Y'-16) * 74 */
|
||||
add r3, r4, r4, asl #2
|
||||
add r4, r3, r4, asl #5
|
||||
ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
|
||||
sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
|
||||
add r3, r5, r5, asl #2
|
||||
add r5, r3, r5, asl #5
|
||||
|
||||
add r6, r1, r4, asr #8 /* r6 = r = (Y >> 9) + rv */
|
||||
add r3, r2, r4, asr #7 /* r3 = g = (Y >> 8) + guv */
|
||||
add r4, r0, r4, asr #8 /* r4 = b = (Y >> 9) + bu */
|
||||
add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
|
||||
add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
|
||||
add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */
|
||||
|
||||
orr r0, r6, r4 /* check if clamping is needed... */
|
||||
orr r0, r6, r5 /* check if clamping is needed... */
|
||||
orr r0, r0, r3, asr #1 /* ...at all */
|
||||
cmp r0, #31
|
||||
bls 15f /* -> no clamp */
|
||||
|
|
@ -177,23 +177,22 @@ lcd_write_yuv420_lines:
|
|||
cmp r3, #63 /* clamp g */
|
||||
mvnhi r3, r3, asr #31
|
||||
andhi r3, r3, #63
|
||||
cmp r4, #31 /* clamp b */
|
||||
mvnhi r4, r4, asr #31
|
||||
andhi r4, r4, #31
|
||||
cmp r5, #31 /* clamp b */
|
||||
mvnhi r5, r5, asr #31
|
||||
andhi r5, r5, #31
|
||||
15: /* no clamp */
|
||||
|
||||
/* calculate pixel_2 and pack with pixel_1 before writing */
|
||||
orr r4, r4, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
|
||||
orr r4, r4, r6, lsl #11 /* r4 = pixel_2 */
|
||||
orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
|
||||
orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */
|
||||
|
||||
/* wait for FIFO half full */
|
||||
.fifo_wait1:
|
||||
ldr r3, [lr, #0x1C] /* while (LCD_STATUS & 0x08); */
|
||||
ldr r3, [lr, #-0x24] /* while (LCD_STATUS & 0x08); */
|
||||
tst r3, #0x8
|
||||
bgt .fifo_wait1
|
||||
|
||||
str r5, [lr, #0x40] /* write pixel_1 */
|
||||
str r4, [lr, #0x40] /* write pixel_2 */
|
||||
stmia lr, {r4,r5} /* write pixel_1 and pixel_2 */
|
||||
|
||||
subs r7, r7, #2 /* check for loop end */
|
||||
bgt 10b /* back to beginning */
|
||||
|
|
@ -234,21 +233,21 @@ lcd_write_yuv420_lines:
|
|||
mvnhi r4, r4, asr #31
|
||||
andhi r4, r4, #31
|
||||
15: /* no clamp */
|
||||
/* calculate pixel_1 and save to r5 for later pixel packing */
|
||||
/* calculate pixel_1 and save to r4 for later pixel packing */
|
||||
orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */
|
||||
orr r5, r4, r6, lsl #11 /* r5 = pixel_1 */
|
||||
orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */
|
||||
|
||||
/* 2nd loop, second pixel */
|
||||
ldrb r4, [r9], #1 /* r4 = *ysrc++ = *Y'_p++ */
|
||||
sub r4, r4, #16 /* r4 = (Y'-16) * 74 */
|
||||
add r3, r4, r4, asl #2
|
||||
add r4, r3, r4, asl #5
|
||||
ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
|
||||
sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
|
||||
add r3, r5, r5, asl #2
|
||||
add r5, r3, r5, asl #5
|
||||
|
||||
add r6, r1, r4, asr #8 /* r6 = r = (Y >> 9) + rv */
|
||||
add r3, r2, r4, asr #7 /* r3 = g = (Y >> 8) + guv */
|
||||
add r4, r0, r4, asr #8 /* r4 = b = (Y >> 9) + bu */
|
||||
add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
|
||||
add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
|
||||
add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */
|
||||
|
||||
orr r0, r6, r4 /* check if clamping is needed... */
|
||||
orr r0, r6, r5 /* check if clamping is needed... */
|
||||
orr r0, r0, r3, asr #1 /* ...at all */
|
||||
cmp r0, #31
|
||||
bls 15f /* -> no clamp */
|
||||
|
|
@ -258,23 +257,22 @@ lcd_write_yuv420_lines:
|
|||
cmp r3, #63 /* clamp g */
|
||||
mvnhi r3, r3, asr #31
|
||||
andhi r3, r3, #63
|
||||
cmp r4, #31 /* clamp b */
|
||||
mvnhi r4, r4, asr #31
|
||||
andhi r4, r4, #31
|
||||
cmp r5, #31 /* clamp b */
|
||||
mvnhi r5, r5, asr #31
|
||||
andhi r5, r5, #31
|
||||
15: /* no clamp */
|
||||
|
||||
/* calculate pixel_2 and pack with pixel_1 before writing */
|
||||
orr r4, r4, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
|
||||
orr r4, r4, r6, lsl #11 /* r4 = pixel_2 */
|
||||
orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
|
||||
orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */
|
||||
|
||||
/* wait for FIFO half full */
|
||||
.fifo_wait2:
|
||||
ldr r3, [lr, #0x1C] /* while (LCD_STATUS & 0x08); */
|
||||
ldr r3, [lr, #-0x24] /* while (LCD_STATUS & 0x08); */
|
||||
tst r3, #0x8
|
||||
bgt .fifo_wait2
|
||||
|
||||
str r5, [lr, #0x40] /* write pixel_1 */
|
||||
str r4, [lr, #0x40] /* write pixel_2 */
|
||||
stmia lr, {r4,r5} /* write pixel_1 and pixel_2 */
|
||||
|
||||
subs r7, r7, #2 /* check for loop end */
|
||||
bgt 20b /* back to beginning */
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue