Speedup of iPod nano 2G YUV blitting by 3%.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@28825 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Andree Buschmann 2010-12-13 20:56:53 +00:00
parent 298bbe8d3c
commit abf28a9586

View file

@ -100,7 +100,7 @@ lcd_write_yuv420_lines:
mov r7, r2 /* r7 = loop count */
add r8, sp, #16 /* chroma buffer */
mov lr, r1 /* LCD data port = LCD_BASE */
add lr, r1, #0x40 /* LCD data port = LCD_BASE + 0x40 */
/* 1st loop start */
10: /* loop start */
@ -153,21 +153,21 @@ lcd_write_yuv420_lines:
andhi r4, r4, #31
15: /* no clamp */
/* calculate pixel_1 and save to r5 for later pixel packing */
/* calculate pixel_1 and save to r4 for later pixel packing */
orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */
orr r5, r4, r6, lsl #11 /* r5 = pixel_1 */
orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */
/* 1st loop, second pixel */
ldrb r4, [r9], #1 /* r4 = *ysrc++ = *Y'_p++ */
sub r4, r4, #16 /* r4 = (Y'-16) * 74 */
add r3, r4, r4, asl #2
add r4, r3, r4, asl #5
ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
add r3, r5, r5, asl #2
add r5, r3, r5, asl #5
add r6, r1, r4, asr #8 /* r6 = r = (Y >> 9) + rv */
add r3, r2, r4, asr #7 /* r3 = g = (Y >> 8) + guv */
add r4, r0, r4, asr #8 /* r4 = b = (Y >> 9) + bu */
add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */
orr r0, r6, r4 /* check if clamping is needed... */
orr r0, r6, r5 /* check if clamping is needed... */
orr r0, r0, r3, asr #1 /* ...at all */
cmp r0, #31
bls 15f /* -> no clamp */
@ -177,23 +177,22 @@ lcd_write_yuv420_lines:
cmp r3, #63 /* clamp g */
mvnhi r3, r3, asr #31
andhi r3, r3, #63
cmp r4, #31 /* clamp b */
mvnhi r4, r4, asr #31
andhi r4, r4, #31
cmp r5, #31 /* clamp b */
mvnhi r5, r5, asr #31
andhi r5, r5, #31
15: /* no clamp */
/* calculate pixel_2 and pack with pixel_1 before writing */
orr r4, r4, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
orr r4, r4, r6, lsl #11 /* r4 = pixel_2 */
orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */
/* wait for FIFO half full */
.fifo_wait1:
ldr r3, [lr, #0x1C] /* while (LCD_STATUS & 0x08); */
ldr r3, [lr, #-0x24] /* while (LCD_STATUS & 0x08); */
tst r3, #0x8
bgt .fifo_wait1
str r5, [lr, #0x40] /* write pixel_1 */
str r4, [lr, #0x40] /* write pixel_2 */
stmia lr, {r4,r5} /* write pixel_1 and pixel_2 */
subs r7, r7, #2 /* check for loop end */
bgt 10b /* back to beginning */
@ -234,21 +233,21 @@ lcd_write_yuv420_lines:
mvnhi r4, r4, asr #31
andhi r4, r4, #31
15: /* no clamp */
/* calculate pixel_1 and save to r5 for later pixel packing */
/* calculate pixel_1 and save to r4 for later pixel packing */
orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */
orr r5, r4, r6, lsl #11 /* r5 = pixel_1 */
orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */
/* 2nd loop, second pixel */
ldrb r4, [r9], #1 /* r4 = *ysrc++ = *Y'_p++ */
sub r4, r4, #16 /* r4 = (Y'-16) * 74 */
add r3, r4, r4, asl #2
add r4, r3, r4, asl #5
ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
add r3, r5, r5, asl #2
add r5, r3, r5, asl #5
add r6, r1, r4, asr #8 /* r6 = r = (Y >> 9) + rv */
add r3, r2, r4, asr #7 /* r3 = g = (Y >> 8) + guv */
add r4, r0, r4, asr #8 /* r4 = b = (Y >> 9) + bu */
add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */
orr r0, r6, r4 /* check if clamping is needed... */
orr r0, r6, r5 /* check if clamping is needed... */
orr r0, r0, r3, asr #1 /* ...at all */
cmp r0, #31
bls 15f /* -> no clamp */
@ -258,23 +257,22 @@ lcd_write_yuv420_lines:
cmp r3, #63 /* clamp g */
mvnhi r3, r3, asr #31
andhi r3, r3, #63
cmp r4, #31 /* clamp b */
mvnhi r4, r4, asr #31
andhi r4, r4, #31
cmp r5, #31 /* clamp b */
mvnhi r5, r5, asr #31
andhi r5, r5, #31
15: /* no clamp */
/* calculate pixel_2 and pack with pixel_1 before writing */
orr r4, r4, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
orr r4, r4, r6, lsl #11 /* r4 = pixel_2 */
orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */
/* wait for FIFO half full */
.fifo_wait2:
ldr r3, [lr, #0x1C] /* while (LCD_STATUS & 0x08); */
ldr r3, [lr, #-0x24] /* while (LCD_STATUS & 0x08); */
tst r3, #0x8
bgt .fifo_wait2
str r5, [lr, #0x40] /* write pixel_1 */
str r4, [lr, #0x40] /* write pixel_2 */
stmia lr, {r4,r5} /* write pixel_1 and pixel_2 */
subs r7, r7, #2 /* check for loop end */
bgt 20b /* back to beginning */