mirror of
https://github.com/Rockbox/rockbox.git
synced 2025-12-09 05:05:20 -05:00
Apply some ARMv6 optimisations to YUV blitting. Speeds up mpegplayer on Gigabeat S by ~2% in undithered and ~7.5% in dithered mode.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@21889 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
27f5cdaad8
commit
989021ed3c
1 changed files with 67 additions and 1 deletions
|
|
@ -170,6 +170,11 @@ lcd_write_yuv420_lines:
|
||||||
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
|
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
|
||||||
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
|
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
|
||||||
@
|
@
|
||||||
|
#if ARM_ARCH >= 6
|
||||||
|
usat r1, #5, r1 @ clamp b
|
||||||
|
usat lr, #5, lr @ clamp r
|
||||||
|
usat r7, #6, r7 @ clamp g
|
||||||
|
#else
|
||||||
orr r12, r1, lr @ check if clamping is needed...
|
orr r12, r1, lr @ check if clamping is needed...
|
||||||
orr r12, r12, r7, asr #1 @ ...at all
|
orr r12, r12, r7, asr #1 @ ...at all
|
||||||
cmp r12, #31 @
|
cmp r12, #31 @
|
||||||
|
|
@ -184,6 +189,7 @@ lcd_write_yuv420_lines:
|
||||||
mvnhi r7, r7, asr #31 @
|
mvnhi r7, r7, asr #31 @
|
||||||
andhi r7, r7, #63 @
|
andhi r7, r7, #63 @
|
||||||
15: @ no clamp @
|
15: @ no clamp @
|
||||||
|
#endif
|
||||||
@
|
@
|
||||||
ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
|
ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
|
||||||
@
|
@
|
||||||
|
|
@ -206,6 +212,11 @@ lcd_write_yuv420_lines:
|
||||||
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
|
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
|
||||||
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
|
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
|
||||||
@
|
@
|
||||||
|
#if ARM_ARCH >= 6
|
||||||
|
usat r1, #5, r1 @ clamp b
|
||||||
|
usat lr, #5, lr @ clamp r
|
||||||
|
usat r7, #6, r7 @ clamp g
|
||||||
|
#else
|
||||||
orr r12, r1, lr @ check if clamping is needed...
|
orr r12, r1, lr @ check if clamping is needed...
|
||||||
orr r12, r12, r7, asr #1 @ ...at all
|
orr r12, r12, r7, asr #1 @ ...at all
|
||||||
cmp r12, #31 @
|
cmp r12, #31 @
|
||||||
|
|
@ -220,6 +231,7 @@ lcd_write_yuv420_lines:
|
||||||
mvnhi r7, r7, asr #31 @
|
mvnhi r7, r7, asr #31 @
|
||||||
andhi r7, r7, #63 @
|
andhi r7, r7, #63 @
|
||||||
15: @ no clamp @
|
15: @ no clamp @
|
||||||
|
#endif
|
||||||
@
|
@
|
||||||
ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
|
ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
|
||||||
@
|
@
|
||||||
|
|
@ -245,6 +257,11 @@ lcd_write_yuv420_lines:
|
||||||
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
|
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
|
||||||
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
|
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
|
||||||
@
|
@
|
||||||
|
#if ARM_ARCH >= 6
|
||||||
|
usat r1, #5, r1 @ clamp b
|
||||||
|
usat lr, #5, lr @ clamp r
|
||||||
|
usat r7, #6, r7 @ clamp g
|
||||||
|
#else
|
||||||
orr r12, r1, lr @ check if clamping is needed...
|
orr r12, r1, lr @ check if clamping is needed...
|
||||||
orr r12, r12, r7, asr #1 @ ...at all
|
orr r12, r12, r7, asr #1 @ ...at all
|
||||||
cmp r12, #31 @
|
cmp r12, #31 @
|
||||||
|
|
@ -259,6 +276,7 @@ lcd_write_yuv420_lines:
|
||||||
mvnhi r7, r7, asr #31 @
|
mvnhi r7, r7, asr #31 @
|
||||||
andhi r7, r7, #63 @
|
andhi r7, r7, #63 @
|
||||||
15: @ no clamp @
|
15: @ no clamp @
|
||||||
|
#endif
|
||||||
@
|
@
|
||||||
ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
|
ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
|
||||||
@
|
@
|
||||||
|
|
@ -281,6 +299,11 @@ lcd_write_yuv420_lines:
|
||||||
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
|
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
|
||||||
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
|
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
|
||||||
@
|
@
|
||||||
|
#if ARM_ARCH >= 6
|
||||||
|
usat r1, #5, r1 @ clamp b
|
||||||
|
usat lr, #5, lr @ clamp r
|
||||||
|
usat r7, #6, r7 @ clamp g
|
||||||
|
#else
|
||||||
orr r12, r1, lr @ check if clamping is needed...
|
orr r12, r1, lr @ check if clamping is needed...
|
||||||
orr r12, r12, r7, asr #1 @ ...at all
|
orr r12, r12, r7, asr #1 @ ...at all
|
||||||
cmp r12, #31 @
|
cmp r12, #31 @
|
||||||
|
|
@ -295,6 +318,7 @@ lcd_write_yuv420_lines:
|
||||||
mvnhi r7, r7, asr #31 @
|
mvnhi r7, r7, asr #31 @
|
||||||
andhi r7, r7, #63 @
|
andhi r7, r7, #63 @
|
||||||
15: @ no clamp @
|
15: @ no clamp @
|
||||||
|
#endif
|
||||||
@
|
@
|
||||||
orr r12, r1, lr, lsl #11 @ r12 = b | (r << 11)
|
orr r12, r1, lr, lsl #11 @ r12 = b | (r << 11)
|
||||||
orr r12, r12, r7, lsl #5 @ r12 |= (g << 5)
|
orr r12, r12, r7, lsl #5 @ r12 |= (g << 5)
|
||||||
|
|
@ -425,6 +449,16 @@ lcd_write_yuv420_lines_odither:
|
||||||
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
|
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
|
||||||
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
|
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
|
||||||
@
|
@
|
||||||
|
#if ARM_ARCH >= 6
|
||||||
|
usat r11, #5, r11, asr #11 @ clamp r
|
||||||
|
usat r7, #6, r7, asr #9 @ clamp g
|
||||||
|
usat r1, #5, r1, asr #10 @ clamp b
|
||||||
|
@
|
||||||
|
ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
|
||||||
|
@
|
||||||
|
orr r1, r1, r11, lsl #11 @ r1 = b | (r << 11)
|
||||||
|
orr r1, r1, r7, lsl #5 @ r1 |= (g << 5)
|
||||||
|
#else
|
||||||
orr r12, r1, r11, asr #1 @ check if clamping is needed...
|
orr r12, r1, r11, asr #1 @ check if clamping is needed...
|
||||||
orr r12, r12, r7 @ ...at all
|
orr r12, r12, r7 @ ...at all
|
||||||
movs r12, r12, asr #15 @
|
movs r12, r12, asr #15 @
|
||||||
|
|
@ -444,6 +478,7 @@ lcd_write_yuv420_lines_odither:
|
||||||
and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) |
|
and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) |
|
||||||
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
|
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
|
||||||
orr r1, r11, r1, lsr #10 @ (b >> 10)
|
orr r1, r11, r1, lsr #10 @ (b >> 10)
|
||||||
|
#endif
|
||||||
@
|
@
|
||||||
#if LCD_WIDTH >= LCD_HEIGHT
|
#if LCD_WIDTH >= LCD_HEIGHT
|
||||||
strh r1, [r0] @
|
strh r1, [r0] @
|
||||||
|
|
@ -477,6 +512,16 @@ lcd_write_yuv420_lines_odither:
|
||||||
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
|
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
|
||||||
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
|
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
|
||||||
@
|
@
|
||||||
|
#if ARM_ARCH >= 6
|
||||||
|
usat r11, #5, r11, asr #11 @ clamp r
|
||||||
|
usat r7, #6, r7, asr #9 @ clamp g
|
||||||
|
usat r1, #5, r1, asr #10 @ clamp b
|
||||||
|
@
|
||||||
|
ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
|
||||||
|
@
|
||||||
|
orr r1, r1, r11, lsl #11 @ r1 = b | (r << 11)
|
||||||
|
orr r1, r1, r7, lsl #5 @ r1 |= (g << 5)
|
||||||
|
#else
|
||||||
orr r12, r1, r11, asr #1 @ check if clamping is needed...
|
orr r12, r1, r11, asr #1 @ check if clamping is needed...
|
||||||
orr r12, r12, r7 @ ...at all
|
orr r12, r12, r7 @ ...at all
|
||||||
movs r12, r12, asr #15 @
|
movs r12, r12, asr #15 @
|
||||||
|
|
@ -496,6 +541,7 @@ lcd_write_yuv420_lines_odither:
|
||||||
and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) |
|
and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) |
|
||||||
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
|
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
|
||||||
orr r1, r11, r1, lsr #10 @ (b >> 10)
|
orr r1, r11, r1, lsr #10 @ (b >> 10)
|
||||||
|
#endif
|
||||||
@
|
@
|
||||||
#if LCD_WIDTH >= LCD_HEIGHT
|
#if LCD_WIDTH >= LCD_HEIGHT
|
||||||
add r0, r0, #2*LCD_WIDTH @
|
add r0, r0, #2*LCD_WIDTH @
|
||||||
|
|
@ -534,6 +580,16 @@ lcd_write_yuv420_lines_odither:
|
||||||
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
|
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
|
||||||
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
|
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
|
||||||
@
|
@
|
||||||
|
#if ARM_ARCH >= 6
|
||||||
|
usat r11, #5, r11, asr #11 @ clamp r
|
||||||
|
usat r7, #6, r7, asr #9 @ clamp g
|
||||||
|
usat r1, #5, r1, asr #10 @ clamp b
|
||||||
|
@
|
||||||
|
ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
|
||||||
|
@
|
||||||
|
orr r1, r1, r11, lsl #11 @ r1 = b | (r << 11)
|
||||||
|
orr r1, r1, r7, lsl #5 @ r1 |= (g << 5)
|
||||||
|
#else
|
||||||
orr r12, r1, r11, asr #1 @ check if clamping is needed...
|
orr r12, r1, r11, asr #1 @ check if clamping is needed...
|
||||||
orr r12, r12, r7 @ ...at all
|
orr r12, r12, r7 @ ...at all
|
||||||
movs r12, r12, asr #15 @
|
movs r12, r12, asr #15 @
|
||||||
|
|
@ -547,12 +603,13 @@ lcd_write_yuv420_lines_odither:
|
||||||
mvnne r7, r12, lsr #15 @
|
mvnne r7, r12, lsr #15 @
|
||||||
15: @ no clamp @
|
15: @ no clamp @
|
||||||
@
|
@
|
||||||
ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
|
ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
|
||||||
@
|
@
|
||||||
and r11, r11, #0xf800 @ pack pixel
|
and r11, r11, #0xf800 @ pack pixel
|
||||||
and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) |
|
and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) |
|
||||||
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
|
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
|
||||||
orr r1, r11, r1, lsr #10 @ (b >> 10)
|
orr r1, r11, r1, lsr #10 @ (b >> 10)
|
||||||
|
#endif
|
||||||
@
|
@
|
||||||
#if LCD_WIDTH >= LCD_HEIGHT
|
#if LCD_WIDTH >= LCD_HEIGHT
|
||||||
strh r1, [r0, #2]
|
strh r1, [r0, #2]
|
||||||
|
|
@ -586,6 +643,14 @@ lcd_write_yuv420_lines_odither:
|
||||||
add r11, r11, r14, lsl #1 @ r = r11 + delta*2
|
add r11, r11, r14, lsl #1 @ r = r11 + delta*2
|
||||||
add r7, r7, r14, lsr #1 @ g = r7 + delta/2
|
add r7, r7, r14, lsr #1 @ g = r7 + delta/2
|
||||||
@
|
@
|
||||||
|
#if ARM_ARCH >= 6
|
||||||
|
usat r11, #5, r11, asr #11 @ clamp r
|
||||||
|
usat r7, #6, r7, asr #9 @ clamp g
|
||||||
|
usat r1, #5, r1, asr #10 @ clamp b
|
||||||
|
@
|
||||||
|
orr r1, r1, r11, lsl #11 @ r1 = b | (r << 11)
|
||||||
|
orr r1, r1, r7, lsl #5 @ r1 |= (g << 5)
|
||||||
|
#else
|
||||||
orr r12, r1, r11, asr #1 @ check if clamping is needed...
|
orr r12, r1, r11, asr #1 @ check if clamping is needed...
|
||||||
orr r12, r12, r7 @ ...at all
|
orr r12, r12, r7 @ ...at all
|
||||||
movs r12, r12, asr #15 @
|
movs r12, r12, asr #15 @
|
||||||
|
|
@ -603,6 +668,7 @@ lcd_write_yuv420_lines_odither:
|
||||||
and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) |
|
and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) |
|
||||||
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
|
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
|
||||||
orr r1, r11, r1, lsr #10 @ (b >> 10)
|
orr r1, r11, r1, lsr #10 @ (b >> 10)
|
||||||
|
#endif
|
||||||
@
|
@
|
||||||
#if LCD_WIDTH >= LCD_HEIGHT
|
#if LCD_WIDTH >= LCD_HEIGHT
|
||||||
add r0, r0, #2*LCD_WIDTH
|
add r0, r0, #2*LCD_WIDTH
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue