Revert "Remove YUV blitting functions and LCD modes"

This reverts commit fe6aa21e9e.

Change-Id: I8bb1e5d6c52ed1478002d2140ef494ec5d62b8e3
This commit is contained in:
Solomon Peachy 2022-10-13 11:03:53 -04:00
parent f9ea1fc79d
commit 418169aff8
54 changed files with 9638 additions and 3 deletions

View file

@ -222,6 +222,15 @@ static const struct plugin_api rockbox_api = {
#if LCD_DEPTH >= 16
lcd_bitmap_transparent_part,
lcd_bitmap_transparent,
#if MEMORYSIZE > 2
lcd_blit_yuv,
#if defined(TOSHIBA_GIGABEAT_F) || defined(SANSA_E200) || defined(SANSA_C200) \
|| defined(IRIVER_H10) || defined(COWON_D2) || defined(PHILIPS_HDD1630) \
|| defined(SANSA_FUZE) || defined(SANSA_E200V2) || defined(SANSA_FUZEV2) \
|| defined(TOSHIBA_GIGABEAT_S) || defined(PHILIPS_SA9200)
lcd_yuv_set_options,
#endif
#endif /* MEMORYSIZE > 2 */
#elif (LCD_DEPTH < 4) && (CONFIG_PLATFORM & PLATFORM_NATIVE)
lcd_blit_mono,
lcd_blit_grey_phase,

View file

@ -242,6 +242,17 @@ struct plugin_api {
int x, int y, int width, int height);
void (*lcd_bitmap_transparent)(const fb_data *src, int x, int y,
int width, int height);
#if MEMORYSIZE > 2
void (*lcd_blit_yuv)(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height);
#if defined(TOSHIBA_GIGABEAT_F) || defined(SANSA_E200) || defined(SANSA_C200) \
|| defined(IRIVER_H10) || defined(COWON_D2) || defined(PHILIPS_HDD1630) \
|| defined(SANSA_FUZE) || defined(SANSA_E200V2) || defined(SANSA_FUZEV2) \
|| defined(TOSHIBA_GIGABEAT_S) || defined(PHILIPS_SA9200)
void (*lcd_yuv_set_options)(unsigned options);
#endif
#endif /* MEMORYSIZE > 2 */
#elif (LCD_DEPTH < 4) && (CONFIG_PLATFORM & PLATFORM_NATIVE)
void (*lcd_blit_mono)(const unsigned char *data, int x, int by, int width,
int bheight, int stride);

View file

@ -123,6 +123,94 @@ static void time_main_update(void)
log_text(str);
}
#if defined(HAVE_LCD_COLOR) && (MEMORYSIZE > 2)
#if LCD_WIDTH >= LCD_HEIGHT
#define YUV_WIDTH LCD_WIDTH
#define YUV_HEIGHT LCD_HEIGHT
#else /* Assume the screen is rotated on portrait LCDs */
#define YUV_WIDTH LCD_HEIGHT
#define YUV_HEIGHT LCD_WIDTH
#endif
static unsigned char ydata[YUV_HEIGHT][YUV_WIDTH];
static unsigned char udata[YUV_HEIGHT/2][YUV_WIDTH/2];
static unsigned char vdata[YUV_HEIGHT/2][YUV_WIDTH/2];
static unsigned char * const yuvbuf[3] = {
(void*)ydata,
(void*)udata,
(void*)vdata
};
static void make_gradient_rect(int width, int height)
{
unsigned char vline[YUV_WIDTH/2];
int x, y;
width /= 2;
height /= 2;
for (x = 0; x < width; x++)
vline[x] = (x << 8) / width;
for (y = 0; y < height; y++)
{
rb->memset(udata[y], (y << 8) / height, width);
rb->memcpy(vdata[y], vline, width);
}
}
static void time_main_yuv(void)
{
char str[32]; /* text buffer */
long time_start; /* start tickcount */
long time_end; /* end tickcount */
int frame_count;
int fps;
const int part14_x = YUV_WIDTH/4; /* x-offset for 1/4 update test */
const int part14_w = YUV_WIDTH/2; /* x-size for 1/4 update test */
const int part14_y = YUV_HEIGHT/4; /* y-offset for 1/4 update test */
const int part14_h = YUV_HEIGHT/2; /* y-size for 1/4 update test */
log_text("Main LCD YUV");
rb->memset(ydata, 128, sizeof(ydata)); /* medium grey */
/* Test 1: full LCD update */
make_gradient_rect(YUV_WIDTH, YUV_HEIGHT);
frame_count = 0;
rb->sleep(0); /* sync to tick */
time_start = *rb->current_tick;
while((time_end = *rb->current_tick) - time_start < DURATION)
{
rb->lcd_blit_yuv(yuvbuf, 0, 0, YUV_WIDTH,
0, 0, YUV_WIDTH, YUV_HEIGHT);
frame_count++;
}
fps = calc_tenth_fps(frame_count, time_end - time_start);
rb->snprintf(str, sizeof(str), "1/1: %d.%d fps", fps / 10, fps % 10);
log_text(str);
/* Test 2: quarter LCD update */
make_gradient_rect(YUV_WIDTH/2, YUV_HEIGHT/2);
frame_count = 0;
rb->sleep(0); /* sync to tick */
time_start = *rb->current_tick;
while((time_end = *rb->current_tick) - time_start < DURATION)
{
rb->lcd_blit_yuv(yuvbuf, 0, 0, YUV_WIDTH,
part14_x, part14_y, part14_w, part14_h);
frame_count++;
}
fps = calc_tenth_fps(frame_count, time_end - time_start);
rb->snprintf(str, sizeof(str), "1/4: %d.%d fps", fps / 10, fps % 10);
log_text(str);
}
#endif
#ifdef HAVE_REMOTE_LCD
static void time_remote_update(void)
{
@ -318,6 +406,9 @@ enum plugin_status plugin_start(const void* parameter)
#endif
time_main_update();
rb->sleep(HZ);
#if defined(HAVE_LCD_COLOR) && (MEMORYSIZE > 2)
time_main_yuv();
#endif
#if LCD_DEPTH < 4
time_greyscale();
#endif

View file

@ -832,6 +832,19 @@ void lcd_blit_mono(const unsigned char *data, int x, int by, int width, int bhei
\param stride
\description
void lcd_blit_yuv(unsigned char * const src[3], int src_x, int src_y, int stride, int x, int y, int width, int height)
\group lcd
\conditions (LCD_DEPTH >= 16)
\param src[3]
\param src_x
\param src_y
\param stride
\param x
\param y
\param width
\param height
\description
void lcd_clear_display(void)
\group lcd
\description Clears the LCD and the framebuffer
@ -1226,6 +1239,12 @@ void lcd_vline(int x, int y1, int y2)
\param y2 Y end coordinate
\description Draws a vertical line at (=x=, =y1=) -> (=x=, =y2=) within current drawing mode
void lcd_yuv_set_options(unsigned options)
\group lcd
\conditions (LCD_DEPTH >= 16) && (defined(TOSHIBA_GIGABEAT_F) || defined(SANSA_E200) || defined(SANSA_C200) || defined(IRIVER_H10) || defined(COWON_D2))
\param options
\description
void led(bool on)
\param on
\description

View file

@ -947,6 +947,7 @@ target/arm/sandisk/sansa-e200/powermgmt-e200.c
#endif /* SANSA_E200 */
#ifdef SANSA_C200
target/arm/sandisk/sansa-c200/lcd-as-c200.S
target/arm/sandisk/sansa-c200/button-c200.c
target/arm/sandisk/sansa-c200/powermgmt-c200.c
#endif /* SANSA_C200 */
@ -971,6 +972,7 @@ target/arm/philips/piezo.c
target/arm/philips/sa9200/backlight-sa9200.c
target/arm/philips/sa9200/button-sa9200.c
target/arm/philips/sa9200/lcd-sa9200.c
target/arm/philips/sa9200/lcd-as-sa9200.S
target/arm/philips/sa9200/power-sa9200.c
target/arm/philips/sa9200/powermgmt-sa9200.c
#endif /* PHILIPS_SA9200 */
@ -988,6 +990,7 @@ target/arm/philips/fmradio_i2c-hdd.c
target/arm/philips/hdd1630/backlight-hdd1630.c
target/arm/philips/hdd1630/button-hdd1630.c
target/arm/philips/hdd1630/lcd-hdd1630.c
target/arm/philips/hdd1630/lcd-as-hdd1630.S
target/arm/philips/hdd1630/powermgmt-hdd1630.c
#endif /* PHILIPS_HDD1630 */
@ -995,6 +998,7 @@ target/arm/philips/hdd1630/powermgmt-hdd1630.c
target/arm/philips/hdd6330/backlight-hdd6330.c
target/arm/philips/hdd6330/button-hdd6330.c
target/arm/philips/hdd6330/lcd-hdd6330.c
target/arm/philips/hdd6330/lcd-as-hdd6330.S
target/arm/philips/hdd6330/powermgmt-hdd6330.c
#endif /* PHILIPS_HDD6330 */
@ -1073,6 +1077,7 @@ target/coldfire/iriver/h300/adc-h300.c
target/coldfire/iriver/h300/backlight-h300.c
target/coldfire/iriver/h300/button-h300.c
target/coldfire/iriver/h300/pcf50606-h300.c
target/coldfire/iriver/h300/lcd-as-h300.S
target/coldfire/iriver/h300/lcd-h300.c
target/coldfire/iriver/h300/power-h300.c
target/coldfire/iriver/h300/powermgmt-h300.c
@ -1105,6 +1110,7 @@ target/arm/iriver/h10/powermgmt-h10.c
#ifdef IRIVER_H10
target/arm/iriver/h10/lcd-h10_20gb.c
target/arm/iriver/h10/lcd-as-h10.S
#endif /* IRIVER_H10 */
#ifdef IRIVER_H10_5GB
@ -1308,11 +1314,13 @@ target/arm/ipod/button-clickwheel.c
#ifdef IPOD_COLOR
target/arm/ipod/backlight-4g_color.c
target/arm/ipod/button-clickwheel.c
target/arm/ipod/lcd-as-color-nano.S
#endif /* IPOD_COLOR */
#ifdef IPOD_NANO
target/arm/ipod/backlight-nano_video.c
target/arm/ipod/button-clickwheel.c
target/arm/ipod/lcd-as-color-nano.S
#endif /* IPOD_NANO */
#ifdef IPOD_VIDEO
@ -1384,6 +1392,7 @@ target/arm/as3525/backlight-e200v2-fuze.c
target/arm/as3525/dbop-as3525.c
#ifndef BOOTLOADER
target/arm/as3525/sansa-e200v2/powermgmt-e200v2.c
target/arm/as3525/lcd-as-e200v2-fuze-fuzev2.S
#endif /* !BOOTLOADER */
#endif /* SANSA_E200V2 */
@ -1410,6 +1419,7 @@ target/arm/as3525/backlight-e200v2-fuze.c
target/arm/as3525/dbop-as3525.c
#ifndef BOOTLOADER
target/arm/as3525/sansa-fuze/powermgmt-fuze.c
target/arm/as3525/lcd-as-e200v2-fuze-fuzev2.S
#endif /* !BOOTLOADER */
#endif /* SANSA_FUZE */
@ -1420,6 +1430,7 @@ target/arm/as3525/sansa-fuzev2/button-fuzev2.c
target/arm/as3525/dbop-as3525.c
#ifndef BOOTLOADER
target/arm/as3525/sansa-fuzev2/powermgmt-fuzev2.c
target/arm/as3525/lcd-as-e200v2-fuze-fuzev2.S
#endif /* !BOOTLOADER */
#endif /* SANSA_FUZEV2 */
@ -1540,6 +1551,7 @@ target/arm/s5l8702/pl080.c
target/arm/s5l8702/dma-s5l8702.c
target/arm/s5l8702/clocking-s5l8702.c
target/arm/s5l8702/ipod6g/lcd-6g.c
target/arm/s5l8702/ipod6g/lcd-asm-6g.S
target/arm/s5l8702/ipod6g/piezo-6g.c
#if 0 //TODO
target/arm/s5l8702/postmortemstub.S
@ -1756,6 +1768,7 @@ target/arm/samsung/power-yh82x_yh92x.c
#ifdef SAMSUNG_YH820
target/arm/samsung/yh820/backlight-yh820.c
target/arm/samsung/yh820/lcd-yh820.c
target/arm/samsung/yh820/lcd-as-yh820.S
target/arm/samsung/yh820/powermgmt-yh820.c
#endif /* SAMSUNG_YH820 */
@ -1770,6 +1783,7 @@ target/arm/samsung/fmradio-yh92x.c
#ifdef SAMSUNG_YH925
target/arm/samsung/yh925/backlight-yh925.c
target/arm/samsung/yh925/lcd-yh925.c
target/arm/samsung/yh925/lcd-as-yh925.S
target/arm/samsung/yh925/powermgmt-yh925.c
#endif /* SAMSUNG_YH925 */
@ -1788,6 +1802,7 @@ target/arm/pbell/vibe500/lcd-vibe500.c
target/arm/pbell/vibe500/button-vibe500.c
target/arm/pbell/vibe500/power-vibe500.c
target/arm/pbell/vibe500/backlight-vibe500.c
target/arm/pbell/vibe500/lcd-as-vibe500.S
target/arm/pbell/vibe500/powermgmt-vibe500.c
#endif

View file

@ -99,3 +99,594 @@ lcd_copy_buffer_rect: @
bgt 10b @ copy line @
ldmpc regs=r4-r11 @ restore regs and return
.size lcd_copy_buffer_rect, .-lcd_copy_buffer_rect
/****************************************************************************
* void lcd_write_yuv420_lines(fb_data *dst,
* unsigned char const * const src[3],
* int width,
* int stride);
*
* |R| |1.000000 -0.000001 1.402000| |Y'|
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
* |B| |1.000000 1.772000 0.000000| |Pr|
* Scaled, normalized, rounded and tweaked to yield RGB 565:
* |R| |74 0 101| |Y' - 16| >> 9
* |G| = |74 -24 -51| |Cb - 128| >> 8
* |B| |74 128 0| |Cr - 128| >> 9
*
* Write four RGB565 pixels in the following order on each loop:
* 1 3 + > down
* 2 4 \/ left
*/
.section .icode.lcd_write_yuv420_lines, "ax", %progbits
.align 2
.global lcd_write_yuv420_lines
.type lcd_write_yuv420_lines, %function
lcd_write_yuv420_lines:
@ r0 = dst
@ r1 = yuv_src
@ r2 = width
@ r3 = stride
stmfd sp!, { r4-r10, lr } @ save non-scratch
ldmia r1, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
@ r5 = yuv_src[1] = Cb_p
@ r6 = yuv_src[2] = Cr_p
@ r1 = scratch
sub r3, r3, #1 @
10: @ loop line @
ldrb r7, [r4], #1 @ r7 = *Y'_p++;
ldrb r8, [r5], #1 @ r8 = *Cb_p++;
ldrb r9, [r6], #1 @ r9 = *Cr_p++;
@
sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
add r7, r12, r7, asl #5 @ by one less when adding - same for all
@
sub r8, r8, #128 @ Cb -= 128
sub r9, r9, #128 @ Cr -= 128
@
add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
add r10, r10, r10, asl #4 @
add r10, r10, r8, asl #3 @
add r10, r10, r8, asl #4 @
@
add lr, r9, r9, asl #2 @ r9 = Cr*101
add lr, lr, r9, asl #5 @
add r9, lr, r9, asl #6 @
@
add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
mov r8, r8, asr #2 @
add r9, r9, #256 @ r9 = rv = (r9 + 256) >> 9
mov r9, r9, asr #9 @
rsb r10, r10, #128 @ r10 = guv = (-r10 + 128) >> 8
mov r10, r10, asr #8 @
@ compute R, G, and B
add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
#if ARM_ARCH >= 6
usat r1, #5, r1 @ clamp b
usat lr, #5, lr @ clamp r
usat r7, #6, r7 @ clamp g
#else
orr r12, r1, lr @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r1, #31 @ clamp b
mvnhi r1, r1, asr #31 @
andhi r1, r1, #31 @
cmp lr, #31 @ clamp r
mvnhi lr, lr, asr #31 @
andhi lr, lr, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
#endif
@
ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
@
orr r1, r1, r7, lsl #5 @ r4 |= (g << 5)
orr r1, r1, lr, lsl #11 @ r4 = b | (r << 11)
#if LCD_WIDTH >= LCD_HEIGHT
strh r1, [r0] @
#elif LCD_WIDTH < 256
strh r1, [r0], #LCD_WIDTH @ store pixel
#else
strh r1, [r0] @
#endif
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
#if ARM_ARCH >= 6
usat r1, #5, r1 @ clamp b
usat lr, #5, lr @ clamp r
usat r7, #6, r7 @ clamp g
#else
orr r12, r1, lr @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r1, #31 @ clamp b
mvnhi r1, r1, asr #31 @
andhi r1, r1, #31 @
cmp lr, #31 @ clamp r
mvnhi lr, lr, asr #31 @
andhi lr, lr, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
#endif
@
ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
@
orr r1, r1, lr, lsl #11 @ r1 = b | (r << 11)
orr r1, r1, r7, lsl #5 @ r1 |= (g << 5)
#if LCD_WIDTH >= LCD_HEIGHT
add r0, r0, #2*LCD_WIDTH @
strh r1, [r0] @ store pixel
sub r0, r0, #2*LCD_WIDTH @
#elif LCD_WIDTH < 256
strh r1, [r0, #-LCD_WIDTH-2] @ store pixel
#else
strh r1, [r0, #-2] @
add r0, r0, #LCD_WIDTH @
#endif
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
#if ARM_ARCH >= 6
usat r1, #5, r1 @ clamp b
usat lr, #5, lr @ clamp r
usat r7, #6, r7 @ clamp g
#else
orr r12, r1, lr @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r1, #31 @ clamp b
mvnhi r1, r1, asr #31 @
andhi r1, r1, #31 @
cmp lr, #31 @ clamp r
mvnhi lr, lr, asr #31 @
andhi lr, lr, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
#endif
@
ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
@
orr r1, r1, r7, lsl #5 @ r1 = b | (g << 5)
orr r1, r1, lr, lsl #11 @ r1 |= (r << 11)
#if LCD_WIDTH >= LCD_HEIGHT
strh r1, [r0, #2]
#elif LCD_WIDTH < 256
strh r1, [r0, #LCD_WIDTH]! @ store pixel
#else
strh r1, [r0] @
#endif
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
#if ARM_ARCH >= 6
usat r1, #5, r1 @ clamp b
usat lr, #5, lr @ clamp r
usat r7, #6, r7 @ clamp g
#else
orr r12, r1, lr @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r1, #31 @ clamp b
mvnhi r1, r1, asr #31 @
andhi r1, r1, #31 @
cmp lr, #31 @ clamp r
mvnhi lr, lr, asr #31 @
andhi lr, lr, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
#endif
@
orr r12, r1, lr, lsl #11 @ r12 = b | (r << 11)
orr r12, r12, r7, lsl #5 @ r12 |= (g << 5)
#if LCD_WIDTH >= LCD_HEIGHT
add r0, r0, #2*LCD_WIDTH
strh r12, [r0, #2]
#if LCD_WIDTH <= 512
sub r0, r0, #(2*LCD_WIDTH)-4
#else
sub r0, r0, #(2*LCD_WIDTH)
add r0, r0, #4
#endif
#else
strh r12, [r0, #-2] @ store pixel
#if LCD_WIDTH < 256
add r0, r0, #2*LCD_WIDTH @
#else
add r0, r0, #LCD_WIDTH @
#endif
#endif
@
subs r2, r2, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmpc regs=r4-r10 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
/****************************************************************************
* void lcd_write_yuv420_lines_odither(fb_data *dst,
* unsigned char const * const src[3],
* int width,
* int stride,
* int x_screen,
* int y_screen);
*
* |R| |1.000000 -0.000001 1.402000| |Y'|
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
* |B| |1.000000 1.772000 0.000000| |Pr|
* Red scaled at twice g & b but at same precision to place it in correct
* bit position after multiply and leave instruction count lower.
* |R| |258 0 408| |Y' - 16|
* |G| = |149 -49 -104| |Cb - 128|
* |B| |149 258 0| |Cr - 128|
*
* Write four RGB565 pixels in the following order on each loop:
* 1 3 + > down
* 2 4 \/ left
*
* Kernel pattern (raw|rotated|use order):
* 5 3 4 2 2 6 3 7 row0 row2 > down
* 1 7 0 6 | 4 0 5 1 | 2 4 6 0 3 5 7 1 col0 left
* 4 2 5 3 | 3 7 2 6 | 3 5 7 1 2 4 6 0 col2 \/
* 0 6 1 7 5 1 4 0
*/
.section .icode.lcd_write_yuv420_lines_odither, "ax", %progbits
.align 2
.global lcd_write_yuv420_lines_odither
.type lcd_write_yuv420_lines_odither, %function
lcd_write_yuv420_lines_odither:
@ r0 = dst
@ r1 = yuv_src
@ r2 = width
@ r3 = stride
@ [sp] = x_screen
@ [sp+4] = y_screen
stmfd sp!, { r4-r11, lr } @ save non-scratch
ldmia r1, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
@ r5 = yuv_src[1] = Cb_p
@ r6 = yuv_src[2] = Cr_p
@
sub r3, r3, #1 @
add r1, sp, #36 @ Line up pattern and kernel quadrant
ldmia r1, { r12, r14 } @
eor r14, r14, r12 @
and r14, r14, #0x2 @
mov r14, r14, lsl #6 @ 0x00 or 0x80
10: @ loop line @
@
ldrb r7, [r4], #1 @ r7 = *Y'_p++;
ldrb r8, [r5], #1 @ r8 = *Cb_p++;
ldrb r9, [r6], #1 @ r9 = *Cr_p++;
@
eor r14, r14, #0x80 @ flip pattern quadrant
@
sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@
sub r8, r8, #128 @ Cb -= 128
sub r9, r9, #128 @ Cr -= 128
@
add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49
add r10, r10, r8, asl #5 @
add r10, r10, r9, asl #3 @
add r10, r10, r9, asl #5 @
add r10, r10, r9, asl #6 @
@
mov r8, r8, asl #1 @ r8 = bu = Cb*258
add r8, r8, r8, asl #7 @
@
add r9, r9, r9, asl #1 @ r9 = rv = Cr*408
add r9, r9, r9, asl #4 @
mov r9, r9, asl #3 @
@
@ compute R, G, and B
add r1, r8, r7 @ r1 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
@ r8 = bu, r9 = rv, r10 = guv
@
sub r12, r1, r1, lsr #5 @ r1 = 31/32*b + b/256
add r1, r12, r1, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
add r7, r12, r7, lsr #8 @
@
add r12, r14, #0x100 @
@
add r1, r1, r12 @ b = r1 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
#if ARM_ARCH >= 6
usat r11, #5, r11, asr #11 @ clamp r
usat r7, #6, r7, asr #9 @ clamp g
usat r1, #5, r1, asr #10 @ clamp b
@
ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
@
orr r1, r1, r11, lsl #11 @ r1 = b | (r << 11)
orr r1, r1, r7, lsl #5 @ r1 |= (g << 5)
#else
orr r12, r1, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r1, asr #15 @ clamp b
mvnne r1, r12, lsr #15 @
andne r1, r1, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
@
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r1, r11, r1, lsr #10 @ (b >> 10)
#endif
@
#if LCD_WIDTH >= LCD_HEIGHT
strh r1, [r0] @
#elif LCD_WIDTH < 256
strh r1, [r0], #LCD_WIDTH @ store pixel
#else
strh r1, [r0] @
#endif
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r1, r8, r7 @ r1 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
sub r12, r1, r1, lsr #5 @ r1 = 31/32*b' + b'/256
add r1, r12, r1, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
add r7, r12, r7, lsr #8 @
@
add r12, r14, #0x200 @
@
add r1, r1, r12 @ b = r1 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
#if ARM_ARCH >= 6
usat r11, #5, r11, asr #11 @ clamp r
usat r7, #6, r7, asr #9 @ clamp g
usat r1, #5, r1, asr #10 @ clamp b
@
ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
@
orr r1, r1, r11, lsl #11 @ r1 = b | (r << 11)
orr r1, r1, r7, lsl #5 @ r1 |= (g << 5)
#else
orr r12, r1, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r1, asr #15 @ clamp b
mvnne r1, r12, lsr #15 @
andne r1, r1, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
@
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r1, r11, r1, lsr #10 @ (b >> 10)
#endif
@
#if LCD_WIDTH >= LCD_HEIGHT
add r0, r0, #2*LCD_WIDTH @
strh r1, [r0] @ store pixel
sub r0, r0, #2*LCD_WIDTH @
#elif LCD_WIDTH < 256
strh r1, [r0, #-LCD_WIDTH-2] @ store pixel
#else
strh r1, [r0, #-2] @ store pixel
add r0, r0, #LCD_WIDTH @
#endif
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r1, r8, r7 @ r1 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
@ r8 = bu, r9 = rv, r10 = guv
@
sub r12, r1, r1, lsr #5 @ r1 = 31/32*b' + b'/256
add r1, r12, r1, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
add r7, r12, r7, lsr #8 @
@
add r12, r14, #0x300 @
@
add r1, r1, r12 @ b = r1 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
#if ARM_ARCH >= 6
usat r11, #5, r11, asr #11 @ clamp r
usat r7, #6, r7, asr #9 @ clamp g
usat r1, #5, r1, asr #10 @ clamp b
@
ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
@
orr r1, r1, r11, lsl #11 @ r1 = b | (r << 11)
orr r1, r1, r7, lsl #5 @ r1 |= (g << 5)
#else
orr r12, r1, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r1, asr #15 @ clamp b
mvnne r1, r12, lsr #15 @
andne r1, r1, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
@
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r1, r11, r1, lsr #10 @ (b >> 10)
#endif
@
#if LCD_WIDTH >= LCD_HEIGHT
strh r1, [r0, #2]
#elif LCD_WIDTH < 256
strh r1, [r0, #LCD_WIDTH]! @ store pixel
#else
strh r1, [r0] @
#endif
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r1, r8, r7 @ r1 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
sub r12, r1, r1, lsr #5 @ r1 = 31/32*b + b/256
add r1, r12, r1, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
add r7, r12, r7, lsr #8 @
@
@ This element is zero - use r14 @
@
add r1, r1, r14 @ b = r1 + delta
add r11, r11, r14, lsl #1 @ r = r11 + delta*2
add r7, r7, r14, lsr #1 @ g = r7 + delta/2
@
#if ARM_ARCH >= 6
usat r11, #5, r11, asr #11 @ clamp r
usat r7, #6, r7, asr #9 @ clamp g
usat r1, #5, r1, asr #10 @ clamp b
@
orr r1, r1, r11, lsl #11 @ r1 = b | (r << 11)
orr r1, r1, r7, lsl #5 @ r1 |= (g << 5)
#else
orr r12, r1, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r1, asr #15 @ clamp b
mvnne r1, r12, lsr #15 @
andne r1, r1, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r1, r11, r1, lsr #10 @ (b >> 10)
#endif
@
#if LCD_WIDTH >= LCD_HEIGHT
add r0, r0, #2*LCD_WIDTH
strh r1, [r0, #2] @ store pixel
#if LCD_WIDTH <= 512
sub r0, r0, #(2*LCD_WIDTH)-4
#else
sub r0, r0, #(2*LCD_WIDTH)
add r0, r0, #4
#endif
#else
strh r1, [r0, #-2] @ store pixel
#if LCD_WIDTH < 256
add r0, r0, #2*LCD_WIDTH @
#else
add r0, r0, #LCD_WIDTH @
#endif
#endif
@
subs r2, r2, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither

View file

@ -9,3 +9,171 @@ void lcd_copy_buffer_rect(fb_data *dst, fb_data *src, int width, int height)
dst += LCD_WIDTH;
} while (--height);
}
#define YFAC (74)
#define RVFAC (101)
#define GUFAC (-24)
#define GVFAC (-51)
#define BUFAC (128)
static inline int clamp(int val, int min, int max)
{
if (val < min)
val = min;
else if (val > max)
val = max;
return val;
}
extern void lcd_write_yuv420_lines(fb_data *dst,
unsigned char const * const src[3],
int width,
int stride)
{
/* Draw a partial YUV colour bitmap - similiar behavior to lcd_blit_yuv
in the core */
const unsigned char *ysrc, *usrc, *vsrc;
fb_data *row_end;
/* width and height must be >= 2 and an even number */
width &= ~1;
#if LCD_WIDTH >= LCD_HEIGHT
row_end = dst + width;
#else
row_end = dst + LCD_WIDTH * width;
#endif
ysrc = src[0];
usrc = src[1];
vsrc = src[2];
/* stride => amount to jump from end of last row to start of next */
stride -= width;
/* upsampling, YUV->RGB conversion and reduction to RGB in one go */
do
{
int y, cb, cr, rv, guv, bu, r, g, b;
y = YFAC*(*ysrc++ - 16);
cb = *usrc++ - 128;
cr = *vsrc++ - 128;
rv = RVFAC*cr;
guv = GUFAC*cb + GVFAC*cr;
bu = BUFAC*cb;
r = y + rv;
g = y + guv;
b = y + bu;
if ((unsigned)(r | g | b) > 64*256-1)
{
r = clamp(r, 0, 64*256-1);
g = clamp(g, 0, 64*256-1);
b = clamp(b, 0, 64*256-1);
}
*dst = FB_RGBPACK(r >> 6, g >> 6, b >> 6);
#if LCD_WIDTH >= LCD_HEIGHT
dst++;
#else
dst += LCD_WIDTH;
#endif
y = YFAC*(*ysrc++ - 16);
r = y + rv;
g = y + guv;
b = y + bu;
if ((unsigned)(r | g | b) > 64*256-1)
{
r = clamp(r, 0, 64*256-1);
g = clamp(g, 0, 64*256-1);
b = clamp(b, 0, 64*256-1);
}
*dst = FB_RGBPACK(r >> 6, g >> 6, b >> 6);
#if LCD_WIDTH >= LCD_HEIGHT
dst++;
#else
dst += LCD_WIDTH;
#endif
}
while (dst < row_end);
ysrc += stride;
usrc -= width >> 1;
vsrc -= width >> 1;
#if LCD_WIDTH >= LCD_HEIGHT
row_end += LCD_WIDTH;
dst += LCD_WIDTH - width;
#else
row_end -= 1;
dst -= LCD_WIDTH*width + 1;
#endif
do
{
int y, cb, cr, rv, guv, bu, r, g, b;
y = YFAC*(*ysrc++ - 16);
cb = *usrc++ - 128;
cr = *vsrc++ - 128;
rv = RVFAC*cr;
guv = GUFAC*cb + GVFAC*cr;
bu = BUFAC*cb;
r = y + rv;
g = y + guv;
b = y + bu;
if ((unsigned)(r | g | b) > 64*256-1)
{
r = clamp(r, 0, 64*256-1);
g = clamp(g, 0, 64*256-1);
b = clamp(b, 0, 64*256-1);
}
*dst = FB_RGBPACK(r >> 6, g >> 6, b >> 6);
#if LCD_WIDTH >= LCD_HEIGHT
dst++;
#else
dst += LCD_WIDTH;
#endif
y = YFAC*(*ysrc++ - 16);
r = y + rv;
g = y + guv;
b = y + bu;
if ((unsigned)(r | g | b) > 64*256-1)
{
r = clamp(r, 0, 64*256-1);
g = clamp(g, 0, 64*256-1);
b = clamp(b, 0, 64*256-1);
}
*dst = FB_RGBPACK(r >> 6, g >> 6, b >> 6);
#if LCD_WIDTH >= LCD_HEIGHT
dst++;
#else
dst += LCD_WIDTH;
#endif
}
while (dst < row_end);
}
void lcd_write_yuv420_lines_odither(fb_data *dst,
unsigned char const * const src[3],
int width, int stride,
int x_screen, int y_screen)
__attribute__((alias("lcd_write_yuv420_lines")));

View file

@ -220,6 +220,195 @@ static inline int clamp(int val, int min, int max)
return val;
}
#ifndef _WIN32
/*
* weak attribute doesn't work for win32 as of gcc 4.6.2 and binutils 2.21.52
* When building win32 simulators, we won't be using an optimized version of
* lcd_blit_yuv(), so just don't use the weak attribute.
*/
__attribute__((weak))
#endif
void lcd_yuv_set_options(unsigned options)
{
(void)options;
}
/* Draw a partial YUV colour bitmap */
#ifndef _WIN32
__attribute__((weak))
#endif
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
const unsigned char *ysrc, *usrc, *vsrc;
int linecounter;
fb_data *dst, *row_end;
long z;
/* width and height must be >= 2 and an even number */
width &= ~1;
linecounter = height >> 1;
#if LCD_WIDTH >= LCD_HEIGHT
dst = FBADDR(x, y);
row_end = dst + width;
#else
dst = FBADDR(LCD_WIDTH - y - 1, x);
row_end = dst + LCD_WIDTH * width;
#endif
z = stride * src_y;
ysrc = src[0] + z + src_x;
usrc = src[1] + (z >> 2) + (src_x >> 1);
vsrc = src[2] + (usrc - src[1]);
/* stride => amount to jump from end of last row to start of next */
stride -= width;
/* upsampling, YUV->RGB conversion and reduction to RGB565 in one go */
do
{
do
{
int y, cb, cr, rv, guv, bu, r, g, b;
y = YFAC*(*ysrc++ - 16);
cb = *usrc++ - 128;
cr = *vsrc++ - 128;
rv = RVFAC*cr;
guv = GUFAC*cb + GVFAC*cr;
bu = BUFAC*cb;
r = y + rv;
g = y + guv;
b = y + bu;
if ((unsigned)(r | g | b) > 64*256-1)
{
r = clamp(r, 0, 64*256-1);
g = clamp(g, 0, 64*256-1);
b = clamp(b, 0, 64*256-1);
}
*dst = FB_RGBPACK(r >> 6, g >> 6, b >> 6);
#if LCD_WIDTH >= LCD_HEIGHT
dst++;
#else
dst += LCD_WIDTH;
#endif
y = YFAC*(*ysrc++ - 16);
r = y + rv;
g = y + guv;
b = y + bu;
if ((unsigned)(r | g | b) > 64*256-1)
{
r = clamp(r, 0, 64*256-1);
g = clamp(g, 0, 64*256-1);
b = clamp(b, 0, 64*256-1);
}
*dst = FB_RGBPACK(r >> 6, g >> 6, b >> 6);
#if LCD_WIDTH >= LCD_HEIGHT
dst++;
#else
dst += LCD_WIDTH;
#endif
}
while (dst < row_end);
ysrc += stride;
usrc -= width >> 1;
vsrc -= width >> 1;
#if LCD_WIDTH >= LCD_HEIGHT
row_end += LCD_WIDTH;
dst += LCD_WIDTH - width;
#else
row_end -= 1;
dst -= LCD_WIDTH*width + 1;
#endif
do
{
int y, cb, cr, rv, guv, bu, r, g, b;
y = YFAC*(*ysrc++ - 16);
cb = *usrc++ - 128;
cr = *vsrc++ - 128;
rv = RVFAC*cr;
guv = GUFAC*cb + GVFAC*cr;
bu = BUFAC*cb;
r = y + rv;
g = y + guv;
b = y + bu;
if ((unsigned)(r | g | b) > 64*256-1)
{
r = clamp(r, 0, 64*256-1);
g = clamp(g, 0, 64*256-1);
b = clamp(b, 0, 64*256-1);
}
*dst = FB_RGBPACK(r >> 6, g >> 6, b >> 6);
#if LCD_WIDTH >= LCD_HEIGHT
dst++;
#else
dst += LCD_WIDTH;
#endif
y = YFAC*(*ysrc++ - 16);
r = y + rv;
g = y + guv;
b = y + bu;
if ((unsigned)(r | g | b) > 64*256-1)
{
r = clamp(r, 0, 64*256-1);
g = clamp(g, 0, 64*256-1);
b = clamp(b, 0, 64*256-1);
}
*dst = FB_RGBPACK(r >> 6, g >> 6, b >> 6);
#if LCD_WIDTH >= LCD_HEIGHT
dst++;
#else
dst += LCD_WIDTH;
#endif
}
while (dst < row_end);
ysrc += stride;
usrc += stride >> 1;
vsrc += stride >> 1;
#if LCD_WIDTH >= LCD_HEIGHT
row_end += LCD_WIDTH;
dst += LCD_WIDTH - width;
#else
row_end -= 1;
dst -= LCD_WIDTH*width + 1;
#endif
}
while (--linecounter > 0);
#if LCD_WIDTH >= LCD_HEIGHT
lcd_update_rect(x, y, width, height);
#else
lcd_update_rect(LCD_WIDTH - y - height, x, height, width);
#endif
}
/* Fill a rectangle with a gradient. This function draws only the partial
* gradient. It assumes the original gradient is src_height high and skips
* the first few rows. This is useful for drawing only the bottom half of

View file

@ -110,3 +110,101 @@ void lcd_update_rect(int x, int y, int width, int height)
}
}
#endif /* LCD_OPTIMIZED_UPDATE_RECT */
/*** YUV functions ***/
static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0;
/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */
extern void lcd_write_yuv420_lines(fb_data *dst,
unsigned char const * const src[3],
int width,
int stride);
extern void lcd_write_yuv420_lines_odither(fb_data *dst,
unsigned char const * const src[3],
int width,
int stride,
int x_screen, /* To align dither pattern */
int y_screen);
void lcd_yuv_set_options(unsigned options)
{
lcd_yuv_options = options;
}
#ifndef LCD_OPTIMIZED_BLIT_YUV
/* Performance function to blit a YUV bitmap directly to the LCD
* src_x, src_y, width and height should be even and within the LCD's
* boundaries.
*
* For portrait LCDs, show it rotated counterclockwise by 90 degrees
*/
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
/* Macrofy the bits that change between orientations */
#if CONFIG_ORIENTATION == SCREEN_PORTRAIT
#define LCD_FRAMEBUF_ADDR_ORIENTED(col, row) \
LCD_FRAMEBUF_ADDR(row, col)
#define lcd_write_yuv420_lines_odither_oriented(dst, src, w, s, col, row) \
lcd_write_yuv420_lines_odither(dst, src, w, s, row, col)
#define YUV_NEXTLINE() dst -= 2
#define YUV_DITHER_NEXTLINE() dst -= 2, y -= 2
#else
#define LCD_FRAMEBUF_ADDR_ORIENTED(col, row) \
LCD_FRAMEBUF_ADDR(col, row)
#define lcd_write_yuv420_lines_odither_oriented(dst, src, w, s, col, row) \
lcd_write_yuv420_lines_odither(dst, src, w, s, col, row)
#define YUV_NEXTLINE() dst += 2*LCD_FBWIDTH
#define YUV_DITHER_NEXTLINE() dst += 2*LCD_FBWIDTH, y += 2
#endif
if (!lcd_write_enabled())
return;
/* Sorry, but width and height must be >= 2 or else */
width &= ~1;
height >>= 1;
#if CONFIG_ORIENTATION == SCREEN_PORTRAIT
/* Adjust portrait coordinates to make (0, 0) the upper right corner */
y = LCD_WIDTH - 1 - y;
#endif
fb_data *dst = LCD_FRAMEBUF_ADDR_ORIENTED(x, y);
int z = stride*src_y;
unsigned char const * yuv_src[3];
yuv_src[0] = src[0] + z + src_x;
yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1);
yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
if (lcd_yuv_options & LCD_YUV_DITHER)
{
do
{
lcd_write_yuv420_lines_odither_oriented(dst, yuv_src, width,
stride, x, y);
yuv_src[0] += stride << 1; /* Skip down two luma lines */
yuv_src[1] += stride >> 1; /* Skip down one chroma line */
yuv_src[2] += stride >> 1;
YUV_DITHER_NEXTLINE();
}
while (--height > 0);
}
else
{
do
{
lcd_write_yuv420_lines(dst, yuv_src, width, stride);
yuv_src[0] += stride << 1; /* Skip down two luma lines */
yuv_src[1] += stride >> 1; /* Skip down one chroma line */
yuv_src[2] += stride >> 1;
YUV_NEXTLINE();
}
while (--height > 0);
}
}
#endif /* LCD_OPTIMIZED_BLIT_YUV */

View file

@ -220,7 +220,7 @@
#define HAVE_USB_HID_MOUSE
/* Define this if hardware supports alternate blitting */
#define HAVE_LCD_MODES (LCD_MODE_RGB565 | LCD_MODE_PAL256)
#define HAVE_LCD_MODES (LCD_MODE_RGB565 | LCD_MODE_YUV | LCD_MODE_PAL256)
#define CONFIG_CPU DM320

View file

@ -145,6 +145,7 @@ struct scrollinfo;
#if defined(HAVE_LCD_MODES)
void lcd_set_mode(int mode);
#define LCD_MODE_RGB565 0x00000001
#define LCD_MODE_YUV 0x00000002
#define LCD_MODE_PAL256 0x00000004
#if HAVE_LCD_MODES & LCD_MODE_PAL256
@ -235,7 +236,15 @@ extern bool lcd_putsxy_scroll_func(int x, int y, const unsigned char *string,
void *data, int x_offset);
/* performance function */
#if !defined(HAVE_LCD_COLOR)
#if defined(HAVE_LCD_COLOR)
#if MEMORYSIZE > 2
#define LCD_YUV_DITHER 0x1
extern void lcd_yuv_set_options(unsigned options);
extern void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height);
#endif /* MEMORYSIZE > 2 */
#else
extern void lcd_blit_mono(const unsigned char *data, int x, int by, int width,
int bheight, int stride);
extern void lcd_blit_grey_phase(unsigned char *values, unsigned char *phases,

View file

@ -0,0 +1,550 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2007 by Jens Arnold
* Heavily based on lcd-as-memframe.c by Michael Sevakis
* Adapted for Sansa Fuze/e200v2 by Rafaël Carré
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "config.h"
#include "cpu.h"
#define DBOP_BUSY (1<<10)
/****************************************************************************
* void lcd_write_yuv420_lines(unsigned char const * const src[3],
* int width,
* int stride);
*
* |R| |1.000000 -0.000001 1.402000| |Y'|
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
* |B| |1.000000 1.772000 0.000000| |Pr|
* Scaled, normalized, rounded and tweaked to yield RGB 565:
* |R| |74 0 101| |Y' - 16| >> 9
* |G| = |74 -24 -51| |Cb - 128| >> 8
* |B| |74 128 0| |Cr - 128| >> 9
*
* Write four RGB565 pixels in the following order on each loop:
* 1 3 + > down
* 2 4 \/ left
*/
.section .icode, "ax", %progbits
.align 2
.global lcd_write_yuv420_lines
.type lcd_write_yuv420_lines, %function
lcd_write_yuv420_lines:
@ r0 = yuv_src
@ r1 = width
@ r2 = stride
stmfd sp!, { r4-r11, lr } @ save non-scratch
mov r3, #0xC8000000 @
orr r3, r3, #0x120000 @ r3 = DBOP_BASE
ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
@ r5 = yuv_src[1] = Cb_p
@ r6 = yuv_src[2] = Cr_p
@ r0 = scratch
ldr r12, [r3, #8] @
sub r2, r2, #1 @ stride -= 1
orr r12, r12, #3<<13 @ DBOP_CTRL |= (1<<13|1<<14) (32bit mode)
#ifdef SANSA_FUZEV2
bic r12, r12, #1<<13 @ DBOP_CTRL &= ~(1<<13),still 32bit mode
#endif
str r12, [r3, #8] @
10: @ loop line @
ldrb r7, [r4], #1 @ r7 = *Y'_p++;
ldrb r8, [r5], #1 @ r8 = *Cb_p++;
ldrb r9, [r6], #1 @ r9 = *Cr_p++;
@
sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
add r7, r12, r7, asl #5 @ by one less when adding - same for all
@
sub r8, r8, #128 @ Cb -= 128
sub r9, r9, #128 @ Cr -= 128
@
add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
add r10, r10, r10, asl #4 @
add r10, r10, r8, asl #3 @
add r10, r10, r8, asl #4 @
@
add lr, r9, r9, asl #2 @ r9 = Cr*101
add lr, lr, r9, asl #5 @
add r9, lr, r9, asl #6 @
@
add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
mov r8, r8, asr #2 @
add r9, r9, #256 @ r9 = rv = (r9 + 256) >> 9
mov r9, r9, asr #9 @
rsb r10, r10, #128 @ r10 = guv = (-r10 + 128) >> 8
mov r10, r10, asr #8 @
@ compute R, G, and B
add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r0, lr @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r0, #31 @ clamp b
mvnhi r0, r0, asr #31 @
andhi r0, r0, #31 @
cmp lr, #31 @ clamp r
mvnhi lr, lr, asr #31 @
andhi lr, lr, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
orr r0, r0, lr, lsl #11 @ r0 = (r << 11) | b
orr r11, r0, r7, lsl #5 @ r11 = (r << 11) | (g << 5) | b
orr r11, r0, r7, lsl #5 @ r11 = (r << 11) | (g << 5) | b
#ifdef SANSA_FUZEV2
mov r0, r11, lsr #8 @
bic r11, r11, #0xff00 @
orr r11, r0, r11, lsl #8 @ swap bytes
#endif
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r0, lr @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r0, #31 @ clamp b
mvnhi r0, r0, asr #31 @
andhi r0, r0, #31 @
cmp lr, #31 @ clamp r
mvnhi lr, lr, asr #31 @
andhi lr, lr, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
@
orr r0, r0, lr, lsl #11 @ r0 = (r << 11) | b
orr r0, r0, r7, lsl #5 @ r0 = (r << 11) | (g << 5) | b
#ifdef SANSA_FUZEV2
mov r7, r0, lsr #8 @
bic r7, r7, #0xff00 @
orr r0, r7, r0, lsl #8 @ swap bytes
#endif
orr r0, r11, r0, lsl#16 @ pack with 2nd pixel
str r0, [r3, #0x10] @ write pixel
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r0, lr @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r0, #31 @ clamp b
mvnhi r0, r0, asr #31 @
andhi r0, r0, #31 @
cmp lr, #31 @ clamp r
mvnhi lr, lr, asr #31 @
andhi lr, lr, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
@
orr r0, r0, lr, lsl #11 @ r0 = (r << 11) | b
orr r11, r0, r7, lsl #5 @ r0 = (r << 11) | (g << 5) | b
#ifdef SANSA_FUZEV2
mov r0, r11, lsr #8 @
bic r11, r11, #0xff00 @
orr r11, r0, r11, lsl #8 @ swap byte
#endif
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r0, lr @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r0, #31 @ clamp b
mvnhi r0, r0, asr #31 @
andhi r0, r0, #31 @
cmp lr, #31 @ clamp r
mvnhi lr, lr, asr #31 @
andhi lr, lr, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
orr r0, r0, lr, lsl #11 @ r0 = (r << 11) | b
orr r0, r0, r7, lsl #5 @ r0 = (r << 11) | (g << 5) | b
#ifdef SANSA_FUZEV2
mov r7, r0, lsr #8 @
bic r7, r7, #0xff00 @
orr r0, r7, r0, lsl #8 @ swap bytes
#endif
orr r0, r11, r0, lsl#16 @ pack with 2nd pixel
str r0, [r3, #0x10] @ write pixel
@
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
1: @ busy
@ writing at max 110*32 (LCD_WIDTH/2), the fifo is bigger
@ so polling fifo empty only after each line is save
ldr r7, [r3,#0xc] @ r7 = DBOP_STATUS
tst r7, #DBOP_BUSY @ fifo not empty?
beq 1b @
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
/****************************************************************************
* void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
* int width,
* int stride,
* int x_screen,
* int y_screen);
*
* |R| |1.000000 -0.000001 1.402000| |Y'|
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
* |B| |1.000000 1.772000 0.000000| |Pr|
* Red scaled at twice g & b but at same precision to place it in correct
* bit position after multiply and leave instruction count lower.
* |R| |258 0 408| |Y' - 16|
* |G| = |149 -49 -104| |Cb - 128|
* |B| |149 258 0| |Cr - 128|
*
* Write four RGB565 pixels in the following order on each loop:
* 1 3 + > down
* 2 4 \/ left
*
* Kernel pattern (raw|rotated|use order):
* 5 3 4 2 2 6 3 7 row0 row2 > down
* 1 7 0 6 | 4 0 5 1 | 2 4 6 0 3 5 7 1 col0 left
* 4 2 5 3 | 3 7 2 6 | 3 5 7 1 2 4 6 0 col2 \/
* 0 6 1 7 5 1 4 0
*/
.section .icode, "ax", %progbits
.align 2
.global lcd_write_yuv420_lines_odither
.type lcd_write_yuv420_lines_odither, %function
lcd_write_yuv420_lines_odither:
@ r0 = yuv_src
@ r1 = width
@ r2 = stride
@ r3 = x_screen
@ [sp] = y_screen
stmfd sp!, { r4-r11, lr } @ save non-scratch
ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
@ r5 = yuv_src[1] = Cb_p
@ r6 = yuv_src[2] = Cr_p
@
ldr r14, [sp, #36] @ Line up pattern and kernel quadrant
sub r2, r2, #1 @ stride =- 1
eor r14, r14, r3 @
and r14, r14, #0x2 @
mov r14, r14, lsl #6 @ 0x00 or 0x80
mov r3, #0xC8000000 @
orr r3, r3, #0x120000 @ r3 = DBOP_BASE, need to be redone
@ due to lack of registers
ldr r12, [r3, #8] @
orr r12, r12, #3<<13 @ DBOP_CTRL |= (1<<13|1<<14)
#ifdef SANSA_FUZEV2
bic r12, r12, #1<<13 @ DBOP_CTRL &= ~(1<<13), still 32bit mode
#endif
str r12, [r3, #8] @ (32bit mode)
10: @ loop line @
@
ldrb r7, [r4], #1 @ r7 = *Y'_p++;
ldrb r8, [r5], #1 @ r8 = *Cb_p++;
ldrb r9, [r6], #1 @ r9 = *Cr_p++;
@
eor r14, r14, #0x80 @ flip pattern quadrant
@
sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@
sub r8, r8, #128 @ Cb -= 128
sub r9, r9, #128 @ Cr -= 128
@
add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49
add r10, r10, r8, asl #5 @
add r10, r10, r9, asl #3 @
add r10, r10, r9, asl #5 @
add r10, r10, r9, asl #6 @
@
mov r8, r8, asl #1 @ r8 = bu = Cb*258
add r8, r8, r8, asl #7 @
@
add r9, r9, r9, asl #1 @ r9 = rv = Cr*408
add r9, r9, r9, asl #4 @
mov r9, r9, asl #3 @
@
@ compute R, G, and B
add r0, r8, r7 @ r0 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
@ r8 = bu, r9 = rv, r10 = guv
@
sub r12, r0, r0, lsr #5 @ r0 = 31/32*b + b/256
add r0, r12, r0, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
add r7, r12, r7, lsr #8 @
@
add r12, r14, #0x100 @
@
add r0, r0, r12 @ b = r0 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
orr r12, r0, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r0, asr #15 @ clamp b
mvnne r0, r12, lsr #15 @
andne r0, r0, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r0 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r3, r11, r0, lsr #10 @ (b >> 10)
#ifdef SANSA_FUZEV2
mov r7, r3, lsr #8 @
bic r3, r3, #0xff00 @
orr r3, r7, r3, lsl #8 @ swap pixel
#endif
@ save pixel
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r0, r8, r7 @ r0 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
sub r12, r0, r0, lsr #5 @ r0 = 31/32*b' + b'/256
add r0, r12, r0, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
add r7, r12, r7, lsr #8 @
@
add r12, r14, #0x200 @
@
add r0, r0, r12 @ b = r0 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
orr r12, r0, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r0, asr #15 @ clamp b
mvnne r0, r12, lsr #15 @
andne r0, r0, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r0 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r0, r11, r0, lsr #10 @ (b >> 10)
#ifdef SANSA_FUZEV2
mov r7, r0, lsr #8 @
bic r0, r0, #0xff00 @
orr r0, r7, r0, lsl #8 @ swap pixel
#endif
orr r3, r3, r0, lsl#16 @ pack with 2nd pixel
mov r0, #0xC8000000 @
orr r0, r0, #0x120000 @ r3 = DBOP_BASE
str r3, [r0, #0x10] @ write pixel
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r0, r8, r7 @ r0 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
@ r8 = bu, r9 = rv, r10 = guv
@
sub r12, r0, r0, lsr #5 @ r0 = 31/32*b' + b'/256
add r0, r12, r0, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
add r7, r12, r7, lsr #8 @
@
add r12, r14, #0x300 @
@
add r0, r0, r12 @ b = r0 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
orr r12, r0, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r0, asr #15 @ clamp b
mvnne r0, r12, lsr #15 @
andne r0, r0, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r0 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r3, r11, r0, lsr #10 @ (b >> 10)
#ifdef SANSA_FUZEV2
mov r7, r3, lsr #8 @
bic r3, r3, #0xff00 @
orr r3, r7, r3, lsl #8 @ swap pixel
#endif
@ save pixel
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r0, r8, r7 @ r0 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
sub r12, r0, r0, lsr #5 @ r0 = 31/32*b + b/256
add r0, r12, r0, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
add r7, r12, r7, lsr #8 @
@
@ This element is zero - use r14 @
@
add r0, r0, r14 @ b = r0 + delta
add r11, r11, r14, lsl #1 @ r = r11 + delta*2
add r7, r7, r14, lsr #1 @ g = r7 + delta/2
@
orr r12, r0, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r0, asr #15 @ clamp b
mvnne r0, r12, lsr #15 @
andne r0, r0, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r0 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r0, r11, r0, lsr #10 @ (b >> 10)
#ifdef SANSA_FUZEV2
mov r7, r0, lsr #8 @
bic r0, r0, #0xff00 @
orr r0, r7, r0, lsl #8 @ swap pixel
#endif
orr r3, r3, r0, lsl#16 @ pack with 2nd pixel
mov r0, #0xC8000000 @
orr r0, r0, #0x120000 @ r3 = DBOP_BASE
str r3, [r0, #0x10] @ write pixel
@
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
1: @ busy @
@ writing at max 110*32 (LCD_WIDTH/2), the fifo is bigger (128*32)
@ so polling fifo empty only after each line is save
ldr r7, [r0,#0xc] @ r7 = DBOP_STATUS
tst r7, #DBOP_BUSY @ fifo not empty?
beq 1b @
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither

View file

@ -197,6 +197,86 @@ static void lcd_window_y(int ymin, int ymax)
lcd_write_reg(R_RAM_ADDR_SET, ymin);
}
static unsigned lcd_yuv_options = 0;
void lcd_yuv_set_options(unsigned options)
{
lcd_yuv_options = options;
}
#ifndef BOOTLOADER
/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */
extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
int width,
int stride);
extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
int width,
int stride,
int x_screen, /* To align dither pattern */
int y_screen);
/* Performance function to blit a YUV bitmap directly to the LCD
* src_x, src_y, width and height should be even
* x, y, width and height have to be within LCD bounds
*/
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
unsigned char const * yuv_src[3];
off_t z;
/* Sorry, but width and height must be >= 2 or else */
width &= ~1;
height >>= 1;
z = stride*src_y;
yuv_src[0] = src[0] + z + src_x;
yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1);
yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
lcd_write_reg(R_ENTRY_MODE, R_ENTRY_MODE_VIDEO);
lcd_window_x(x, x + width - 1);
if (lcd_yuv_options & LCD_YUV_DITHER)
{
do
{
lcd_window_y(y, y + 1);
lcd_write_cmd(R_WRITE_DATA_2_GRAM);
lcd_write_yuv420_lines_odither(yuv_src, width, stride, x, y);
yuv_src[0] += stride << 1; /* Skip down two luma lines */
yuv_src[1] += stride >> 1; /* Skip down one chroma line */
yuv_src[2] += stride >> 1;
y += 2;
}
while (--height > 0);
}
else
{
do
{
lcd_window_y(y, y + 1);
lcd_write_cmd(R_WRITE_DATA_2_GRAM);
lcd_write_yuv420_lines(yuv_src, width, stride);
yuv_src[0] += stride << 1; /* Skip down two luma lines */
yuv_src[1] += stride >> 1; /* Skip down one chroma line */
yuv_src[2] += stride >> 1;
y += 2;
}
while (--height > 0);
}
}
#endif
/* Update the display.
This must be called after all other LCD functions that change the display. */
void lcd_update(void)

View file

@ -336,6 +336,104 @@ bool lcd_active(void)
/*** update functions ***/
static unsigned lcd_yuv_options = 0;
void lcd_yuv_set_options(unsigned options)
{
lcd_yuv_options = options;
}
#ifndef BOOTLOADER
static void lcd_window_blit(int xmin, int ymin, int xmax, int ymax)
{
if (!display_flipped)
{
lcd_write_reg(R_HORIZ_RAM_ADDR_POS,
((LCD_WIDTH-1 - xmin) << 8) | (LCD_WIDTH-1 - xmax));
lcd_write_reg(R_VERT_RAM_ADDR_POS, (ymax << 8) | ymin);
lcd_write_reg(R_RAM_ADDR_SET,
(ymin << 8) | (LCD_WIDTH-1 - xmin));
}
else
{
lcd_write_reg(R_HORIZ_RAM_ADDR_POS, (xmax << 8) | xmin);
lcd_write_reg(R_VERT_RAM_ADDR_POS, (ymax << 8) | ymin);
lcd_write_reg(R_RAM_ADDR_SET, (ymax << 8) | xmin);
}
}
/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */
extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
int width,
int stride);
extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
int width,
int stride,
int x_screen, /* To align dither pattern */
int y_screen);
/* Performance function to blit a YUV bitmap directly to the LCD
* src_x, src_y, width and height should be even
* x, y, width and height have to be within LCD bounds
*/
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
unsigned char const * yuv_src[3];
off_t z;
/* Sorry, but width and height must be >= 2 or else */
width &= ~1;
height >>= 1;
z = stride*src_y;
yuv_src[0] = src[0] + z + src_x;
yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1);
yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
lcd_write_reg(R_ENTRY_MODE,
display_flipped ? R_ENTRY_MODE_VIDEO_FLIPPED : R_ENTRY_MODE_VIDEO_NORMAL
);
if (lcd_yuv_options & LCD_YUV_DITHER)
{
do
{
lcd_window_blit(y, x, y+1, x+width-1);
lcd_write_cmd(R_WRITE_DATA_2_GRAM);
lcd_write_yuv420_lines_odither(yuv_src, width, stride, x, y);
yuv_src[0] += stride << 1; /* Skip down two luma lines */
yuv_src[1] += stride >> 1; /* Skip down one chroma line */
yuv_src[2] += stride >> 1;
y += 2;
}
while (--height > 0);
}
else
{
do
{
lcd_window_blit(y, x, y+1, x+width-1);
lcd_write_cmd(R_WRITE_DATA_2_GRAM);
lcd_write_yuv420_lines(yuv_src, width, stride);
yuv_src[0] += stride << 1; /* Skip down two luma lines */
yuv_src[1] += stride >> 1; /* Skip down one chroma line */
yuv_src[2] += stride >> 1;
y += 2;
}
while (--height > 0);
}
}
#endif
/* Update the display.
This must be called after all other LCD functions that change the display. */
void lcd_update(void)

View file

@ -0,0 +1,287 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id:$
*
* Copyright (C) 2010-2011 by Andree Buschmann
*
* Generic asm helper function used by YUV blitting.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "config.h"
#include "cpu.h"
/****************************************************************************
* #define FORCE_FIFO_WAIT
*
* This is not needed in YUV blitting when the LCD IF is fast enough. In this
* case YUV-to-RGB conversion per pixel needs longer than the transfer of a
* pixel via the LCD IF.
****************************************************************************/
#include "config.h"
/* Set FIFO wait for both iPod Color and iPod nano1G until we know for which
* devices we can switch this off. */
#define FORCE_FIFO_WAIT
.section .icode, "ax", %progbits
/****************************************************************************
* extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
* const unsigned LCD_BASE,
* int width,
* int stride);
*
* Conversion from Motion JPEG and MPEG Y'PbPr to RGB is:
* |R| |1.164 0.000 1.596| |Y' - 16|
* |G| = |1.164 -0.391 -0.813| |Pb - 128|
* |B| |1.164 2.018 0.000| |Pr - 128|
*
* Scaled, normalized, rounded and tweaked to yield RGB 565:
* |R| |74 0 101| |Y' - 16| >> 9
* |G| = |74 -24 -51| |Cb - 128| >> 8
* |B| |74 128 0| |Cr - 128| >> 9
*
* Converts two lines from YUV to RGB565 and writes to LCD at once. First loop
* loads Cb/Cr, calculates the chroma offset and saves them to buffer. Within
* the second loop these chroma offset are reloaded from buffer. Within each
* loop two pixels are calculated and written to LCD.
*/
.align 2
.global lcd_write_yuv420_lines
.type lcd_write_yuv420_lines, %function
lcd_write_yuv420_lines:
/* r0 = src = yuv_src */
/* r1 = dst = LCD_BASE */
/* r2 = width */
/* r3 = stride */
stmfd sp!, { r4-r10, lr } /* save non-scratch */
ldmia r0, { r9, r10, r12 } /* r9 = yuv_src[0] = Y'_p */
/* r10 = yuv_src[1] = Cb_p */
/* r12 = yuv_src[2] = Cr_p */
add r3, r9, r3 /* r3 = &ysrc[stride] */
add r4, r2, r2, asr #1 /* chroma buffer lenght = width/2 *3 */
mov r4, r4, asl #2 /* use words for str/ldm possibility */
add r4, r4, #19 /* plus room for 4 additional words, */
bic r4, r4, #3 /* rounded up to multiples of 4 byte */
sub sp, sp, r4 /* and allocate on stack */
stmia sp, {r1-r4} /* LCD_BASE, width, &ysrc[stride], stack_alloc */
mov r7, r2 /* r7 = loop count */
add r8, sp, #16 /* chroma buffer */
add lr, r1, #0x100 /* LCD data port = LCD2_BASE + 0x100 */
/* 1st loop start */
10: /* loop start */
ldrb r0, [r10], #1 /* r0 = *usrc++ = *Cb_p++ */
ldrb r1, [r12], #1 /* r1 = *vsrc++ = *Cr_p++ */
sub r0, r0, #128 /* r0 = Cb-128 */
sub r1, r1, #128 /* r1 = Cr-128 */
add r2, r1, r1, asl #1 /* r2 = Cr*51 + Cb*24 */
add r2, r2, r2, asl #4
add r2, r2, r0, asl #3
add r2, r2, r0, asl #4
add r4, r1, r1, asl #2 /* r1 = Cr*101 */
add r4, r4, r1, asl #5
add r1, r4, r1, asl #6
add r1, r1, #256 /* r1 = rv = (r1 + 256) >> 9 */
mov r1, r1, asr #9
rsb r2, r2, #128 /* r2 = guv = (-r2 + 128) >> 8 */
mov r2, r2, asr #8
add r0, r0, #2 /* r0 = bu = (Cb*128 + 256) >> 9 */
mov r0, r0, asr #2
stmia r8!, {r0-r2} /* store r0, r1 and r2 to chroma buffer */
/* 1st loop, first pixel */
ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
add r3, r5, r5, asl #2
add r5, r3, r5, asl #5
add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */
orr r5, r6, r4 /* check if clamping is needed... */
orr r5, r5, r3, asr #1 /* ...at all */
cmp r5, #31
bls 15f /* -> no clamp */
cmp r6, #31 /* clamp r */
mvnhi r6, r6, asr #31
andhi r6, r6, #31
cmp r3, #63 /* clamp g */
mvnhi r3, r3, asr #31
andhi r3, r3, #63
cmp r4, #31 /* clamp b */
mvnhi r4, r4, asr #31
andhi r4, r4, #31
15: /* no clamp */
/* calculate pixel_1 and save to r4 for later pixel packing */
orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */
orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */
/* 1st loop, second pixel */
ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
add r3, r5, r5, asl #2
add r5, r3, r5, asl #5
add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */
orr r0, r6, r5 /* check if clamping is needed... */
orr r0, r0, r3, asr #1 /* ...at all */
cmp r0, #31
bls 15f /* -> no clamp */
cmp r6, #31 /* clamp r */
mvnhi r6, r6, asr #31
andhi r6, r6, #31
cmp r3, #63 /* clamp g */
mvnhi r3, r3, asr #31
andhi r3, r3, #63
cmp r5, #31 /* clamp b */
mvnhi r5, r5, asr #31
andhi r5, r5, #31
15: /* no clamp */
/* calculate pixel_2 and pack with pixel_1 before writing */
orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */
#ifdef FORCE_FIFO_WAIT
/* wait for FIFO half full */
.fifo_wait1:
ldr r3, [lr, #-0xE0] /* while !(LCD2_BLOCK_CTRL & 0x1000000); */
tst r3, #0x1000000
beq .fifo_wait1
#endif
mov r3, r4, lsl #8 /* swap pixel_1 */
and r3, r3, #0xff00
add r4, r3, r4, lsr #8
orr r4, r4, r5, lsl #24 /* swap pixel_2 and pack with pixel_1 */
mov r5, r5, lsr #8
orr r4, r4, r5, lsl #16
str r4, [lr] /* write pixel_1 and pixel_2 */
subs r7, r7, #2 /* check for loop end */
bgt 10b /* back to beginning */
/* 1st loop end */
/* Reload several registers for pointer rewinding for next loop */
add r8, sp, #16 /* chroma buffer */
ldmia sp, { r1, r7, r9} /* r1 = LCD_BASE */
/* r7 = loop count */
/* r9 = &ysrc[stride] */
/* 2nd loop start */
20: /* loop start */
/* restore r0 (bu), r1 (rv) and r2 (guv) from chroma buffer */
ldmia r8!, {r0-r2}
/* 2nd loop, first pixel */
ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
add r3, r5, r5, asl #2
add r5, r3, r5, asl #5
add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */
orr r5, r6, r4 /* check if clamping is needed... */
orr r5, r5, r3, asr #1 /* ...at all */
cmp r5, #31
bls 15f /* -> no clamp */
cmp r6, #31 /* clamp r */
mvnhi r6, r6, asr #31
andhi r6, r6, #31
cmp r3, #63 /* clamp g */
mvnhi r3, r3, asr #31
andhi r3, r3, #63
cmp r4, #31 /* clamp b */
mvnhi r4, r4, asr #31
andhi r4, r4, #31
15: /* no clamp */
/* calculate pixel_1 and save to r4 for later pixel packing */
orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */
orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */
/* 2nd loop, second pixel */
ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
add r3, r5, r5, asl #2
add r5, r3, r5, asl #5
add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */
orr r0, r6, r5 /* check if clamping is needed... */
orr r0, r0, r3, asr #1 /* ...at all */
cmp r0, #31
bls 15f /* -> no clamp */
cmp r6, #31 /* clamp r */
mvnhi r6, r6, asr #31
andhi r6, r6, #31
cmp r3, #63 /* clamp g */
mvnhi r3, r3, asr #31
andhi r3, r3, #63
cmp r5, #31 /* clamp b */
mvnhi r5, r5, asr #31
andhi r5, r5, #31
15: /* no clamp */
/* calculate pixel_2 and pack with pixel_1 before writing */
orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */
#ifdef FORCE_FIFO_WAIT
/* wait for FIFO half full */
.fifo_wait2:
ldr r3, [lr, #-0xE0] /* while !(LCD2_BLOCK_CTRL & 0x1000000); */
tst r3, #0x1000000
beq .fifo_wait2
#endif
mov r3, r4, lsl #8 /* swap pixel_1 */
and r3, r3, #0xff00
add r4, r3, r4, lsr #8
orr r4, r4, r5, lsl #24 /* swap pixel_2 and pack with pixel_1 */
mov r5, r5, lsr #8
orr r4, r4, r5, lsl #16
str r4, [lr] /* write pixel_1 and pixel_2 */
subs r7, r7, #2 /* check for loop end */
bgt 20b /* back to beginning */
/* 2nd loop end */
ldr r3, [sp, #12]
add sp, sp, r3 /* deallocate buffer */
ldmpc regs=r4-r10 /* restore registers */
.ltorg
.size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines

View file

@ -202,6 +202,62 @@ static void lcd_setup_drawing_region(int x, int y, int width, int height)
}
}
/* Line write helper function for lcd_yuv_blit. Writes two lines of yuv420. */
extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
const unsigned int lcd_baseadress,
int width,
int stride);
/* Performance function to blit a YUV bitmap directly to the LCD */
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
int z;
unsigned char const * yuv_src[3];
width = (width + 1) & ~1; /* ensure width is even */
height = (height + 1) & ~1; /* ensure height is even */
lcd_setup_drawing_region(x, y, width, height);
z = stride * src_y;
yuv_src[0] = src[0] + z + src_x;
yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1);
yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
while (height > 0) {
int r, h, pixels_to_write;
pixels_to_write = (width * height) * 2;
h = height;
/* calculate how much we can do in one go */
if (pixels_to_write > 0x10000) {
h = ((0x10000/2) / width) & ~1; /* ensure h is even */
pixels_to_write = (width * h) * 2;
}
LCD2_BLOCK_CTRL = 0x10000080;
LCD2_BLOCK_CONFIG = 0xc0010000 | (pixels_to_write - 1);
LCD2_BLOCK_CTRL = 0x34000000;
r = h>>1; /* lcd_write_yuv420_lines writes two lines at once */
do {
lcd_write_yuv420_lines(yuv_src, LCD2_BASE, width, stride);
yuv_src[0] += stride << 1;
yuv_src[1] += stride >> 1;
yuv_src[2] += stride >> 1;
} while (--r > 0);
/* transfer of pixels_to_write bytes finished */
while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_READY));
LCD2_BLOCK_CONFIG = 0;
height -= h;
}
}
/* Helper function writes 'count' consecutive pixels from src to LCD IF */
static void lcd_write_line(int count, unsigned long *src)
{

View file

@ -63,3 +63,240 @@ lcd_write_data: /* r1 = pixel count, must be even */
strne r3, [lr]
ldmpc regs=r4
/****************************************************************************
* extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
* unsigned bcmaddr
* int width,
* int stride);
*
* Conversion from Motion JPEG and MPEG Y'PbPr to RGB is:
* |R| |1.164 0.000 1.596| |Y' - 16|
* |G| = |1.164 -0.391 -0.813| |Pb - 128|
* |B| |1.164 2.018 0.000| |Pr - 128|
*
* Scaled, normalized, rounded and tweaked to yield RGB 565:
* |R| |74 0 101| |Y' - 16| >> 9
* |G| = |74 -24 -51| |Cb - 128| >> 8
* |B| |74 128 0| |Cr - 128| >> 9
*
* Converts two lines from YUV to RGB565 and writes to BCM at once. First loop
* loads Cb/Cr, calculates the chroma offset and saves them to buffer. Within
* the second loop these chroma offset are reloaded from buffer.
* Within each loop two pixels are calculated and written to BCM. Before each
* loop the desired destination address is transmitted to BCM.
*/
.align 2
.global lcd_write_yuv420_lines
.type lcd_write_yuv420_lines, %function
lcd_write_yuv420_lines:
/* r0 = src = yuv_src */
/* r1 = dst = bcmaddr */
/* r2 = width */
/* r3 = stride */
stmfd sp!, { r4-r10, lr } /* save non-scratch */
ldmia r0, { r9, r10, r12 } /* r9 = yuv_src[0] = Y'_p */
/* r10 = yuv_src[1] = Cb_p */
/* r12 = yuv_src[2] = Cr_p */
add r3, r9, r3 /* r3 = &ysrc[stride] */
add r4, r2, r2, asr #1 /* chroma buffer lenght = width/2 *3 */
mov r4, r4, asl #2 /* use words for str/ldm possibility */
add r4, r4, #19 /* plus room for 4 additional words, */
bic r4, r4, #3 /* rounded up to multiples of 4 byte */
sub sp, sp, r4 /* and allocate on stack */
stmia sp, {r1-r4} /* bcmaddr, width, &ysrc[stride], stack_alloc */
mov r7, r2 /* r7 = loop count */
add r8, sp, #16 /* chroma buffer */
mov lr, #0x30000000 /* LCD data port */
/* The following writes dest address to BCM and waits for write ready */
orr r2, lr, #0x00010000 /* r2 = BCM_WR_ADDR32 */
orr r6, lr, #0x00030000 /* r6 = BCM_CONTROL */
str r1, [r2] /* BCM_WR_ADDR32 = bcmaddr */
.busy_1:
ldrh r1, [r6] /* while (!(BCM_CONTROL & 0x2)) */
tst r1, #0x2
beq .busy_1
/* 1st loop start */
10: /* loop start */
ldrb r0, [r10], #1 /* r0 = *usrc++ = *Cb_p++ */
ldrb r1, [r12], #1 /* r1 = *vsrc++ = *Cr_p++ */
sub r0, r0, #128 /* r0 = Cb-128 */
sub r1, r1, #128 /* r1 = Cr-128 */
add r2, r1, r1, asl #1 /* r2 = Cr*51 + Cb*24 */
add r2, r2, r2, asl #4
add r2, r2, r0, asl #3
add r2, r2, r0, asl #4
add r4, r1, r1, asl #2 /* r1 = Cr*101 */
add r4, r4, r1, asl #5
add r1, r4, r1, asl #6
add r1, r1, #256 /* r1 = rv = (r1 + 256) >> 9 */
mov r1, r1, asr #9
rsb r2, r2, #128 /* r2 = guv = (-r2 + 128) >> 8 */
mov r2, r2, asr #8
add r0, r0, #2 /* r0 = bu = (Cb*128 + 256) >> 9 */
mov r0, r0, asr #2
stmia r8!, {r0-r2} /* store r0, r1 and r2 to chroma buffer */
/* 1st loop, first pixel */
ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
add r3, r5, r5, asl #2
add r5, r3, r5, asl #5
add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */
orr r5, r6, r4 /* check if clamping is needed... */
orr r5, r5, r3, asr #1 /* ...at all */
cmp r5, #31
bls 15f /* -> no clamp */
cmp r6, #31 /* clamp r */
mvnhi r6, r6, asr #31
andhi r6, r6, #31
cmp r3, #63 /* clamp g */
mvnhi r3, r3, asr #31
andhi r3, r3, #63
cmp r4, #31 /* clamp b */
mvnhi r4, r4, asr #31
andhi r4, r4, #31
15: /* no clamp */
/* calculate pixel_1 and save to r5 for later pixel packing */
orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */
orr r5, r4, r6, lsl #11 /* r5 = pixel_1 */
/* 1st loop, second pixel */
ldrb r4, [r9], #1 /* r4 = *ysrc++ = *Y'_p++ */
sub r4, r4, #16 /* r4 = (Y'-16) * 74 */
add r3, r4, r4, asl #2
add r4, r3, r4, asl #5
add r6, r1, r4, asr #8 /* r6 = r = (Y >> 9) + rv */
add r3, r2, r4, asr #7 /* r3 = g = (Y >> 8) + guv */
add r4, r0, r4, asr #8 /* r4 = b = (Y >> 9) + bu */
orr r0, r6, r4 /* check if clamping is needed... */
orr r0, r0, r3, asr #1 /* ...at all */
cmp r0, #31
bls 15f /* -> no clamp */
cmp r6, #31 /* clamp r */
mvnhi r6, r6, asr #31
andhi r6, r6, #31
cmp r3, #63 /* clamp g */
mvnhi r3, r3, asr #31
andhi r3, r3, #63
cmp r4, #31 /* clamp b */
mvnhi r4, r4, asr #31
andhi r4, r4, #31
15: /* no clamp */
/* calculate pixel_2 and pack with pixel_1 before writing */
orr r4, r4, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
orr r4, r4, r6, lsl #11 /* r4 = pixel_2 */
orr r4, r5, r4, lsl #16 /* r4 = pixel_2<<16 | pixel_1 */
str r4, [lr] /* write packed pixels */
subs r7, r7, #2 /* check for loop end */
bgt 10b /* back to beginning */
/* 1st loop end */
/* Reload several registers for pointer rewinding for next loop */
add r8, sp, #16 /* chroma buffer */
ldmia sp, { r1, r7, r9} /* r1 = bcmaddr */
/* r7 = loop count */
/* r9 = &ysrc[stride] */
/* The following writes dest address to BCM and waits for write ready */
orr r2, lr, #0x00010000 /* r2 = BCM_WR_ADDR32 */
orr r6, lr, #0x00030000 /* r6 = BCM_CONTROL */
add r1, r1, #640 /* dst += (LCD_WIDTH*2) */
str r1, [r2] /* BCM_WR_ADDR32 = dst */
.busy_2:
ldrh r1, [r6] /* while (!(BCM_CONTROL & 0x2)) */
tst r1, #0x2
beq .busy_2
/* 2nd loop start */
20: /* loop start */
/* restore r0 (bu), r1 (rv) and r2 (guv) from chroma buffer */
ldmia r8!, {r0-r2}
/* 2nd loop, first pixel */
ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
add r3, r5, r5, asl #2
add r5, r3, r5, asl #5
add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */
orr r5, r6, r4 /* check if clamping is needed... */
orr r5, r5, r3, asr #1 /* ...at all */
cmp r5, #31
bls 15f /* -> no clamp */
cmp r6, #31 /* clamp r */
mvnhi r6, r6, asr #31
andhi r6, r6, #31
cmp r3, #63 /* clamp g */
mvnhi r3, r3, asr #31
andhi r3, r3, #63
cmp r4, #31 /* clamp b */
mvnhi r4, r4, asr #31
andhi r4, r4, #31
15: /* no clamp */
/* calculate pixel_1 and save to r5 for later pixel packing */
orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */
orr r5, r4, r6, lsl #11 /* r5 = pixel_1 */
/* 2nd loop, second pixel */
ldrb r4, [r9], #1 /* r4 = *ysrc++ = *Y'_p++ */
sub r4, r4, #16 /* r4 = (Y'-16) * 74 */
add r3, r4, r4, asl #2
add r4, r3, r4, asl #5
add r6, r1, r4, asr #8 /* r6 = r = (Y >> 9) + rv */
add r3, r2, r4, asr #7 /* r3 = g = (Y >> 8) + guv */
add r4, r0, r4, asr #8 /* r4 = b = (Y >> 9) + bu */
orr r0, r6, r4 /* check if clamping is needed... */
orr r0, r0, r3, asr #1 /* ...at all */
cmp r0, #31
bls 15f /* -> no clamp */
cmp r6, #31 /* clamp r */
mvnhi r6, r6, asr #31
andhi r6, r6, #31
cmp r3, #63 /* clamp g */
mvnhi r3, r3, asr #31
andhi r3, r3, #63
cmp r4, #31 /* clamp b */
mvnhi r4, r4, asr #31
andhi r4, r4, #31
15: /* no clamp */
/* calculate pixel_2 and pack with pixel_1 before writing */
orr r4, r4, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
orr r4, r4, r6, lsl #11 /* r4 = pixel_2 */
orr r4, r5, r4, lsl #16 /* r4 = pixel_2<<16 | pixel_1 */
str r4, [lr] /* write packed pixels */
subs r7, r7, #2 /* check for loop end */
bgt 20b /* back to beginning */
/* 2nd loop end */
ldr r3, [sp, #12]
add sp, sp, r3 /* deallocate buffer */
ldmpc regs=r4-r10 /* restore registers */
.ltorg
.size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines

View file

@ -439,6 +439,53 @@ void lcd_update(void)
lcd_update_rect(0, 0, LCD_WIDTH, LCD_HEIGHT);
}
/* Line write helper function for lcd_yuv_blit. Writes two lines of yuv420. */
extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
unsigned bcmaddr,
int width,
int stride);
/* Performance function to blit a YUV bitmap directly to the LCD */
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
unsigned bcmaddr;
off_t z;
unsigned char const * yuv_src[3];
#ifdef HAVE_LCD_SLEEP
if (!lcd_state.display_on)
return;
#endif
/* Sorry, but width and height must be >= 2 or else */
width &= ~1;
z = stride * src_y;
yuv_src[0] = src[0] + z + src_x;
yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1);
yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
/* Prevent the tick from triggering BCM updates while we're writing. */
lcd_block_tick();
bcmaddr = BCMA_CMDPARAM + (LCD_WIDTH*2) * y + (x << 1);
height >>= 1;
do
{
lcd_write_yuv420_lines(yuv_src, bcmaddr, width, stride);
bcmaddr += (LCD_WIDTH*4); /* Skip up two lines */
yuv_src[0] += stride << 1;
yuv_src[1] += stride >> 1; /* Skip down one chroma line */
yuv_src[2] += stride >> 1;
}
while (--height > 0);
lcd_unblock_and_update();
}
#ifdef HAVE_LCD_SLEEP
/* Executes a BCM command immediately and waits for it to complete.
Other BCM commands (eg. LCD updates or lcd_tick) must not interfere.

View file

@ -0,0 +1,538 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2007-2008 by Michael Sevakis
*
* H10 20GB LCD assembly routines
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "config.h"
#include "cpu.h"
/****************************************************************************
* void lcd_write_yuv420_lines(unsigned char const * const src[3],
* int width,
* int stride);
*
* |R| |1.000000 -0.000001 1.402000| |Y'|
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
* |B| |1.000000 1.772000 0.000000| |Pr|
* Scaled, normalized, rounded and tweaked to yield RGB 565:
* |R| |74 0 101| |Y' - 16| >> 9
* |G| = |74 -24 -51| |Cb - 128| >> 8
* |B| |74 128 0| |Cr - 128| >> 9
*
* Write four RGB565 pixels in the following order on each loop:
* 1 3 + > down
* 2 4 \/ left
*/
.section .icode, "ax", %progbits
.align 2
.global lcd_write_yuv420_lines
.type lcd_write_yuv420_lines, %function
lcd_write_yuv420_lines:
@ r0 = yuv_src
@ r1 = width
@ r2 = stride
stmfd sp!, { r4-r11, lr } @ save non-scratch
ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
@ r5 = yuv_src[1] = Cb_p
@ r6 = yuv_src[2] = Cr_p
@
mov r0, #0x7000000c @ r0 = &LCD2_PORT = 0x70008a0c
add r0, r0, #0x8a00 @
mov r14, #LCD2_DATA_MASK @
@
sub r2, r2, #1 @ Adjust stride because of increment
10: @ loop line @
ldrb r7, [r4], #1 @ r7 = *Y'_p++;
ldrb r8, [r5], #1 @ r8 = *Cb_p++;
ldrb r9, [r6], #1 @ r9 = *Cr_p++;
@
sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
add r7, r12, r7, asl #5 @ by one less when adding - same for all
@
sub r8, r8, #128 @ Cb -= 128
sub r9, r9, #128 @ Cr -= 128
@
add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
add r10, r10, r10, asl #4 @
add r10, r10, r8, asl #3 @
add r10, r10, r8, asl #4 @
@
add r11, r9, r9, asl #2 @ r9 = Cr*101
add r11, r11, r9, asl #5 @
add r9, r11, r9, asl #6 @
@
add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
mov r8, r8, asr #2 @
add r9, r9, #256 @ r9 = rv = (r8 + 256) >> 9
mov r9, r9, asr #9 @
rsb r10, r10, #128 @ r10 = guv = (-r9 + 128) >> 8
mov r10, r10, asr #8 @
@ compute R, G, and B
add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r3, r11 @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r3, #31 @ clamp b
mvnhi r3, r3, asr #31 @
andhi r3, r3, #31 @
cmp r11, #31 @ clamp r
mvnhi r11, r11, asr #31 @
andhi r11, r11, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
@
orr r7, r14, r3, lsr #8 @ store pixel
orr r11, r14, r3 @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r7, [r0] @
str r11, [r0] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r3, r11 @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r3, #31 @ clamp b
mvnhi r3, r3, asr #31 @
andhi r3, r3, #31 @
cmp r11, #31 @ clamp r
mvnhi r11, r11, asr #31 @
andhi r11, r11, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
@
orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
@
orr r7, r14, r3, lsr #8 @ store pixel
orr r11, r14, r3 @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r7, [r0] @
str r11, [r0] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r3, r11 @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r3, #31 @ clamp b
mvnhi r3, r3, asr #31 @
andhi r3, r3, #31 @
cmp r11, #31 @ clamp r
mvnhi r11, r11, asr #31 @
andhi r11, r11, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
orr r3, r3, r7, lsl #5 @ r3 = b | (g << 5)
orr r3, r3, r11, lsl #11 @ r3 |= (r << 11)
@
orr r7, r14, r3, lsr #8 @ store pixel
orr r11, r14, r3 @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r7, [r0] @
str r11, [r0] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r3, r11 @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r3, #31 @ clamp b
mvnhi r3, r3, asr #31 @
andhi r3, r3, #31 @
cmp r11, #31 @ clamp r
mvnhi r11, r11, asr #31 @
andhi r11, r11, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
@
orr r7, r14, r3, lsr #8 @ store pixel
orr r11, r14, r3 @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r7, [r0] @
str r11, [r0] @
@
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
/****************************************************************************
* void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
* int width,
* int stride,
* int x_screen,
* int y_screen);
*
* |R| |1.000000 -0.000001 1.402000| |Y'|
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
* |B| |1.000000 1.772000 0.000000| |Pr|
* Red scaled at twice g & b but at same precision to place it in correct
* bit position after multiply and leave instruction count lower.
* |R| |258 0 408| |Y' - 16|
* |G| = |149 -49 -104| |Cb - 128|
* |B| |149 258 0| |Cr - 128|
*
* Write four RGB565 pixels in the following order on each loop:
* 1 3 + > down
* 2 4 \/ left
*
* Kernel pattern (raw|use order):
* 5 3 4 2 row0 row2 > down
* 1 7 0 6 | 5 1 3 7 4 0 2 6 col0 left
* 4 2 5 3 | 4 0 2 6 5 1 3 7 col2 \/
* 0 6 1 7
*/
.section .icode, "ax", %progbits
.align 2
.global lcd_write_yuv420_lines_odither
.type lcd_write_yuv420_lines_odither, %function
lcd_write_yuv420_lines_odither:
@ r0 = yuv_src
@ r1 = width
@ r2 = stride
@ r3 = x_screen
@ [sp] = y_screen
stmfd sp!, { r4-r11, lr } @ save non-scratch
ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
@ r5 = yuv_src[1] = Cb_p
@ r6 = yuv_src[2] = Cr_p
@
ldr r0, [sp, #36] @ Line up pattern and kernel quadrant
eor r14, r3, r0 @
and r14, r14, #0x2 @
mov r14, r14, lsl #6 @ 0x00 or 0x80
@
mov r0, #0x7000000c @ r0 = &LCD2_PORT = 0x70008a0c
add r0, r0, #0x8a00 @
@
sub r2, r2, #1 @ Adjust stride because of increment
10: @ loop line @
@
ldrb r7, [r4], #1 @ r7 = *Y'_p++;
ldrb r8, [r5], #1 @ r8 = *Cb_p++;
ldrb r9, [r6], #1 @ r9 = *Cr_p++;
@
eor r14, r14, #0x80 @ flip pattern quadrant
@
sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@
sub r8, r8, #128 @ Cb -= 128
sub r9, r9, #128 @ Cr -= 128
@
add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49
add r10, r10, r8, asl #5 @
add r10, r10, r9, asl #3 @
add r10, r10, r9, asl #5 @
add r10, r10, r9, asl #6 @
@
mov r8, r8, asl #1 @ r8 = bu = Cb*258
add r8, r8, r8, asl #7 @
@
add r9, r9, r9, asl #1 @ r9 = rv = Cr*408
add r9, r9, r9, asl #4 @
mov r9, r9, asl #3 @
@
@ compute R, G, and B
add r3, r8, r7 @ r3 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
@ r8 = bu, r9 = rv, r10 = guv
@
sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256
add r3, r12, r3, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
add r7, r12, r7, lsr #8 @
@
add r12, r14, #0x200 @
@
add r3, r3, r12 @ b = r3 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
orr r12, r3, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r3, asr #15 @ clamp b
mvnne r3, r12, lsr #15 @
andne r3, r3, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r3, r11, r3, lsr #10 @ (b >> 10)
@
mov r11, #LCD2_DATA_MASK @ store pixel
orr r7, r11, r3, lsr #8 @
orr r11, r11, r3 @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r7, [r0] @
str r11, [r0] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r3, r8, r7 @ r3 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256
add r3, r12, r3, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
add r7, r12, r7, lsr #8 @
@
@ This element is zero - use r14 @
@
add r3, r3, r14 @ b = r3 + delta
add r11, r11, r14, lsl #1 @ r = r11 + delta*2
add r7, r7, r14, lsr #1 @ g = r7 + delta/2
@
orr r12, r3, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r3, asr #15 @ clamp b
mvnne r3, r12, lsr #15 @
andne r3, r3, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
@
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r3, r11, r3, lsr #10 @ (b >> 10)
@
mov r11, #LCD2_DATA_MASK @ store pixel
orr r7, r11, r3, lsr #8 @
orr r11, r11, r3 @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r7, [r0] @
str r11, [r0] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r3, r8, r7 @ r3 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
@ r8 = bu, r9 = rv, r10 = guv
@
sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256
add r3, r12, r3, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
add r7, r12, r7, lsr #8 @
@
add r12, r14, #0x100 @
@
add r3, r3, r12 @ b = r3 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
orr r12, r3, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r3, asr #15 @ clamp b
mvnne r3, r12, lsr #15 @
andne r3, r3, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r3, r11, r3, lsr #10 @ (b >> 10)
@
mov r11, #LCD2_DATA_MASK @ store pixel
orr r7, r11, r3, lsr #8 @
orr r11, r11, r3 @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r7, [r0] @
str r11, [r0] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r3, r8, r7 @ r3 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256
add r3, r12, r3, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
add r7, r12, r7, lsr #8 @
@
add r12, r14, #0x300 @
@
add r3, r3, r12 @ b = r3 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
orr r12, r3, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r3, asr #15 @ clamp b
mvnne r3, r12, lsr #15 @
andne r3, r3, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r3, r11, r3, lsr #10 @ (b >> 10)
@
mov r11, #LCD2_DATA_MASK @ store pixel
orr r7, r11, r3, lsr #8 @
orr r11, r11, r3 @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r7, [r0] @
str r11, [r0] @
@
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither

View file

@ -36,6 +36,8 @@ static unsigned short disp_control_rev;
/* Contrast setting << 8 */
static int lcd_contrast;
static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0;
/* Forward declarations */
#if defined(HAVE_LCD_ENABLE) || defined(HAVE_LCD_SLEEP)
static void lcd_display_off(void);
@ -396,6 +398,94 @@ bool lcd_active(void)
/*** update functions ***/
void lcd_yuv_set_options(unsigned options)
{
lcd_yuv_options = options;
}
/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */
extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
int width,
int stride);
extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
int width,
int stride,
int x_screen, /* To align dither pattern */
int y_screen);
/* Performance function to blit a YUV bitmap directly to the LCD */
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
const unsigned char *yuv_src[3];
const unsigned char *ysrc_max;
int y0;
int options;
if (!display_on)
return;
width &= ~1;
height &= ~1;
/* calculate the drawing region */
/* The 20GB LCD is actually 128x160 but rotated 90 degrees so the origin
* is actually the bottom left and horizontal and vertical are swapped.
* Rockbox expects the origin to be the top left so we need to use
* 127 - y instead of just y */
/* max vert << 8 | start vert */
lcd_write_reg(R_VERT_RAM_ADDR_POS, ((x + width - 1) << 8) | x);
y0 = LCD_HEIGHT - 1 - y + y_offset;
/* DIT=0, BGR=1, HWM=0, I/D1-0=10, AM=0, LG2-0=000 */
lcd_write_reg(R_ENTRY_MODE, 0x1020);
yuv_src[0] = src[0] + src_y * stride + src_x;
yuv_src[1] = src[1] + (src_y * stride >> 2) + (src_x >> 1);
yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
ysrc_max = yuv_src[0] + height * stride;
options = lcd_yuv_options;
do
{
/* max horiz << 8 | start horiz */
lcd_write_reg(R_HORIZ_RAM_ADDR_POS, (y0 << 8) | (y0 - 1));
/* position cursor (set AD0-AD15) */
/* start vert << 8 | start horiz */
lcd_write_reg(R_RAM_ADDR_SET, (x << 8) | y0);
/* start drawing */
lcd_send_cmd(R_WRITE_DATA_2_GRAM);
if (options & LCD_YUV_DITHER)
{
lcd_write_yuv420_lines_odither(yuv_src, width, stride,
x, y);
y -= 2;
}
else
{
lcd_write_yuv420_lines(yuv_src, width, stride);
}
y0 -= 2;
yuv_src[0] += stride << 1;
yuv_src[1] += stride >> 1;
yuv_src[2] += stride >> 1;
}
while (yuv_src[0] < ysrc_max);
/* DIT=0, BGR=1, HWM=0, I/D1-0=10, AM=1, LG2-0=000 */
lcd_write_reg(R_ENTRY_MODE, 0x1028);
}
/* Update a fraction of the display. */
void lcd_update_rect(int x0, int y0, int width, int height)
{

View file

@ -118,6 +118,168 @@ void lcd_init_device(void)
/*** update functions ***/
#define CSUB_X 2
#define CSUB_Y 2
#define RYFAC (31*257)
#define GYFAC (31*257)
#define BYFAC (31*257)
#define RVFAC 11170 /* 31 * 257 * 1.402 */
#define GVFAC (-5690) /* 31 * 257 * -0.714136 */
#define GUFAC (-2742) /* 31 * 257 * -0.344136 */
#define BUFAC 14118 /* 31 * 257 * 1.772 */
#define ROUNDOFFS (127*257)
#define ROUNDOFFSG (63*257)
/* Performance function to blit a YUV bitmap directly to the LCD */
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
int y0, x0, y1, x1;
int ymax;
width = (width + 1) & ~1;
/* calculate the drawing region */
x0 = x;
x1 = x + width - 1;
y0 = y;
y1 = y + height - 1;
/* max horiz << 8 | start horiz */
lcd_write_reg(R_HORIZ_RAM_ADDR_POS, (x1 << 8) | x0);
/* max vert << 8 | start vert */
lcd_write_reg(R_VERT_RAM_ADDR_POS, (y1 << 8) | y0);
/* start vert << 8 | start horiz */
lcd_write_reg(R_RAM_ADDR_SET, (y0 << 8) | x0);
/* start drawing */
lcd_send_cmd(R_WRITE_DATA_2_GRAM);
ymax = y + height - 1 ;
const int stride_div_csub_x = stride/CSUB_X;
for (; y <= ymax ; y++)
{
/* upsampling, YUV->RGB conversion and reduction to RGB565 in one go */
const unsigned char *ysrc = src[0] + stride * src_y + src_x;
const int uvoffset = stride_div_csub_x * (src_y/CSUB_Y) +
(src_x/CSUB_X);
const unsigned char *usrc = src[1] + uvoffset;
const unsigned char *vsrc = src[2] + uvoffset;
const unsigned char *row_end = ysrc + width;
int y, u, v;
int red1, green1, blue1;
int red2, green2, blue2;
unsigned rbits, gbits, bbits;
int rc, gc, bc;
do
{
u = *usrc++ - 128;
v = *vsrc++ - 128;
rc = RVFAC * v + ROUNDOFFS;
gc = GVFAC * v + GUFAC * u + ROUNDOFFSG;
bc = BUFAC * u + ROUNDOFFS;
/* Pixel 1 */
y = *ysrc++;
red1 = RYFAC * y + rc;
green1 = GYFAC * y + gc;
blue1 = BYFAC * y + bc;
/* Pixel 2 */
y = *ysrc++;
red2 = RYFAC * y + rc;
green2 = GYFAC * y + gc;
blue2 = BYFAC * y + bc;
/* Since out of bounds errors are relatively rare, we check two
pixels at once to see if any components are out of bounds, and
then fix whichever is broken. This works due to high values and
negative values both becoming larger than the cutoff when
casted to unsigned. And ORing them together checks all of them
simultaneously. */
if (((unsigned)(red1 | green1 | blue1 |
red2 | green2 | blue2)) > (RYFAC*255+ROUNDOFFS)) {
if (((unsigned)(red1 | green1 | blue1)) >
(RYFAC*255+ROUNDOFFS)) {
if ((unsigned)red1 > (RYFAC*255+ROUNDOFFS))
{
if (red1 < 0)
red1 = 0;
else
red1 = (RYFAC*255+ROUNDOFFS);
}
if ((unsigned)green1 > (GYFAC*255+ROUNDOFFSG))
{
if (green1 < 0)
green1 = 0;
else
green1 = (GYFAC*255+ROUNDOFFSG);
}
if ((unsigned)blue1 > (BYFAC*255+ROUNDOFFS))
{
if (blue1 < 0)
blue1 = 0;
else
blue1 = (BYFAC*255+ROUNDOFFS);
}
}
if (((unsigned)(red2 | green2 | blue2)) >
(RYFAC*255+ROUNDOFFS)) {
if ((unsigned)red2 > (RYFAC*255+ROUNDOFFS))
{
if (red2 < 0)
red2 = 0;
else
red2 = (RYFAC*255+ROUNDOFFS);
}
if ((unsigned)green2 > (GYFAC*255+ROUNDOFFSG))
{
if (green2 < 0)
green2 = 0;
else
green2 = (GYFAC*255+ROUNDOFFSG);
}
if ((unsigned)blue2 > (BYFAC*255+ROUNDOFFS))
{
if (blue2 < 0)
blue2 = 0;
else
blue2 = (BYFAC*255+ROUNDOFFS);
}
}
}
rbits = red1 >> 16 ;
gbits = green1 >> 15 ;
bbits = blue1 >> 16 ;
lcd_send_data((rbits << 11) | (gbits << 5) | bbits);
rbits = red2 >> 16 ;
gbits = green2 >> 15 ;
bbits = blue2 >> 16 ;
lcd_send_data((rbits << 11) | (gbits << 5) | bbits);
}
while (ysrc < row_end);
src_y++;
}
}
/* Update a fraction of the display. */
void lcd_update_rect(int x0, int y0, int width, int height)
{

View file

@ -30,6 +30,9 @@
#endif
/* Display status */
#if MEMORYSIZE > 2
static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0;
#endif
static bool is_lcd_enabled = true;
/* LCD command set for Samsung S6B33B2 */
@ -298,6 +301,80 @@ void lcd_set_flip(bool yesno)
/*** update functions ***/
#if MEMORYSIZE > 2 /* not for C200V2 */
void lcd_yuv_set_options(unsigned options)
{
lcd_yuv_options = options;
}
/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */
extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
int width,
int stride);
extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
int width,
int stride,
int x_screen, /* To align dither pattern */
int y_screen);
/* Performance function to blit a YUV bitmap directly to the LCD */
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
unsigned char const * yuv_src[3];
off_t z;
/* Sorry, but width and height must be >= 2 or else */
width &= ~1;
height >>= 1;
y += 0x1a;
z = stride*src_y;
yuv_src[0] = src[0] + z + src_x;
yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1);
yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
lcd_send_command(R_ENTRY_MODE, 0x80);
lcd_send_command(R_X_ADDR_AREA, x);
lcd_send_command(x + width - 1, 0);
if (lcd_yuv_options & LCD_YUV_DITHER)
{
do
{
lcd_send_command(R_Y_ADDR_AREA, y);
lcd_send_command(y + 1, 0);
lcd_write_yuv420_lines_odither(yuv_src, width, stride, x, y);
yuv_src[0] += stride << 1; /* Skip down two luma lines */
yuv_src[1] += stride >> 1; /* Skip down one chroma line */
yuv_src[2] += stride >> 1;
y += 2;
}
while (--height > 0);
}
else
{
do
{
lcd_send_command(R_Y_ADDR_AREA, y);
lcd_send_command(y + 1, 0);
lcd_write_yuv420_lines(yuv_src, width, stride);
yuv_src[0] += stride << 1; /* Skip down two luma lines */
yuv_src[1] += stride >> 1; /* Skip down one chroma line */
yuv_src[2] += stride >> 1;
y += 2;
}
while (--height > 0);
}
}
#endif /* MEMORYSIZE > 2 */
/* Update the display.
This must be called after all other LCD functions that change the display. */
void lcd_update(void)

View file

@ -0,0 +1,556 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id:$
*
* Copyright (C) 2007-2008 by Michael Sevakis
* Adapted for the Packard Bell Vibe 500 by Szymon Dziok
*
* Packard Bell Vibe 500 LCD assembly routines
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "config.h"
#include "cpu.h"
/****************************************************************************
* void lcd_write_yuv420_lines(unsigned char const * const src[3],
* int width,
* int stride);
*
* |R| |1.000000 -0.000001 1.402000| |Y'|
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
* |B| |1.000000 1.772000 0.000000| |Pr|
* Scaled, normalized, rounded and tweaked to yield RGB 565:
* |R| |74 0 101| |Y' - 16| >> 9
* |G| = |74 -24 -51| |Cb - 128| >> 8
* |B| |74 128 0| |Cr - 128| >> 9
*
* Write four RGB565 pixels in the following order on each loop:
* 1 3 + > down
* 2 4 \/ left
*/
.section .icode, "ax", %progbits
.align 2
.global lcd_write_yuv420_lines
.type lcd_write_yuv420_lines, %function
lcd_write_yuv420_lines:
@ r0 = yuv_src
@ r1 = width
@ r2 = stride
stmfd sp!, { r4-r11, lr } @ save non-scratch
ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
@ r5 = yuv_src[1] = Cb_p
@ r6 = yuv_src[2] = Cr_p
@
ldr r0, =LCD1_BASE @
@
sub r2, r2, #1 @ Adjust stride because of increment
10: @ loop line @
ldrb r7, [r4], #1 @ r7 = *Y'_p++;
ldrb r8, [r5], #1 @ r8 = *Cb_p++;
ldrb r9, [r6], #1 @ r9 = *Cr_p++;
@
sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
add r7, r12, r7, asl #5 @ by one less when adding - same for all
@
sub r8, r8, #128 @ Cb -= 128
sub r9, r9, #128 @ Cr -= 128
@
add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
add r10, r10, r10, asl #4 @
add r10, r10, r8, asl #3 @
add r10, r10, r8, asl #4 @
@
add r11, r9, r9, asl #2 @ r9 = Cr*101
add r11, r11, r9, asl #5 @
add r9, r11, r9, asl #6 @
@
add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
mov r8, r8, asr #2 @
add r9, r9, #256 @ r9 = rv = (r8 + 256) >> 9
mov r9, r9, asr #9 @
rsb r10, r10, #128 @ r10 = guv = (-r9 + 128) >> 8
mov r10, r10, asr #8 @
@ compute R, G, and B
add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r3, r11 @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r3, #31 @ clamp b
mvnhi r3, r3, asr #31 @
andhi r3, r3, #31 @
cmp r11, #31 @ clamp r
mvnhi r11, r11, asr #31 @
andhi r11, r11, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
@
movs r7, r3, lsr #8 @ store pixel
20: @
ldr r11, [r0] @
tst r11, #LCD1_BUSY_MASK @
bne 20b @
str r7, [r0, #0x10] @
25: @
ldr r11, [r0] @
tst r11, #LCD1_BUSY_MASK @
bne 25b @
str r3, [r0, #0x10] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r3, r11 @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r3, #31 @ clamp b
mvnhi r3, r3, asr #31 @
andhi r3, r3, #31 @
cmp r11, #31 @ clamp r
mvnhi r11, r11, asr #31 @
andhi r11, r11, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
@
orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
@
movs r7, r3, lsr #8 @ store pixel
20: @
ldr r11, [r0] @
tst r11, #LCD1_BUSY_MASK @
bne 20b @
str r7, [r0, #0x10] @
25: @
ldr r11, [r0] @
tst r11, #LCD1_BUSY_MASK @
bne 25b @
str r3, [r0, #0x10] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r3, r11 @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r3, #31 @ clamp b
mvnhi r3, r3, asr #31 @
andhi r3, r3, #31 @
cmp r11, #31 @ clamp r
mvnhi r11, r11, asr #31 @
andhi r11, r11, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
orr r3, r3, r7, lsl #5 @ r3 = b | (g << 5)
orr r3, r3, r11, lsl #11 @ r3 |= (r << 11)
@
movs r7, r3, lsr #8 @ store pixel
20: @
ldr r11, [r0] @
tst r11, #LCD1_BUSY_MASK @
bne 20b @
str r7, [r0, #0x10] @
25: @
ldr r11, [r0] @
tst r11, #LCD1_BUSY_MASK @
bne 25b @
str r3, [r0, #0x10] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r3, r11 @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r3, #31 @ clamp b
mvnhi r3, r3, asr #31 @
andhi r3, r3, #31 @
cmp r11, #31 @ clamp r
mvnhi r11, r11, asr #31 @
andhi r11, r11, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
@
movs r7, r3, lsr #8 @ store pixel
20: @
ldr r11, [r0] @
tst r11, #LCD1_BUSY_MASK @
bne 20b @
str r7, [r0, #0x10] @
25: @
ldr r11, [r0] @
tst r11, #LCD1_BUSY_MASK @
bne 25b @
str r3, [r0, #0x10] @
@
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
/****************************************************************************
* void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
* int width,
* int stride,
* int x_screen,
* int y_screen);
*
* |R| |1.000000 -0.000001 1.402000| |Y'|
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
* |B| |1.000000 1.772000 0.000000| |Pr|
* Red scaled at twice g & b but at same precision to place it in correct
* bit position after multiply and leave instruction count lower.
* |R| |258 0 408| |Y' - 16|
* |G| = |149 -49 -104| |Cb - 128|
* |B| |149 258 0| |Cr - 128|
*
* Write four RGB565 pixels in the following order on each loop:
* 1 3 + > down
* 2 4 \/ left
*
* Kernel pattern (raw|use order):
* 5 3 4 2 row0 row2 > down
* 1 7 0 6 | 5 1 3 7 4 0 2 6 col0 left
* 4 2 5 3 | 4 0 2 6 5 1 3 7 col2 \/
* 0 6 1 7
*/
.section .icode, "ax", %progbits
.align 2
.global lcd_write_yuv420_lines_odither
.type lcd_write_yuv420_lines_odither, %function
lcd_write_yuv420_lines_odither:
@ r0 = yuv_src
@ r1 = width
@ r2 = stride
@ r3 = x_screen
@ [sp] = y_screen
stmfd sp!, { r4-r11, lr } @ save non-scratch
ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
@ r5 = yuv_src[1] = Cb_p
@ r6 = yuv_src[2] = Cr_p
@
ldr r0, [sp, #36] @ Line up pattern and kernel quadrant
eor r14, r3, r0 @
and r14, r14, #0x2 @
mov r14, r14, lsl #6 @ 0x00 or 0x80
@
ldr r0, =LCD1_BASE @
@
sub r2, r2, #1 @ Adjust stride because of increment
10: @ loop line @
@
ldrb r7, [r4], #1 @ r7 = *Y'_p++;
ldrb r8, [r5], #1 @ r8 = *Cb_p++;
ldrb r9, [r6], #1 @ r9 = *Cr_p++;
@
eor r14, r14, #0x80 @ flip pattern quadrant
@
sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@
sub r8, r8, #128 @ Cb -= 128
sub r9, r9, #128 @ Cr -= 128
@
add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49
add r10, r10, r8, asl #5 @
add r10, r10, r9, asl #3 @
add r10, r10, r9, asl #5 @
add r10, r10, r9, asl #6 @
@
mov r8, r8, asl #1 @ r8 = bu = Cb*258
add r8, r8, r8, asl #7 @
@
add r9, r9, r9, asl #1 @ r9 = rv = Cr*408
add r9, r9, r9, asl #4 @
mov r9, r9, asl #3 @
@
@ compute R, G, and B
add r3, r8, r7 @ r3 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
@ r8 = bu, r9 = rv, r10 = guv
@
sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256
add r3, r12, r3, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
add r7, r12, r7, lsr #8 @
@
add r12, r14, #0x200 @
@
add r3, r3, r12 @ b = r3 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
orr r12, r3, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r3, asr #15 @ clamp b
mvnne r3, r12, lsr #15 @
andne r3, r3, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r3, r11, r3, lsr #10 @ (b >> 10)
@
movs r7, r3, lsr #8 @ store pixel
20: @
ldr r11, [r0] @
tst r11, #LCD1_BUSY_MASK @
bne 20b @
str r7, [r0, #0x10] @
25: @
ldr r11, [r0] @
tst r11, #LCD1_BUSY_MASK @
bne 25b @
str r3, [r0, #0x10] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r3, r8, r7 @ r3 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256
add r3, r12, r3, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
add r7, r12, r7, lsr #8 @
@
@ This element is zero - use r14 @
@
add r3, r3, r14 @ b = r3 + delta
add r11, r11, r14, lsl #1 @ r = r11 + delta*2
add r7, r7, r14, lsr #1 @ g = r7 + delta/2
@
orr r12, r3, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r3, asr #15 @ clamp b
mvnne r3, r12, lsr #15 @
andne r3, r3, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
@
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r3, r11, r3, lsr #10 @ (b >> 10)
@
movs r7, r3, lsr #8 @ store pixel
20: @
ldr r11, [r0] @
tst r11, #LCD1_BUSY_MASK @
bne 20b @
str r7, [r0, #0x10] @
25: @
ldr r11, [r0] @
tst r11, #LCD1_BUSY_MASK @
bne 25b @
str r3, [r0, #0x10] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r3, r8, r7 @ r3 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
@ r8 = bu, r9 = rv, r10 = guv
@
sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256
add r3, r12, r3, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
add r7, r12, r7, lsr #8 @
@
add r12, r14, #0x100 @
@
add r3, r3, r12 @ b = r3 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
orr r12, r3, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r3, asr #15 @ clamp b
mvnne r3, r12, lsr #15 @
andne r3, r3, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r3, r11, r3, lsr #10 @ (b >> 10)
@
movs r7, r3, lsr #8 @ store pixel
20: @
ldr r11, [r0] @
tst r11, #LCD1_BUSY_MASK @
bne 20b @
str r7, [r0, #0x10] @
25: @
ldr r11, [r0] @
tst r11, #LCD1_BUSY_MASK @
bne 25b @
str r3, [r0, #0x10] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r3, r8, r7 @ r3 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256
add r3, r12, r3, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
add r7, r12, r7, lsr #8 @
@
add r12, r14, #0x300 @
@
add r3, r3, r12 @ b = r3 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
orr r12, r3, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r3, asr #15 @ clamp b
mvnne r3, r12, lsr #15 @
andne r3, r3, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r3, r11, r3, lsr #10 @ (b >> 10)
@
movs r7, r3, lsr #8 @ store pixel
20: @
ldr r11, [r0] @
tst r11, #LCD1_BUSY_MASK @
bne 20b @
str r7, [r0, #0x10] @
25: @
ldr r11, [r0] @
tst r11, #LCD1_BUSY_MASK @
bne 25b @
str r3, [r0, #0x10] @
@
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither

View file

@ -35,6 +35,8 @@ static unsigned short disp_control_rev;
/* Contrast setting << 8 */
static int lcd_contrast;
static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0;
/* Forward declarations */
#if defined(HAVE_LCD_ENABLE) || defined(HAVE_LCD_SLEEP)
static void lcd_display_off(void);
@ -375,6 +377,79 @@ bool lcd_active(void)
/*** update functions ***/
void lcd_yuv_set_options(unsigned options)
{
lcd_yuv_options = options;
}
/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */
extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
int width,
int stride);
extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
int width,
int stride,
int x_screen, /* To align dither pattern */
int y_screen);
/* Performance function to blit a YUV bitmap directly to the LCD */
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
const unsigned char *yuv_src[3];
const unsigned char *ysrc_max;
int y0;
int options;
if (!display_on)
return;
width &= ~1;
height &= ~1;
lcd_write_reg(R_VERT_RAM_ADDR_POS, ((LCD_WIDTH - 1 - x) << 8) |
((LCD_WIDTH-1) - (x + width - 1)));
y0 = LCD_HEIGHT - 1 - y;
lcd_write_reg(R_ENTRY_MODE,0x1000);
yuv_src[0] = src[0] + src_y * stride + src_x;
yuv_src[1] = src[1] + (src_y * stride >> 2) + (src_x >> 1);
yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
ysrc_max = yuv_src[0] + height * stride;
options = lcd_yuv_options;
do
{
lcd_write_reg(R_HORIZ_RAM_ADDR_POS, (y0 << 8) | (y0 - 1));
lcd_write_reg(R_RAM_ADDR_SET, ((LCD_WIDTH - 1 - x) << 8) | y0);
/* start drawing */
lcd_send_cmd(R_WRITE_DATA_2_GRAM);
if (options & LCD_YUV_DITHER)
{
lcd_write_yuv420_lines_odither(yuv_src, width, stride,x, y);
y -= 2;
}
else
{
lcd_write_yuv420_lines(yuv_src, width, stride);
}
y0 -= 2;
yuv_src[0] += stride << 1;
yuv_src[1] += stride >> 1;
yuv_src[2] += stride >> 1;
}
while (yuv_src[0] < ysrc_max);
lcd_write_reg(R_ENTRY_MODE,0x1008);
}
/* Update a fraction of the display. */
void lcd_update_rect(int x0, int y0, int width, int height)
{

View file

@ -0,0 +1,570 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2007-2008 by Michael Sevakis
*
* H10 20GB LCD assembly routines
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "config.h"
#include "cpu.h"
/****************************************************************************
* void lcd_write_yuv420_lines(unsigned char const * const src[3],
* int width,
* int stride);
*
* |R| |1.000000 -0.000001 1.402000| |Y'|
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
* |B| |1.000000 1.772000 0.000000| |Pr|
* Scaled, normalized, rounded and tweaked to yield RGB 565:
* |R| |74 0 101| |Y' - 16| >> 9
* |G| = |74 -24 -51| |Cb - 128| >> 8
* |B| |74 128 0| |Cr - 128| >> 9
*
* Write four RGB565 pixels in the following order on each loop:
* 1 3 + > down
* 2 4 \/ left
*/
.section .icode, "ax", %progbits
.align 2
.global lcd_write_yuv420_lines
.type lcd_write_yuv420_lines, %function
lcd_write_yuv420_lines:
@ r0 = yuv_src
@ r1 = width
@ r2 = stride
stmfd sp!, { r4-r11, lr } @ save non-scratch
ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
@ r5 = yuv_src[1] = Cb_p
@ r6 = yuv_src[2] = Cr_p
@
mov r0, #0x7000000c @ r0 = &LCD2_PORT = 0x70008a0c
add r0, r0, #0x8a00 @
mov r14, #LCD2_DATA_MASK @
@
sub r2, r2, #1 @ Adjust stride because of increment
10: @ loop line @
ldrb r7, [r4], #1 @ r7 = *Y'_p++;
ldrb r8, [r5], #1 @ r8 = *Cb_p++;
ldrb r9, [r6], #1 @ r9 = *Cr_p++;
@
sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
add r7, r12, r7, asl #5 @ by one less when adding - same for all
@
sub r8, r8, #128 @ Cb -= 128
sub r9, r9, #128 @ Cr -= 128
@
add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
add r10, r10, r10, asl #4 @
add r10, r10, r8, asl #3 @
add r10, r10, r8, asl #4 @
@
add r11, r9, r9, asl #2 @ r9 = Cr*101
add r11, r11, r9, asl #5 @
add r9, r11, r9, asl #6 @
@
add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
mov r8, r8, asr #2 @
add r9, r9, #256 @ r9 = rv = (r8 + 256) >> 9
mov r9, r9, asr #9 @
rsb r10, r10, #128 @ r10 = guv = (-r9 + 128) >> 8
mov r10, r10, asr #8 @
@ compute R, G, and B
add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r3, r11 @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r3, #31 @ clamp b
mvnhi r3, r3, asr #31 @
andhi r3, r3, #31 @
cmp r11, #31 @ clamp r
mvnhi r11, r11, asr #31 @
andhi r11, r11, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
@
orr r7, r14, r3, lsr #8 @ store pixel
orr r11, r14, r3 @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r7, [r0] @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r11, [r0] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r3, r11 @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r3, #31 @ clamp b
mvnhi r3, r3, asr #31 @
andhi r3, r3, #31 @
cmp r11, #31 @ clamp r
mvnhi r11, r11, asr #31 @
andhi r11, r11, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
@
orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
@
orr r7, r14, r3, lsr #8 @ store pixel
orr r11, r14, r3 @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r7, [r0] @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r11, [r0] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r3, r11 @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r3, #31 @ clamp b
mvnhi r3, r3, asr #31 @
andhi r3, r3, #31 @
cmp r11, #31 @ clamp r
mvnhi r11, r11, asr #31 @
andhi r11, r11, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
orr r3, r3, r7, lsl #5 @ r3 = b | (g << 5)
orr r3, r3, r11, lsl #11 @ r3 |= (r << 11)
@
orr r7, r14, r3, lsr #8 @ store pixel
orr r11, r14, r3 @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r7, [r0] @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r11, [r0] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r3, r11 @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r3, #31 @ clamp b
mvnhi r3, r3, asr #31 @
andhi r3, r3, #31 @
cmp r11, #31 @ clamp r
mvnhi r11, r11, asr #31 @
andhi r11, r11, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
@
orr r7, r14, r3, lsr #8 @ store pixel
orr r11, r14, r3 @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r7, [r0] @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r11, [r0] @
@
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
/****************************************************************************
* void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
* int width,
* int stride,
* int x_screen,
* int y_screen);
*
* |R| |1.000000 -0.000001 1.402000| |Y'|
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
* |B| |1.000000 1.772000 0.000000| |Pr|
* Red scaled at twice g & b but at same precision to place it in correct
* bit position after multiply and leave instruction count lower.
* |R| |258 0 408| |Y' - 16|
* |G| = |149 -49 -104| |Cb - 128|
* |B| |149 258 0| |Cr - 128|
*
* Write four RGB565 pixels in the following order on each loop:
* 1 3 + > down
* 2 4 \/ left
*
* Kernel pattern (raw|use order):
* 5 3 4 2 row0 row2 > down
* 1 7 0 6 | 5 1 3 7 4 0 2 6 col0 left
* 4 2 5 3 | 4 0 2 6 5 1 3 7 col2 \/
* 0 6 1 7
*/
.section .icode, "ax", %progbits
.align 2
.global lcd_write_yuv420_lines_odither
.type lcd_write_yuv420_lines_odither, %function
lcd_write_yuv420_lines_odither:
@ r0 = yuv_src
@ r1 = width
@ r2 = stride
@ r3 = x_screen
@ [sp] = y_screen
stmfd sp!, { r4-r11, lr } @ save non-scratch
ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
@ r5 = yuv_src[1] = Cb_p
@ r6 = yuv_src[2] = Cr_p
@
ldr r0, [sp, #36] @ Line up pattern and kernel quadrant
eor r14, r3, r0 @
and r14, r14, #0x2 @
mov r14, r14, lsl #6 @ 0x00 or 0x80
@
mov r0, #0x7000000c @ r0 = &LCD2_PORT = 0x70008a0c
add r0, r0, #0x8a00 @
@
sub r2, r2, #1 @ Adjust stride because of increment
10: @ loop line @
@
ldrb r7, [r4], #1 @ r7 = *Y'_p++;
ldrb r8, [r5], #1 @ r8 = *Cb_p++;
ldrb r9, [r6], #1 @ r9 = *Cr_p++;
@
eor r14, r14, #0x80 @ flip pattern quadrant
@
sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@
sub r8, r8, #128 @ Cb -= 128
sub r9, r9, #128 @ Cr -= 128
@
add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49
add r10, r10, r8, asl #5 @
add r10, r10, r9, asl #3 @
add r10, r10, r9, asl #5 @
add r10, r10, r9, asl #6 @
@
mov r8, r8, asl #1 @ r8 = bu = Cb*258
add r8, r8, r8, asl #7 @
@
add r9, r9, r9, asl #1 @ r9 = rv = Cr*408
add r9, r9, r9, asl #4 @
mov r9, r9, asl #3 @
@
@ compute R, G, and B
add r3, r8, r7 @ r3 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
@ r8 = bu, r9 = rv, r10 = guv
@
sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256
add r3, r12, r3, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
add r7, r12, r7, lsr #8 @
@
add r12, r14, #0x200 @
@
add r3, r3, r12 @ b = r3 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
orr r12, r3, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r3, asr #15 @ clamp b
mvnne r3, r12, lsr #15 @
andne r3, r3, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r3, r11, r3, lsr #10 @ (b >> 10)
@
mov r11, #LCD2_DATA_MASK @ store pixel
orr r7, r11, r3, lsr #8 @
orr r11, r11, r3 @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r7, [r0] @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r11, [r0] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r3, r8, r7 @ r3 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256
add r3, r12, r3, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
add r7, r12, r7, lsr #8 @
@
@ This element is zero - use r14 @
@
add r3, r3, r14 @ b = r3 + delta
add r11, r11, r14, lsl #1 @ r = r11 + delta*2
add r7, r7, r14, lsr #1 @ g = r7 + delta/2
@
orr r12, r3, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r3, asr #15 @ clamp b
mvnne r3, r12, lsr #15 @
andne r3, r3, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
@
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r3, r11, r3, lsr #10 @ (b >> 10)
@
mov r11, #LCD2_DATA_MASK @ store pixel
orr r7, r11, r3, lsr #8 @
orr r11, r11, r3 @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r7, [r0] @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r11, [r0] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r3, r8, r7 @ r3 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
@ r8 = bu, r9 = rv, r10 = guv
@
sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256
add r3, r12, r3, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
add r7, r12, r7, lsr #8 @
@
add r12, r14, #0x100 @
@
add r3, r3, r12 @ b = r3 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
orr r12, r3, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r3, asr #15 @ clamp b
mvnne r3, r12, lsr #15 @
andne r3, r3, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r3, r11, r3, lsr #10 @ (b >> 10)
@
mov r11, #LCD2_DATA_MASK @ store pixel
orr r7, r11, r3, lsr #8 @
orr r11, r11, r3 @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r7, [r0] @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r11, [r0] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r3, r8, r7 @ r3 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256
add r3, r12, r3, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
add r7, r12, r7, lsr #8 @
@
add r12, r14, #0x300 @
@
add r3, r3, r12 @ b = r3 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
orr r12, r3, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r3, asr #15 @ clamp b
mvnne r3, r12, lsr #15 @
andne r3, r3, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r3, r11, r3, lsr #10 @ (b >> 10)
@
mov r11, #LCD2_DATA_MASK @ store pixel
orr r7, r11, r3, lsr #8 @
orr r11, r11, r3 @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r7, [r0] @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r11, [r0] @
@
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither

View file

@ -81,6 +81,7 @@
static bool lcd_enabled;
/* Display status */
static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0;
static unsigned mad_ctrl = 0;
/* wait for LCD */
@ -312,6 +313,86 @@ void lcd_set_flip(bool yesno)
lcd_send_data(mad_ctrl);
}
void lcd_yuv_set_options(unsigned options)
{
lcd_yuv_options = options;
}
/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */
extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
int width, int stride);
extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
int width, int stride,
int x_screen, int y_screen);
/* Performance function to blit a YUV bitmap directly to the LCD */
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
unsigned char const * yuv_src[3];
off_t z;
/* Sorry, but width and height must be >= 2 or else */
width &= ~1;
height >>= 1;
z = stride*src_y;
yuv_src[0] = src[0] + z + src_x;
yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1);
yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
/* Set vertical address mode */
lcd_send_cmd(MADCTR);
lcd_send_data(mad_ctrl | (1<<5));
lcd_send_cmd(RASET);
lcd_send_data(x);
lcd_send_data(x + width - 1);
if (lcd_yuv_options & LCD_YUV_DITHER)
{
do
{
lcd_send_cmd(CASET);
lcd_send_data(y);
lcd_send_data(y + 1);
lcd_send_cmd(RAMWR);
lcd_write_yuv420_lines_odither(yuv_src, width, stride, x, y);
yuv_src[0] += stride << 1; /* Skip down two luma lines */
yuv_src[1] += stride >> 1; /* Skip down one chroma line */
yuv_src[2] += stride >> 1;
y += 2;
}
while (--height > 0);
}
else
{
do
{
lcd_send_cmd(CASET);
lcd_send_data(y);
lcd_send_data(y + 1);
lcd_send_cmd(RAMWR);
lcd_write_yuv420_lines(yuv_src, width, stride);
yuv_src[0] += stride << 1; /* Skip down two luma lines */
yuv_src[1] += stride >> 1; /* Skip down one chroma line */
yuv_src[2] += stride >> 1;
y += 2;
}
while (--height > 0);
}
/* Restore the address mode */
lcd_send_cmd(MADCTR);
lcd_send_data(mad_ctrl);
}
/* Update the display.
This must be called after all other LCD functions that change the display. */
void lcd_update(void)

View file

@ -0,0 +1,140 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id:$
*
* Copyright (C) 2010 by Szymon Dziok
*
* Philips Gogear HDD6330 LCD assembly routine
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "config.h"
#include "cpu.h"
/****************************************************************************
void lcd_yuv_write_inner_loop(unsigned char const * const ysrc,
unsigned char const * const usrc,
unsigned char const * const vsrc,
int width);
*/
.section .icode, "ax", %progbits
.align 2
.global lcd_yuv_write_inner_loop
.type lcd_yuv_write_inner_loop, %function
lcd_yuv_write_inner_loop:
@ r0 = ysrc
@ r1 = usrc
@ r2 = vsrc
@ r3 = width
stmfd sp!, { r4-r11, lr } @ save regs
mov r4, #0x70000000 @ r4 = LCD2_BLOCK_CTRL - 0x20
add r4, r4, #0x8a00 @
add r5, r4, #0x100 @ r5 = LCD2_BLOCK_DATA
10: @ loop
ldrb r7, [r1], #1 @ *usrc++
ldrb r8, [r2], #1 @ *vsrc++
sub r7, r7, #128 @ Cb -= 128
sub r8, r8, #128 @ Cr -= 128
add r10, r8, r8, asl #2 @ Cr*101
add r10, r10, r8, asl #5
add r10, r10, r8, asl #6
add r11, r8, r8, asl #1 @ Cr*51 + Cb*24
add r11, r11, r11, asl #4
add r11, r11, r7, asl #3
add r11, r11, r7, asl #4
add r12, r7, #2 @ r12 = bu = (Cb*128 + 256) >> 9
mov r12, r12, asr #2
add r10, r10, #256 @ r10 = rv = (Cr*101 + 256) >> 9
mov r10, r10, asr #9
rsb r11, r11, #128 @ r11 = guv = (-r11 + 128) >> 8
mov r11, r11, asr #8
@ pixel_1
ldrb r7, [r0], #1 @ *ysrc++
sub r7, r7, #16 @ Y = (Y' - 16) * 37
add r8, r7, r7, asl #2
add r7, r8, r7, asl #5
add r9, r10, r7, asr #8 @ R = (Y >> 8) + rv
add r8, r11, r7, asr #7 @ G = (Y >> 7) + guv
add r7, r12, r7, asr #8 @ B = (Y >> 8) + bu
cmp r9, #31 @ clamp R
mvnhi r9, r9, asr #31
andhi r9, r9, #31
cmp r8, #63 @ clamp G
mvnhi r8, r8, asr #31
andhi r8, r8, #63
cmp r7, #31 @ clamp B
mvnhi r7, r7, asr #31
andhi r7, r7, #31
orr r6, r7, r8, lsl #5 @ pack pixel
orr r6, r6, r9, lsl #11
mov r7, r6, lsl #8 @ swap bytes
and r7, r7, #0xff00
add r6, r7, r6, lsr #8
@ pixel_2
ldrb r7, [r0], #1 @ *ysrc++
sub r7, r7, #16 @ Y = (Y' - 16) * 37
add r8, r7, r7, asl #2
add r7, r8, r7, asl #5
add r9, r10, r7, asr #8 @ R = (Y >> 8) + rv
add r8, r11, r7, asr #7 @ G = (Y >> 7) + guv
add r7, r12, r7, asr #8 @ B = (Y >> 8) + bu
cmp r9, #31 @ clamp R
mvnhi r9, r9, asr #31
andhi r9, r9, #31
cmp r8, #63 @ clamp G
mvnhi r8, r8, asr #31
andhi r8, r8, #63
cmp r7, #31 @ clamp B
mvnhi r7, r7, asr #31
andhi r7, r7, #31
orr r7, r7, r8, lsl #5 @ pack pixel
orr r7, r7, r9, lsl #11
orr r6, r6, r7, lsl #24 @ swap bytes and add pixels simultaneously
mov r7, r7, lsr #8
orr r6, r6, r7, lsl #16
11: @ while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK));
ldr r11, [r4, #0x20] @
tst r11, #0x1000000 @
beq 11b @
str r6, [r5] @ send two pixels
subs r3, r3, #2 @ decrease width
bgt 10b @ loop
ldmpc regs=r4-r11 @ restore regs
.ltorg @ dump constant pool
.size lcd_yuv_write_inner_loop, .-lcd_yuv_write_inner_loop

View file

@ -37,6 +37,9 @@
/* whether the lcd is currently enabled or not */
static bool lcd_enabled;
/* Display status */
static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0;
/* Value used for flipping. Must be remembered when display is turned off. */
static unsigned short flip;
@ -144,6 +147,101 @@ void lcd_set_flip(bool yesno)
lcd_send_data(0x08 | flip);
}
void lcd_yuv_set_options(unsigned options)
{
lcd_yuv_options = options;
}
#define CSUB_X 2
#define CSUB_Y 2
/* YUV- > RGB565 conversion
* |R| |1.000000 -0.000001 1.402000| |Y'|
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
* |B| |1.000000 1.772000 0.000000| |Pr|
* Scaled, normalized, rounded and tweaked to yield RGB 565:
* |R| |74 0 101| |Y' - 16| >> 9
* |G| = |74 -24 -51| |Cb - 128| >> 8
* |B| |74 128 0| |Cr - 128| >> 9
*/
extern void lcd_yuv_write_inner_loop(unsigned char const * const ysrc,
unsigned char const * const usrc,
unsigned char const * const vsrc,
int width);
/* Performance function to blit a YUV bitmap directly to the LCD */
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
int h;
width = (width + 1) & ~1;
lcd_send_reg(LCD_REG_HORIZ_ADDR_START);
lcd_send_data(y);
lcd_send_reg(LCD_REG_HORIZ_ADDR_END);
lcd_send_data(y + height - 1);
lcd_send_reg(LCD_REG_VERT_ADDR_START);
lcd_send_data(x + x_offset);
lcd_send_reg(LCD_REG_VERT_ADDR_END);
lcd_send_data(x + width - 1 + x_offset);
lcd_send_reg(LCD_REG_WRITE_DATA_2_GRAM);
const int stride_div_csub_x = stride/CSUB_X;
h=0;
while (1)
{
/* upsampling, YUV->RGB conversion and reduction to RGB565 in one go */
const unsigned char *ysrc = src[0] + stride * src_y + src_x;
const int uvoffset = stride_div_csub_x * (src_y/CSUB_Y) +
(src_x/CSUB_X);
const unsigned char *usrc = src[1] + uvoffset;
const unsigned char *vsrc = src[2] + uvoffset;
int pixels_to_write;
if (h==0)
{
while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_READY));
LCD2_BLOCK_CONFIG = 0;
if (height == 0) break;
pixels_to_write = (width * height) * 2;
h = height;
/* calculate how much we can do in one go */
if (pixels_to_write > 0x10000)
{
h = (0x10000/2) / width;
pixels_to_write = (width * h) * 2;
}
height -= h;
LCD2_BLOCK_CTRL = 0x10000080;
LCD2_BLOCK_CONFIG = 0xc0010000 | (pixels_to_write - 1);
LCD2_BLOCK_CTRL = 0x34000000;
}
lcd_yuv_write_inner_loop(ysrc,usrc,vsrc,width);
src_y++;
h--;
}
while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_READY));
LCD2_BLOCK_CONFIG = 0;
}
/* Update the display.
This must be called after all other LCD functions that change the display. */
void lcd_update(void)

View file

@ -0,0 +1,590 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2007-2011 by Michael Sevakis
*
* Philips GoGear SA9200 LCD assembly routines
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
/* This code should work in general for a Renesas type LCD interface
* connected to the "mono" bridge. TODO: Share it where possible.
*
* Dither is already prepared to be built for upright and rotated
* orientations. */
#include "config.h"
#include "cpu.h"
/****************************************************************************
* void lcd_write_yuv420_lines(unsigned char const * const src[3],
* int width,
* int stride);
*
* |R| |1.000000 -0.000001 1.402000| |Y'|
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
* |B| |1.000000 1.772000 0.000000| |Pr|
* Scaled, normalized, rounded and tweaked to yield RGB 565:
* |R| |74 0 101| |Y' - 16| >> 9
* |G| = |74 -24 -51| |Cb - 128| >> 8
* |B| |74 128 0| |Cr - 128| >> 9
*
* Write four RGB565 pixels in the following order on each loop:
* 1 3 + > down
* 2 4 \/ left
*/
.section .icode, "ax", %progbits
.align 2
.global lcd_write_yuv420_lines
.type lcd_write_yuv420_lines, %function
lcd_write_yuv420_lines:
@ r0 = yuv_src
@ r1 = width
@ r2 = stride
stmfd sp!, { r4-r10, lr } @ save non-scratch
ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
@ r5 = yuv_src[1] = Cb_p
@ r6 = yuv_src[2] = Cr_p
@
mov r0, #0x70000000 @ r0 = LCD1_BASE_ADDR = 0x70003000
orr r0, r0, #0x3000 @
@
sub r2, r2, #1 @ Adjust stride because of increment
10: @ loop line @
ldrb r7, [r4], #1 @ r7 = *Y'_p++;
ldrb r8, [r5], #1 @ r8 = *Cb_p++;
ldrb r9, [r6], #1 @ r9 = *Cr_p++;
@
sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
add r7, r12, r7, asl #5 @ by one less when adding - same for all
@
sub r8, r8, #128 @ Cb -= 128
sub r9, r9, #128 @ Cr -= 128
@
add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
add r10, r10, r10, asl #4 @
add r10, r10, r8, asl #3 @
add r10, r10, r8, asl #4 @
@
add r14, r9, r9, asl #2 @ r9 = Cr*101
add r14, r14, r9, asl #5 @
add r9, r14, r9, asl #6 @
@
add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
mov r8, r8, asr #2 @
add r9, r9, #256 @ r9 = rv = (r8 + 256) >> 9
mov r9, r9, asr #9 @
rsb r10, r10, #128 @ r10 = guv = (-r9 + 128) >> 8
mov r10, r10, asr #8 @
@ compute R, G, and B
add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
add r14, r9, r7, asr #8 @ r14 = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r3, r14 @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r3, #31 @ clamp b
mvnhi r3, r3, asr #31 @
andhi r3, r3, #31 @
cmp r14, #31 @ clamp r
mvnhi r14, r14, asr #31 @
andhi r14, r14, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
orr r7, r3, r7, lsl #5 @ r7 = |00000000|00000000|00000ggg|gggbbbbb|
orr r7, r7, r14, lsl #11 @ r7 = |00000000|00000000|rrrrrggg|gggbbbbb|
mov r14, r7, lsr #8 @ r14 = |00000000|00000000|00000000|rrrrrggg|
@
20: @
ldr r3, [r0] @
tst r3, #LCD1_BUSY_MASK @
bne 20b @
strb r14, [r0, #0x10] @
20: @
ldr r3, [r0] @
tst r3, #LCD1_BUSY_MASK @
bne 20b @
strb r7, [r0, #0x10] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
add r14, r9, r7, asr #8 @ r14 = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r3, r14 @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r3, #31 @ clamp b
mvnhi r3, r3, asr #31 @
andhi r3, r3, #31 @
cmp r14, #31 @ clamp r
mvnhi r14, r14, asr #31 @
andhi r14, r14, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
@
orr r7, r3, r7, lsl #5 @ r7 = |00000000|00000000|00000ggg|gggbbbbb|
orr r7, r7, r14, lsl #11 @ r7 = |00000000|00000000|rrrrrggg|gggbbbbb|
mov r14, r7, lsr #8 @ r14 = |00000000|00000000|00000000|rrrrrggg|
20: @
ldr r3, [r0] @
tst r3, #LCD1_BUSY_MASK @
bne 20b @
strb r14, [r0, #0x10] @
20: @
ldr r3, [r0] @
tst r3, #LCD1_BUSY_MASK @
bne 20b @
strb r7, [r0, #0x10] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
add r14, r9, r7, asr #8 @ r14 = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r3, r14 @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r3, #31 @ clamp b
mvnhi r3, r3, asr #31 @
andhi r3, r3, #31 @
cmp r14, #31 @ clamp r
mvnhi r14, r14, asr #31 @
andhi r14, r14, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
orr r7, r3, r7, lsl #5 @ r7 = |00000000|00000000|00000ggg|gggbbbbb|
orr r7, r7, r14, lsl #11 @ r7 = |00000000|00000000|rrrrrggg|gggbbbbb|
mov r14, r7, lsr #8 @ r14 = |00000000|00000000|00000000|rrrrrggg|
20: @
ldr r3, [r0] @
tst r3, #LCD1_BUSY_MASK @
bne 20b @
strb r14, [r0, #0x10] @
20: @
ldr r3, [r0] @
tst r3, #LCD1_BUSY_MASK @
bne 20b @
strb r7, [r0, #0x10] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
add r14, r9, r7, asr #8 @ r14 = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r3, r14 @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r3, #31 @ clamp b
mvnhi r3, r3, asr #31 @
andhi r3, r3, #31 @
cmp r14, #31 @ clamp r
mvnhi r14, r14, asr #31 @
andhi r14, r14, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
orr r7, r3, r7, lsl #5 @ r7 = |00000000|00000000|00000ggg|gggbbbbb|
orr r7, r7, r14, lsl #11 @ r7 = |00000000|00000000|rrrrrggg|gggbbbbb|
mov r14, r7, lsr #8 @ r14 = |00000000|00000000|00000000|rrrrrggg|
20: @
ldr r3, [r0] @
tst r3, #LCD1_BUSY_MASK @
bne 20b @
strb r14, [r0, #0x10] @
20: @
ldr r3, [r0] @
tst r3, #LCD1_BUSY_MASK @
bne 20b @
strb r7, [r0, #0x10] @
@
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmpc regs=r4-r10 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
/****************************************************************************
* void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
* int width,
* int stride,
* int x_screen,
* int y_screen);
*
* |R| |1.000000 -0.000001 1.402000| |Y'|
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
* |B| |1.000000 1.772000 0.000000| |Pr|
* Red scaled at twice g & b but at same precision to place it in correct
* bit position after multiply and leave instruction count lower.
* |R| |258 0 408| |Y' - 16|
* |G| = |149 -49 -104| |Cb - 128|
* |B| |149 258 0| |Cr - 128|
*
* Write four RGB565 pixels in the following order on each loop:
* 1 3 + > right/down
* 2 4 \/ down/left
*
* Kernel pattern for upright display:
* 5 3 4 2 +-> right
* 1 7 0 6 | down
* 4 2 5 3 \/
* 0 6 1 7
*
* Kernel pattern for clockwise rotated display:
* 2 6 3 7 +-> down
* 4 0 5 1 | left
* 3 7 2 6 \/
* 5 1 4 0
*/
.section .icode, "ax", %progbits
.align 2
.global lcd_write_yuv420_lines_odither
.type lcd_write_yuv420_lines_odither, %function
lcd_write_yuv420_lines_odither:
@ r0 = yuv_src
@ r1 = width
@ r2 = strideS
@ r3 = x_screen
@ [sp] = y_screen
stmfd sp!, { r4-r11, lr } @ save non-scratch
ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
@ r5 = yuv_src[1] = Cb_p
@ r6 = yuv_src[2] = Cr_p
@
ldr r0, [sp, #36] @ Line up pattern and kernel quadrant
eor r14, r3, r0 @
and r14, r14, #0x2 @
mov r14, r14, lsl #6 @ 0x00 or 0x80
@
mov r0, #0x70000000 @ r0 = LCD1_BASE_ADDR = 0x70003000
orr r0, r0, #0x3000 @
@
sub r2, r2, #1 @ Adjust stride because of increment
10: @ loop line @
@
ldrb r7, [r4], #1 @ r7 = *Y'_p++;
ldrb r8, [r5], #1 @ r8 = *Cb_p++;
ldrb r9, [r6], #1 @ r9 = *Cr_p++;
@
eor r14, r14, #0x80 @ flip pattern quadrant
@
sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@
sub r8, r8, #128 @ Cb -= 128
sub r9, r9, #128 @ Cr -= 128
@
add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49
add r10, r10, r8, asl #5 @
add r10, r10, r9, asl #3 @
add r10, r10, r9, asl #5 @
add r10, r10, r9, asl #6 @
@
mov r8, r8, asl #1 @ r8 = bu = Cb*258
add r8, r8, r8, asl #7 @
@
add r9, r9, r9, asl #1 @ r9 = rv = Cr*408
add r9, r9, r9, asl #4 @
mov r9, r9, asl #3 @
@
@ compute R, G, and B
add r3, r8, r7 @ r3 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
@ r8 = bu, r9 = rv, r10 = guv
@
sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256
add r3, r12, r3, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
add r7, r12, r7, lsr #8 @
@
#if LCD_WIDTH >= LCD_HEIGHT
add r12, r14, #0x200 @
#else
add r12, r14, #0x100 @
#endif
@
add r3, r3, r12 @ b = r3 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
orr r12, r3, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r3, asr #15 @ clamp b
mvnne r3, r12, lsr #15 @
andne r3, r3, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
and r11, r11, #0xf800 @ r11 = |00000000|00000000|rrrrrggg|gggbbbbb|
and r7, r7, #0x7e00 @
orr r11, r11, r7, lsr #4 @
orr r11, r11, r3, lsr #10 @
mov r7, r11, lsr #8 @ r7 = |00000000|00000000|00000000|rrrrrggg|
@
20: @
ldr r3, [r0] @
tst r3, #LCD1_BUSY_MASK @
bne 20b @
strb r7, [r0, #0x10] @
20: @
ldr r3, [r0] @
tst r3, #LCD1_BUSY_MASK @
bne 20b @
strb r11, [r0, #0x10] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r3, r8, r7 @ r3 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256
add r3, r12, r3, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
add r7, r12, r7, lsr #8 @
@
#if LCD_WIDTH >= LCD_HEIGHT
@ This element is zero - use r14 @
@
add r3, r3, r14 @ b = r3 + delta
add r11, r11, r14, lsl #1 @ r = r11 + delta*2
add r7, r7, r14, lsr #1 @ g = r7 + delta/2
#else
add r12, r14, #0x200 @
@
add r3, r3, r12 @ b = r3 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
#endif
@
orr r12, r3, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r3, asr #15 @ clamp b
mvnne r3, r12, lsr #15 @
andne r3, r3, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
@
and r11, r11, #0xf800 @ r11 = |00000000|00000000|rrrrrggg|gggbbbbb|
and r7, r7, #0x7e00 @
orr r11, r11, r7, lsr #4 @
orr r11, r11, r3, lsr #10 @
mov r7, r11, lsr #8 @ r7 = |00000000|00000000|00000000|rrrrrggg|
@
20: @
ldr r3, [r0] @
tst r3, #LCD1_BUSY_MASK @
bne 20b @
strb r7, [r0, #0x10] @
20: @
ldr r3, [r0] @
tst r3, #LCD1_BUSY_MASK @
bne 20b @
strb r11, [r0, #0x10] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r3, r8, r7 @ r3 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
@ r8 = bu, r9 = rv, r10 = guv
@
sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256
add r3, r12, r3, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
add r7, r12, r7, lsr #8 @
@
#if LCD_WIDTH >= LCD_HEIGHT
add r12, r14, #0x100 @
#else
add r12, r14, #0x300 @
#endif
@
add r3, r3, r12 @ b = r3 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
orr r12, r3, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r3, asr #15 @ clamp b
mvnne r3, r12, lsr #15 @
andne r3, r3, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
and r11, r11, #0xf800 @ r11 = |00000000|00000000|rrrrrggg|gggbbbbb|
and r7, r7, #0x7e00 @
orr r11, r11, r7, lsr #4 @
orr r11, r11, r3, lsr #10 @
mov r7, r11, lsr #8 @ r7 = |00000000|00000000|00000000|rrrrrggg|
@
20: @
ldr r3, [r0] @
tst r3, #LCD1_BUSY_MASK @
bne 20b @
strb r7, [r0, #0x10] @
20: @
ldr r3, [r0] @
tst r3, #LCD1_BUSY_MASK @
bne 20b @
strb r11, [r0, #0x10] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r3, r8, r7 @ r3 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256
add r3, r12, r3, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
add r7, r12, r7, lsr #8 @
@
#if LCD_WIDTH >= LCD_HEIGHT
add r12, r14, #0x300 @
@
add r3, r3, r12 @ b = r3 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
#else
@ This element is zero - use r14 @
@
add r3, r3, r14 @ b = r3 + delta
add r11, r11, r14, lsl #1 @ r = r11 + delta*2
add r7, r7, r14, lsr #1 @ g = r7 + delta/2
#endif
@
orr r12, r3, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r3, asr #15 @ clamp b
mvnne r3, r12, lsr #15 @
andne r3, r3, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
and r11, r11, #0xf800 @ r11 = |00000000|00000000|rrrrrggg|gggbbbbb|
and r7, r7, #0x7e00 @
orr r11, r11, r7, lsr #4 @
orr r11, r11, r3, lsr #10 @
mov r7, r11, lsr #8 @ r7 = |00000000|00000000|00000000|rrrrrggg|
@
20: @
ldr r3, [r0] @
tst r3, #LCD1_BUSY_MASK @
bne 20b @
strb r7, [r0, #0x10] @
20: @
ldr r3, [r0] @
tst r3, #LCD1_BUSY_MASK @
bne 20b @
strb r11, [r0, #0x10] @
@
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither

View file

@ -75,6 +75,9 @@ static void lcd_display_off(void);
#define R_GATE_OUT_PERIOD_CTRL 0x71
#define R_SOFTWARE_RESET 0x72
/* Display status */
static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0;
/* wait for LCD */
static inline void lcd_wait_write(void)
{
@ -404,6 +407,85 @@ void lcd_set_flip(bool yesno)
lcd_write_reg(R_DRV_OUTPUT_CONTROL, flip ? 0x090c : 0x0a0c);
}
void lcd_yuv_set_options(unsigned options)
{
lcd_yuv_options = options;
}
/* Performance function to blit a YUV bitmap directly to the LCD */
void lcd_write_yuv420_lines(unsigned char const * const src[3],
int width,
int stride);
void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
int width,
int stride,
int x_screen,
int y_screen);
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
const unsigned char *yuv_src[3];
const unsigned char *ysrc_max;
int options;
if (!display_on)
return;
width &= ~1;
height &= ~1;
/* calculate the drawing region */
lcd_write_reg(R_VERT_RAM_ADDR_POS, ((x + width - 1) << 8) | x);
/* convert YUV coordinates to screen coordinates */
y = LCD_WIDTH - 1 - y;
/* 2px strip: cursor moves left, then down in gram */
/* BGR=1, MDT1-0=00, I/D1-0=10, AM=0 */
lcd_write_reg(R_ENTRY_MODE, 0x1020);
yuv_src[0] = src[0] + src_y * stride + src_x;
yuv_src[1] = src[1] + (src_y * stride >> 2) + (src_x >> 1);
yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
ysrc_max = yuv_src[0] + height * stride;
/* cache options setting */
options = lcd_yuv_options;
do
{
/* max horiz << 8 | start horiz */
lcd_write_reg(R_HORIZ_RAM_ADDR_POS, (y << 8) | (y - 1));
/* position cursor (set AD0-AD15) */
lcd_write_reg(R_RAM_ADDR_SET, (x << 8) | y);
/* start drawing */
lcd_send_command(R_WRITE_DATA_2_GRAM);
if (options & LCD_YUV_DITHER)
{
lcd_write_yuv420_lines_odither(yuv_src, width, stride,
y, x);
}
else
{
lcd_write_yuv420_lines(yuv_src, width, stride);
}
y -= 2; /* move strip by "down" 2 px */
yuv_src[0] += stride << 1;
yuv_src[1] += stride >> 1;
yuv_src[2] += stride >> 1;
}
while (yuv_src[0] < ysrc_max);
/* back to normal right, then down cursor in gram */
/* BGR=1, MDT1-0=00, I/D1-0=11, AM=0 */
lcd_write_reg(R_ENTRY_MODE, 0x1030);
}
/* Update the display.
This must be called after all other LCD functions that change the display. */
void lcd_update(void)

View file

@ -207,3 +207,18 @@ bool lcd_active()
{
return display_on;
}
/* Blit a YUV bitmap directly to the LCD */
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
(void)src;
(void)src_x;
(void)src_y;
(void)stride;
(void)x;
(void)y;
(void)width;
(void)height;
}

View file

@ -268,3 +268,18 @@ bool lcd_active()
{
return display_on;
}
/* Blit a YUV bitmap directly to the LCD */
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
(void)src;
(void)src_x;
(void)src_y;
(void)stride;
(void)x;
(void)y;
(void)width;
(void)height;
}

View file

@ -231,3 +231,18 @@ bool lcd_active()
{
return display_on;
}
/* Blit a YUV bitmap directly to the LCD */
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
(void)src;
(void)src_x;
(void)src_y;
(void)stride;
(void)x;
(void)y;
(void)width;
(void)height;
}

View file

@ -211,3 +211,18 @@ bool lcd_active()
{
return display_on;
}
/* Blit a YUV bitmap directly to the LCD */
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
(void)src;
(void)src_x;
(void)src_y;
(void)stride;
(void)x;
(void)y;
(void)width;
(void)height;
}

View file

@ -350,3 +350,22 @@ bool lcd_active()
{
return display_on;
}
/* Blit a YUV bitmap directly to the LCD
* provided by generic fallback in lcd-16bit-common.c
*/
#if 0
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
(void)src;
(void)src_x;
(void)src_y;
(void)stride;
(void)x;
(void)y;
(void)width;
(void)height;
}
#endif

View file

@ -253,3 +253,18 @@ bool lcd_active()
{
return display_on;
}
/* Blit a YUV bitmap directly to the LCD */
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
(void)src;
(void)src_x;
(void)src_y;
(void)stride;
(void)x;
(void)y;
(void)width;
(void)height;
}

View file

@ -178,3 +178,22 @@ void lcd_set_gram_area(int x_start, int y_start,
lcd_cmd(GRAM_WRITE);
LCDC_CTRL &= ~RGB24B;
}
/* Blit a YUV bitmap directly to the LCD
* provided by generic fallback in lcd-16bit-common.c
*/
#if 0
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
(void)src;
(void)src_x;
(void)src_y;
(void)stride;
(void)x;
(void)y;
(void)width;
(void)height;
}
#endif

View file

@ -65,3 +65,231 @@ lcd_write_line: /* r2 = LCD_BASE */
bgt .loop
ldmpc regs=r4-r6
/****************************************************************************
* extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
* const unsigned LCD_BASE,
* int width,
* int stride);
*
* Conversion from Motion JPEG and MPEG Y'PbPr to RGB is:
* |R| |1.164 0.000 1.596| |Y' - 16|
* |G| = |1.164 -0.391 -0.813| |Pb - 128|
* |B| |1.164 2.018 0.000| |Pr - 128|
*
* Scaled, normalized, rounded and tweaked to yield RGB 565:
* |R| |74 0 101| |Y' - 16| >> 9
* |G| = |74 -24 -51| |Cb - 128| >> 8
* |B| |74 128 0| |Cr - 128| >> 9
*
* Converts two lines from YUV to RGB565 and writes to LCD at once. First loop
* loads Cb/Cr, calculates the chroma offset and saves them to buffer. Within
* the second loop these chroma offset are reloaded from buffer. Within each
* loop two pixels are calculated and written to LCD.
*/
.align 2
.global lcd_write_yuv420_lines
.type lcd_write_yuv420_lines, %function
lcd_write_yuv420_lines:
/* r0 = src = yuv_src */
/* r1 = dst = LCD_BASE */
/* r2 = width */
/* r3 = stride */
stmfd sp!, { r4-r10, lr } /* save non-scratch */
ldmia r0, { r9, r10, r12 } /* r9 = yuv_src[0] = Y'_p */
/* r10 = yuv_src[1] = Cb_p */
/* r12 = yuv_src[2] = Cr_p */
add r3, r9, r3 /* r3 = &ysrc[stride] */
add r4, r2, r2, asr #1 /* chroma buffer lenght = width/2 *3 */
mov r4, r4, asl #2 /* use words for str/ldm possibility */
add r4, r4, #19 /* plus room for 4 additional words, */
bic r4, r4, #3 /* rounded up to multiples of 4 byte */
sub sp, sp, r4 /* and allocate on stack */
stmia sp, {r1-r4} /* LCD_BASE, width, &ysrc[stride], stack_alloc */
mov r7, r2 /* r7 = loop count */
add r8, sp, #16 /* chroma buffer */
add lr, r1, #0x40 /* LCD data port = LCD_BASE + 0x40 */
/* 1st loop start */
10: /* loop start */
ldrb r0, [r10], #1 /* r0 = *usrc++ = *Cb_p++ */
ldrb r1, [r12], #1 /* r1 = *vsrc++ = *Cr_p++ */
sub r0, r0, #128 /* r0 = Cb-128 */
sub r1, r1, #128 /* r1 = Cr-128 */
add r2, r1, r1, asl #1 /* r2 = Cr*51 + Cb*24 */
add r2, r2, r2, asl #4
add r2, r2, r0, asl #3
add r2, r2, r0, asl #4
add r4, r1, r1, asl #2 /* r1 = Cr*101 */
add r4, r4, r1, asl #5
add r1, r4, r1, asl #6
add r1, r1, #256 /* r1 = rv = (r1 + 256) >> 9 */
mov r1, r1, asr #9
rsb r2, r2, #128 /* r2 = guv = (-r2 + 128) >> 8 */
mov r2, r2, asr #8
add r0, r0, #2 /* r0 = bu = (Cb*128 + 256) >> 9 */
mov r0, r0, asr #2
stmia r8!, {r0-r2} /* store r0, r1 and r2 to chroma buffer */
/* 1st loop, first pixel */
ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
add r3, r5, r5, asl #2
add r5, r3, r5, asl #5
add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */
orr r5, r6, r4 /* check if clamping is needed... */
orr r5, r5, r3, asr #1 /* ...at all */
cmp r5, #31
bls 15f /* -> no clamp */
cmp r6, #31 /* clamp r */
mvnhi r6, r6, asr #31
andhi r6, r6, #31
cmp r3, #63 /* clamp g */
mvnhi r3, r3, asr #31
andhi r3, r3, #63
cmp r4, #31 /* clamp b */
mvnhi r4, r4, asr #31
andhi r4, r4, #31
15: /* no clamp */
/* calculate pixel_1 and save to r4 for later pixel packing */
orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */
orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */
/* 1st loop, second pixel */
ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
add r3, r5, r5, asl #2
add r5, r3, r5, asl #5
add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */
orr r0, r6, r5 /* check if clamping is needed... */
orr r0, r0, r3, asr #1 /* ...at all */
cmp r0, #31
bls 15f /* -> no clamp */
cmp r6, #31 /* clamp r */
mvnhi r6, r6, asr #31
andhi r6, r6, #31
cmp r3, #63 /* clamp g */
mvnhi r3, r3, asr #31
andhi r3, r3, #63
cmp r5, #31 /* clamp b */
mvnhi r5, r5, asr #31
andhi r5, r5, #31
15: /* no clamp */
/* calculate pixel_2 and pack with pixel_1 before writing */
orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */
#ifdef FORCE_FIFO_WAIT
/* wait for FIFO half full */
.fifo_wait1:
ldr r3, [lr, #-0x24] /* while (LCD_STATUS & 0x08); */
tst r3, #0x8
bgt .fifo_wait1
#endif
stmia lr, {r4,r5} /* write pixel_1 and pixel_2 */
subs r7, r7, #2 /* check for loop end */
bgt 10b /* back to beginning */
/* 1st loop end */
/* Reload several registers for pointer rewinding for next loop */
add r8, sp, #16 /* chroma buffer */
ldmia sp, { r1, r7, r9} /* r1 = LCD_BASE */
/* r7 = loop count */
/* r9 = &ysrc[stride] */
/* 2nd loop start */
20: /* loop start */
/* restore r0 (bu), r1 (rv) and r2 (guv) from chroma buffer */
ldmia r8!, {r0-r2}
/* 2nd loop, first pixel */
ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
add r3, r5, r5, asl #2
add r5, r3, r5, asl #5
add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */
orr r5, r6, r4 /* check if clamping is needed... */
orr r5, r5, r3, asr #1 /* ...at all */
cmp r5, #31
bls 15f /* -> no clamp */
cmp r6, #31 /* clamp r */
mvnhi r6, r6, asr #31
andhi r6, r6, #31
cmp r3, #63 /* clamp g */
mvnhi r3, r3, asr #31
andhi r3, r3, #63
cmp r4, #31 /* clamp b */
mvnhi r4, r4, asr #31
andhi r4, r4, #31
15: /* no clamp */
/* calculate pixel_1 and save to r4 for later pixel packing */
orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */
orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */
/* 2nd loop, second pixel */
ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
add r3, r5, r5, asl #2
add r5, r3, r5, asl #5
add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */
orr r0, r6, r5 /* check if clamping is needed... */
orr r0, r0, r3, asr #1 /* ...at all */
cmp r0, #31
bls 15f /* -> no clamp */
cmp r6, #31 /* clamp r */
mvnhi r6, r6, asr #31
andhi r6, r6, #31
cmp r3, #63 /* clamp g */
mvnhi r3, r3, asr #31
andhi r3, r3, #63
cmp r5, #31 /* clamp b */
mvnhi r5, r5, asr #31
andhi r5, r5, #31
15: /* no clamp */
/* calculate pixel_2 and pack with pixel_1 before writing */
orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */
#ifdef FORCE_FIFO_WAIT
/* wait for FIFO half full */
.fifo_wait2:
ldr r3, [lr, #-0x24] /* while (LCD_STATUS & 0x08); */
tst r3, #0x8
bgt .fifo_wait2
#endif
stmia lr, {r4,r5} /* write pixel_1 and pixel_2 */
subs r7, r7, #2 /* check for loop end */
bgt 20b /* back to beginning */
/* 2nd loop end */
ldr r3, [sp, #12]
add sp, sp, r3 /* deallocate buffer */
ldmpc regs=r4-r10 /* restore registers */
.ltorg
.size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines

View file

@ -406,3 +406,36 @@ void lcd_update_rect(int x, int y, int width, int height)
} while (--height > 0 );
}
}
/* Line write helper function for lcd_yuv_blit. Writes two lines of yuv420. */
extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
const unsigned int lcd_baseadress,
int width,
int stride);
/* Blit a YUV bitmap directly to the LCD */
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
unsigned int z;
unsigned char const * yuv_src[3];
width = (width + 1) & ~1; /* ensure width is even */
lcd_setup_drawing_region(x, y, width, height);
z = stride * src_y;
yuv_src[0] = src[0] + z + src_x;
yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1);
yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
height >>= 1;
do {
lcd_write_yuv420_lines(yuv_src, LCD_BASE, width, stride);
yuv_src[0] += stride << 1;
yuv_src[1] += stride >> 1; /* Skip down one chroma line */
yuv_src[2] += stride >> 1;
} while (--height > 0);
}

View file

@ -311,3 +311,11 @@ void lcd_update_rect(int x, int y, int width, int height)
{
lcd_update();
}
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
/* stub */
}

View file

@ -476,3 +476,19 @@ void lcd_update(void)
{
lcd_update_rect(0, 0, LCD_WIDTH, LCD_HEIGHT);
}
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
(void)src;
(void)src_x;
(void)src_y;
(void)stride;
(void)x;
(void)y;
(void)width;
(void)height;
/* TODO: not implemented yet */
}

View file

@ -530,3 +530,49 @@ void lcd_update_rect(int x, int y, int width, int height)
displaylcd_dma(pixels);
}
/* Line write helper function for lcd_yuv_blit. Writes two lines of yuv420. */
extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
uint16_t* outbuf,
int width,
int stride);
/* Blit a YUV bitmap directly to the LCD */
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height) ICODE_ATTR;
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
unsigned int z;
unsigned char const * yuv_src[3];
#ifdef HAVE_LCD_SLEEP
if (!lcd_active()) return;
#endif
width = (width + 1) & ~1; /* ensure width is even */
int pixels = width * height;
uint16_t* out = lcd_dblbuf[0];
z = stride * src_y;
yuv_src[0] = src[0] + z + src_x;
yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1);
yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
displaylcd_setup(x, y, width, height);
height >>= 1;
do {
lcd_write_yuv420_lines(yuv_src, out, width, stride);
yuv_src[0] += stride << 1;
yuv_src[1] += stride >> 1; /* Skip down one chroma line */
yuv_src[2] += stride >> 1;
out += width << 1;
} while (--height);
displaylcd_dma(pixels);
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,550 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2007 by Jens Arnold
* Heavily based on lcd-as-memframe.c by Michael Sevakis
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "config.h"
#include "cpu.h"
/****************************************************************************
* void lcd_write_yuv420_lines(unsigned char const * const src[3],
* int width,
* int stride);
*
* |R| |1.000000 -0.000001 1.402000| |Y'|
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
* |B| |1.000000 1.772000 0.000000| |Pr|
* Scaled, normalized, rounded and tweaked to yield RGB 565:
* |R| |74 0 101| |Y' - 16| >> 9
* |G| = |74 -24 -51| |Cb - 128| >> 8
* |B| |74 128 0| |Cr - 128| >> 9
*
* Write four RGB565 pixels in the following order on each loop:
* 1 3 + > down
* 2 4 \/ left
*/
.section .icode, "ax", %progbits
.align 2
.global lcd_write_yuv420_lines
.type lcd_write_yuv420_lines, %function
lcd_write_yuv420_lines:
@ r0 = yuv_src
@ r1 = width
@ r2 = stride
stmfd sp!, { r4-r10, lr } @ save non-scratch
ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
@ r5 = yuv_src[1] = Cb_p
@ r6 = yuv_src[2] = Cr_p
@ r0 = scratch
sub r2, r2, #1 @
mov r3, #0x70000000 @
orr r3, r3, #0x3000 @ r3 = LCD1_BASE
10: @ loop line @
ldrb r7, [r4], #1 @ r7 = *Y'_p++;
ldrb r8, [r5], #1 @ r8 = *Cb_p++;
ldrb r9, [r6], #1 @ r9 = *Cr_p++;
@
sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
add r7, r12, r7, asl #5 @ by one less when adding - same for all
@
sub r8, r8, #128 @ Cb -= 128
sub r9, r9, #128 @ Cr -= 128
@
add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
add r10, r10, r10, asl #4 @
add r10, r10, r8, asl #3 @
add r10, r10, r8, asl #4 @
@
add lr, r9, r9, asl #2 @ r9 = Cr*101
add lr, lr, r9, asl #5 @
add r9, lr, r9, asl #6 @
@
add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
mov r8, r8, asr #2 @
add r9, r9, #256 @ r9 = rv = (r9 + 256) >> 9
mov r9, r9, asr #9 @
rsb r10, r10, #128 @ r10 = guv = (-r10 + 128) >> 8
mov r10, r10, asr #8 @
@ compute R, G, and B
add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r0, lr @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r0, #31 @ clamp b
mvnhi r0, r0, asr #31 @
andhi r0, r0, #31 @
cmp lr, #31 @ clamp r
mvnhi lr, lr, asr #31 @
andhi lr, lr, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
mov lr, lr, lsl #3 @
orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3)
orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str lr, [r3, #0x10] @ send MSB
1: @busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r0, [r3, #0x10] @ send LSB
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r0, lr @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r0, #31 @ clamp b
mvnhi r0, r0, asr #31 @
andhi r0, r0, #31 @
cmp lr, #31 @ clamp r
mvnhi lr, lr, asr #31 @
andhi lr, lr, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
@
mov lr, lr, lsl #3 @
orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3)
orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str lr, [r3, #0x10] @ send MSB
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r0, [r3, #0x10] @ send LSB
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r0, lr @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r0, #31 @ clamp b
mvnhi r0, r0, asr #31 @
andhi r0, r0, #31 @
cmp lr, #31 @ clamp r
mvnhi lr, lr, asr #31 @
andhi lr, lr, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
@
mov lr, lr, lsl #3 @
orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3)
orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str lr, [r3, #0x10] @ send MSB
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r0, [r3, #0x10] @ send LSB
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r0, lr @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r0, #31 @ clamp b
mvnhi r0, r0, asr #31 @
andhi r0, r0, #31 @
cmp lr, #31 @ clamp r
mvnhi lr, lr, asr #31 @
andhi lr, lr, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
mov lr, lr, lsl #3 @
orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3)
orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str lr, [r3, #0x10] @ send MSB
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r0, [r3, #0x10] @ send LSB
@
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmpc regs=r4-r10 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
/****************************************************************************
* void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
* int width,
* int stride,
* int x_screen,
* int y_screen);
*
* |R| |1.000000 -0.000001 1.402000| |Y'|
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
* |B| |1.000000 1.772000 0.000000| |Pr|
* Red scaled at twice g & b but at same precision to place it in correct
* bit position after multiply and leave instruction count lower.
* |R| |258 0 408| |Y' - 16|
* |G| = |149 -49 -104| |Cb - 128|
* |B| |149 258 0| |Cr - 128|
*
* Write four RGB565 pixels in the following order on each loop:
* 1 3 + > down
* 2 4 \/ left
*
* Kernel pattern (raw|rotated|use order):
* 5 3 4 2 2 6 3 7 row0 row2 > down
* 1 7 0 6 | 4 0 5 1 | 2 4 6 0 3 5 7 1 col0 left
* 4 2 5 3 | 3 7 2 6 | 3 5 7 1 2 4 6 0 col2 \/
* 0 6 1 7 5 1 4 0
*/
.section .icode, "ax", %progbits
.align 2
.global lcd_write_yuv420_lines_odither
.type lcd_write_yuv420_lines_odither, %function
lcd_write_yuv420_lines_odither:
@ r0 = yuv_src
@ r1 = width
@ r2 = stride
@ r3 = x_screen
@ [sp] = y_screen
stmfd sp!, { r4-r11, lr } @ save non-scratch
ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
@ r5 = yuv_src[1] = Cb_p
@ r6 = yuv_src[2] = Cr_p
@
sub r2, r2, #1 @
ldr r14, [sp, #36] @ Line up pattern and kernel quadrant
eor r14, r14, r3 @
and r14, r14, #0x2 @
mov r14, r14, lsl #6 @ 0x00 or 0x80
mov r3, #0x70000000 @
orr r3, r3, #0x3000 @ r3 = LCD1_BASE
10: @ loop line @
@
ldrb r7, [r4], #1 @ r7 = *Y'_p++;
ldrb r8, [r5], #1 @ r8 = *Cb_p++;
ldrb r9, [r6], #1 @ r9 = *Cr_p++;
@
eor r14, r14, #0x80 @ flip pattern quadrant
@
sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@
sub r8, r8, #128 @ Cb -= 128
sub r9, r9, #128 @ Cr -= 128
@
add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49
add r10, r10, r8, asl #5 @
add r10, r10, r9, asl #3 @
add r10, r10, r9, asl #5 @
add r10, r10, r9, asl #6 @
@
mov r8, r8, asl #1 @ r8 = bu = Cb*258
add r8, r8, r8, asl #7 @
@
add r9, r9, r9, asl #1 @ r9 = rv = Cr*408
add r9, r9, r9, asl #4 @
mov r9, r9, asl #3 @
@
@ compute R, G, and B
add r0, r8, r7 @ r0 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
@ r8 = bu, r9 = rv, r10 = guv
@
sub r12, r0, r0, lsr #5 @ r0 = 31/32*b + b/256
add r0, r12, r0, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
add r7, r12, r7, lsr #8 @
@
add r12, r14, #0x100 @
@
add r0, r0, r12 @ b = r0 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
orr r12, r0, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r0, asr #15 @ clamp b
mvnne r0, r12, lsr #15 @
andne r0, r0, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
and r11, r11, #0xf800 @ pack pixel
mov r11, r11, lsr #8
and r7, r7, #0x7e00
orr r11, r11, r7, lsr #12
mov r7, r7, lsr#4
orr r0, r7, r0, lsr #10
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r11, [r3, #0x10] @ send MSB
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r0, [r3, #0x10] @ send LSB
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r0, r8, r7 @ r0 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
sub r12, r0, r0, lsr #5 @ r0 = 31/32*b' + b'/256
add r0, r12, r0, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
add r7, r12, r7, lsr #8 @
@
add r12, r14, #0x200 @
@
add r0, r0, r12 @ b = r0 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
orr r12, r0, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r0, asr #15 @ clamp b
mvnne r0, r12, lsr #15 @
andne r0, r0, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
and r11, r11, #0xf800 @ pack pixel
mov r11, r11, lsr #8
and r7, r7, #0x7e00
orr r11, r11, r7, lsr #12
mov r7, r7, lsr#4
orr r0, r7, r0, lsr #10
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r11, [r3, #0x10] @ send MSB
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r0, [r3, #0x10] @ send LSB
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r0, r8, r7 @ r0 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
@ r8 = bu, r9 = rv, r10 = guv
@
sub r12, r0, r0, lsr #5 @ r0 = 31/32*b' + b'/256
add r0, r12, r0, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
add r7, r12, r7, lsr #8 @
@
add r12, r14, #0x300 @
@
add r0, r0, r12 @ b = r0 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
orr r12, r0, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r0, asr #15 @ clamp b
mvnne r0, r12, lsr #15 @
andne r0, r0, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
and r11, r11, #0xf800 @ pack pixel
mov r11, r11, lsr #8
and r7, r7, #0x7e00
orr r11, r11, r7, lsr #12
mov r7, r7, lsr#4
orr r0, r7, r0, lsr #10
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r11, [r3, #0x10] @ send MSB
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r0, [r3, #0x10] @ send LSB
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r0, r8, r7 @ r0 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
sub r12, r0, r0, lsr #5 @ r0 = 31/32*b + b/256
add r0, r12, r0, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
add r7, r12, r7, lsr #8 @
@
@ This element is zero - use r14 @
@
add r0, r0, r14 @ b = r0 + delta
add r11, r11, r14, lsl #1 @ r = r11 + delta*2
add r7, r7, r14, lsr #1 @ g = r7 + delta/2
@
orr r12, r0, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r0, asr #15 @ clamp b
mvnne r0, r12, lsr #15 @
andne r0, r0, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
and r11, r11, #0xf800 @ pack pixel
mov r11, r11, lsr #8
and r7, r7, #0x7e00
orr r11, r11, r7, lsr #12
mov r7, r7, lsr#4
orr r0, r7, r0, lsr #10
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r11, [r3, #0x10] @ send MSB
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r0, [r3, #0x10] @ send LSB
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither

View file

@ -30,6 +30,8 @@
#endif
/* Display status */
static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0;
#if defined(HAVE_LCD_ENABLE) || defined(HAVE_LCD_SLEEP)
static bool is_lcd_enabled = true;
#endif
@ -289,6 +291,78 @@ void lcd_set_flip(bool yesno)
/*** update functions ***/
void lcd_yuv_set_options(unsigned options)
{
lcd_yuv_options = options;
}
/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */
extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
int width,
int stride);
extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
int width,
int stride,
int x_screen, /* To align dither pattern */
int y_screen);
/* Performance function to blit a YUV bitmap directly to the LCD */
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
unsigned char const * yuv_src[3];
off_t z;
/* Sorry, but width and height must be >= 2 or else */
width &= ~1;
height >>= 1;
z = stride*src_y;
yuv_src[0] = src[0] + z + src_x;
yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1);
yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
lcd_send_command(R_ENTRY_MODE);
lcd_send_command(0x03);
lcd_send_command(R_Y_ADDR_AREA);
lcd_send_command(x + 4);
lcd_send_command(x + width - 1 + 4);
if (lcd_yuv_options & LCD_YUV_DITHER)
{
do
{
lcd_send_command(R_X_ADDR_AREA);
lcd_send_command(y);
lcd_send_command(y + 1);
lcd_write_yuv420_lines_odither(yuv_src, width, stride, x, y);
yuv_src[0] += stride << 1; /* Skip down two luma lines */
yuv_src[1] += stride >> 1; /* Skip down one chroma line */
yuv_src[2] += stride >> 1;
y += 2;
}
while (--height > 0);
}
else
{
do
{
lcd_send_command(R_X_ADDR_AREA);
lcd_send_command(y);
lcd_send_command(y + 1);
lcd_write_yuv420_lines(yuv_src, width, stride);
yuv_src[0] += stride << 1; /* Skip down two luma lines */
yuv_src[1] += stride >> 1; /* Skip down one chroma line */
yuv_src[2] += stride >> 1;
y += 2;
}
while (--height > 0);
}
}
/* Update the display.
This must be called after all other LCD functions that change the display. */
void lcd_update(void)

View file

@ -0,0 +1,538 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2007-2008 by Michael Sevakis
*
* H10 20GB LCD assembly routines
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "config.h"
#include "cpu.h"
/****************************************************************************
* void lcd_write_yuv420_lines(unsigned char const * const src[3],
* int width,
* int stride);
*
* |R| |1.000000 -0.000001 1.402000| |Y'|
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
* |B| |1.000000 1.772000 0.000000| |Pr|
* Scaled, normalized, rounded and tweaked to yield RGB 565:
* |R| |74 0 101| |Y' - 16| >> 9
* |G| = |74 -24 -51| |Cb - 128| >> 8
* |B| |74 128 0| |Cr - 128| >> 9
*
* Write four RGB565 pixels in the following order on each loop:
* 1 3 + > down
* 2 4 \/ left
*/
.section .icode, "ax", %progbits
.align 2
.global lcd_write_yuv420_lines
.type lcd_write_yuv420_lines, %function
lcd_write_yuv420_lines:
@ r0 = yuv_src
@ r1 = width
@ r2 = stride
stmfd sp!, { r4-r11, lr } @ save non-scratch
ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
@ r5 = yuv_src[1] = Cb_p
@ r6 = yuv_src[2] = Cr_p
@
mov r0, #0x7000000c @ r0 = &LCD2_PORT = 0x70008a0c
add r0, r0, #0x8a00 @
mov r14, #LCD2_DATA_MASK @
@
sub r2, r2, #1 @ Adjust stride because of increment
10: @ loop line @
ldrb r7, [r4], #1 @ r7 = *Y'_p++;
ldrb r8, [r5], #1 @ r8 = *Cb_p++;
ldrb r9, [r6], #1 @ r9 = *Cr_p++;
@
sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
add r7, r12, r7, asl #5 @ by one less when adding - same for all
@
sub r8, r8, #128 @ Cb -= 128
sub r9, r9, #128 @ Cr -= 128
@
add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
add r10, r10, r10, asl #4 @
add r10, r10, r8, asl #3 @
add r10, r10, r8, asl #4 @
@
add r11, r9, r9, asl #2 @ r9 = Cr*101
add r11, r11, r9, asl #5 @
add r9, r11, r9, asl #6 @
@
add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
mov r8, r8, asr #2 @
add r9, r9, #256 @ r9 = rv = (r8 + 256) >> 9
mov r9, r9, asr #9 @
rsb r10, r10, #128 @ r10 = guv = (-r9 + 128) >> 8
mov r10, r10, asr #8 @
@ compute R, G, and B
add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r3, r11 @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r3, #31 @ clamp b
mvnhi r3, r3, asr #31 @
andhi r3, r3, #31 @
cmp r11, #31 @ clamp r
mvnhi r11, r11, asr #31 @
andhi r11, r11, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
@
orr r7, r14, r3, lsr #8 @ store pixel
orr r11, r14, r3 @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r7, [r0] @
str r11, [r0] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r3, r11 @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r3, #31 @ clamp b
mvnhi r3, r3, asr #31 @
andhi r3, r3, #31 @
cmp r11, #31 @ clamp r
mvnhi r11, r11, asr #31 @
andhi r11, r11, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
@
orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
@
orr r7, r14, r3, lsr #8 @ store pixel
orr r11, r14, r3 @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r7, [r0] @
str r11, [r0] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r3, r11 @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r3, #31 @ clamp b
mvnhi r3, r3, asr #31 @
andhi r3, r3, #31 @
cmp r11, #31 @ clamp r
mvnhi r11, r11, asr #31 @
andhi r11, r11, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
orr r3, r3, r7, lsl #5 @ r3 = b | (g << 5)
orr r3, r3, r11, lsl #11 @ r3 |= (r << 11)
@
orr r7, r14, r3, lsr #8 @ store pixel
orr r11, r14, r3 @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r7, [r0] @
str r11, [r0] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r3, r11 @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r3, #31 @ clamp b
mvnhi r3, r3, asr #31 @
andhi r3, r3, #31 @
cmp r11, #31 @ clamp r
mvnhi r11, r11, asr #31 @
andhi r11, r11, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
@
orr r7, r14, r3, lsr #8 @ store pixel
orr r11, r14, r3 @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r7, [r0] @
str r11, [r0] @
@
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
/****************************************************************************
* void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
* int width,
* int stride,
* int x_screen,
* int y_screen);
*
* |R| |1.000000 -0.000001 1.402000| |Y'|
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
* |B| |1.000000 1.772000 0.000000| |Pr|
* Red scaled at twice g & b but at same precision to place it in correct
* bit position after multiply and leave instruction count lower.
* |R| |258 0 408| |Y' - 16|
* |G| = |149 -49 -104| |Cb - 128|
* |B| |149 258 0| |Cr - 128|
*
* Write four RGB565 pixels in the following order on each loop:
* 1 3 + > down
* 2 4 \/ left
*
* Kernel pattern (raw|use order):
* 5 3 4 2 row0 row2 > down
* 1 7 0 6 | 5 1 3 7 4 0 2 6 col0 left
* 4 2 5 3 | 4 0 2 6 5 1 3 7 col2 \/
* 0 6 1 7
*/
.section .icode, "ax", %progbits
.align 2
.global lcd_write_yuv420_lines_odither
.type lcd_write_yuv420_lines_odither, %function
lcd_write_yuv420_lines_odither:
@ r0 = yuv_src
@ r1 = width
@ r2 = stride
@ r3 = x_screen
@ [sp] = y_screen
stmfd sp!, { r4-r11, lr } @ save non-scratch
ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
@ r5 = yuv_src[1] = Cb_p
@ r6 = yuv_src[2] = Cr_p
@
ldr r0, [sp, #36] @ Line up pattern and kernel quadrant
eor r14, r3, r0 @
and r14, r14, #0x2 @
mov r14, r14, lsl #6 @ 0x00 or 0x80
@
mov r0, #0x7000000c @ r0 = &LCD2_PORT = 0x70008a0c
add r0, r0, #0x8a00 @
@
sub r2, r2, #1 @ Adjust stride because of increment
10: @ loop line @
@
ldrb r7, [r4], #1 @ r7 = *Y'_p++;
ldrb r8, [r5], #1 @ r8 = *Cb_p++;
ldrb r9, [r6], #1 @ r9 = *Cr_p++;
@
eor r14, r14, #0x80 @ flip pattern quadrant
@
sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@
sub r8, r8, #128 @ Cb -= 128
sub r9, r9, #128 @ Cr -= 128
@
add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49
add r10, r10, r8, asl #5 @
add r10, r10, r9, asl #3 @
add r10, r10, r9, asl #5 @
add r10, r10, r9, asl #6 @
@
mov r8, r8, asl #1 @ r8 = bu = Cb*258
add r8, r8, r8, asl #7 @
@
add r9, r9, r9, asl #1 @ r9 = rv = Cr*408
add r9, r9, r9, asl #4 @
mov r9, r9, asl #3 @
@
@ compute R, G, and B
add r3, r8, r7 @ r3 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
@ r8 = bu, r9 = rv, r10 = guv
@
sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256
add r3, r12, r3, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
add r7, r12, r7, lsr #8 @
@
add r12, r14, #0x200 @
@
add r3, r3, r12 @ b = r3 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
orr r12, r3, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r3, asr #15 @ clamp b
mvnne r3, r12, lsr #15 @
andne r3, r3, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r3, r11, r3, lsr #10 @ (b >> 10)
@
mov r11, #LCD2_DATA_MASK @ store pixel
orr r7, r11, r3, lsr #8 @
orr r11, r11, r3 @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r7, [r0] @
str r11, [r0] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r3, r8, r7 @ r3 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256
add r3, r12, r3, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
add r7, r12, r7, lsr #8 @
@
@ This element is zero - use r14 @
@
add r3, r3, r14 @ b = r3 + delta
add r11, r11, r14, lsl #1 @ r = r11 + delta*2
add r7, r7, r14, lsr #1 @ g = r7 + delta/2
@
orr r12, r3, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r3, asr #15 @ clamp b
mvnne r3, r12, lsr #15 @
andne r3, r3, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
@
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r3, r11, r3, lsr #10 @ (b >> 10)
@
mov r11, #LCD2_DATA_MASK @ store pixel
orr r7, r11, r3, lsr #8 @
orr r11, r11, r3 @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r7, [r0] @
str r11, [r0] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r3, r8, r7 @ r3 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
@ r8 = bu, r9 = rv, r10 = guv
@
sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256
add r3, r12, r3, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
add r7, r12, r7, lsr #8 @
@
add r12, r14, #0x100 @
@
add r3, r3, r12 @ b = r3 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
orr r12, r3, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r3, asr #15 @ clamp b
mvnne r3, r12, lsr #15 @
andne r3, r3, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r3, r11, r3, lsr #10 @ (b >> 10)
@
mov r11, #LCD2_DATA_MASK @ store pixel
orr r7, r11, r3, lsr #8 @
orr r11, r11, r3 @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r7, [r0] @
str r11, [r0] @
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r3, r8, r7 @ r3 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256
add r3, r12, r3, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
add r7, r12, r7, lsr #8 @
@
add r12, r14, #0x300 @
@
add r3, r3, r12 @ b = r3 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
orr r12, r3, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r3, asr #15 @ clamp b
mvnne r3, r12, lsr #15 @
andne r3, r3, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r3, r11, r3, lsr #10 @ (b >> 10)
@
mov r11, #LCD2_DATA_MASK @ store pixel
orr r7, r11, r3, lsr #8 @
orr r11, r11, r3 @
20: @
ldr r3, [r0] @
tst r3, #LCD2_BUSY_MASK @
bne 20b @
str r7, [r0] @
str r11, [r0] @
@
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither

View file

@ -37,6 +37,8 @@ static unsigned short disp_control_rev;
/* Contrast setting << 8 */
static int lcd_contrast;
static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0;
/* Forward declarations */
#if defined(HAVE_LCD_ENABLE) || defined(HAVE_LCD_SLEEP)
static void lcd_display_off(void);
@ -508,6 +510,98 @@ bool lcd_active(void)
/*** update functions ***/
void lcd_yuv_set_options(unsigned options)
{
lcd_yuv_options = options;
}
/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */
extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
int width,
int stride);
extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
int width,
int stride,
int x_screen, /* To align dither pattern */
int y_screen);
/* Performance function to blit a YUV bitmap directly to the LCD */
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
const unsigned char *yuv_src[3];
const unsigned char *ysrc_max;
int y0;
int options;
/* NOT MODIFIED FOR THE YH-925 */
if (!display_on)
return;
width &= ~1;
height &= ~1;
x += x_offset;
/* calculate the drawing region */
/* The 20GB LCD is actually 128x160 but rotated 90 degrees so the origin
* is actually the bottom left and horizontal and vertical are swapped.
* Rockbox expects the origin to be the top left so we need to use
* 127 - y instead of just y */
/* max vert << 8 | start vert */
lcd_write_reg(R_VERT_RAM_ADDR_POS, ((x + width - 1) << 8) | x);
y0 = LCD_HEIGHT - 1 - y + y_offset;
/* DIT=0, BGR=1, HWM=0, I/D1-0=10, AM=0, LG2-0=000 */
lcd_write_reg(R_ENTRY_MODE, 0x1020);
yuv_src[0] = src[0] + src_y * stride + src_x;
yuv_src[1] = src[1] + (src_y * stride >> 2) + (src_x >> 1);
yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
ysrc_max = yuv_src[0] + height * stride;
options = lcd_yuv_options;
do
{
/* max horiz << 8 | start horiz */
lcd_write_reg(R_HORIZ_RAM_ADDR_POS, (y0 << 8) | (y0 - 1));
/* position cursor (set AD0-AD15) */
/* start vert << 8 | start horiz */
lcd_write_reg(R_RAM_ADDR_SET, (x << 8) | y0);
/* start drawing */
lcd_send_cmd(R_WRITE_DATA_2_GRAM);
if (options & LCD_YUV_DITHER)
{
lcd_write_yuv420_lines_odither(yuv_src, width, stride,
x, y);
y -= 2;
}
else
{
lcd_write_yuv420_lines(yuv_src, width, stride);
}
y0 -= 2;
yuv_src[0] += stride << 1;
yuv_src[1] += stride >> 1;
yuv_src[2] += stride >> 1;
}
while (yuv_src[0] < ysrc_max);
/* DIT=0, BGR=1, HWM=0, I/D1-0=10, AM=1, LG2-0=000 */
lcd_write_reg(R_ENTRY_MODE, 0x1028);
}
/* Update a fraction of the display. */
void lcd_update_rect(int x0, int y0, int width, int height)
{

View file

@ -0,0 +1,550 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2007 by Jens Arnold
* Heavily based on lcd-as-memframe.c by Michael Sevakis
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "config.h"
#include "cpu.h"
/****************************************************************************
* void lcd_write_yuv420_lines(unsigned char const * const src[3],
* int width,
* int stride);
*
* |R| |1.000000 -0.000001 1.402000| |Y'|
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
* |B| |1.000000 1.772000 0.000000| |Pr|
* Scaled, normalized, rounded and tweaked to yield RGB 565:
* |R| |74 0 101| |Y' - 16| >> 9
* |G| = |74 -24 -51| |Cb - 128| >> 8
* |B| |74 128 0| |Cr - 128| >> 9
*
* Write four RGB565 pixels in the following order on each loop:
* 1 3 + > down
* 2 4 \/ left
*/
.section .icode, "ax", %progbits
.align 2
.global lcd_write_yuv420_lines
.type lcd_write_yuv420_lines, %function
lcd_write_yuv420_lines:
@ r0 = yuv_src
@ r1 = width
@ r2 = stride
stmfd sp!, { r4-r10, lr } @ save non-scratch
ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
@ r5 = yuv_src[1] = Cb_p
@ r6 = yuv_src[2] = Cr_p
@ r0 = scratch
sub r2, r2, #1 @
mov r3, #0x70000000 @
orr r3, r3, #0x3000 @ r3 = LCD1_BASE
10: @ loop line @
ldrb r7, [r4], #1 @ r7 = *Y'_p++;
ldrb r8, [r5], #1 @ r8 = *Cb_p++;
ldrb r9, [r6], #1 @ r9 = *Cr_p++;
@
sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
add r7, r12, r7, asl #5 @ by one less when adding - same for all
@
sub r8, r8, #128 @ Cb -= 128
sub r9, r9, #128 @ Cr -= 128
@
add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
add r10, r10, r10, asl #4 @
add r10, r10, r8, asl #3 @
add r10, r10, r8, asl #4 @
@
add lr, r9, r9, asl #2 @ r9 = Cr*101
add lr, lr, r9, asl #5 @
add r9, lr, r9, asl #6 @
@
add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
mov r8, r8, asr #2 @
add r9, r9, #256 @ r9 = rv = (r9 + 256) >> 9
mov r9, r9, asr #9 @
rsb r10, r10, #128 @ r10 = guv = (-r10 + 128) >> 8
mov r10, r10, asr #8 @
@ compute R, G, and B
add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r0, lr @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r0, #31 @ clamp b
mvnhi r0, r0, asr #31 @
andhi r0, r0, #31 @
cmp lr, #31 @ clamp r
mvnhi lr, lr, asr #31 @
andhi lr, lr, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
mov lr, lr, lsl #3 @
orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3)
orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str lr, [r3, #0x10] @ send MSB
1: @busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r0, [r3, #0x10] @ send LSB
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r0, lr @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r0, #31 @ clamp b
mvnhi r0, r0, asr #31 @
andhi r0, r0, #31 @
cmp lr, #31 @ clamp r
mvnhi lr, lr, asr #31 @
andhi lr, lr, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
@
mov lr, lr, lsl #3 @
orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3)
orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str lr, [r3, #0x10] @ send MSB
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r0, [r3, #0x10] @ send LSB
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r0, lr @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r0, #31 @ clamp b
mvnhi r0, r0, asr #31 @
andhi r0, r0, #31 @
cmp lr, #31 @ clamp r
mvnhi lr, lr, asr #31 @
andhi lr, lr, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
@
mov lr, lr, lsl #3 @
orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3)
orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str lr, [r3, #0x10] @ send MSB
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r0, [r3, #0x10] @ send LSB
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
add r12, r7, r7, asl #2 @
add r7, r12, r7, asl #5 @
@ compute R, G, and B
add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
orr r12, r0, lr @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
bls 15f @ no clamp @
cmp r0, #31 @ clamp b
mvnhi r0, r0, asr #31 @
andhi r0, r0, #31 @
cmp lr, #31 @ clamp r
mvnhi lr, lr, asr #31 @
andhi lr, lr, #31 @
cmp r7, #63 @ clamp g
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
@
mov lr, lr, lsl #3 @
orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3)
orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str lr, [r3, #0x10] @ send MSB
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r0, [r3, #0x10] @ send LSB
@
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmpc regs=r4-r10 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
/****************************************************************************
* void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
* int width,
* int stride,
* int x_screen,
* int y_screen);
*
* |R| |1.000000 -0.000001 1.402000| |Y'|
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
* |B| |1.000000 1.772000 0.000000| |Pr|
* Red scaled at twice g & b but at same precision to place it in correct
* bit position after multiply and leave instruction count lower.
* |R| |258 0 408| |Y' - 16|
* |G| = |149 -49 -104| |Cb - 128|
* |B| |149 258 0| |Cr - 128|
*
* Write four RGB565 pixels in the following order on each loop:
* 1 3 + > down
* 2 4 \/ left
*
* Kernel pattern (raw|rotated|use order):
* 5 3 4 2 2 6 3 7 row0 row2 > down
* 1 7 0 6 | 4 0 5 1 | 2 4 6 0 3 5 7 1 col0 left
* 4 2 5 3 | 3 7 2 6 | 3 5 7 1 2 4 6 0 col2 \/
* 0 6 1 7 5 1 4 0
*/
.section .icode, "ax", %progbits
.align 2
.global lcd_write_yuv420_lines_odither
.type lcd_write_yuv420_lines_odither, %function
lcd_write_yuv420_lines_odither:
@ r0 = yuv_src
@ r1 = width
@ r2 = stride
@ r3 = x_screen
@ [sp] = y_screen
stmfd sp!, { r4-r11, lr } @ save non-scratch
ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
@ r5 = yuv_src[1] = Cb_p
@ r6 = yuv_src[2] = Cr_p
@
sub r2, r2, #1 @
ldr r14, [sp, #36] @ Line up pattern and kernel quadrant
eor r14, r14, r3 @
and r14, r14, #0x2 @
mov r14, r14, lsl #6 @ 0x00 or 0x80
mov r3, #0x70000000 @
orr r3, r3, #0x3000 @ r3 = LCD1_BASE
10: @ loop line @
@
ldrb r7, [r4], #1 @ r7 = *Y'_p++;
ldrb r8, [r5], #1 @ r8 = *Cb_p++;
ldrb r9, [r6], #1 @ r9 = *Cr_p++;
@
eor r14, r14, #0x80 @ flip pattern quadrant
@
sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@
sub r8, r8, #128 @ Cb -= 128
sub r9, r9, #128 @ Cr -= 128
@
add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49
add r10, r10, r8, asl #5 @
add r10, r10, r9, asl #3 @
add r10, r10, r9, asl #5 @
add r10, r10, r9, asl #6 @
@
mov r8, r8, asl #1 @ r8 = bu = Cb*258
add r8, r8, r8, asl #7 @
@
add r9, r9, r9, asl #1 @ r9 = rv = Cr*408
add r9, r9, r9, asl #4 @
mov r9, r9, asl #3 @
@
@ compute R, G, and B
add r0, r8, r7 @ r0 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
@ r8 = bu, r9 = rv, r10 = guv
@
sub r12, r0, r0, lsr #5 @ r0 = 31/32*b + b/256
add r0, r12, r0, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
add r7, r12, r7, lsr #8 @
@
add r12, r14, #0x100 @
@
add r0, r0, r12 @ b = r0 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
orr r12, r0, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r0, asr #15 @ clamp b
mvnne r0, r12, lsr #15 @
andne r0, r0, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
@
and r11, r11, #0xf800 @ pack pixel
mov r11, r11, lsr #8
and r7, r7, #0x7e00
orr r11, r11, r7, lsr #12
mov r7, r7, lsr#4
orr r0, r7, r0, lsr #10
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r11, [r3, #0x10] @ send MSB
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r0, [r3, #0x10] @ send LSB
@
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r0, r8, r7 @ r0 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
sub r12, r0, r0, lsr #5 @ r0 = 31/32*b' + b'/256
add r0, r12, r0, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
add r7, r12, r7, lsr #8 @
@
add r12, r14, #0x200 @
@
add r0, r0, r12 @ b = r0 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
orr r12, r0, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r0, asr #15 @ clamp b
mvnne r0, r12, lsr #15 @
andne r0, r0, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
and r11, r11, #0xf800 @ pack pixel
mov r11, r11, lsr #8
and r7, r7, #0x7e00
orr r11, r11, r7, lsr #12
mov r7, r7, lsr#4
orr r0, r7, r0, lsr #10
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r11, [r3, #0x10] @ send MSB
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r0, [r3, #0x10] @ send LSB
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r0, r8, r7 @ r0 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
@ r8 = bu, r9 = rv, r10 = guv
@
sub r12, r0, r0, lsr #5 @ r0 = 31/32*b' + b'/256
add r0, r12, r0, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
add r7, r12, r7, lsr #8 @
@
add r12, r14, #0x300 @
@
add r0, r0, r12 @ b = r0 + delta
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
orr r12, r0, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r0, asr #15 @ clamp b
mvnne r0, r12, lsr #15 @
andne r0, r0, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
and r11, r11, #0xf800 @ pack pixel
mov r11, r11, lsr #8
and r7, r7, #0x7e00
orr r11, r11, r7, lsr #12
mov r7, r7, lsr#4
orr r0, r7, r0, lsr #10
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r11, [r3, #0x10] @ send MSB
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r0, [r3, #0x10] @ send LSB
sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
add r12, r7, r7, asl #2 @
add r12, r12, r12, asl #4 @
add r7, r12, r7, asl #6 @
@ compute R, G, and B
add r0, r8, r7 @ r0 = b' = Y + bu
add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
rsb r7, r10, r7 @ r7 = g' = Y + guv
@
sub r12, r0, r0, lsr #5 @ r0 = 31/32*b + b/256
add r0, r12, r0, lsr #8 @
@
sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
add r11, r12, r11, lsr #8 @
@
sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
add r7, r12, r7, lsr #8 @
@
@ This element is zero - use r14 @
@
add r0, r0, r14 @ b = r0 + delta
add r11, r11, r14, lsl #1 @ r = r11 + delta*2
add r7, r7, r14, lsr #1 @ g = r7 + delta/2
@
orr r12, r0, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
beq 15f @ no clamp @
movs r12, r0, asr #15 @ clamp b
mvnne r0, r12, lsr #15 @
andne r0, r0, #0x7c00 @ mask b only if clamped
movs r12, r11, asr #16 @ clamp r
mvnne r11, r12, lsr #16 @
movs r12, r7, asr #15 @ clamp g
mvnne r7, r12, lsr #15 @
15: @ no clamp @
and r11, r11, #0xf800 @ pack pixel
mov r11, r11, lsr #8
and r7, r7, #0x7e00
orr r11, r11, r7, lsr #12
mov r7, r7, lsr#4
orr r0, r7, r0, lsr #10
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r11, [r3, #0x10] @ send MSB
1: @ busy @
ldr r7, [r3] @ r7 = LCD1_BASE
tst r7, #LCD1_BUSY_MASK @ bridge busy?
bne 1b @
str r0, [r3, #0x10] @ send LSB
subs r1, r1, #2 @ subtract block from width
bgt 10b @ loop line @
@
ldmpc regs=r4-r11 @ restore registers and return
.ltorg @ dump constant pool
.size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither

View file

@ -273,7 +273,15 @@ void lcd_init_device(void)
#if defined(HAVE_LCD_MODES)
void lcd_set_mode(int mode)
{
if(mode==LCD_MODE_RGB565) {
if(mode==LCD_MODE_YUV) {
/* Turn off the RGB buffer and enable the YUV buffer with zoom */
IO_OSD_OSDWINMD0 |= 0x04;
IO_OSD_VIDWINMD |= 0x01;
#if LCD_NATIVE_WIDTH > 240
IO_OSD_VIDWINMD |= (0x05<<2); /* This does a 2x zoom */
#endif
memset16(FRAME2, 0x0080, LCD_NATIVE_HEIGHT*(LCD_NATIVE_WIDTH+LCD_FUDGE));
} else if(mode==LCD_MODE_RGB565) {
/* Turn on the RGB window, set it to 16 bit and turn YUV window off */
IO_OSD_VIDWINMD &= ~(0x01);
IO_OSD_OSDWIN0OFST = LCD_NATIVE_WIDTH / 16;
@ -636,6 +644,82 @@ void lcd_pal256_update_pal(fb_data *palette)
}
#endif
/* Performance function to blit a YUV bitmap directly to the LCD */
/* Show it rotated so the LCD_WIDTH is now the height */
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
unsigned char const * yuv_src[3];
if (!lcd_on)
return;
/* y has to be on a 16 pixel boundary */
y &= ~0xF;
if( ((y | x | height | width ) < 0)
|| y>LCD_NATIVE_HEIGHT || x>LCD_NATIVE_WIDTH )
return;
if(y+height>LCD_NATIVE_WIDTH)
{
height=LCD_NATIVE_WIDTH-y;
}
if(x+width>LCD_NATIVE_HEIGHT)
{
width=LCD_NATIVE_HEIGHT-x;
}
/* Sorry, but width and height must be >= 2 or else */
width &= ~1;
height>>=1;
fb_data * dst = FRAME2
+ ((LCD_NATIVE_WIDTH+LCD_FUDGE)*(LCD_NATIVE_HEIGHT-1))
- (LCD_NATIVE_WIDTH+LCD_FUDGE)*x + y ;
/* Scope z */
{
off_t z;
z = stride*src_y;
yuv_src[0] = src[0] + z + src_x;
yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1);
yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
}
int cbcr_remain=(stride>>1)-(width>>1);
int y_remain=(stride<<1)-width;
do
{
register int c_width=width;
register unsigned int *c_dst=(unsigned int*)dst;
do
{
register unsigned short Y=*((unsigned short*)yuv_src[0]);
register unsigned short Yst=*((unsigned short*)(yuv_src[0]+stride));
yuv_src[0]+=2;
register unsigned char Cb=*yuv_src[1]++;
register unsigned char Cr=*yuv_src[2]++;
*c_dst = (Yst<<24) | (Cr << 16) | ((Y&0xFF)<<8) | Cb;
*(c_dst - (LCD_NATIVE_WIDTH+LCD_FUDGE)/2) =
( (Yst&0xFF00)<<16) | (Cr << 16) | (Y&0xFF00) | Cb;
c_dst -= (LCD_NATIVE_WIDTH+LCD_FUDGE);
c_width -= 2;
} while (c_width);
yuv_src[0] += y_remain; /* Skip down two luma lines-width */
yuv_src[1] += cbcr_remain; /* Skip down one chroma line-width/2 */
yuv_src[2] += cbcr_remain;
dst+=2;
} while (--height);
}
void lcd_set_contrast(int val) {
(void) val;
// TODO:

View file

@ -25,6 +25,248 @@
.section .icode,"ax",@progbits
/* begin lcd_write_yuv420_lines
*
* See http://en.wikipedia.org/wiki/YCbCr
* ITU-R BT.601 (formerly CCIR 601):
* |Y'| | 0.299000 0.587000 0.114000| |R|
* |Pb| = |-0.168736 -0.331264 0.500000| |G| or 0.564334*(B - Y')
* |Pr| | 0.500000 -0.418688 0.081312| |B| or 0.713267*(R - Y')
* Scaled, normalized and rounded:
* |Y'| | 65 129 25| |R| + 16 : 16->235
* |Cb| = |-38 -74 112| |G| + 128 : 16->240
* |Cr| |112 -94 -18| |B| + 128 : 16->240
*
* The inverse:
* |R| |1.000000 -0.000001 1.402000| |Y'|
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
* |B| |1.000000 1.772000 0.000000| |Pr|
* Scaled, normalized, rounded and tweaked to yield RGB 666:
* |R| |19611723 0 26881894| |Y' - 16| >> 26
* |G| = |19611723 -6406711 -13692816| |Cb - 128| >> 26
* |B| |19611723 33976259 0| |Cr - 128| >> 26
*
* Needs EMAC set to saturated, signed integer mode.
*
* register usage:
* %a0 - LCD data port
* %a1 - Y pointer
* %a2 - C pointer
* %a3 - C width
* %a4 - Y end address
* %a5 - Y factor
* %a6 - BU factor
* %d0 - scratch
* %d1 - B, previous Y \ alternating
* %d2 - U / B, previous Y /
* %d3 - V / G
* %d4 - R / output pixel
* %d5 - GU factor
* %d6 - GV factor
* %d7 - RGB signed -> unsigned conversion mask
*/
.align 2
.global lcd_write_yuv420_lines
.type lcd_write_yuv420_lines, @function
lcd_write_yuv420_lines:
lea.l (-44, %sp), %sp /* free up some registers */
movem.l %d2-%d7/%a2-%a6, (%sp)
lea.l 0xf0008002, %a0 /* LCD data port */
movem.l (44+4, %sp), %a1-%a3 /* Y data, C data, C width */
lea.l (%a1, %a3*2), %a4 /* Y end address */
move.l #19611723, %a5 /* y factor */
move.l #33976259, %a6 /* bu factor */
move.l #-6406711, %d5 /* gu factor */
move.l #-13692816, %d6 /* gv factor */
move.l #0x01040820, %d7 /* bitmask for signed->unsigned conversion
* of R, G and B within RGGB6666 at once */
/* chroma for first 2x2 block */
clr.l %d3 /* load v component */
move.b (%a2, %a3), %d3
clr.l %d2 /* load u component */
move.b (%a2)+, %d2
moveq.l #-128, %d0
add.l %d0, %d2
add.l %d0, %d3
mac.l %a6, %d2, %acc0 /* bu */
mac.l %d5, %d2, %acc1 /* gu */
mac.l %d6, %d3, %acc1 /* gv */
move.l #26881894, %d0 /* rv factor */
mac.l %d0, %d3, %acc2 /* rv */
/* luma for very first pixel (top left) */
clr.l %d1
move.b (%a1, %a3*2), %d1
moveq.l #-126, %d0
add.l %d1, %d0 /* y' (-0.5 ... +0.5) */
mac.l %a5, %d0, %acc0
mac.l %a5, %d0, %acc1
mac.l %a5, %d0, %acc2
bra.b .yuv_line_entry
.yuv_line_loop:
/* chroma for 2x2 pixel block */
clr.l %d3 /* load v component */
move.b (%a2, %a3), %d3
clr.l %d2 /* load u component */
move.b (%a2)+, %d2
moveq.l #-128, %d0
add.l %d0, %d2
add.l %d0, %d3
mac.l %a6, %d2, %acc0 /* bu */
mac.l %d5, %d2, %acc1 /* gu */
mac.l %d6, %d3, %acc1 /* gv */
move.l #26881894, %d0 /* rv factor */
mac.l %d0, %d3, %acc2 /* rv */
/* luma for first pixel (top left) */
clr.l %d1
move.b (%a1, %a3*2), %d1
moveq.l #-126, %d0
add.l %d1, %d0 /* y' (-0.5 ... +0.5) */
mac.l %a5, %d0, %acc0
mac.l %a5, %d0, %acc1
mac.l %a5, %d0, %acc2
move.w %d4, (%a0)
/* 2nd LCD write is delayed one pixel to use it for filling the EMAC latency */
/* convert to RGB666, pack and output */
.yuv_line_entry:
moveq.l #26, %d0
move.l %acc0, %d4
move.l %acc1, %d3
move.l %acc2, %d2
lsr.l %d0, %d4
lsr.l %d0, %d3
lsr.l %d0, %d2
lsl.l #6, %d2
or.l %d3, %d2 /* |00000000|00000000|0000Rrrr|rrGggggg| */
lsl.l #7, %d2
or.l %d2, %d3 /* |00000000|00000Rrr|rrrGgggg|g0Gggggg| */
lsl.l #6, %d3
or.l %d3, %d4 /* |0000000R|rrrrrGgg|ggg0Gggg|ggBbbbbb| */
eor.l %d7, %d4 /* |0000000r|rrrrrggg|ggg0gggg|ggbbbbbb| */
swap %d4
move.w %d4, (%a0)
swap %d4
/* luma for second pixel (bottom left) as delta from the first */
clr.l %d2
move.b (%a1)+, %d2
move.l %d2, %d0
sub.l %d1, %d0
mac.l %a5, %d0, %acc0
mac.l %a5, %d0, %acc1
mac.l %a5, %d0, %acc2
move.w %d4, (%a0)
/* 2nd LCD write is delayed one pixel to use it for filling the EMAC latency */
/* convert to RGB666, pack and output */
moveq.l #26, %d0
move.l %acc0, %d4
move.l %acc1, %d3
move.l %acc2, %d1
lsr.l %d0, %d4
lsr.l %d0, %d3
lsr.l %d0, %d1
lsl.l #6, %d1
or.l %d3, %d1 /* |00000000|00000000|0000Rrrr|rrGggggg| */
lsl.l #7, %d1
or.l %d1, %d3 /* |00000000|00000Rrr|rrrGgggg|g0Gggggg| */
lsl.l #6, %d3
or.l %d3, %d4 /* |0000000R|rrrrrGgg|ggg0Gggg|ggBbbbbb| */
eor.l %d7, %d4 /* |0000000r|rrrrrggg|ggg0gggg|ggbbbbbb| */
swap %d4
move.w %d4, (%a0)
swap %d4
/* luma for third pixel (top right) as delta from the second */
clr.l %d1
move.b (%a1, %a3*2), %d1
move.l %d1, %d0
sub.l %d2, %d0
mac.l %a5, %d0, %acc0
mac.l %a5, %d0, %acc1
mac.l %a5, %d0, %acc2
move.w %d4, (%a0)
/* 2nd LCD write is delayed one pixel to use it for filling the EMAC latency */
/* convert to RGB666, pack and output */
moveq.l #26, %d0
move.l %acc0, %d4
move.l %acc1, %d3
move.l %acc2, %d2
lsr.l %d0, %d4
lsr.l %d0, %d3
lsr.l %d0, %d2
lsl.l #6, %d2
or.l %d3, %d2 /* |00000000|00000000|0000Rrrr|rrGggggg| */
lsl.l #7, %d2
or.l %d2, %d3 /* |00000000|00000Rrr|rrrGgggg|g0Gggggg| */
lsl.l #6, %d3
or.l %d3, %d4 /* |0000000R|rrrrrGgg|ggg0Gggg|ggBbbbbb| */
eor.l %d7, %d4 /* |0000000r|rrrrrggg|ggg0gggg|ggbbbbbb| */
swap %d4
move.w %d4, (%a0)
swap %d4
/* luma for fourth pixel (bottom right) as delta from the thrid */
clr.l %d2
move.b (%a1)+, %d2
move.l %d2, %d0
sub.l %d1, %d0
mac.l %a5, %d0, %acc0
mac.l %a5, %d0, %acc1
mac.l %a5, %d0, %acc2
move.w %d4, (%a0)
/* 2nd LCD write is delayed one pixel to use it for filling the EMAC latency */
/* convert to RGB666, pack and output */
moveq.l #26, %d0
movclr.l %acc0, %d4
movclr.l %acc1, %d3
movclr.l %acc2, %d1
lsr.l %d0, %d4
lsr.l %d0, %d3
lsr.l %d0, %d1
lsl.l #6, %d1
or.l %d3, %d1 /* |00000000|00000000|0000Rrrr|rrGggggg| */
lsl.l #7, %d1
or.l %d1, %d3 /* |00000000|00000Rrr|rrrGgggg|g0Gggggg| */
lsl.l #6, %d3
or.l %d3, %d4 /* |0000000R|rrrrrGgg|ggg0Gggg|ggBbbbbb| */
eor.l %d7, %d4 /* |0000000r|rrrrrggg|ggg0gggg|ggbbbbbb| */
swap %d4
move.w %d4, (%a0)
swap %d4
cmp.l %a1, %a4 /* run %a1 up to end of line */
bhi.w .yuv_line_loop
move.w %d4, (%a0) /* write (very) last 2nd word */
movem.l (%sp), %d2-%d7/%a2-%a6
lea.l (44, %sp), %sp /* restore registers */
rts
.yuv_end:
.size lcd_write_yuv420_lines, .yuv_end - lcd_write_yuv420_lines
/* begin lcd_write_data */
.align 2
.global lcd_write_data

View file

@ -414,6 +414,69 @@ bool lcd_active(void)
#endif
/*** update functions ***/
/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420.
* y should have two lines of Y back to back, 2nd line first.
* c should contain the Cb and Cr data for the two lines of Y back to back.
* Needs EMAC set to saturated, signed integer mode.
*/
extern void lcd_write_yuv420_lines(const unsigned char *y,
const unsigned char *c, int width);
/* Performance function to blit a YUV bitmap directly to the LCD
* src_x, src_y, width and height should be even and within the LCD's
* boundaries.
*/
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
/* IRAM Y, Cb/bu, guv and Cb/rv buffers. */
unsigned char y_ibuf[LCD_WIDTH*2];
unsigned char c_ibuf[LCD_WIDTH];
const unsigned char *ysrc, *usrc, *vsrc;
const unsigned char *ysrc_max;
if (!display_on)
return;
width &= ~1; /* stay on the safe side */
height &= ~1;
lcd_write_reg(R_ENTRY_MODE, R_ENTRY_MODE_DIT_HORZ);
/* Set start position and window */
lcd_write_reg(R_VERT_RAM_ADDR_POS, (LCD_WIDTH-1) << 8);
ysrc = src[0] + src_y * stride + src_x;
usrc = src[1] + (src_y * stride >> 2) + (src_x >> 1);
vsrc = src[2] + (src_y * stride >> 2) + (src_x >> 1);
ysrc_max = ysrc + height * stride;
unsigned long macsr = coldfire_get_macsr();
coldfire_set_macsr(EMAC_SATURATE);
do
{
lcd_write_reg(R_HORIZ_RAM_ADDR_POS, ((y + y_offset + 1) << 8) | (y + y_offset));
lcd_write_reg(R_RAM_ADDR_SET, (x << 8) | (y + y_offset));
lcd_begin_write_gram();
memcpy(y_ibuf + width, ysrc, width);
memcpy(y_ibuf, ysrc + stride, width);
memcpy(c_ibuf, usrc, width >> 1);
memcpy(c_ibuf + (width >> 1), vsrc, width >> 1);
lcd_write_yuv420_lines(y_ibuf, c_ibuf, width >> 1);
y += 2;
ysrc += 2 * stride;
usrc += stride >> 1;
vsrc += stride >> 1;
}
while (ysrc < ysrc_max);
coldfire_set_macsr(macsr);
} /* lcd_yuv_blit */
/* Update the display.
This must be called after all other LCD functions that change the
lcd frame buffer. */

View file

@ -0,0 +1,246 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2006 by Jens Arnold
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "config.h"
#include "cpu.h"
.section .icode, "ax", @progbits
/* lcd_write_yuv420_lines()
*
* See http://en.wikipedia.org/wiki/YCbCr
* ITU-R BT.601 (formerly CCIR 601):
* |Y'| | 0.299000 0.587000 0.114000| |R|
* |Pb| = |-0.168736 -0.331264 0.500000| |G| or 0.564334*(B - Y')
* |Pr| | 0.500000 -0.418688 0.081312| |B| or 0.713267*(R - Y')
* Scaled, normalized and rounded:
* |Y'| | 65 129 25| |R| + 16 : 16->235
* |Cb| = |-38 -74 112| |G| + 128 : 16->240
* |Cr| |112 -94 -18| |B| + 128 : 16->240
*
* The inverse:
* |R| |1.000000 0.000000 1.402000| |Y'|
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
* |B| |1.000000 1.772000 0.000000| |Pr|
* Scaled, normalized, rounded and tweaked to yield RGB565:
* |R| |19611723 0 26881894| |Y' - 16| >> 27
* |G| = |19611723 -6406711 -13692816| |Cb - 128| >> 26
* |B| |19611723 33976259 0| |Cr - 128| >> 27
*
* Needs EMAC set to saturated, signed integer mode.
*
* register usage:
* %a0 - LCD data port
* %a1 - Y pointer
* %a2 - C pointer
* %a3 - C width
* %a4 - Y end address
* %a5 - Y factor
* %a6 - BU factor
* %d0 - scratch
* %d1 - B, previous Y \ alternating
* %d2 - U / B, previous Y /
* %d3 - V / G
* %d4 - R / output pixel
* %d5 - GU factor
* %d6 - GV factor
* %d7 - RGB signed -> unsigned conversion mask
*/
.align 2
.global lcd_write_yuv420_lines
.type lcd_write_yuv420_lines, @function
lcd_write_yuv420_lines:
lea.l (-44, %sp), %sp /* free up some registers */
movem.l %d2-%d7/%a2-%a6, (%sp)
lea.l 0xf0000002, %a0 /* LCD data port */
movem.l (44+4, %sp), %a1-%a3 /* Y data, C data, C width */
lea.l (%a1, %a3*2), %a4 /* Y end address */
move.l #19611723, %a5 /* y factor */
move.l #33976259, %a6 /* bu factor */
move.l #-6406711, %d5 /* gu factor */
move.l #-13692816, %d6 /* gv factor */
move.l #0x8410, %d7 /* bitmask for signed->unsigned conversion
* of R, G and B within RGB565 at once */
/* chroma for first 2x2 pixel block */
clr.l %d3 /* load v component */
move.b (%a2, %a3), %d3
clr.l %d2 /* load u component */
move.b (%a2)+, %d2
moveq.l #-128, %d0
add.l %d0, %d2
add.l %d0, %d3
mac.l %a6, %d2, %acc0 /* bu */
mac.l %d5, %d2, %acc1 /* gu */
mac.l %d6, %d3, %acc1 /* gv */
move.l #26881894, %d0 /* rv factor */
mac.l %d0, %d3, %acc2 /* rv */
/* luma for very first pixel (top left) */
clr.l %d1
move.b (%a1, %a3*2), %d1
moveq.l #-126, %d0
add.l %d1, %d0 /* y' (-0.5 ... +0.5) */
mac.l %a5, %d0, %acc0
mac.l %a5, %d0, %acc1
mac.l %a5, %d0, %acc2
bra.b .yuv_line_entry
.yuv_line_loop:
/* chroma for 2x2 pixel block */
clr.l %d3 /* load v component */
move.b (%a2, %a3), %d3
clr.l %d2 /* load u component */
move.b (%a2)+, %d2
moveq.l #-128, %d0
add.l %d0, %d2
add.l %d0, %d3
mac.l %a6, %d2, %acc0 /* bu */
mac.l %d5, %d2, %acc1 /* gu */
mac.l %d6, %d3, %acc1 /* gv */
move.l #26881894, %d0 /* rv factor */
mac.l %d0, %d3, %acc2 /* rv */
/* luma for first pixel (top left) */
clr.l %d1
move.b (%a1, %a3*2), %d1
moveq.l #-126, %d0
add.l %d1, %d0 /* y' (-0.5 ... +0.5) */
mac.l %a5, %d0, %acc0
mac.l %a5, %d0, %acc1
mac.l %a5, %d0, %acc2
move.w %d4, (%a0)
/* LCD write is delayed one pixel to use it for filling the EMAC latency */
/* convert to RGB565, pack and output */
.yuv_line_entry:
moveq.l #27, %d0
move.l %acc0, %d2
move.l %acc1, %d3
move.l %acc2, %d4
lsr.l %d0, %d2
lsr.l %d0, %d4
moveq.l #26, %d0
lsr.l %d0, %d3
lsl.l #6, %d4
or.l %d3, %d4
lsl.l #5, %d4
or.l %d2, %d4
eor.l %d7, %d4
/* luma for second pixel (bottom left) as delta from the first */
clr.l %d2
move.b (%a1)+, %d2
move.l %d2, %d0
sub.l %d1, %d0
mac.l %a5, %d0, %acc0
mac.l %a5, %d0, %acc1
mac.l %a5, %d0, %acc2
move.w %d4, (%a0)
/* LCD write is delayed one pixel to use it for filling the EMAC latency */
/* convert to RGB565, pack and output */
moveq.l #27, %d0
move.l %acc0, %d1
move.l %acc1, %d3
move.l %acc2, %d4
lsr.l %d0, %d1
lsr.l %d0, %d4
moveq.l #26, %d0
lsr.l %d0, %d3
lsl.l #6, %d4
or.l %d3, %d4
lsl.l #5, %d4
or.l %d1, %d4
eor.l %d7, %d4
/* luma for third pixel (top right) as delta from the second */
clr.l %d1
move.b (%a1, %a3*2), %d1
move.l %d1, %d0
sub.l %d2, %d0
mac.l %a5, %d0, %acc0
mac.l %a5, %d0, %acc1
mac.l %a5, %d0, %acc2
move.w %d4, (%a0)
/* LCD write is delayed one pixel to use it for filling the EMAC latency */
/* convert to RGB565, pack and output */
moveq.l #27, %d0
move.l %acc0, %d2
move.l %acc1, %d3
move.l %acc2, %d4
lsr.l %d0, %d2
lsr.l %d0, %d4
moveq.l #26, %d0
lsr.l %d0, %d3
lsl.l #6, %d4
or.l %d3, %d4
lsl.l #5, %d4
or.l %d2, %d4
eor.l %d7, %d4
/* luma for fourth pixel (bottom right) as delta from the third */
clr.l %d2
move.b (%a1)+, %d2
move.l %d2, %d0
sub.l %d1, %d0
mac.l %a5, %d0, %acc0
mac.l %a5, %d0, %acc1
mac.l %a5, %d0, %acc2
move.w %d4, (%a0)
/* LCD write is delayed one pixel to use it for filling the EMAC latency */
/* convert to RGB565, pack and output */
moveq.l #27, %d0
movclr.l %acc0, %d1
movclr.l %acc1, %d3
movclr.l %acc2, %d4
lsr.l %d0, %d1
lsr.l %d0, %d4
moveq.l #26, %d0
lsr.l %d0, %d3
lsl.l #6, %d4
or.l %d3, %d4
lsl.l #5, %d4
or.l %d1, %d4
eor.l %d7, %d4
cmp.l %a1, %a4 /* run %a1 up to end of line */
bhi.w .yuv_line_loop
move.w %d4, (%a0) /* write (very) last pixel */
movem.l (%sp), %d2-%d7/%a2-%a6
lea.l (44, %sp), %sp /* restore registers */
rts
.yuv_end:
.size lcd_write_yuv420_lines, .yuv_end - lcd_write_yuv420_lines

View file

@ -325,6 +325,67 @@ bool lcd_active(void)
/*** update functions ***/
/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420.
* y should have two lines of Y back to back, 2nd line first.
* c should contain the Cb and Cr data for the two lines of Y back to back.
* Needs EMAC set to saturated, signed integer mode.
*/
extern void lcd_write_yuv420_lines(const unsigned char *y,
const unsigned char *c, int cwidth);
/* Performance function to blit a YUV bitmap directly to the LCD
* src_x, src_y, width and height should be even
* x, y, width and height have to be within LCD bounds
*/
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
/* IRAM Y, Cb and Cb buffers. */
unsigned char y_ibuf[LCD_WIDTH*2];
unsigned char c_ibuf[LCD_WIDTH];
const unsigned char *ysrc, *usrc, *vsrc;
const unsigned char *ysrc_max;
if (!display_on)
return;
LCD_MUTEX_LOCK();
width &= ~1; /* stay on the safe side */
height &= ~1;
lcd_write_reg(R_ENTRY_MODE, R_ENTRY_MODE_HORZ);
/* Set start position and window */
lcd_write_reg(R_VERT_RAM_ADDR_POS, ((xoffset + 219) << 8) | xoffset);
ysrc = src[0] + src_y * stride + src_x;
usrc = src[1] + (src_y * stride >> 2) + (src_x >> 1);
vsrc = src[2] + (src_y * stride >> 2) + (src_x >> 1);
ysrc_max = ysrc + height * stride;
coldfire_set_macsr(EMAC_SATURATE);
do
{
lcd_write_reg(R_HORIZ_RAM_ADDR_POS, ((y + 1) << 8) | y);
lcd_write_reg(R_RAM_ADDR_SET, ((x+xoffset) << 8) | y);
lcd_begin_write_gram();
memcpy(y_ibuf + width, ysrc, width);
memcpy(y_ibuf, ysrc + stride, width);
memcpy(c_ibuf, usrc, width >> 1);
memcpy(c_ibuf + (width >> 1), vsrc, width >> 1);
lcd_write_yuv420_lines(y_ibuf, c_ibuf, width >> 1);
y += 2;
ysrc += 2 * stride;
usrc += stride >> 1;
vsrc += stride >> 1;
}
while (ysrc < ysrc_max)
;;
LCD_MUTEX_UNLOCK();
}
#ifndef BOOTLOADER
/* LCD DMA ISR */
void DMA3(void) __attribute__ ((interrupt_handler, section(".icode")));

View file

@ -158,3 +158,65 @@ void lcd_update(void)
lcd_update_rect(0, 0, LCD_WIDTH, LCD_HEIGHT);
}
/* (Mis)use LCD framebuffer as a temporary buffer */
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
int x, int y, int width, int height)
{
unsigned char const * yuv_src[3];
register off_t z;
if(!lcd_is_on)
return;
z = stride * src_y;
yuv_src[0] = src[0] + z + src_x;
yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1);
yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
commit_discard_dcache(); // XXX range
__cpm_start_ipu();
IPU_STOP_IPU();
IPU_RESET_IPU();
IPU_CLEAR_END_FLAG();
IPU_DISABLE_RSIZE();
IPU_DISABLE_IRQ();
IPU_SET_INFMT(INFMT_YUV420);
IPU_SET_OUTFMT(OUTFMT_RGB565);
IPU_SET_IN_FM(width, height);
IPU_SET_Y_STRIDE(stride);
IPU_SET_UV_STRIDE(stride, stride);
IPU_SET_Y_ADDR(PHYSADDR((unsigned long)yuv_src[0]));
IPU_SET_U_ADDR(PHYSADDR((unsigned long)yuv_src[1]));
IPU_SET_V_ADDR(PHYSADDR((unsigned long)yuv_src[2]));
IPU_SET_OUT_ADDR(PHYSADDR((unsigned long)FBADDR(y,x)));
IPU_SET_OUT_FM(height, width);
IPU_SET_OUT_STRIDE(height);
IPU_SET_CSC_C0_COEF(YUV_CSC_C0);
IPU_SET_CSC_C1_COEF(YUV_CSC_C1);
IPU_SET_CSC_C2_COEF(YUV_CSC_C2);
IPU_SET_CSC_C3_COEF(YUV_CSC_C3);
IPU_SET_CSC_C4_COEF(YUV_CSC_C4);
IPU_RUN_IPU();
while(!(IPU_POLLING_END_FLAG()) && IPU_IS_ENABLED());
IPU_CLEAR_END_FLAG();
IPU_STOP_IPU();
IPU_RESET_IPU();
__cpm_stop_ipu();
/* YUV speed is limited by LCD speed */
lcd_update_rect(y, x, height, width);
}