mirror of
https://github.com/Rockbox/rockbox.git
synced 2025-12-10 21:55:10 -05:00
Speed up of iPod nano 1G and iPod color LCD. Use HDD6330 asm part for YUV blitting, introduce special handling for full width screen updates. Speed up is about +30% for YUV on both color/nano1G.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@28930 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
1980fc3a61
commit
b04d676706
2 changed files with 182 additions and 102 deletions
152
firmware/target/arm/ipod/lcd-as-color-nano.S
Executable file
152
firmware/target/arm/ipod/lcd-as-color-nano.S
Executable file
|
|
@ -0,0 +1,152 @@
|
||||||
|
/***************************************************************************
|
||||||
|
* __________ __ ___.
|
||||||
|
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
||||||
|
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
||||||
|
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
||||||
|
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
||||||
|
* \/ \/ \/ \/ \/
|
||||||
|
* $Id:$
|
||||||
|
*
|
||||||
|
* Copyright (C) 2010 by Andree Buschmann
|
||||||
|
*
|
||||||
|
* Generic asm helper function used by YUV blitting.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version 2
|
||||||
|
* of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
||||||
|
* KIND, either express or implied.
|
||||||
|
*
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
#include "cpu.h"
|
||||||
|
|
||||||
|
.section .icode, "ax", %progbits
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
* void lcd_yuv_write_inner_loop(unsigned char const * const ysrc,
|
||||||
|
* unsigned char const * const usrc,
|
||||||
|
* unsigned char const * const vsrc,
|
||||||
|
* int width);
|
||||||
|
*
|
||||||
|
* YUV- > RGB565 conversion
|
||||||
|
* |R| |1.000000 -0.000001 1.402000| |Y'|
|
||||||
|
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
|
||||||
|
* |B| |1.000000 1.772000 0.000000| |Pr|
|
||||||
|
* Scaled, normalized, rounded and tweaked to yield RGB 565:
|
||||||
|
* |R| |74 0 101| |Y' - 16| >> 9
|
||||||
|
* |G| = |74 -24 -51| |Cb - 128| >> 8
|
||||||
|
* |B| |74 128 0| |Cr - 128| >> 9
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
.align 2
|
||||||
|
.global lcd_yuv_write_inner_loop
|
||||||
|
.type lcd_yuv_write_inner_loop, %function
|
||||||
|
|
||||||
|
lcd_yuv_write_inner_loop:
|
||||||
|
@ r0 = ysrc
|
||||||
|
@ r1 = usrc
|
||||||
|
@ r2 = vsrc
|
||||||
|
@ r3 = width
|
||||||
|
stmfd sp!, { r4-r11, lr } @ save regs
|
||||||
|
mov r4, #0x70000000 @ r4 = LCD2_BLOCK_CTRL - 0x20
|
||||||
|
add r4, r4, #0x8a00 @
|
||||||
|
add r5, r4, #0x100 @ r5 = LCD2_BLOCK_DATA
|
||||||
|
10: @ loop
|
||||||
|
|
||||||
|
ldrb r7, [r1], #1 @ *usrc++
|
||||||
|
ldrb r8, [r2], #1 @ *vsrc++
|
||||||
|
|
||||||
|
sub r7, r7, #128 @ Cb -= 128
|
||||||
|
sub r8, r8, #128 @ Cr -= 128
|
||||||
|
|
||||||
|
add r10, r8, r8, asl #2 @ Cr*101
|
||||||
|
add r10, r10, r8, asl #5
|
||||||
|
add r10, r10, r8, asl #6
|
||||||
|
|
||||||
|
add r11, r8, r8, asl #1 @ Cr*51 + Cb*24
|
||||||
|
add r11, r11, r11, asl #4
|
||||||
|
add r11, r11, r7, asl #3
|
||||||
|
add r11, r11, r7, asl #4
|
||||||
|
|
||||||
|
add r12, r7, #2 @ r12 = bu = (Cb*128 + 256) >> 9
|
||||||
|
mov r12, r12, asr #2
|
||||||
|
add r10, r10, #256 @ r10 = rv = (Cr*101 + 256) >> 9
|
||||||
|
mov r10, r10, asr #9
|
||||||
|
rsb r11, r11, #128 @ r11 = guv = (-r11 + 128) >> 8
|
||||||
|
mov r11, r11, asr #8
|
||||||
|
|
||||||
|
@ pixel_1
|
||||||
|
ldrb r7, [r0], #1 @ *ysrc++
|
||||||
|
sub r7, r7, #16 @ Y = (Y' - 16) * 37
|
||||||
|
add r8, r7, r7, asl #2
|
||||||
|
add r7, r8, r7, asl #5
|
||||||
|
|
||||||
|
add r9, r10, r7, asr #8 @ R = (Y >> 8) + rv
|
||||||
|
add r8, r11, r7, asr #7 @ G = (Y >> 7) + guv
|
||||||
|
add r7, r12, r7, asr #8 @ B = (Y >> 8) + bu
|
||||||
|
|
||||||
|
cmp r9, #31 @ clamp R
|
||||||
|
mvnhi r9, r9, asr #31
|
||||||
|
andhi r9, r9, #31
|
||||||
|
|
||||||
|
cmp r8, #63 @ clamp G
|
||||||
|
mvnhi r8, r8, asr #31
|
||||||
|
andhi r8, r8, #63
|
||||||
|
|
||||||
|
cmp r7, #31 @ clamp B
|
||||||
|
mvnhi r7, r7, asr #31
|
||||||
|
andhi r7, r7, #31
|
||||||
|
|
||||||
|
orr r6, r7, r8, lsl #5 @ pack pixel
|
||||||
|
orr r6, r6, r9, lsl #11
|
||||||
|
|
||||||
|
mov r7, r6, lsl #8 @ swap bytes
|
||||||
|
and r7, r7, #0xff00
|
||||||
|
add r6, r7, r6, lsr #8
|
||||||
|
|
||||||
|
@ pixel_2
|
||||||
|
ldrb r7, [r0], #1 @ *ysrc++
|
||||||
|
sub r7, r7, #16 @ Y = (Y' - 16) * 37
|
||||||
|
add r8, r7, r7, asl #2
|
||||||
|
add r7, r8, r7, asl #5
|
||||||
|
|
||||||
|
add r9, r10, r7, asr #8 @ R = (Y >> 8) + rv
|
||||||
|
add r8, r11, r7, asr #7 @ G = (Y >> 7) + guv
|
||||||
|
add r7, r12, r7, asr #8 @ B = (Y >> 8) + bu
|
||||||
|
|
||||||
|
cmp r9, #31 @ clamp R
|
||||||
|
mvnhi r9, r9, asr #31
|
||||||
|
andhi r9, r9, #31
|
||||||
|
|
||||||
|
cmp r8, #63 @ clamp G
|
||||||
|
mvnhi r8, r8, asr #31
|
||||||
|
andhi r8, r8, #63
|
||||||
|
|
||||||
|
cmp r7, #31 @ clamp B
|
||||||
|
mvnhi r7, r7, asr #31
|
||||||
|
andhi r7, r7, #31
|
||||||
|
|
||||||
|
orr r7, r7, r8, lsl #5 @ pack pixel
|
||||||
|
orr r7, r7, r9, lsl #11
|
||||||
|
|
||||||
|
orr r6, r6, r7, lsl #24 @ swap bytes and add pixels simultaneously
|
||||||
|
mov r7, r7, lsr #8
|
||||||
|
orr r6, r6, r7, lsl #16
|
||||||
|
#if 1
|
||||||
|
11: @ while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK));
|
||||||
|
ldr r11, [r4, #0x20] @
|
||||||
|
tst r11, #0x1000000 @
|
||||||
|
beq 11b @
|
||||||
|
#endif
|
||||||
|
str r6, [r5] @ send two pixels
|
||||||
|
|
||||||
|
subs r3, r3, #2 @ decrease width
|
||||||
|
bgt 10b @ loop
|
||||||
|
|
||||||
|
ldmpc regs=r4-r11 @ restore regs
|
||||||
|
.ltorg @ dump constant pool
|
||||||
|
.size lcd_yuv_write_inner_loop, .-lcd_yuv_write_inner_loop
|
||||||
|
|
@ -121,38 +121,14 @@ void lcd_init_device(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*** update functions ***/
|
/*** update functions ***/
|
||||||
|
extern void lcd_yuv_write_inner_loop(unsigned char const * const ysrc,
|
||||||
|
unsigned char const * const usrc,
|
||||||
|
unsigned char const * const vsrc,
|
||||||
|
int width);
|
||||||
|
|
||||||
#define CSUB_X 2
|
#define CSUB_X 2
|
||||||
#define CSUB_Y 2
|
#define CSUB_Y 2
|
||||||
|
|
||||||
/* YUV- > RGB565 conversion
|
|
||||||
* |R| |1.000000 -0.000001 1.402000| |Y'|
|
|
||||||
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
|
|
||||||
* |B| |1.000000 1.772000 0.000000| |Pr|
|
|
||||||
* Scaled, normalized, rounded and tweaked to yield RGB 565:
|
|
||||||
* |R| |74 0 101| |Y' - 16| >> 9
|
|
||||||
* |G| = |74 -24 -51| |Cb - 128| >> 8
|
|
||||||
* |B| |74 128 0| |Cr - 128| >> 9
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define RGBYFAC 74 /* 1.0 */
|
|
||||||
#define RVFAC 101 /* 1.402 */
|
|
||||||
#define GVFAC (-51) /* -0.714136 */
|
|
||||||
#define GUFAC (-24) /* -0.334136 */
|
|
||||||
#define BUFAC 128 /* 1.772 */
|
|
||||||
|
|
||||||
/* ROUNDOFFS contain constant for correct round-offs as well as
|
|
||||||
constant parts of the conversion matrix (e.g. (Y'-16)*RGBYFAC
|
|
||||||
-> constant part = -16*RGBYFAC). Through extraction of these
|
|
||||||
constant parts we save at leat 4 substractions in the conversion
|
|
||||||
loop */
|
|
||||||
#define ROUNDOFFSR (256 - 16*RGBYFAC - 128*RVFAC)
|
|
||||||
#define ROUNDOFFSG (128 - 16*RGBYFAC - 128*GVFAC - 128*GUFAC)
|
|
||||||
#define ROUNDOFFSB (256 - 16*RGBYFAC - 128*BUFAC)
|
|
||||||
|
|
||||||
#define MAX_5BIT 0x1f
|
|
||||||
#define MAX_6BIT 0x3f
|
|
||||||
|
|
||||||
/* Performance function to blit a YUV bitmap directly to the LCD */
|
/* Performance function to blit a YUV bitmap directly to the LCD */
|
||||||
void lcd_blit_yuv(unsigned char * const src[3],
|
void lcd_blit_yuv(unsigned char * const src[3],
|
||||||
int src_x, int src_y, int stride,
|
int src_x, int src_y, int stride,
|
||||||
|
|
@ -222,7 +198,8 @@ void lcd_blit_yuv(unsigned char * const src[3],
|
||||||
const int stride_div_csub_x = stride/CSUB_X;
|
const int stride_div_csub_x = stride/CSUB_X;
|
||||||
|
|
||||||
h=0;
|
h=0;
|
||||||
while (1) {
|
while (1)
|
||||||
|
{
|
||||||
/* upsampling, YUV->RGB conversion and reduction to RGB565 in one go */
|
/* upsampling, YUV->RGB conversion and reduction to RGB565 in one go */
|
||||||
const unsigned char *ysrc = src[0] + stride * src_y + src_x;
|
const unsigned char *ysrc = src[0] + stride * src_y + src_x;
|
||||||
|
|
||||||
|
|
@ -231,17 +208,11 @@ void lcd_blit_yuv(unsigned char * const src[3],
|
||||||
|
|
||||||
const unsigned char *usrc = src[1] + uvoffset;
|
const unsigned char *usrc = src[1] + uvoffset;
|
||||||
const unsigned char *vsrc = src[2] + uvoffset;
|
const unsigned char *vsrc = src[2] + uvoffset;
|
||||||
const unsigned char *row_end = ysrc + width;
|
|
||||||
|
|
||||||
int yp, up, vp;
|
|
||||||
int red1, green1, blue1;
|
|
||||||
int red2, green2, blue2;
|
|
||||||
|
|
||||||
int rc, gc, bc;
|
|
||||||
int pixels_to_write;
|
int pixels_to_write;
|
||||||
fb_data pixel1,pixel2;
|
|
||||||
|
|
||||||
if (h==0) {
|
if (h==0)
|
||||||
|
{
|
||||||
while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_READY));
|
while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_READY));
|
||||||
LCD2_BLOCK_CONFIG = 0;
|
LCD2_BLOCK_CONFIG = 0;
|
||||||
|
|
||||||
|
|
@ -251,7 +222,8 @@ void lcd_blit_yuv(unsigned char * const src[3],
|
||||||
h = height;
|
h = height;
|
||||||
|
|
||||||
/* calculate how much we can do in one go */
|
/* calculate how much we can do in one go */
|
||||||
if (pixels_to_write > 0x10000) {
|
if (pixels_to_write > 0x10000)
|
||||||
|
{
|
||||||
h = (0x10000/2) / width;
|
h = (0x10000/2) / width;
|
||||||
pixels_to_write = (width * h) * 2;
|
pixels_to_write = (width * h) * 2;
|
||||||
}
|
}
|
||||||
|
|
@ -262,61 +234,7 @@ void lcd_blit_yuv(unsigned char * const src[3],
|
||||||
LCD2_BLOCK_CTRL = 0x34000000;
|
LCD2_BLOCK_CTRL = 0x34000000;
|
||||||
}
|
}
|
||||||
|
|
||||||
do
|
lcd_yuv_write_inner_loop(ysrc,usrc,vsrc,width);
|
||||||
{
|
|
||||||
up = *usrc++;
|
|
||||||
vp = *vsrc++;
|
|
||||||
rc = RVFAC * vp + ROUNDOFFSR;
|
|
||||||
gc = GVFAC * vp + GUFAC * up + ROUNDOFFSG;
|
|
||||||
bc = BUFAC * up + ROUNDOFFSB;
|
|
||||||
|
|
||||||
/* Pixel 1 -> RGB565 */
|
|
||||||
yp = *ysrc++ * RGBYFAC;
|
|
||||||
red1 = (yp + rc) >> 9;
|
|
||||||
green1 = (yp + gc) >> 8;
|
|
||||||
blue1 = (yp + bc) >> 9;
|
|
||||||
|
|
||||||
/* Pixel 2 -> RGB565 */
|
|
||||||
yp = *ysrc++ * RGBYFAC;
|
|
||||||
red2 = (yp + rc) >> 9;
|
|
||||||
green2 = (yp + gc) >> 8;
|
|
||||||
blue2 = (yp + bc) >> 9;
|
|
||||||
|
|
||||||
/* Since out of bounds errors are relatively rare, we check two
|
|
||||||
pixels at once to see if any components are out of bounds, and
|
|
||||||
then fix whichever is broken. This works due to high values and
|
|
||||||
negative values both being !=0 when bitmasking them.
|
|
||||||
We first check for red and blue components (5bit range). */
|
|
||||||
if ((red1 | blue1 | red2 | blue2) & ~MAX_5BIT)
|
|
||||||
{
|
|
||||||
if (red1 & ~MAX_5BIT)
|
|
||||||
red1 = (red1 >> 31) ? 0 : MAX_5BIT;
|
|
||||||
if (blue1 & ~MAX_5BIT)
|
|
||||||
blue1 = (blue1 >> 31) ? 0 : MAX_5BIT;
|
|
||||||
if (red2 & ~MAX_5BIT)
|
|
||||||
red2 = (red2 >> 31) ? 0 : MAX_5BIT;
|
|
||||||
if (blue2 & ~MAX_5BIT)
|
|
||||||
blue2 = (blue2 >> 31) ? 0 : MAX_5BIT;
|
|
||||||
}
|
|
||||||
/* We second check for green component (6bit range) */
|
|
||||||
if ((green1 | green2) & ~MAX_6BIT)
|
|
||||||
{
|
|
||||||
if (green1 & ~MAX_6BIT)
|
|
||||||
green1 = (green1 >> 31) ? 0 : MAX_6BIT;
|
|
||||||
if (green2 & ~MAX_6BIT)
|
|
||||||
green2 = (green2 >> 31) ? 0 : MAX_6BIT;
|
|
||||||
}
|
|
||||||
|
|
||||||
pixel1 = swap16((red1 << 11) | (green1 << 5) | blue1);
|
|
||||||
|
|
||||||
pixel2 = swap16((red2 << 11) | (green2 << 5) | blue2);
|
|
||||||
|
|
||||||
while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK));
|
|
||||||
|
|
||||||
/* output 2 pixels */
|
|
||||||
LCD2_BLOCK_DATA = (pixel2 << 16) | pixel1;
|
|
||||||
}
|
|
||||||
while (ysrc < row_end);
|
|
||||||
|
|
||||||
src_y++;
|
src_y++;
|
||||||
h--;
|
h--;
|
||||||
|
|
@ -415,6 +333,15 @@ void lcd_update_rect(int x, int y, int width, int height)
|
||||||
LCD2_BLOCK_CONFIG = 0xc0010000 | (pixels_to_write - 1);
|
LCD2_BLOCK_CONFIG = 0xc0010000 | (pixels_to_write - 1);
|
||||||
LCD2_BLOCK_CTRL = 0x34000000;
|
LCD2_BLOCK_CTRL = 0x34000000;
|
||||||
|
|
||||||
|
if (LCD_WIDTH == width) {
|
||||||
|
/* for each row and column in a single loop */
|
||||||
|
for (r = 0; r < h*width; r += 2) {
|
||||||
|
while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK));
|
||||||
|
|
||||||
|
/* output 2 pixels */
|
||||||
|
LCD2_BLOCK_DATA = *addr++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
/* for each row */
|
/* for each row */
|
||||||
for (r = 0; r < h; r++) {
|
for (r = 0; r < h; r++) {
|
||||||
/* for each column */
|
/* for each column */
|
||||||
|
|
@ -426,6 +353,7 @@ void lcd_update_rect(int x, int y, int width, int height)
|
||||||
}
|
}
|
||||||
addr += (LCD_WIDTH - width)/2;
|
addr += (LCD_WIDTH - width)/2;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_READY));
|
while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_READY));
|
||||||
LCD2_BLOCK_CONFIG = 0;
|
LCD2_BLOCK_CONFIG = 0;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue