mirror of
https://github.com/Rockbox/rockbox.git
synced 2025-12-09 21:25:19 -05:00
Speed up of iPod nano 1G and iPod color LCD. Use HDD6330 asm part for YUV blitting, introduce special handling for full width screen updates. Speed up is about +30% for YUV on both color/nano1G.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@28930 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
1980fc3a61
commit
b04d676706
2 changed files with 182 additions and 102 deletions
152
firmware/target/arm/ipod/lcd-as-color-nano.S
Executable file
152
firmware/target/arm/ipod/lcd-as-color-nano.S
Executable file
|
|
@ -0,0 +1,152 @@
|
|||
/***************************************************************************
|
||||
* __________ __ ___.
|
||||
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
||||
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
||||
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
||||
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
||||
* \/ \/ \/ \/ \/
|
||||
* $Id:$
|
||||
*
|
||||
* Copyright (C) 2010 by Andree Buschmann
|
||||
*
|
||||
* Generic asm helper function used by YUV blitting.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
||||
* KIND, either express or implied.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#include "config.h"
|
||||
#include "cpu.h"
|
||||
|
||||
.section .icode, "ax", %progbits
|
||||
|
||||
/****************************************************************************
|
||||
* void lcd_yuv_write_inner_loop(unsigned char const * const ysrc,
|
||||
* unsigned char const * const usrc,
|
||||
* unsigned char const * const vsrc,
|
||||
* int width);
|
||||
*
|
||||
* YUV- > RGB565 conversion
|
||||
* |R| |1.000000 -0.000001 1.402000| |Y'|
|
||||
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
|
||||
* |B| |1.000000 1.772000 0.000000| |Pr|
|
||||
* Scaled, normalized, rounded and tweaked to yield RGB 565:
|
||||
* |R| |74 0 101| |Y' - 16| >> 9
|
||||
* |G| = |74 -24 -51| |Cb - 128| >> 8
|
||||
* |B| |74 128 0| |Cr - 128| >> 9
|
||||
*
|
||||
*/
|
||||
.align 2
|
||||
.global lcd_yuv_write_inner_loop
|
||||
.type lcd_yuv_write_inner_loop, %function
|
||||
|
||||
lcd_yuv_write_inner_loop:
|
||||
@ r0 = ysrc
|
||||
@ r1 = usrc
|
||||
@ r2 = vsrc
|
||||
@ r3 = width
|
||||
stmfd sp!, { r4-r11, lr } @ save regs
|
||||
mov r4, #0x70000000 @ r4 = LCD2_BLOCK_CTRL - 0x20
|
||||
add r4, r4, #0x8a00 @
|
||||
add r5, r4, #0x100 @ r5 = LCD2_BLOCK_DATA
|
||||
10: @ loop
|
||||
|
||||
ldrb r7, [r1], #1 @ *usrc++
|
||||
ldrb r8, [r2], #1 @ *vsrc++
|
||||
|
||||
sub r7, r7, #128 @ Cb -= 128
|
||||
sub r8, r8, #128 @ Cr -= 128
|
||||
|
||||
add r10, r8, r8, asl #2 @ Cr*101
|
||||
add r10, r10, r8, asl #5
|
||||
add r10, r10, r8, asl #6
|
||||
|
||||
add r11, r8, r8, asl #1 @ Cr*51 + Cb*24
|
||||
add r11, r11, r11, asl #4
|
||||
add r11, r11, r7, asl #3
|
||||
add r11, r11, r7, asl #4
|
||||
|
||||
add r12, r7, #2 @ r12 = bu = (Cb*128 + 256) >> 9
|
||||
mov r12, r12, asr #2
|
||||
add r10, r10, #256 @ r10 = rv = (Cr*101 + 256) >> 9
|
||||
mov r10, r10, asr #9
|
||||
rsb r11, r11, #128 @ r11 = guv = (-r11 + 128) >> 8
|
||||
mov r11, r11, asr #8
|
||||
|
||||
@ pixel_1
|
||||
ldrb r7, [r0], #1 @ *ysrc++
|
||||
sub r7, r7, #16 @ Y = (Y' - 16) * 37
|
||||
add r8, r7, r7, asl #2
|
||||
add r7, r8, r7, asl #5
|
||||
|
||||
add r9, r10, r7, asr #8 @ R = (Y >> 8) + rv
|
||||
add r8, r11, r7, asr #7 @ G = (Y >> 7) + guv
|
||||
add r7, r12, r7, asr #8 @ B = (Y >> 8) + bu
|
||||
|
||||
cmp r9, #31 @ clamp R
|
||||
mvnhi r9, r9, asr #31
|
||||
andhi r9, r9, #31
|
||||
|
||||
cmp r8, #63 @ clamp G
|
||||
mvnhi r8, r8, asr #31
|
||||
andhi r8, r8, #63
|
||||
|
||||
cmp r7, #31 @ clamp B
|
||||
mvnhi r7, r7, asr #31
|
||||
andhi r7, r7, #31
|
||||
|
||||
orr r6, r7, r8, lsl #5 @ pack pixel
|
||||
orr r6, r6, r9, lsl #11
|
||||
|
||||
mov r7, r6, lsl #8 @ swap bytes
|
||||
and r7, r7, #0xff00
|
||||
add r6, r7, r6, lsr #8
|
||||
|
||||
@ pixel_2
|
||||
ldrb r7, [r0], #1 @ *ysrc++
|
||||
sub r7, r7, #16 @ Y = (Y' - 16) * 37
|
||||
add r8, r7, r7, asl #2
|
||||
add r7, r8, r7, asl #5
|
||||
|
||||
add r9, r10, r7, asr #8 @ R = (Y >> 8) + rv
|
||||
add r8, r11, r7, asr #7 @ G = (Y >> 7) + guv
|
||||
add r7, r12, r7, asr #8 @ B = (Y >> 8) + bu
|
||||
|
||||
cmp r9, #31 @ clamp R
|
||||
mvnhi r9, r9, asr #31
|
||||
andhi r9, r9, #31
|
||||
|
||||
cmp r8, #63 @ clamp G
|
||||
mvnhi r8, r8, asr #31
|
||||
andhi r8, r8, #63
|
||||
|
||||
cmp r7, #31 @ clamp B
|
||||
mvnhi r7, r7, asr #31
|
||||
andhi r7, r7, #31
|
||||
|
||||
orr r7, r7, r8, lsl #5 @ pack pixel
|
||||
orr r7, r7, r9, lsl #11
|
||||
|
||||
orr r6, r6, r7, lsl #24 @ swap bytes and add pixels simultaneously
|
||||
mov r7, r7, lsr #8
|
||||
orr r6, r6, r7, lsl #16
|
||||
#if 1
|
||||
11: @ while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK));
|
||||
ldr r11, [r4, #0x20] @
|
||||
tst r11, #0x1000000 @
|
||||
beq 11b @
|
||||
#endif
|
||||
str r6, [r5] @ send two pixels
|
||||
|
||||
subs r3, r3, #2 @ decrease width
|
||||
bgt 10b @ loop
|
||||
|
||||
ldmpc regs=r4-r11 @ restore regs
|
||||
.ltorg @ dump constant pool
|
||||
.size lcd_yuv_write_inner_loop, .-lcd_yuv_write_inner_loop
|
||||
|
|
@ -121,38 +121,14 @@ void lcd_init_device(void)
|
|||
}
|
||||
|
||||
/*** update functions ***/
|
||||
extern void lcd_yuv_write_inner_loop(unsigned char const * const ysrc,
|
||||
unsigned char const * const usrc,
|
||||
unsigned char const * const vsrc,
|
||||
int width);
|
||||
|
||||
#define CSUB_X 2
|
||||
#define CSUB_Y 2
|
||||
|
||||
/* YUV- > RGB565 conversion
|
||||
* |R| |1.000000 -0.000001 1.402000| |Y'|
|
||||
* |G| = |1.000000 -0.334136 -0.714136| |Pb|
|
||||
* |B| |1.000000 1.772000 0.000000| |Pr|
|
||||
* Scaled, normalized, rounded and tweaked to yield RGB 565:
|
||||
* |R| |74 0 101| |Y' - 16| >> 9
|
||||
* |G| = |74 -24 -51| |Cb - 128| >> 8
|
||||
* |B| |74 128 0| |Cr - 128| >> 9
|
||||
*/
|
||||
|
||||
#define RGBYFAC 74 /* 1.0 */
|
||||
#define RVFAC 101 /* 1.402 */
|
||||
#define GVFAC (-51) /* -0.714136 */
|
||||
#define GUFAC (-24) /* -0.334136 */
|
||||
#define BUFAC 128 /* 1.772 */
|
||||
|
||||
/* ROUNDOFFS contain constant for correct round-offs as well as
|
||||
constant parts of the conversion matrix (e.g. (Y'-16)*RGBYFAC
|
||||
-> constant part = -16*RGBYFAC). Through extraction of these
|
||||
constant parts we save at leat 4 substractions in the conversion
|
||||
loop */
|
||||
#define ROUNDOFFSR (256 - 16*RGBYFAC - 128*RVFAC)
|
||||
#define ROUNDOFFSG (128 - 16*RGBYFAC - 128*GVFAC - 128*GUFAC)
|
||||
#define ROUNDOFFSB (256 - 16*RGBYFAC - 128*BUFAC)
|
||||
|
||||
#define MAX_5BIT 0x1f
|
||||
#define MAX_6BIT 0x3f
|
||||
|
||||
/* Performance function to blit a YUV bitmap directly to the LCD */
|
||||
void lcd_blit_yuv(unsigned char * const src[3],
|
||||
int src_x, int src_y, int stride,
|
||||
|
|
@ -222,7 +198,8 @@ void lcd_blit_yuv(unsigned char * const src[3],
|
|||
const int stride_div_csub_x = stride/CSUB_X;
|
||||
|
||||
h=0;
|
||||
while (1) {
|
||||
while (1)
|
||||
{
|
||||
/* upsampling, YUV->RGB conversion and reduction to RGB565 in one go */
|
||||
const unsigned char *ysrc = src[0] + stride * src_y + src_x;
|
||||
|
||||
|
|
@ -231,17 +208,11 @@ void lcd_blit_yuv(unsigned char * const src[3],
|
|||
|
||||
const unsigned char *usrc = src[1] + uvoffset;
|
||||
const unsigned char *vsrc = src[2] + uvoffset;
|
||||
const unsigned char *row_end = ysrc + width;
|
||||
|
||||
int yp, up, vp;
|
||||
int red1, green1, blue1;
|
||||
int red2, green2, blue2;
|
||||
|
||||
int rc, gc, bc;
|
||||
int pixels_to_write;
|
||||
fb_data pixel1,pixel2;
|
||||
|
||||
if (h==0) {
|
||||
if (h==0)
|
||||
{
|
||||
while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_READY));
|
||||
LCD2_BLOCK_CONFIG = 0;
|
||||
|
||||
|
|
@ -251,7 +222,8 @@ void lcd_blit_yuv(unsigned char * const src[3],
|
|||
h = height;
|
||||
|
||||
/* calculate how much we can do in one go */
|
||||
if (pixels_to_write > 0x10000) {
|
||||
if (pixels_to_write > 0x10000)
|
||||
{
|
||||
h = (0x10000/2) / width;
|
||||
pixels_to_write = (width * h) * 2;
|
||||
}
|
||||
|
|
@ -262,61 +234,7 @@ void lcd_blit_yuv(unsigned char * const src[3],
|
|||
LCD2_BLOCK_CTRL = 0x34000000;
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
up = *usrc++;
|
||||
vp = *vsrc++;
|
||||
rc = RVFAC * vp + ROUNDOFFSR;
|
||||
gc = GVFAC * vp + GUFAC * up + ROUNDOFFSG;
|
||||
bc = BUFAC * up + ROUNDOFFSB;
|
||||
|
||||
/* Pixel 1 -> RGB565 */
|
||||
yp = *ysrc++ * RGBYFAC;
|
||||
red1 = (yp + rc) >> 9;
|
||||
green1 = (yp + gc) >> 8;
|
||||
blue1 = (yp + bc) >> 9;
|
||||
|
||||
/* Pixel 2 -> RGB565 */
|
||||
yp = *ysrc++ * RGBYFAC;
|
||||
red2 = (yp + rc) >> 9;
|
||||
green2 = (yp + gc) >> 8;
|
||||
blue2 = (yp + bc) >> 9;
|
||||
|
||||
/* Since out of bounds errors are relatively rare, we check two
|
||||
pixels at once to see if any components are out of bounds, and
|
||||
then fix whichever is broken. This works due to high values and
|
||||
negative values both being !=0 when bitmasking them.
|
||||
We first check for red and blue components (5bit range). */
|
||||
if ((red1 | blue1 | red2 | blue2) & ~MAX_5BIT)
|
||||
{
|
||||
if (red1 & ~MAX_5BIT)
|
||||
red1 = (red1 >> 31) ? 0 : MAX_5BIT;
|
||||
if (blue1 & ~MAX_5BIT)
|
||||
blue1 = (blue1 >> 31) ? 0 : MAX_5BIT;
|
||||
if (red2 & ~MAX_5BIT)
|
||||
red2 = (red2 >> 31) ? 0 : MAX_5BIT;
|
||||
if (blue2 & ~MAX_5BIT)
|
||||
blue2 = (blue2 >> 31) ? 0 : MAX_5BIT;
|
||||
}
|
||||
/* We second check for green component (6bit range) */
|
||||
if ((green1 | green2) & ~MAX_6BIT)
|
||||
{
|
||||
if (green1 & ~MAX_6BIT)
|
||||
green1 = (green1 >> 31) ? 0 : MAX_6BIT;
|
||||
if (green2 & ~MAX_6BIT)
|
||||
green2 = (green2 >> 31) ? 0 : MAX_6BIT;
|
||||
}
|
||||
|
||||
pixel1 = swap16((red1 << 11) | (green1 << 5) | blue1);
|
||||
|
||||
pixel2 = swap16((red2 << 11) | (green2 << 5) | blue2);
|
||||
|
||||
while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK));
|
||||
|
||||
/* output 2 pixels */
|
||||
LCD2_BLOCK_DATA = (pixel2 << 16) | pixel1;
|
||||
}
|
||||
while (ysrc < row_end);
|
||||
lcd_yuv_write_inner_loop(ysrc,usrc,vsrc,width);
|
||||
|
||||
src_y++;
|
||||
h--;
|
||||
|
|
@ -415,16 +333,26 @@ void lcd_update_rect(int x, int y, int width, int height)
|
|||
LCD2_BLOCK_CONFIG = 0xc0010000 | (pixels_to_write - 1);
|
||||
LCD2_BLOCK_CTRL = 0x34000000;
|
||||
|
||||
/* for each row */
|
||||
for (r = 0; r < h; r++) {
|
||||
/* for each column */
|
||||
for (c = 0; c < width; c += 2) {
|
||||
while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK));
|
||||
|
||||
/* output 2 pixels */
|
||||
LCD2_BLOCK_DATA = *addr++;
|
||||
if (LCD_WIDTH == width) {
|
||||
/* for each row and column in a single loop */
|
||||
for (r = 0; r < h*width; r += 2) {
|
||||
while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK));
|
||||
|
||||
/* output 2 pixels */
|
||||
LCD2_BLOCK_DATA = *addr++;
|
||||
}
|
||||
} else {
|
||||
/* for each row */
|
||||
for (r = 0; r < h; r++) {
|
||||
/* for each column */
|
||||
for (c = 0; c < width; c += 2) {
|
||||
while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK));
|
||||
|
||||
/* output 2 pixels */
|
||||
LCD2_BLOCK_DATA = *addr++;
|
||||
}
|
||||
addr += (LCD_WIDTH - width)/2;
|
||||
}
|
||||
addr += (LCD_WIDTH - width)/2;
|
||||
}
|
||||
|
||||
while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_READY));
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue