Port Tom Meyer's lcd_yuv_blit() optimisations for the ipod 5g to the ipod Photo/Color and Nano - gives a similar 10-15% speedup in mpegplayer.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@10629 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Dave Chapman 2006-08-17 09:03:16 +00:00
parent 30f237303b
commit 5b71470185

View file

@ -404,14 +404,15 @@ void lcd_blit(const fb_data* data, int x, int by, int width,
#define CSUB_Y 2 #define CSUB_Y 2
#define RYFAC (31*257) #define RYFAC (31*257)
#define GYFAC (63*257) #define GYFAC (31*257)
#define BYFAC (31*257) #define BYFAC (31*257)
#define RVFAC 11170 /* 31 * 257 * 1.402 */ #define RVFAC 11170 /* 31 * 257 * 1.402 */
#define GVFAC (-11563) /* 63 * 257 * -0.714136 */ #define GVFAC (-5690) /* 31 * 257 * -0.714136 */
#define GUFAC (-5572) /* 63 * 257 * -0.344136 */ #define GUFAC (-2742) /* 31 * 257 * -0.344136 */
#define BUFAC 14118 /* 31 * 257 * 1.772 */ #define BUFAC 14118 /* 31 * 257 * 1.772 */
#define ROUNDOFFS (127*257) #define ROUNDOFFS (127*257)
#define ROUNDOFFSG (63*257)
/* Performance function to blit a YUV bitmap directly to the LCD */ /* Performance function to blit a YUV bitmap directly to the LCD */
void lcd_yuv_blit(unsigned char * const src[3], void lcd_yuv_blit(unsigned char * const src[3],
@ -478,14 +479,27 @@ void lcd_yuv_blit(unsigned char * const src[3],
lcd_send_lo(LCD_CNTL_WRITE_TO_GRAM); lcd_send_lo(LCD_CNTL_WRITE_TO_GRAM);
} }
const int stride_div_csub_x = stride/CSUB_X;
h=0; h=0;
while (1) { while (1) {
int pixels_to_write; /* upsampling, YUV->RGB conversion and reduction to RGB565 in one go */
const unsigned char *ysrc = src[0] + stride * src_y + src_x; const unsigned char *ysrc = src[0] + stride * src_y + src_x;
const int uvoffset = stride_div_csub_x * (src_y/CSUB_Y) +
(src_x/CSUB_X);
const unsigned char *usrc = src[1] + uvoffset;
const unsigned char *vsrc = src[2] + uvoffset;
const unsigned char *row_end = ysrc + width; const unsigned char *row_end = ysrc + width;
int y, u, v; int y, u, v;
int red, green, blue; int red1, green1, blue1;
int red2, green2, blue2;
unsigned rbits, gbits, bbits; unsigned rbits, gbits, bbits;
int rc, gc, bc;
int pixels_to_write;
fb_data pixel1,pixel2; fb_data pixel1,pixel2;
if (h==0) { if (h==0) {
@ -509,91 +523,97 @@ void lcd_yuv_blit(unsigned char * const src[3],
outl(0x34000000, 0x70008a20); outl(0x34000000, 0x70008a20);
} }
/* upsampling, YUV->RGB conversion and reduction to RGB565 in one go */ do
const unsigned char *usrc = src[1] + (stride/CSUB_X) * (src_y/CSUB_Y) {
+ (src_x/CSUB_X);
const unsigned char *vsrc = src[2] + (stride/CSUB_X) * (src_y/CSUB_Y)
+ (src_x/CSUB_X);
int rc, gc, bc;
u = *usrc++ - 128; u = *usrc++ - 128;
v = *vsrc++ - 128; v = *vsrc++ - 128;
rc = RVFAC * v + ROUNDOFFS; rc = RVFAC * v + ROUNDOFFS;
gc = GVFAC * v + GUFAC * u + ROUNDOFFS; gc = GVFAC * v + GUFAC * u + ROUNDOFFSG;
bc = BUFAC * u + ROUNDOFFS; bc = BUFAC * u + ROUNDOFFS;
do /* Pixel 1 */
{
y = *ysrc++; y = *ysrc++;
red = RYFAC * y + rc;
green = GYFAC * y + gc;
blue = BYFAC * y + bc;
if ((unsigned)red > (RYFAC*255+ROUNDOFFS)) red1 = RYFAC * y + rc;
green1 = GYFAC * y + gc;
blue1 = BYFAC * y + bc;
/* Pixel 2 */
y = *ysrc++;
red2 = RYFAC * y + rc;
green2 = GYFAC * y + gc;
blue2 = BYFAC * y + bc;
/* Since out of bounds errors are relatively rare, we check two
pixels at once to see if any components are out of bounds, and
then fix whichever is broken. This works due to high values and
negative values both becoming larger than the cutoff when
casted to unsigned. And ORing them together checks all of them
simultaneously. */
if (((unsigned)(red1 | green1 | blue1 |
red2 | green2 | blue2)) > (RYFAC*255+ROUNDOFFS)) {
if (((unsigned)(red1 | green1 | blue1)) >
(RYFAC*255+ROUNDOFFS)) {
if ((unsigned)red1 > (RYFAC*255+ROUNDOFFS))
{ {
if (red < 0) if (red1 < 0)
red = 0; red1 = 0;
else else
red = (RYFAC*255+ROUNDOFFS); red1 = (RYFAC*255+ROUNDOFFS);
} }
if ((unsigned)green > (GYFAC*255+ROUNDOFFS)) if ((unsigned)green1 > (GYFAC*255+ROUNDOFFSG))
{ {
if (green < 0) if (green1 < 0)
green = 0; green1 = 0;
else else
green = (GYFAC*255+ROUNDOFFS); green1 = (GYFAC*255+ROUNDOFFSG);
} }
if ((unsigned)blue > (BYFAC*255+ROUNDOFFS)) if ((unsigned)blue1 > (BYFAC*255+ROUNDOFFS))
{ {
if (blue < 0) if (blue1 < 0)
blue = 0; blue1 = 0;
else else
blue = (BYFAC*255+ROUNDOFFS); blue1 = (BYFAC*255+ROUNDOFFS);
} }
rbits = ((unsigned)red) >> 16 ; }
gbits = ((unsigned)green) >> 16 ;
bbits = ((unsigned)blue) >> 16 ; if (((unsigned)(red2 | green2 | blue2)) >
(RYFAC*255+ROUNDOFFS)) {
if ((unsigned)red2 > (RYFAC*255+ROUNDOFFS))
{
if (red2 < 0)
red2 = 0;
else
red2 = (RYFAC*255+ROUNDOFFS);
}
if ((unsigned)green2 > (GYFAC*255+ROUNDOFFSG))
{
if (green2 < 0)
green2 = 0;
else
green2 = (GYFAC*255+ROUNDOFFSG);
}
if ((unsigned)blue2 > (BYFAC*255+ROUNDOFFS))
{
if (blue2 < 0)
blue2 = 0;
else
blue2 = (BYFAC*255+ROUNDOFFS);
}
}
}
rbits = red1 >> 16 ;
gbits = green1 >> 15 ;
bbits = blue1 >> 16 ;
pixel1 = swap16((rbits << 11) | (gbits << 5) | bbits); pixel1 = swap16((rbits << 11) | (gbits << 5) | bbits);
y = *ysrc++; rbits = red2 >> 16 ;
red = RYFAC * y + rc; gbits = green2 >> 15 ;
green = GYFAC * y + gc; bbits = blue2 >> 16 ;
blue = BYFAC * y + bc;
if ((unsigned)red > (RYFAC*255+ROUNDOFFS))
{
if (red < 0)
red = 0;
else
red = (RYFAC*255+ROUNDOFFS);
}
if ((unsigned)green > (GYFAC*255+ROUNDOFFS))
{
if (green < 0)
green = 0;
else
green = (GYFAC*255+ROUNDOFFS);
}
if ((unsigned)blue > (BYFAC*255+ROUNDOFFS))
{
if (blue < 0)
blue = 0;
else
blue = (BYFAC*255+ROUNDOFFS);
}
rbits = ((unsigned)red) >> 16 ;
gbits = ((unsigned)green) >> 16 ;
bbits = ((unsigned)blue) >> 16 ;
pixel2 = swap16((rbits << 11) | (gbits << 5) | bbits); pixel2 = swap16((rbits << 11) | (gbits << 5) | bbits);
u = *usrc++ - 128;
v = *vsrc++ - 128;
rc = RVFAC * v + ROUNDOFFS;
gc = GVFAC * v + GUFAC * u + ROUNDOFFS;
bc = BUFAC * u + ROUNDOFFS;
while ((inl(0x70008a20) & 0x1000000) == 0); while ((inl(0x70008a20) & 0x1000000) == 0);
/* output 2 pixels */ /* output 2 pixels */