1
0
Fork 0
forked from len0rd/rockbox

Really squeezed lcd_write_data() with tricky assembler optimization. Now it does 114 fps when playing video from memory (way more than the ~75 fps needed, I need to slow down the video player), leaving headroom for audio.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@4278 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Jörg Hohensohn 2004-01-26 22:26:54 +00:00
parent bd7c07a600
commit c546b70e18

View file

@ -7,7 +7,7 @@
* \/ \/ \/ \/ \/ * \/ \/ \/ \/ \/
* $Id$ * $Id$
* *
* Copyright (C) 2002 by Alan Korr * Copyright (C) 2002 by Alan Korr, speedup by Jörg Hohensohn
* *
* All files in this archive are subject to the GNU General Public License. * All files in this archive are subject to the GNU General Public License.
* See the file COPYING in the source tree root for full license agreement. * See the file COPYING in the source tree root for full license agreement.
@ -180,9 +180,10 @@ void lcd_write(bool command, int byte)
Ultimately, all calls to lcd_write(false, xxx) should be substituted by Ultimately, all calls to lcd_write(false, xxx) should be substituted by
this, it will be most efficient if the LCD buffer is tilted to have the this, it will be most efficient if the LCD buffer is tilted to have the
X row as consecutive bytes, so we can write a whole row */ X row as consecutive bytes, so we can write a whole row */
/* FixMe: somehow the red LED is affected by this, although I don't touch
any other bit. Therefore not used yet, except for lcd_blit() */
void lcd_write_data(unsigned char* p_bytes, int count) __attribute__ ((section (".icode"))); void lcd_write_data(unsigned char* p_bytes, int count) __attribute__ ((section (".icode")));
#ifdef HAVE_LCD_CHARCELLS
/* This version works for both Player and Recorder models */
void lcd_write_data(unsigned char* p_bytes, int count) void lcd_write_data(unsigned char* p_bytes, int count)
{ {
do do
@ -193,7 +194,8 @@ void lcd_write_data(unsigned char* p_bytes, int count)
byte = *p_bytes++ << 24; /* fetch to MSB position */ byte = *p_bytes++ << 24; /* fetch to MSB position */
cli(); /* make port modifications atomic */ cli(); /* make port modifications atomic, in case an IRQ uses PBDRL */
/* (currently not the case, so this could be optimized away) */
/* precalculate the values for later bit toggling, init data write */ /* precalculate the values for later bit toggling, init data write */
asm ( asm (
@ -268,9 +270,9 @@ void lcd_write_data(unsigned char* p_bytes, int count)
"mov.b %3,@%4\n" "mov.b %3,@%4\n"
"bf 1f\n" "bf 1f\n"
"mov.b %1,@%4\n" "mov.b %1,@%4\n" /* set SD high, SC low still */
"1: \n" "1: \n"
"or.b %2, @(r0,gbr)\n" "or.b %2, @(r0,gbr)\n" /* rise SC (independent of SD level) */
"or.b %5, @(r0,gbr)\n" /* restore port */ "or.b %5, @(r0,gbr)\n" /* restore port */
: :
@ -287,3 +289,105 @@ void lcd_write_data(unsigned char* p_bytes, int count)
} while (--count); /* tail loop is faster */ } while (--count); /* tail loop is faster */
} }
#else /* #ifdef HAVE_LCD_CHARCELLS */
/* A further optimized version, exploits that SD is on bit 0 for recorders */
void lcd_write_data(unsigned char* p_bytes, int count)
{
do
{
unsigned byte;
unsigned sda1; /* precalculated SC=low,SD=1 */
/* take inverse data, so I can use the NEGC instruction below, it is
the only carry add/sub which does not destroy a source register */
byte = ~(*p_bytes++ << 24); /* fetch to MSB position */
cli(); /* make port modifications atomic, in case an IRQ uses PBDRL */
/* (currently not the case, so this could be optimized away) */
/* precalculate the values for later bit toggling, init data write */
asm (
"mov.b @%1,r0\n" /* r0 = PBDRL */
"or %3,r0\n" /* r0 |= LCD_DS | LCD_SD DS and SD high, */
"and %2,r0\n" /* r0 &= ~(LCD_CS | LCD_SC) CS and SC low */
"mov.b r0,@%1\n" /* PBDRL = r0 */
"neg r0,%0\n" /* sda1 = 0-r0 */
: /* outputs: */
/* %0 */ "=r"(sda1)
: /* inputs: */
/* %1 */ "r"(LCDR),
/* %2 */ "I"(~(LCD_CS | LCD_SC)),
/* %3 */ "I"(LCD_DS | LCD_SD)
: /* trashed */
"r0"
);
/* unrolled loop to serialize the byte */
asm (
"shll %0 \n" /* shift the MSB into carry */
"negc %1, r0\n" /* carry to SD, SC low */
"mov.b r0,@%3\n" /* set data to port */
"or %2, r0\n" /* rise SC (independent of SD level) */
"mov.b r0,@%3\n" /* set to port */
"shll %0 \n"
"negc %1, r0\n"
"mov.b r0,@%3\n"
"or %2, r0\n"
"mov.b r0,@%3\n"
"shll %0 \n"
"negc %1, r0\n"
"mov.b r0,@%3\n"
"or %2, r0\n"
"mov.b r0,@%3\n"
"shll %0 \n"
"negc %1, r0\n"
"mov.b r0,@%3\n"
"or %2, r0\n"
"mov.b r0,@%3\n"
"shll %0 \n"
"negc %1, r0\n"
"mov.b r0,@%3\n"
"or %2, r0\n"
"mov.b r0,@%3\n"
"shll %0 \n"
"negc %1, r0\n"
"mov.b r0,@%3\n"
"or %2, r0\n"
"mov.b r0,@%3\n"
"shll %0 \n"
"negc %1, r0\n"
"mov.b r0,@%3\n"
"or %2, r0\n"
"mov.b r0,@%3\n"
"shll %0 \n"
"negc %1, r0\n"
"mov.b r0,@%3\n"
"or %2, r0\n"
"mov.b r0,@%3\n"
"or %4, r0\n" /* restore port */
"mov.b r0,@%3\n"
: /* outputs: */
: /* inputs: */
/* %0 */ "r"(byte),
/* %1 */ "r"(sda1),
/* %2 */ "I"(LCD_SC),
/* %3 */ "r"(LCDR),
/* %4 */ "I"(LCD_CS|LCD_DS|LCD_SD|LCD_SC)
: /* trashed: */
"r0"
);
sti(); /* end of atomic port modifications */
} while (--count); /* tail loop is faster */
}
#endif /* #ifdef HAVE_LCD_CHARCELLS */