1
0
Fork 0
forked from len0rd/rockbox

Port greylib blitting optimisation to MPIO HD200. ISR speedup is ~10%; further speedup should be possible by using line transfers for accessing the greylib buffers. Thanks to Marcin Bukat for testing.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@26793 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Jens Arnold 2010-06-11 19:53:17 +00:00
parent 3c95dbb208
commit f053b0d606

View file

@ -125,54 +125,45 @@ lcd_mono_data:
.type lcd_grey_data,@function
lcd_grey_data:
lea.l (-2*4, %sp), %sp
movem.l %a2-%a3, (%sp)
movem.l (2*4+4, %sp), %a0-%a2 /* values, phases, length */
lea.l (-6*4, %sp), %sp
movem.l %d2-%d5/%a2-%a3, (%sp)
movem.l (6*4+4, %sp), %a0-%a2 /* values, phases, length */
add.l %a2, %a2
lea.l (%a1, %a2.l*4), %a2 /* end address */
lea.l LCD_BASE_ADDRESS+2, %a3 /* LCD data port address */
moveq.l #24, %d4 /* shift count */
move.l #0x204081, %d5 /* bit shuffle factor */
.ph_loop:
clr.l %d1
move.l (%a1), %d0 /* fetch 4 pixel phases */
bclr.l #31, %d0 /* Z = !(p0 & 0x80); p0 &= ~0x80; */
seq.b %d1 /* %d1 = ........................00000000 */
lsl.l #1, %d1 /* %d1 = .......................00000000. */
bclr.l #23, %d0 /* Z = !(p1 & 0x80); p1 &= ~0x80; */
seq.b %d1 /* %d1 = .......................011111111 */
lsl.l #1, %d1 /* %d1 = ......................011111111. */
bclr.l #15, %d0 /* Z = !(p2 & 0x80); p2 &= ~0x80; */
seq.b %d1 /* %d1 = ......................0122222222 */
lsl.l #1, %d1 /* %d1 = .....................0122222222. */
bclr.l #7, %d0 /* Z = !(p3 & 0x80); p3 &= ~0x80; */
seq.b %d1 /* %d1 = .....................01233333333 */
lsl.l #1, %d1 /* %d1 = ....................01233333333. */
add.l (%a0)+, %d0 /* add 4 pixel values to the phases */
move.l %d0, (%a1)+ /* store new phases, advance pointer */
movem.l (%a1), %d0-%d1 /* fetch 8 pixel phases */
move.l (%a1), %d0 /* fetch 4 pixel phases */
bclr.l #31, %d0 /* Z = !(p0 & 0x80); p0 &= ~0x80; */
seq.b %d1 /* %d1 = ....................012344444444 */
lsl.l #1, %d1 /* %d1 = ...................012344444444. */
bclr.l #23, %d0 /* Z = !(p1 & 0x80); p1 &= ~0x80; */
seq.b %d1 /* %d1 = ...................0123455555555 */
lsl.l #1, %d1 /* %d1 = ..................0123455555555. */
bclr.l #15, %d0 /* Z = !(p2 & 0x80); p2 &= ~0x80; */
seq.b %d1 /* %d1 = ..................01234566666666 */
lsl.l #1, %d1 /* %d1 = .................01234566666666. */
bclr.l #7, %d0 /* Z = !(p3 & 0x80); p3 &= ~0x80; */
seq.b %d1 /* %d1 = .................012345677777777 */
lsr.l #7, %d1 /* %d1 = ........................01234567 */
add.l (%a0)+, %d0 /* add 4 pixel values to the phases */
move.l %d0, (%a1)+ /* store new phases, advance pointer */
move.l %d0, %d2
and.l #0x80808080, %d2 /* %d2 = 0.......1.......2.......3....... */
eor.l %d2, %d0
add.l (%a0)+, %d0 /* add values to first 4 phases */
move.w %d1, (%a3) /* transfer to lcd */
move.w %d1, (%a3) /* transfer to lcd */
move.l %d1, %d3
and.l #0x80808080, %d3 /* %d3 = 4.......5.......6.......7....... */
eor.l %d3, %d1
add.l (%a0)+, %d1 /* add values to second 4 phases */
lsr.l #4, %d3 /* %d3 = ....4.......5.......6.......7... */
or.l %d3, %d2 /* %d2 = 0...4...1...5...2...6...3...7... */
mulu.l %d5, %d2 /* %d2 = 01234567123.567.23..67..3...7... */
not.l %d2 /* negate bits */
lsr.l %d4, %d2 /* %d2 = ........................01234567 */
move.w %d2, (%a3) /* transfer first LCD byte */
movem.l %d0-%d1, (%a1) /* store 8 new pixel phases */
addq.l #8, %a1
move.w %d2, (%a3) /* transfer second LCD byte */
cmp.l %a2, %a1
bls.s .ph_loop
movem.l (%sp), %a2-%a3
lea.l (2*4, %sp), %sp
movem.l (%sp), %d2-%d5/%a2-%a3
lea.l (6*4, %sp), %sp
rts
.grey_end:
.size lcd_grey_data,.grey_end-lcd_grey_data