forked from len0rd/rockbox
* Assembler optimised gray_update_rect() and writearray() for arm (greyscale iPods). * Some slight optimisations for coldfire (H1x0) and SH1 (archos). * Comment and formatting cleanup.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@10473 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
8921b34e4b
commit
c00d799fa3
3 changed files with 675 additions and 406 deletions
|
@ -648,14 +648,165 @@ void gray_update_rect(int x, int y, int width, int height)
|
||||||
cbuf = _gray_info.cur_buffer + srcofs_row;
|
cbuf = _gray_info.cur_buffer + srcofs_row;
|
||||||
bbuf = _gray_info.back_buffer + srcofs_row;
|
bbuf = _gray_info.back_buffer + srcofs_row;
|
||||||
|
|
||||||
#if 0 /* CPU specific asm versions will go here */
|
#ifdef CPU_ARM
|
||||||
|
asm volatile (
|
||||||
|
"ldr r0, [%[cbuf]] \n"
|
||||||
|
"ldr r1, [%[bbuf]] \n"
|
||||||
|
"eor r1, r0, r1 \n"
|
||||||
|
"ldr r0, [%[cbuf], #4] \n"
|
||||||
|
"ldr %[chg], [%[bbuf], #4] \n"
|
||||||
|
"eor %[chg], r0, %[chg] \n"
|
||||||
|
"orr %[chg], %[chg], r1 \n"
|
||||||
|
: /* outputs */
|
||||||
|
[chg] "=&r"(change)
|
||||||
|
: /* inputs */
|
||||||
|
[cbuf]"r"(cbuf),
|
||||||
|
[bbuf]"r"(bbuf)
|
||||||
|
: /* clobbers */
|
||||||
|
"r0", "r1"
|
||||||
|
);
|
||||||
|
|
||||||
|
if (change != 0)
|
||||||
|
{
|
||||||
|
unsigned char *addr, *end;
|
||||||
|
unsigned mask, trash;
|
||||||
|
|
||||||
|
pat_ptr = &pat_stack[8];
|
||||||
|
|
||||||
|
/* precalculate the bit patterns with random shifts
|
||||||
|
* for all 8 pixels and put them on an extra "stack" */
|
||||||
|
asm volatile (
|
||||||
|
"mov r3, #8 \n" /* loop count */
|
||||||
|
"mov %[mask], #0 \n"
|
||||||
|
|
||||||
|
".ur_pre_loop: \n"
|
||||||
|
"mov %[mask], %[mask], lsl #1 \n" /* shift mask */
|
||||||
|
"ldrb r0, [%[cbuf]], #1 \n" /* read current buffer */
|
||||||
|
"ldrb r1, [%[bbuf]] \n" /* read back buffer */
|
||||||
|
"strb r0, [%[bbuf]], #1 \n" /* update back buffer */
|
||||||
|
"mov r2, #0 \n" /* preset for skipped pixel */
|
||||||
|
"cmp r0, r1 \n" /* no change? */
|
||||||
|
"beq .ur_skip \n" /* -> skip */
|
||||||
|
|
||||||
|
"ldr r2, [%[bpat], r0, lsl #2] \n" /* r2 = bitpattern[byte]; */
|
||||||
|
|
||||||
|
"add r0, %[rnd], %[rnd], lsl #3 \n" /* multiply by 75 */
|
||||||
|
"add %[rnd], %[rnd], %[rnd], lsl #1 \n"
|
||||||
|
"add %[rnd], %[rnd], r0, lsl #3 \n"
|
||||||
|
"add %[rnd], %[rnd], #74 \n" /* add another 74 */
|
||||||
|
/* Since the lower bits are not very random: get bits 8..15 (need max. 5) */
|
||||||
|
"and r1, %[rmsk], %[rnd], lsr #8 \n" /* ..and mask out unneeded bits */
|
||||||
|
|
||||||
|
"cmp r1, %[dpth] \n" /* random >= depth ? */
|
||||||
|
"subhs r1, r1, %[dpth] \n" /* yes: random -= depth */
|
||||||
|
|
||||||
|
"mov r0, r2, lsl r1 \n" /** rotate pattern **/
|
||||||
|
"sub r1, %[dpth], r1 \n"
|
||||||
|
"orr r2, r0, r2, lsr r1 \n"
|
||||||
|
|
||||||
|
"orr %[mask], %[mask], #1 \n" /* set mask bit */
|
||||||
|
|
||||||
|
".ur_skip: \n"
|
||||||
|
"str r2, [%[patp], #-4]! \n" /* push on pattern stack */
|
||||||
|
|
||||||
|
"subs r3, r3, #1 \n" /* loop 8 times (pixel block) */
|
||||||
|
"bne .ur_pre_loop \n"
|
||||||
|
: /* outputs */
|
||||||
|
[cbuf]"+r"(cbuf),
|
||||||
|
[bbuf]"+r"(bbuf),
|
||||||
|
[patp]"+r"(pat_ptr),
|
||||||
|
[rnd] "+r"(_gray_random_buffer),
|
||||||
|
[mask]"=&r"(mask)
|
||||||
|
: /* inputs */
|
||||||
|
[bpat]"r"(_gray_info.bitpattern),
|
||||||
|
[dpth]"r"(_gray_info.depth),
|
||||||
|
[rmsk]"r"(_gray_info.randmask)
|
||||||
|
: /* clobbers */
|
||||||
|
"r0", "r1", "r2", "r3"
|
||||||
|
);
|
||||||
|
|
||||||
|
addr = dst_row;
|
||||||
|
end = addr + MULU16(_gray_info.depth, _gray_info.plane_size);
|
||||||
|
|
||||||
|
/* set the bits for all 8 pixels in all bytes according to the
|
||||||
|
* precalculated patterns on the pattern stack */
|
||||||
|
asm volatile (
|
||||||
|
"ldmia %[patp], {r2 - r8, %[rx]} \n" /* pop all 8 patterns */
|
||||||
|
|
||||||
|
"mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */
|
||||||
|
"ands %[mask], %[mask], #0xff \n"
|
||||||
|
"beq .ur_sloop \n" /* short loop if nothing to keep */
|
||||||
|
|
||||||
|
".ur_floop: \n" /** full loop (there are bits to keep)**/
|
||||||
|
"movs %[rx], %[rx], lsr #1 \n" /* shift out pattern bit */
|
||||||
|
"adc r0, r0, r0 \n" /* put bit into LSB for byte */
|
||||||
|
"movs r8, r8, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
"movs r7, r7, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
"movs r6, r6, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
"movs r5, r5, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
"movs r4, r4, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
"movs r3, r3, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
"movs r2, r2, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
|
||||||
|
"ldrb r1, [%[addr]] \n" /* read old value */
|
||||||
|
"and r1, r1, %[mask] \n" /* mask out replaced bits */
|
||||||
|
"orr r1, r1, r0 \n" /* set new bits */
|
||||||
|
"strb r1, [%[addr]], %[psiz] \n" /* store value, advance to next bpl */
|
||||||
|
|
||||||
|
"cmp %[end], %[addr] \n" /* loop for all bitplanes */
|
||||||
|
"bne .ur_floop \n"
|
||||||
|
|
||||||
|
"b .ur_end \n"
|
||||||
|
|
||||||
|
".ur_sloop: \n" /** short loop (nothing to keep) **/
|
||||||
|
"movs %[rx], %[rx], lsr #1 \n" /* shift out pattern bit */
|
||||||
|
"adc r0, r0, r0 \n" /* put bit into LSB for byte */
|
||||||
|
"movs r8, r8, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
"movs r7, r7, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
"movs r6, r6, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
"movs r5, r5, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
"movs r4, r4, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
"movs r3, r3, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
"movs r2, r2, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
|
||||||
|
"strb r0, [%[addr]], %[psiz] \n" /* store byte, advance to next bpl */
|
||||||
|
|
||||||
|
"cmp %[end], %[addr] \n" /* loop for all bitplanes */
|
||||||
|
"bne .ur_sloop \n"
|
||||||
|
|
||||||
|
".ur_end: \n"
|
||||||
|
: /* outputs */
|
||||||
|
[addr]"+r"(addr),
|
||||||
|
[mask]"+r"(mask),
|
||||||
|
[rx] "=&r"(trash)
|
||||||
|
: /* inputs */
|
||||||
|
[psiz]"r"(_gray_info.plane_size),
|
||||||
|
[end] "r"(end),
|
||||||
|
[patp]"[rx]"(pat_ptr)
|
||||||
|
: /* clobbers */
|
||||||
|
"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8"
|
||||||
|
);
|
||||||
|
}
|
||||||
#else /* C version, for reference*/
|
#else /* C version, for reference*/
|
||||||
|
#warning C version of gray_update_rect() used
|
||||||
(void)pat_ptr;
|
(void)pat_ptr;
|
||||||
/* check whether anything changed in the 8-pixel block */
|
/* check whether anything changed in the 8-pixel block */
|
||||||
change = *(uint32_t *)cbuf ^ *(uint32_t *)bbuf;
|
change = *(uint32_t *)cbuf ^ *(uint32_t *)bbuf;
|
||||||
cbuf += sizeof(uint32_t);
|
change |= *(uint32_t *)(cbuf + 4) ^ *(uint32_t *)(bbuf + 4);
|
||||||
bbuf += sizeof(uint32_t);
|
|
||||||
change |= *(uint32_t *)cbuf ^ *(uint32_t *)bbuf;
|
|
||||||
|
|
||||||
if (change != 0)
|
if (change != 0)
|
||||||
{
|
{
|
||||||
|
@ -664,9 +815,6 @@ void gray_update_rect(int x, int y, int width, int height)
|
||||||
unsigned test = 1;
|
unsigned test = 1;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
cbuf = _gray_info.cur_buffer + srcofs_row;
|
|
||||||
bbuf = _gray_info.back_buffer + srcofs_row;
|
|
||||||
|
|
||||||
/* precalculate the bit patterns with random shifts
|
/* precalculate the bit patterns with random shifts
|
||||||
* for all 8 pixels and put them on an extra "stack" */
|
* for all 8 pixels and put them on an extra "stack" */
|
||||||
for (i = 7; i >= 0; i--)
|
for (i = 7; i >= 0; i--)
|
||||||
|
@ -788,18 +936,18 @@ void gray_update_rect(int x, int y, int width, int height)
|
||||||
|
|
||||||
#if CONFIG_CPU == SH7034
|
#if CONFIG_CPU == SH7034
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"mov.l @%[cbuf]+,r1 \n"
|
|
||||||
"mov.l @%[bbuf]+,r2 \n"
|
|
||||||
"xor r1,r2 \n"
|
|
||||||
"mov.l @%[cbuf],r1 \n"
|
"mov.l @%[cbuf],r1 \n"
|
||||||
"mov.l @%[bbuf],%[chg] \n"
|
"mov.l @%[bbuf],r2 \n"
|
||||||
|
"xor r1,r2 \n"
|
||||||
|
"mov.l @(4,%[cbuf]),r1 \n"
|
||||||
|
"mov.l @(4,%[bbuf]),%[chg] \n"
|
||||||
"xor r1,%[chg] \n"
|
"xor r1,%[chg] \n"
|
||||||
"or r2,%[chg] \n"
|
"or r2,%[chg] \n"
|
||||||
: /* outputs */
|
: /* outputs */
|
||||||
[cbuf]"+r"(cbuf),
|
|
||||||
[bbuf]"+r"(bbuf),
|
|
||||||
[chg] "=r"(change)
|
[chg] "=r"(change)
|
||||||
: /* inputs */
|
: /* inputs */
|
||||||
|
[cbuf]"r"(cbuf),
|
||||||
|
[bbuf]"r"(bbuf)
|
||||||
: /* clobbers */
|
: /* clobbers */
|
||||||
"r1", "r2"
|
"r1", "r2"
|
||||||
);
|
);
|
||||||
|
@ -810,13 +958,11 @@ void gray_update_rect(int x, int y, int width, int height)
|
||||||
unsigned mask, trash;
|
unsigned mask, trash;
|
||||||
|
|
||||||
pat_ptr = &pat_stack[8];
|
pat_ptr = &pat_stack[8];
|
||||||
cbuf = _gray_info.cur_buffer + srcofs_row;
|
|
||||||
bbuf = _gray_info.back_buffer + srcofs_row;
|
|
||||||
|
|
||||||
/* precalculate the bit patterns with random shifts
|
/* precalculate the bit patterns with random shifts
|
||||||
* for all 8 pixels and put them on an extra "stack" */
|
* for all 8 pixels and put them on an extra "stack" */
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"mov #8,r3 \n" /* loop count in r3: 8 pixels */
|
"mov #8,r3 \n" /* loop count */
|
||||||
|
|
||||||
".ur_pre_loop: \n"
|
".ur_pre_loop: \n"
|
||||||
"mov.b @%[cbuf]+,r0\n" /* read current buffer */
|
"mov.b @%[cbuf]+,r0\n" /* read current buffer */
|
||||||
|
@ -860,10 +1006,11 @@ void gray_update_rect(int x, int y, int width, int height)
|
||||||
"rotcr %[mask] \n" /* get mask bit */
|
"rotcr %[mask] \n" /* get mask bit */
|
||||||
"mov.l r2,@-%[patp]\n" /* push on pattern stack */
|
"mov.l r2,@-%[patp]\n" /* push on pattern stack */
|
||||||
|
|
||||||
"add #-1,r3 \n" /* decrease loop count */
|
"add #-1,r3 \n" /* loop 8 times (pixel block) */
|
||||||
"cmp/pl r3 \n" /* loop count > 0? */
|
"cmp/pl r3 \n"
|
||||||
"bt .ur_pre_loop\n" /* yes: loop */
|
"bt .ur_pre_loop\n"
|
||||||
"shlr8 %[mask] \n"
|
|
||||||
|
"shlr8 %[mask] \n" /* shift mask to low byte */
|
||||||
"shlr16 %[mask] \n"
|
"shlr16 %[mask] \n"
|
||||||
: /* outputs */
|
: /* outputs */
|
||||||
[cbuf]"+r"(cbuf),
|
[cbuf]"+r"(cbuf),
|
||||||
|
@ -885,17 +1032,17 @@ void gray_update_rect(int x, int y, int width, int height)
|
||||||
/* set the bits for all 8 pixels in all bytes according to the
|
/* set the bits for all 8 pixels in all bytes according to the
|
||||||
* precalculated patterns on the pattern stack */
|
* precalculated patterns on the pattern stack */
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"mov.l @%[patp]+,r1\n" /* pop all 8 patterns */
|
"mov.l @%[patp]+,r1 \n" /* pop all 8 patterns */
|
||||||
"mov.l @%[patp]+,r2\n"
|
"mov.l @%[patp]+,r2 \n"
|
||||||
"mov.l @%[patp]+,r3\n"
|
"mov.l @%[patp]+,r3 \n"
|
||||||
"mov.l @%[patp]+,r6\n"
|
"mov.l @%[patp]+,r6 \n"
|
||||||
"mov.l @%[patp]+,r7\n"
|
"mov.l @%[patp]+,r7 \n"
|
||||||
"mov.l @%[patp]+,r8\n"
|
"mov.l @%[patp]+,r8 \n"
|
||||||
"mov.l @%[patp]+,r9\n"
|
"mov.l @%[patp]+,r9 \n"
|
||||||
"mov.l @%[patp],r10\n"
|
"mov.l @%[patp],r10 \n"
|
||||||
|
|
||||||
"tst %[mask],%[mask] \n" /* nothing to keep? */
|
"tst %[mask],%[mask] \n"
|
||||||
"bt .ur_sloop \n" /* yes: jump to short loop */
|
"bt .ur_sloop \n" /* short loop if nothing to keep */
|
||||||
|
|
||||||
".ur_floop: \n" /** full loop (there are bits to keep)**/
|
".ur_floop: \n" /** full loop (there are bits to keep)**/
|
||||||
"shlr r1 \n" /* rotate lsb of pattern 1 to t bit */
|
"shlr r1 \n" /* rotate lsb of pattern 1 to t bit */
|
||||||
|
@ -915,12 +1062,12 @@ void gray_update_rect(int x, int y, int width, int height)
|
||||||
"shlr r10 \n"
|
"shlr r10 \n"
|
||||||
"mov.b @%[addr],%[rx] \n" /* read old value */
|
"mov.b @%[addr],%[rx] \n" /* read old value */
|
||||||
"rotcl r0 \n"
|
"rotcl r0 \n"
|
||||||
"and %[mask],%[rx] \n" /* mask out unneeded bits */
|
"and %[mask],%[rx] \n" /* mask out replaced bits */
|
||||||
"or %[rx],r0 \n" /* set new bits */
|
"or %[rx],r0 \n" /* set new bits */
|
||||||
"mov.b r0,@%[addr] \n" /* store value to bitplane */
|
"mov.b r0,@%[addr] \n" /* store value to bitplane */
|
||||||
"add %[psiz],%[addr] \n" /* advance to next bitplane */
|
"add %[psiz],%[addr] \n" /* advance to next bitplane */
|
||||||
"cmp/hi %[addr],%[end] \n" /* last bitplane done? */
|
"cmp/hi %[addr],%[end] \n" /* loop through all bitplanes */
|
||||||
"bt .ur_floop \n" /* no: loop */
|
"bt .ur_floop \n"
|
||||||
|
|
||||||
"bra .ur_end \n"
|
"bra .ur_end \n"
|
||||||
"nop \n"
|
"nop \n"
|
||||||
|
@ -952,8 +1099,8 @@ void gray_update_rect(int x, int y, int width, int height)
|
||||||
"rotcl r0 \n"
|
"rotcl r0 \n"
|
||||||
"mov.b r0,@%[addr] \n" /* store byte to bitplane */
|
"mov.b r0,@%[addr] \n" /* store byte to bitplane */
|
||||||
"add %[psiz],%[addr] \n" /* advance to next bitplane */
|
"add %[psiz],%[addr] \n" /* advance to next bitplane */
|
||||||
"cmp/hi %[addr],%[end] \n" /* last bitplane done? */
|
"cmp/hi %[addr],%[end] \n" /* loop through all bitplanes */
|
||||||
"bt .ur_sloop \n" /* no: loop */
|
"bt .ur_sloop \n"
|
||||||
|
|
||||||
".ur_end: \n"
|
".ur_end: \n"
|
||||||
: /* outputs */
|
: /* outputs */
|
||||||
|
@ -970,18 +1117,18 @@ void gray_update_rect(int x, int y, int width, int height)
|
||||||
}
|
}
|
||||||
#elif defined(CPU_COLDFIRE)
|
#elif defined(CPU_COLDFIRE)
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"move.l (%[cbuf])+,%%d0 \n"
|
|
||||||
"move.l (%[bbuf])+,%%d1 \n"
|
|
||||||
"eor.l %%d0,%%d1 \n"
|
|
||||||
"move.l (%[cbuf]),%%d0 \n"
|
"move.l (%[cbuf]),%%d0 \n"
|
||||||
"move.l (%[bbuf]),%[chg]\n"
|
"move.l (%[bbuf]),%%d1 \n"
|
||||||
|
"eor.l %%d0,%%d1 \n"
|
||||||
|
"move.l (4,%[cbuf]),%%d0 \n"
|
||||||
|
"move.l (4,%[bbuf]),%[chg] \n"
|
||||||
"eor.l %%d0,%[chg] \n"
|
"eor.l %%d0,%[chg] \n"
|
||||||
"or.l %%d1,%[chg] \n"
|
"or.l %%d1,%[chg] \n"
|
||||||
: /* outputs */
|
: /* outputs */
|
||||||
[cbuf]"+a"(cbuf),
|
|
||||||
[bbuf]"+a"(bbuf),
|
|
||||||
[chg] "=&d"(change)
|
[chg] "=&d"(change)
|
||||||
: /* inputs */
|
: /* inputs */
|
||||||
|
[cbuf]"a"(cbuf),
|
||||||
|
[bbuf]"a"(bbuf)
|
||||||
: /* clobbers */
|
: /* clobbers */
|
||||||
"d0", "d1"
|
"d0", "d1"
|
||||||
);
|
);
|
||||||
|
@ -992,13 +1139,11 @@ void gray_update_rect(int x, int y, int width, int height)
|
||||||
unsigned mask, trash;
|
unsigned mask, trash;
|
||||||
|
|
||||||
pat_ptr = &pat_stack[8];
|
pat_ptr = &pat_stack[8];
|
||||||
cbuf = _gray_info.cur_buffer + srcofs_row;
|
|
||||||
bbuf = _gray_info.back_buffer + srcofs_row;
|
|
||||||
|
|
||||||
/* precalculate the bit patterns with random shifts
|
/* precalculate the bit patterns with random shifts
|
||||||
* for all 8 pixels and put them on an extra "stack" */
|
* for all 8 pixels and put them on an extra "stack" */
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"moveq.l #8,%%d3 \n" /* loop count in d3: 8 pixels */
|
"moveq.l #8,%%d3 \n" /* loop count */
|
||||||
"clr.l %[mask] \n"
|
"clr.l %[mask] \n"
|
||||||
|
|
||||||
".ur_pre_loop: \n"
|
".ur_pre_loop: \n"
|
||||||
|
@ -1018,16 +1163,16 @@ void gray_update_rect(int x, int y, int width, int height)
|
||||||
/* Since the lower bits are not very random: */
|
/* Since the lower bits are not very random: */
|
||||||
"move.l %[rnd],%%d1 \n"
|
"move.l %[rnd],%%d1 \n"
|
||||||
"lsr.l #8,%%d1 \n" /* get bits 8..15 (need max. 5) */
|
"lsr.l #8,%%d1 \n" /* get bits 8..15 (need max. 5) */
|
||||||
"and.l %[rmsk],%%d1\n" /* mask out unneeded bits */
|
"and.l %[rmsk],%%d1 \n" /* mask out unneeded bits */
|
||||||
|
|
||||||
"cmp.l %[dpth],%%d1\n" /* random >= depth ? */
|
"cmp.l %[dpth],%%d1 \n" /* random >= depth ? */
|
||||||
"blo.b .ur_ntrim \n"
|
"blo.b .ur_ntrim \n"
|
||||||
"sub.l %[dpth],%%d1\n" /* yes: random -= depth; */
|
"sub.l %[dpth],%%d1 \n" /* yes: random -= depth; */
|
||||||
".ur_ntrim: \n"
|
".ur_ntrim: \n"
|
||||||
|
|
||||||
"move.l %%d2,%%d0 \n"
|
"move.l %%d2,%%d0 \n" /** rotate pattern **/
|
||||||
"lsl.l %%d1,%%d0 \n"
|
"lsl.l %%d1,%%d0 \n"
|
||||||
"sub.l %[dpth],%%d1\n"
|
"sub.l %[dpth],%%d1 \n"
|
||||||
"neg.l %%d1 \n" /* d1 = depth - d1 */
|
"neg.l %%d1 \n" /* d1 = depth - d1 */
|
||||||
"lsr.l %%d1,%%d2 \n"
|
"lsr.l %%d1,%%d2 \n"
|
||||||
"or.l %%d0,%%d2 \n" /* rotated_pattern = d2 | d0 */
|
"or.l %%d0,%%d2 \n" /* rotated_pattern = d2 | d0 */
|
||||||
|
@ -1038,8 +1183,8 @@ void gray_update_rect(int x, int y, int width, int height)
|
||||||
"lsr.l #1,%[mask] \n" /* shift mask */
|
"lsr.l #1,%[mask] \n" /* shift mask */
|
||||||
"move.l %%d2,-(%[patp]) \n" /* push on pattern stack */
|
"move.l %%d2,-(%[patp]) \n" /* push on pattern stack */
|
||||||
|
|
||||||
"subq.l #1,%%d3 \n" /* decrease loop count */
|
"subq.l #1,%%d3 \n" /* loop 8 times (pixel block) */
|
||||||
"bne.b .ur_pre_loop\n" /* yes: loop */
|
"bne.b .ur_pre_loop \n"
|
||||||
: /* outputs */
|
: /* outputs */
|
||||||
[cbuf]"+a"(cbuf),
|
[cbuf]"+a"(cbuf),
|
||||||
[bbuf]"+a"(bbuf),
|
[bbuf]"+a"(bbuf),
|
||||||
|
@ -1062,14 +1207,14 @@ void gray_update_rect(int x, int y, int width, int height)
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"movem.l (%[patp]),%%d2-%%d6/%%a0-%%a1/%[ax] \n"
|
"movem.l (%[patp]),%%d2-%%d6/%%a0-%%a1/%[ax] \n"
|
||||||
/* pop all 8 patterns */
|
/* pop all 8 patterns */
|
||||||
"not.l %[mask] \n" /* set mask -> keep mask */
|
"not.l %[mask] \n" /* "set" mask -> "keep" mask */
|
||||||
"and.l #0xFF,%[mask] \n"
|
"and.l #0xFF,%[mask] \n"
|
||||||
"beq.b .ur_sstart \n" /* yes: jump to short loop */
|
"beq.b .ur_sstart \n" /* short loop if nothing to keep */
|
||||||
|
|
||||||
".ur_floop: \n" /** full loop (there are bits to keep)**/
|
".ur_floop: \n" /** full loop (there are bits to keep)**/
|
||||||
"clr.l %%d0 \n"
|
"clr.l %%d0 \n"
|
||||||
"lsr.l #1,%%d2 \n" /* shift out mask bit */
|
"lsr.l #1,%%d2 \n" /* shift out pattern bit */
|
||||||
"addx.l %%d0,%%d0 \n" /* puts bit into LSB, shifts left by 1 */
|
"addx.l %%d0,%%d0 \n" /* put bit into LSB of byte */
|
||||||
"lsr.l #1,%%d3 \n"
|
"lsr.l #1,%%d3 \n"
|
||||||
"addx.l %%d0,%%d0 \n"
|
"addx.l %%d0,%%d0 \n"
|
||||||
"lsr.l #1,%%d4 \n"
|
"lsr.l #1,%%d4 \n"
|
||||||
|
@ -1092,23 +1237,23 @@ void gray_update_rect(int x, int y, int width, int height)
|
||||||
"move.l %%d1,%[ax] \n"
|
"move.l %%d1,%[ax] \n"
|
||||||
|
|
||||||
"move.b (%[addr]),%%d1 \n" /* read old value */
|
"move.b (%[addr]),%%d1 \n" /* read old value */
|
||||||
"and.l %[mask],%%d1 \n" /* mask out unneeded bits */
|
"and.l %[mask],%%d1 \n" /* mask out replaced bits */
|
||||||
"or.l %%d0,%%d1 \n" /* set new bits */
|
"or.l %%d0,%%d1 \n" /* set new bits */
|
||||||
"move.b %%d1,(%[addr]) \n" /* store value to bitplane */
|
"move.b %%d1,(%[addr]) \n" /* store value to bitplane */
|
||||||
|
|
||||||
"add.l %[psiz],%[addr] \n" /* advance to next bitplane */
|
"add.l %[psiz],%[addr] \n" /* advance to next bitplane */
|
||||||
"cmp.l %[addr],%[end] \n" /* last bitplane done? */
|
"cmp.l %[addr],%[end] \n" /* loop through all bitplanes */
|
||||||
"bhi.b .ur_floop \n" /* no: loop */
|
"bhi.b .ur_floop \n"
|
||||||
|
|
||||||
"bra.b .ur_end \n"
|
"bra.b .ur_end \n"
|
||||||
|
|
||||||
".ur_sstart: \n"
|
".ur_sstart: \n"
|
||||||
"move.l %%a0,%[mask]\n" /* mask isn't needed here, reuse reg */
|
"move.l %%a0,%[mask] \n" /* mask isn't needed here, reuse reg */
|
||||||
|
|
||||||
".ur_sloop: \n" /** short loop (nothing to keep) **/
|
".ur_sloop: \n" /** short loop (nothing to keep) **/
|
||||||
"clr.l %%d0 \n"
|
"clr.l %%d0 \n"
|
||||||
"lsr.l #1,%%d2 \n" /* shift out mask bit */
|
"lsr.l #1,%%d2 \n" /* shift out pattern bit */
|
||||||
"addx.l %%d0,%%d0 \n" /* puts bit into LSB, shifts left by 1 */
|
"addx.l %%d0,%%d0 \n" /* put bit into LSB of byte */
|
||||||
"lsr.l #1,%%d3 \n"
|
"lsr.l #1,%%d3 \n"
|
||||||
"addx.l %%d0,%%d0 \n"
|
"addx.l %%d0,%%d0 \n"
|
||||||
"lsr.l #1,%%d4 \n"
|
"lsr.l #1,%%d4 \n"
|
||||||
|
@ -1130,8 +1275,8 @@ void gray_update_rect(int x, int y, int width, int height)
|
||||||
|
|
||||||
"move.b %%d0,(%[addr]) \n" /* store byte to bitplane */
|
"move.b %%d0,(%[addr]) \n" /* store byte to bitplane */
|
||||||
"add.l %[psiz],%[addr] \n" /* advance to next bitplane */
|
"add.l %[psiz],%[addr] \n" /* advance to next bitplane */
|
||||||
"cmp.l %[addr],%[end] \n" /* last bitplane done? */
|
"cmp.l %[addr],%[end] \n" /* loop through all bitplanes */
|
||||||
"bhi.b .ur_sloop \n" /* no: loop */
|
"bhi.b .ur_sloop \n"
|
||||||
|
|
||||||
".ur_end: \n"
|
".ur_end: \n"
|
||||||
: /* outputs */
|
: /* outputs */
|
||||||
|
@ -1151,9 +1296,7 @@ void gray_update_rect(int x, int y, int width, int height)
|
||||||
(void)pat_ptr;
|
(void)pat_ptr;
|
||||||
/* check whether anything changed in the 8-pixel block */
|
/* check whether anything changed in the 8-pixel block */
|
||||||
change = *(uint32_t *)cbuf ^ *(uint32_t *)bbuf;
|
change = *(uint32_t *)cbuf ^ *(uint32_t *)bbuf;
|
||||||
cbuf += sizeof(uint32_t);
|
change |= *(uint32_t *)(cbuf + 4) ^ *(uint32_t *)(bbuf + 4);
|
||||||
bbuf += sizeof(uint32_t);
|
|
||||||
change |= *(uint32_t *)cbuf ^ *(uint32_t *)bbuf;
|
|
||||||
|
|
||||||
if (change != 0)
|
if (change != 0)
|
||||||
{
|
{
|
||||||
|
@ -1162,9 +1305,6 @@ void gray_update_rect(int x, int y, int width, int height)
|
||||||
unsigned test = 1;
|
unsigned test = 1;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
cbuf = _gray_info.cur_buffer + srcofs_row;
|
|
||||||
bbuf = _gray_info.back_buffer + srcofs_row;
|
|
||||||
|
|
||||||
/* precalculate the bit patterns with random shifts
|
/* precalculate the bit patterns with random shifts
|
||||||
* for all 8 pixels and put them on an extra "stack" */
|
* for all 8 pixels and put them on an extra "stack" */
|
||||||
for (i = 0; i < 8; i++)
|
for (i = 0; i < 8; i++)
|
||||||
|
|
|
@ -876,8 +876,140 @@ static void _writearray(unsigned char *address, const unsigned char *src,
|
||||||
unsigned long pat_stack[8];
|
unsigned long pat_stack[8];
|
||||||
unsigned long *pat_ptr = &pat_stack[8];
|
unsigned long *pat_ptr = &pat_stack[8];
|
||||||
unsigned char *addr, *end;
|
unsigned char *addr, *end;
|
||||||
#if 0 /* CPU specific asm versions will go here */
|
#ifdef CPU_ARM
|
||||||
|
const unsigned char *_src;
|
||||||
|
unsigned _mask, trash;
|
||||||
|
|
||||||
|
_mask = mask;
|
||||||
|
_src = src;
|
||||||
|
|
||||||
|
/* precalculate the bit patterns with random shifts
|
||||||
|
for all 8 pixels and put them on an extra "stack" */
|
||||||
|
asm volatile (
|
||||||
|
"mov %[mask], %[mask], lsl #24 \n" /* shift mask to upper byte */
|
||||||
|
"mov r3, #8 \n" /* loop count */
|
||||||
|
|
||||||
|
".wa_loop: \n" /** load pattern for pixel **/
|
||||||
|
"mov r2, #0 \n" /* pattern for skipped pixel must be 0 */
|
||||||
|
"movs %[mask], %[mask], lsl #1 \n" /* shift out msb of mask */
|
||||||
|
"bcc .wa_skip \n" /* skip this pixel */
|
||||||
|
|
||||||
|
"ldrb r0, [%[src]] \n" /* load src byte */
|
||||||
|
"ldrb r0, [%[trns], r0] \n" /* idxtable into pattern index */
|
||||||
|
"ldr r2, [%[bpat], r0, lsl #2] \n" /* r2 = bitpattern[byte]; */
|
||||||
|
|
||||||
|
"add r0, %[rnd], %[rnd], lsl #3 \n" /* multiply by 75 */
|
||||||
|
"add %[rnd], %[rnd], %[rnd], lsl #1 \n"
|
||||||
|
"add %[rnd], %[rnd], r0, lsl #3 \n"
|
||||||
|
"add %[rnd], %[rnd], #74 \n" /* add another 74 */
|
||||||
|
/* Since the lower bits are not very random: get bits 8..15 (need max. 5) */
|
||||||
|
"and r1, %[rmsk], %[rnd], lsr #8 \n" /* ..and mask out unneeded bits */
|
||||||
|
|
||||||
|
"cmp r1, %[dpth] \n" /* random >= depth ? */
|
||||||
|
"subhs r1, r1, %[dpth] \n" /* yes: random -= depth */
|
||||||
|
|
||||||
|
"mov r0, r2, lsl r1 \n" /** rotate pattern **/
|
||||||
|
"sub r1, %[dpth], r1 \n"
|
||||||
|
"orr r2, r0, r2, lsr r1 \n"
|
||||||
|
|
||||||
|
".wa_skip: \n"
|
||||||
|
"str r2, [%[patp], #-4]! \n" /* push on pattern stack */
|
||||||
|
|
||||||
|
"add %[src], %[src], #1 \n" /* src++; */
|
||||||
|
"subs r3, r3, #1 \n" /* loop 8 times (pixel block) */
|
||||||
|
"bne .wa_loop \n"
|
||||||
|
: /* outputs */
|
||||||
|
[src] "+r"(_src),
|
||||||
|
[patp]"+r"(pat_ptr),
|
||||||
|
[rnd] "+r"(_gray_random_buffer),
|
||||||
|
[mask]"+r"(_mask)
|
||||||
|
: /* inputs */
|
||||||
|
[bpat]"r"(_gray_info.bitpattern),
|
||||||
|
[trns]"r"(_gray_info.idxtable),
|
||||||
|
[dpth]"r"(_gray_info.depth),
|
||||||
|
[rmsk]"r"(_gray_info.randmask)
|
||||||
|
: /* clobbers */
|
||||||
|
"r0", "r1", "r2", "r3"
|
||||||
|
);
|
||||||
|
|
||||||
|
addr = address;
|
||||||
|
end = addr + MULU16(_gray_info.depth, _gray_info.plane_size);
|
||||||
|
_mask = mask;
|
||||||
|
|
||||||
|
/* set the bits for all 8 pixels in all bytes according to the
|
||||||
|
* precalculated patterns on the pattern stack */
|
||||||
|
asm volatile (
|
||||||
|
"ldmia %[patp], {r2 - r8, %[rx]} \n" /* pop all 8 patterns */
|
||||||
|
|
||||||
|
"mvn %[mask], %[mask] \n" /* "set" mask -> "keep" mask */
|
||||||
|
"ands %[mask], %[mask], #0xff \n"
|
||||||
|
"beq .wa_sloop \n" /* short loop if nothing to keep */
|
||||||
|
|
||||||
|
".wa_floop: \n" /** full loop (there are bits to keep)**/
|
||||||
|
"movs %[rx], %[rx], lsr #1 \n" /* shift out pattern bit */
|
||||||
|
"adc r0, r0, r0 \n" /* put bit into LSB of byte */
|
||||||
|
"movs r8, r8, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
"movs r7, r7, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
"movs r6, r6, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
"movs r5, r5, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
"movs r4, r4, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
"movs r3, r3, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
"movs r2, r2, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
|
||||||
|
"ldrb r1, [%[addr]] \n" /* read old value */
|
||||||
|
"and r1, r1, %[mask] \n" /* mask out replaced bits */
|
||||||
|
"orr r1, r1, r0 \n" /* set new bits */
|
||||||
|
"strb r1, [%[addr]], %[psiz] \n" /* store value, advance to next bpl */
|
||||||
|
|
||||||
|
"cmp %[end], %[addr] \n" /* loop through all bitplanes */
|
||||||
|
"bne .wa_floop \n"
|
||||||
|
|
||||||
|
"b .wa_end \n"
|
||||||
|
|
||||||
|
".wa_sloop: \n" /** short loop (nothing to keep) **/
|
||||||
|
"movs %[rx], %[rx], lsr #1 \n" /* shift out pattern bit */
|
||||||
|
"adc r0, r0, r0 \n" /* put bit into LSB of byte */
|
||||||
|
"movs r8, r8, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
"movs r7, r7, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
"movs r6, r6, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
"movs r5, r5, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
"movs r4, r4, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
"movs r3, r3, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
"movs r2, r2, lsr #1 \n"
|
||||||
|
"adc r0, r0, r0 \n"
|
||||||
|
|
||||||
|
"strb r0, [%[addr]], %[psiz] \n" /* store byte, advance to next bpl */
|
||||||
|
|
||||||
|
"cmp %[end], %[addr] \n" /* loop through all bitplanes */
|
||||||
|
"bne .wa_sloop \n"
|
||||||
|
|
||||||
|
".wa_end: \n"
|
||||||
|
: /* outputs */
|
||||||
|
[addr]"+r"(addr),
|
||||||
|
[mask]"+r"(_mask),
|
||||||
|
[rx] "=&r"(trash)
|
||||||
|
: /* inputs */
|
||||||
|
[psiz]"r"(_gray_info.plane_size),
|
||||||
|
[end] "r"(end),
|
||||||
|
[patp]"[rx]"(pat_ptr)
|
||||||
|
: /* clobbers */
|
||||||
|
"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8"
|
||||||
|
);
|
||||||
#else /* C version, for reference*/
|
#else /* C version, for reference*/
|
||||||
|
#warning C version of _writearray() used
|
||||||
unsigned test = 0x80;
|
unsigned test = 0x80;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
@ -1027,7 +1159,7 @@ static void _writearray(unsigned char *address, const unsigned char *src,
|
||||||
/* precalculate the bit patterns with random shifts
|
/* precalculate the bit patterns with random shifts
|
||||||
for all 8 pixels and put them on an extra "stack" */
|
for all 8 pixels and put them on an extra "stack" */
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"mov #8,r3 \n" /* loop count in r3: 8 pixels */
|
"mov #8,r3 \n" /* loop count */
|
||||||
|
|
||||||
".wa_loop: \n" /** load pattern for pixel **/
|
".wa_loop: \n" /** load pattern for pixel **/
|
||||||
"mov #0,r0 \n" /* pattern for skipped pixel must be 0 */
|
"mov #0,r0 \n" /* pattern for skipped pixel must be 0 */
|
||||||
|
@ -1067,12 +1199,12 @@ static void _writearray(unsigned char *address, const unsigned char *src,
|
||||||
"or r1,r0 \n" /* rotated_pattern = r0 | r1 */
|
"or r1,r0 \n" /* rotated_pattern = r0 | r1 */
|
||||||
|
|
||||||
".wa_skip: \n"
|
".wa_skip: \n"
|
||||||
"mov.l r0,@-%[patp]\n" /* push on pattern stack */
|
"mov.l r0,@-%[patp] \n" /* push on pattern stack */
|
||||||
|
|
||||||
"add %[stri],%[src] \n" /* src += stride; */
|
"add %[stri],%[src] \n" /* src += stride; */
|
||||||
"add #-1,r3 \n" /* decrease loop count */
|
"add #-1,r3 \n" /* loop 8 times (pixel block) */
|
||||||
"cmp/pl r3 \n" /* loop count > 0? */
|
"cmp/pl r3 \n"
|
||||||
"bt .wa_loop \n" /* yes: loop */
|
"bt .wa_loop \n"
|
||||||
: /* outputs */
|
: /* outputs */
|
||||||
[src] "+r"(_src),
|
[src] "+r"(_src),
|
||||||
[rnd] "+r"(_gray_random_buffer),
|
[rnd] "+r"(_gray_random_buffer),
|
||||||
|
@ -1095,19 +1227,19 @@ static void _writearray(unsigned char *address, const unsigned char *src,
|
||||||
/* set the bits for all 8 pixels in all bytes according to the
|
/* set the bits for all 8 pixels in all bytes according to the
|
||||||
* precalculated patterns on the pattern stack */
|
* precalculated patterns on the pattern stack */
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"mov.l @%[patp]+,r1\n" /* pop all 8 patterns */
|
"mov.l @%[patp]+,r1 \n" /* pop all 8 patterns */
|
||||||
"mov.l @%[patp]+,r2\n"
|
"mov.l @%[patp]+,r2 \n"
|
||||||
"mov.l @%[patp]+,r3\n"
|
"mov.l @%[patp]+,r3 \n"
|
||||||
"mov.l @%[patp]+,r6\n"
|
"mov.l @%[patp]+,r6 \n"
|
||||||
"mov.l @%[patp]+,r7\n"
|
"mov.l @%[patp]+,r7 \n"
|
||||||
"mov.l @%[patp]+,r8\n"
|
"mov.l @%[patp]+,r8 \n"
|
||||||
"mov.l @%[patp]+,r9\n"
|
"mov.l @%[patp]+,r9 \n"
|
||||||
"mov.l @%[patp],r10\n"
|
"mov.l @%[patp],r10 \n"
|
||||||
|
|
||||||
"not %[mask],%[mask] \n" /* "set" mask -> "keep" mask */
|
"not %[mask],%[mask] \n" /* "set" mask -> "keep" mask */
|
||||||
"extu.b %[mask],%[mask] \n" /* mask out high bits */
|
"extu.b %[mask],%[mask] \n" /* mask out high bits */
|
||||||
"tst %[mask],%[mask] \n" /* nothing to keep? */
|
"tst %[mask],%[mask] \n"
|
||||||
"bt .wa_sloop \n" /* yes: jump to short loop */
|
"bt .wa_sloop \n" /* short loop if nothing to keep */
|
||||||
|
|
||||||
".wa_floop: \n" /** full loop (there are bits to keep)**/
|
".wa_floop: \n" /** full loop (there are bits to keep)**/
|
||||||
"shlr r1 \n" /* rotate lsb of pattern 1 to t bit */
|
"shlr r1 \n" /* rotate lsb of pattern 1 to t bit */
|
||||||
|
@ -1127,12 +1259,12 @@ static void _writearray(unsigned char *address, const unsigned char *src,
|
||||||
"shlr r10 \n"
|
"shlr r10 \n"
|
||||||
"mov.b @%[addr],%[rx] \n" /* read old value */
|
"mov.b @%[addr],%[rx] \n" /* read old value */
|
||||||
"rotcl r0 \n"
|
"rotcl r0 \n"
|
||||||
"and %[mask],%[rx] \n" /* mask out unneeded bits */
|
"and %[mask],%[rx] \n" /* mask out replaced bits */
|
||||||
"or %[rx],r0 \n" /* set new bits */
|
"or %[rx],r0 \n" /* set new bits */
|
||||||
"mov.b r0,@%[addr] \n" /* store value to bitplane */
|
"mov.b r0,@%[addr] \n" /* store value to bitplane */
|
||||||
"add %[psiz],%[addr] \n" /* advance to next bitplane */
|
"add %[psiz],%[addr] \n" /* advance to next bitplane */
|
||||||
"cmp/hi %[addr],%[end] \n" /* last bitplane done? */
|
"cmp/hi %[addr],%[end] \n" /* loop for all bitplanes */
|
||||||
"bt .wa_floop \n" /* no: loop */
|
"bt .wa_floop \n"
|
||||||
|
|
||||||
"bra .wa_end \n"
|
"bra .wa_end \n"
|
||||||
"nop \n"
|
"nop \n"
|
||||||
|
@ -1164,8 +1296,8 @@ static void _writearray(unsigned char *address, const unsigned char *src,
|
||||||
"rotcl r0 \n"
|
"rotcl r0 \n"
|
||||||
"mov.b r0,@%[addr] \n" /* store byte to bitplane */
|
"mov.b r0,@%[addr] \n" /* store byte to bitplane */
|
||||||
"add %[psiz],%[addr] \n" /* advance to next bitplane */
|
"add %[psiz],%[addr] \n" /* advance to next bitplane */
|
||||||
"cmp/hi %[addr],%[end] \n" /* last bitplane done? */
|
"cmp/hi %[addr],%[end] \n" /* loop for all bitplanes */
|
||||||
"bt .wa_sloop \n" /* no: loop */
|
"bt .wa_sloop \n"
|
||||||
|
|
||||||
".wa_end: \n"
|
".wa_end: \n"
|
||||||
: /* outputs */
|
: /* outputs */
|
||||||
|
@ -1189,7 +1321,7 @@ static void _writearray(unsigned char *address, const unsigned char *src,
|
||||||
/* precalculate the bit patterns with random shifts
|
/* precalculate the bit patterns with random shifts
|
||||||
for all 8 pixels and put them on an extra "stack" */
|
for all 8 pixels and put them on an extra "stack" */
|
||||||
asm volatile (
|
asm volatile (
|
||||||
"moveq.l #8,%%d3 \n" /* loop count in d3: 8 pixels */
|
"moveq.l #8,%%d3 \n" /* loop count */
|
||||||
|
|
||||||
".wa_loop: \n" /** load pattern for pixel **/
|
".wa_loop: \n" /** load pattern for pixel **/
|
||||||
"clr.l %%d2 \n" /* pattern for skipped pixel must be 0 */
|
"clr.l %%d2 \n" /* pattern for skipped pixel must be 0 */
|
||||||
|
@ -1206,16 +1338,16 @@ static void _writearray(unsigned char *address, const unsigned char *src,
|
||||||
/* Since the lower bits are not very random: */
|
/* Since the lower bits are not very random: */
|
||||||
"move.l %[rnd],%%d1 \n"
|
"move.l %[rnd],%%d1 \n"
|
||||||
"lsr.l #8,%%d1 \n" /* get bits 8..15 (need max. 5) */
|
"lsr.l #8,%%d1 \n" /* get bits 8..15 (need max. 5) */
|
||||||
"and.l %[rmsk],%%d1\n" /* mask out unneeded bits */
|
"and.l %[rmsk],%%d1 \n" /* mask out unneeded bits */
|
||||||
|
|
||||||
"cmp.l %[dpth],%%d1\n" /* random >= depth ? */
|
"cmp.l %[dpth],%%d1 \n" /* random >= depth ? */
|
||||||
"blo.b .wa_ntrim \n"
|
"blo.b .wa_ntrim \n"
|
||||||
"sub.l %[dpth],%%d1\n" /* yes: random -= depth; */
|
"sub.l %[dpth],%%d1 \n" /* yes: random -= depth; */
|
||||||
".wa_ntrim: \n"
|
".wa_ntrim: \n"
|
||||||
|
|
||||||
"move.l %%d2,%%d0 \n"
|
"move.l %%d2,%%d0 \n" /** rotate pattern **/
|
||||||
"lsl.l %%d1,%%d0 \n"
|
"lsl.l %%d1,%%d0 \n"
|
||||||
"sub.l %[dpth],%%d1\n"
|
"sub.l %[dpth],%%d1 \n"
|
||||||
"neg.l %%d1 \n" /* d1 = depth - d1 */
|
"neg.l %%d1 \n" /* d1 = depth - d1 */
|
||||||
"lsr.l %%d1,%%d2 \n"
|
"lsr.l %%d1,%%d2 \n"
|
||||||
"or.l %%d0,%%d2 \n"
|
"or.l %%d0,%%d2 \n"
|
||||||
|
@ -1224,8 +1356,8 @@ static void _writearray(unsigned char *address, const unsigned char *src,
|
||||||
"move.l %%d2,-(%[patp]) \n" /* push on pattern stack */
|
"move.l %%d2,-(%[patp]) \n" /* push on pattern stack */
|
||||||
|
|
||||||
"add.l %[stri],%[src] \n" /* src += stride; */
|
"add.l %[stri],%[src] \n" /* src += stride; */
|
||||||
"subq.l #1,%%d3 \n" /* decrease loop count */
|
"subq.l #1,%%d3 \n" /* loop 8 times (pixel block) */
|
||||||
"bne.b .wa_loop \n" /* yes: loop */
|
"bne.b .wa_loop \n"
|
||||||
: /* outputs */
|
: /* outputs */
|
||||||
[src] "+a"(_src),
|
[src] "+a"(_src),
|
||||||
[patp]"+a"(pat_ptr),
|
[patp]"+a"(pat_ptr),
|
||||||
|
@ -1252,12 +1384,11 @@ static void _writearray(unsigned char *address, const unsigned char *src,
|
||||||
/* pop all 8 patterns */
|
/* pop all 8 patterns */
|
||||||
"not.l %[mask] \n" /* "set" mask -> "keep" mask */
|
"not.l %[mask] \n" /* "set" mask -> "keep" mask */
|
||||||
"and.l #0xFF,%[mask] \n"
|
"and.l #0xFF,%[mask] \n"
|
||||||
"beq.b .wa_sstart \n" /* yes: jump to short loop */
|
"beq.b .wa_sstart \n" /* short loop if nothing to keep */
|
||||||
|
|
||||||
".wa_floop: \n" /** full loop (there are bits to keep)**/
|
".wa_floop: \n" /** full loop (there are bits to keep)**/
|
||||||
"clr.l %%d0 \n"
|
"lsr.l #1,%%d2 \n" /* shift out pattern bit */
|
||||||
"lsr.l #1,%%d2 \n" /* shift out mask bit */
|
"addx.l %%d0,%%d0 \n" /* put bit into LSB of byte */
|
||||||
"addx.l %%d0,%%d0 \n" /* puts bit into LSB, shifts left by 1 */
|
|
||||||
"lsr.l #1,%%d3 \n"
|
"lsr.l #1,%%d3 \n"
|
||||||
"addx.l %%d0,%%d0 \n"
|
"addx.l %%d0,%%d0 \n"
|
||||||
"lsr.l #1,%%d4 \n"
|
"lsr.l #1,%%d4 \n"
|
||||||
|
@ -1280,23 +1411,22 @@ static void _writearray(unsigned char *address, const unsigned char *src,
|
||||||
"move.l %%d1,%[ax] \n"
|
"move.l %%d1,%[ax] \n"
|
||||||
|
|
||||||
"move.b (%[addr]),%%d1 \n" /* read old value */
|
"move.b (%[addr]),%%d1 \n" /* read old value */
|
||||||
"and.l %[mask],%%d1 \n" /* mask out unneeded bits */
|
"and.l %[mask],%%d1 \n" /* mask out replaced bits */
|
||||||
"or.l %%d0,%%d1 \n" /* set new bits */
|
"or.l %%d0,%%d1 \n" /* set new bits */
|
||||||
"move.b %%d1,(%[addr]) \n" /* store value to bitplane */
|
"move.b %%d1,(%[addr]) \n" /* store value to bitplane */
|
||||||
|
|
||||||
"add.l %[psiz],%[addr] \n" /* advance to next bitplane */
|
"add.l %[psiz],%[addr] \n" /* advance to next bitplane */
|
||||||
"cmp.l %[addr],%[end] \n" /* last bitplane done? */
|
"cmp.l %[addr],%[end] \n" /* loop for all bitplanes */
|
||||||
"bhi.b .wa_floop \n" /* no: loop */
|
"bhi.b .wa_floop \n"
|
||||||
|
|
||||||
"bra.b .wa_end \n"
|
"bra.b .wa_end \n"
|
||||||
|
|
||||||
".wa_sstart: \n"
|
".wa_sstart: \n"
|
||||||
"move.l %%a0,%[mask]\n" /* mask isn't needed here, reuse reg */
|
"move.l %%a0,%[mask] \n" /* mask isn't needed here, reuse reg */
|
||||||
|
|
||||||
".wa_sloop: \n" /** short loop (nothing to keep) **/
|
".wa_sloop: \n" /** short loop (nothing to keep) **/
|
||||||
"clr.l %%d0 \n"
|
"lsr.l #1,%%d2 \n" /* shift out pattern bit */
|
||||||
"lsr.l #1,%%d2 \n" /* shift out mask bit */
|
"addx.l %%d0,%%d0 \n" /* put bit into LSB of byte */
|
||||||
"addx.l %%d0,%%d0 \n" /* puts bit into LSB, shifts left by 1 */
|
|
||||||
"lsr.l #1,%%d3 \n"
|
"lsr.l #1,%%d3 \n"
|
||||||
"addx.l %%d0,%%d0 \n"
|
"addx.l %%d0,%%d0 \n"
|
||||||
"lsr.l #1,%%d4 \n"
|
"lsr.l #1,%%d4 \n"
|
||||||
|
@ -1318,8 +1448,8 @@ static void _writearray(unsigned char *address, const unsigned char *src,
|
||||||
|
|
||||||
"move.b %%d0,(%[addr]) \n" /* store byte to bitplane */
|
"move.b %%d0,(%[addr]) \n" /* store byte to bitplane */
|
||||||
"add.l %[psiz],%[addr] \n" /* advance to next bitplane */
|
"add.l %[psiz],%[addr] \n" /* advance to next bitplane */
|
||||||
"cmp.l %[addr],%[end] \n" /* last bitplane done? */
|
"cmp.l %[addr],%[end] \n" /* loop for all bitplanes */
|
||||||
"bhi.b .wa_sloop \n" /* no: loop */
|
"bhi.b .wa_sloop \n"
|
||||||
|
|
||||||
".wa_end: \n"
|
".wa_end: \n"
|
||||||
: /* outputs */
|
: /* outputs */
|
||||||
|
|
|
@ -283,32 +283,32 @@ void gray_ub_scroll_left(int count)
|
||||||
if (count)
|
if (count)
|
||||||
{
|
{
|
||||||
asm (
|
asm (
|
||||||
"mov r4, %[high] \n"
|
"mov r4, %[high] \n" /* rows = height */
|
||||||
|
|
||||||
".sl_rloop: \n"
|
".sl_rloop: \n" /* repeat for every row */
|
||||||
"mov r5, %[addr] \n"
|
"mov r5, %[addr] \n" /* get start address */
|
||||||
"mov r2, %[dpth] \n"
|
"mov r2, %[dpth] \n" /* planes = depth */
|
||||||
|
|
||||||
".sl_oloop: \n"
|
".sl_oloop: \n" /* repeat for every bitplane */
|
||||||
"mov r6, r5 \n"
|
"mov r6, r5 \n" /* get start address */
|
||||||
"mov r3, %[cols] \n"
|
"mov r3, %[cols] \n" /* cols = col_count */
|
||||||
"mov r1, #0 \n"
|
"mov r1, #0 \n" /* fill with zero */
|
||||||
|
|
||||||
".sl_iloop: \n"
|
".sl_iloop: \n" /* repeat for all cols */
|
||||||
"mov r1, r1, lsr #8 \n"
|
"mov r1, r1, lsr #8 \n" /* shift right to get residue */
|
||||||
"ldrb r0, [r6, #-1]! \n"
|
"ldrb r0, [r6, #-1]! \n" /* decrement addr & get data byte */
|
||||||
"orr r1, r1, r0, lsl %[cnt] \n"
|
"orr r1, r1, r0, lsl %[cnt] \n" /* combine with last residue */
|
||||||
"strb r1, [r6] \n"
|
"strb r1, [r6] \n" /* store data */
|
||||||
|
|
||||||
"subs r3, r3, #1 \n"
|
"subs r3, r3, #1 \n" /* cols-- */
|
||||||
"bne .sl_iloop \n"
|
"bne .sl_iloop \n"
|
||||||
|
|
||||||
"add r5, r5, %[psiz] \n"
|
"add r5, r5, %[psiz] \n" /* start_address += plane_size */
|
||||||
"subs r2, r2, #1 \n"
|
"subs r2, r2, #1 \n" /* planes-- */
|
||||||
"bne .sl_oloop \n"
|
"bne .sl_oloop \n"
|
||||||
|
|
||||||
"add %[addr],%[addr],%[bwid] \n"
|
"add %[addr],%[addr],%[bwid] \n" /* start_address += bwidth */
|
||||||
"subs r4, r4, #1 \n"
|
"subs r4, r4, #1 \n" /* rows-- */
|
||||||
"bne .sl_rloop \n"
|
"bne .sl_rloop \n"
|
||||||
: /* outputs */
|
: /* outputs */
|
||||||
: /* inputs */
|
: /* inputs */
|
||||||
|
@ -364,32 +364,32 @@ void gray_ub_scroll_right(int count)
|
||||||
if (count)
|
if (count)
|
||||||
{
|
{
|
||||||
asm (
|
asm (
|
||||||
"mov r4, %[high] \n"
|
"mov r4, %[high] \n" /* rows = height */
|
||||||
|
|
||||||
".sr_rloop: \n"
|
".sr_rloop: \n" /* repeat for every row */
|
||||||
"mov r5, %[addr] \n"
|
"mov r5, %[addr] \n" /* get start address */
|
||||||
"mov r2, %[dpth] \n"
|
"mov r2, %[dpth] \n" /* planes = depth */
|
||||||
|
|
||||||
".sr_oloop: \n"
|
".sr_oloop: \n" /* repeat for every bitplane */
|
||||||
"mov r6, r5 \n"
|
"mov r6, r5 \n" /* get start address */
|
||||||
"mov r3, %[cols] \n"
|
"mov r3, %[cols] \n" /* cols = col_count */
|
||||||
"mov r1, #0 \n"
|
"mov r1, #0 \n" /* fill with zero */
|
||||||
|
|
||||||
".sr_iloop: \n"
|
".sr_iloop: \n" /* repeat for all cols */
|
||||||
"ldrb r0, [r6] \n"
|
"ldrb r0, [r6] \n" /* get data byte */
|
||||||
"orr r1, r0, r1, lsl #8 \n"
|
"orr r1, r0, r1, lsl #8 \n" /* combine w/ old data shifted to 2nd byte */
|
||||||
"mov r0, r1, lsr %[cnt] \n"
|
"mov r0, r1, lsr %[cnt] \n" /* shift right */
|
||||||
"strb r0, [r6], #1 \n"
|
"strb r0, [r6], #1 \n" /* store data, increment addr */
|
||||||
|
|
||||||
"subs r3, r3, #1 \n"
|
"subs r3, r3, #1 \n" /* cols-- */
|
||||||
"bne .sr_iloop \n"
|
"bne .sr_iloop \n"
|
||||||
|
|
||||||
"add r5, r5, %[psiz] \n"
|
"add r5, r5, %[psiz] \n" /* start_address += plane_size */
|
||||||
"subs r2, r2, #1 \n"
|
"subs r2, r2, #1 \n" /* planes-- */
|
||||||
"bne .sr_oloop \n"
|
"bne .sr_oloop \n"
|
||||||
|
|
||||||
"add %[addr],%[addr],%[bwid] \n"
|
"add %[addr],%[addr],%[bwid] \n" /* start_address += bwidth */
|
||||||
"subs r4, r4, #1 \n"
|
"subs r4, r4, #1 \n" /* rows-- */
|
||||||
"bne .sr_rloop \n"
|
"bne .sr_rloop \n"
|
||||||
: /* outputs */
|
: /* outputs */
|
||||||
: /* inputs */
|
: /* inputs */
|
||||||
|
@ -714,8 +714,7 @@ void gray_ub_scroll_up(int count)
|
||||||
"move.b (%%a1),%%d0 \n" /* get data byte */
|
"move.b (%%a1),%%d0 \n" /* get data byte */
|
||||||
"lsl.l #8,%%d1 \n" /* old data to 2nd byte */
|
"lsl.l #8,%%d1 \n" /* old data to 2nd byte */
|
||||||
"or.l %%d1,%%d0 \n" /* combine old data */
|
"or.l %%d1,%%d0 \n" /* combine old data */
|
||||||
"clr.l %%d1 \n"
|
"move.l %%d0,%%d1 \n" /* keep data for next round */
|
||||||
"move.b %%d0,%%d1 \n" /* keep data for next round */
|
|
||||||
"lsr.l %[cnt],%%d0 \n" /* shift right */
|
"lsr.l %[cnt],%%d0 \n" /* shift right */
|
||||||
"move.b %%d0,(%%a1) \n" /* store data */
|
"move.b %%d0,(%%a1) \n" /* store data */
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue