1
0
Fork 0
forked from len0rd/rockbox

Replace the range_limit lookup table with an inline function, asm optimised for SH1 and coldfire. Slight speedup on SH1, up to 15% speedup on coldfire. Saves almost 1KB.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@8736 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Jens Arnold 2006-02-19 12:43:50 +00:00
parent da5cef6330
commit 2ea75fdbec

View file

@ -143,78 +143,47 @@ static struct plugin_api* rb;
/**************** begin JPEG code ********************/
/* LUT for IDCT, this could also be used for gamma correction */
const unsigned char range_limit[1024] =
INLINE unsigned range_limit(int value)
{
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
#if CONFIG_CPU == SH7034
unsigned tmp;
asm ( /* Note: Uses knowledge that only the low byte of the result is used */
"mov #-128,%[t] \n"
"sub %[t],%[v] \n" /* value -= -128; equals value += 128; */
"extu.b %[v],%[t] \n"
"cmp/eq %[v],%[t] \n" /* low byte == whole number ? */
"bt 1f \n" /* yes: no overflow */
"cmp/pz %[v] \n" /* overflow: positive? */
"subc %[v],%[v] \n" /* %[r] now either 0 or 0xffffffff */
"1: \n"
: /* outputs */
[v]"+r"(value),
[t]"=&r"(tmp)
);
return value;
#elif defined(CPU_COLDFIRE)
asm ( /* Note: Uses knowledge that only the low byte of the result is used */
"add.l #128,%[v] \n" /* value += 128; */
"cmp.l #255,%[v] \n" /* overflow? */
"bls.b 1f \n" /* no: return value */
"spl.b %[v] \n" /* yes: set low byte to appropriate boundary */
"1: \n"
: /* outputs */
[v]"+r"(value)
);
return value;
#else
value += 128;
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
if ((unsigned)value <= 255)
return value;
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127
};
if (value < 0)
return 0;
return 255;
#endif
}
/* IDCT implementation */
@ -266,8 +235,6 @@ const unsigned char range_limit[1024] =
*/
#define DESCALE(x,n) (((x) + (1l << ((n)-1))) >> (n))
#define RANGE_MASK (255 * 4 + 3) /* 2 bits wider than legal samples */
/*
@ -277,7 +244,7 @@ const unsigned char range_limit[1024] =
void idct1x1(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
{
(void)skip_line; /* unused */
*p_byte = range_limit[(inptr[0] * quantptr[0] >> 3) & RANGE_MASK];
*p_byte = range_limit(inptr[0] * quantptr[0] >> 3);
}
@ -312,18 +279,14 @@ void idct2x2(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
/* Row 0 */
outptr = p_byte;
outptr[0] = range_limit[(int) DESCALE(tmp0 + tmp1, 3)
& RANGE_MASK];
outptr[1] = range_limit[(int) DESCALE(tmp0 - tmp1, 3)
& RANGE_MASK];
outptr[0] = range_limit((int) DESCALE(tmp0 + tmp1, 3));
outptr[1] = range_limit((int) DESCALE(tmp0 - tmp1, 3));
/* Row 1 */
outptr = p_byte + skip_line;
outptr[0] = range_limit[(int) DESCALE(tmp2 + tmp3, 3)
& RANGE_MASK];
outptr[1] = range_limit[(int) DESCALE(tmp2 - tmp3, 3)
& RANGE_MASK];
outptr[0] = range_limit((int) DESCALE(tmp2 + tmp3, 3));
outptr[1] = range_limit((int) DESCALE(tmp2 - tmp3, 3));
}
@ -398,18 +361,14 @@ void idct4x4(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
/* Final output stage */
outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp2,
CONST_BITS+PASS1_BITS+3)
& RANGE_MASK];
outptr[3] = range_limit[(int) DESCALE(tmp10 - tmp2,
CONST_BITS+PASS1_BITS+3)
& RANGE_MASK];
outptr[1] = range_limit[(int) DESCALE(tmp12 + tmp0,
CONST_BITS+PASS1_BITS+3)
& RANGE_MASK];
outptr[2] = range_limit[(int) DESCALE(tmp12 - tmp0,
CONST_BITS+PASS1_BITS+3)
& RANGE_MASK];
outptr[0] = range_limit((int) DESCALE(tmp10 + tmp2,
CONST_BITS+PASS1_BITS+3));
outptr[3] = range_limit((int) DESCALE(tmp10 - tmp2,
CONST_BITS+PASS1_BITS+3));
outptr[1] = range_limit((int) DESCALE(tmp12 + tmp0,
CONST_BITS+PASS1_BITS+3));
outptr[2] = range_limit((int) DESCALE(tmp12 - tmp0,
CONST_BITS+PASS1_BITS+3));
wsptr += 4; /* advance pointer to next row */
}
@ -549,8 +508,8 @@ void idct8x8(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
| wsptr[4] | wsptr[5] | wsptr[6] | wsptr[7]) == 0)
{
/* AC terms all zero */
unsigned char dcval = range_limit[(int) DESCALE((long) wsptr[0],
PASS1_BITS+3) & RANGE_MASK];
unsigned char dcval = range_limit((int) DESCALE((long) wsptr[0],
PASS1_BITS+3));
outptr[0] = dcval;
outptr[1] = dcval;
@ -617,22 +576,22 @@ void idct8x8(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp3,
CONST_BITS+PASS1_BITS+3) & RANGE_MASK];
outptr[7] = range_limit[(int) DESCALE(tmp10 - tmp3,
CONST_BITS+PASS1_BITS+3) & RANGE_MASK];
outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp2,
CONST_BITS+PASS1_BITS+3) & RANGE_MASK];
outptr[6] = range_limit[(int) DESCALE(tmp11 - tmp2,
CONST_BITS+PASS1_BITS+3) & RANGE_MASK];
outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp1,
CONST_BITS+PASS1_BITS+3) & RANGE_MASK];
outptr[5] = range_limit[(int) DESCALE(tmp12 - tmp1,
CONST_BITS+PASS1_BITS+3) & RANGE_MASK];
outptr[3] = range_limit[(int) DESCALE(tmp13 + tmp0,
CONST_BITS+PASS1_BITS+3) & RANGE_MASK];
outptr[4] = range_limit[(int) DESCALE(tmp13 - tmp0,
CONST_BITS+PASS1_BITS+3) & RANGE_MASK];
outptr[0] = range_limit((int) DESCALE(tmp10 + tmp3,
CONST_BITS+PASS1_BITS+3));
outptr[7] = range_limit((int) DESCALE(tmp10 - tmp3,
CONST_BITS+PASS1_BITS+3));
outptr[1] = range_limit((int) DESCALE(tmp11 + tmp2,
CONST_BITS+PASS1_BITS+3));
outptr[6] = range_limit((int) DESCALE(tmp11 - tmp2,
CONST_BITS+PASS1_BITS+3));
outptr[2] = range_limit((int) DESCALE(tmp12 + tmp1,
CONST_BITS+PASS1_BITS+3));
outptr[5] = range_limit((int) DESCALE(tmp12 - tmp1,
CONST_BITS+PASS1_BITS+3));
outptr[3] = range_limit((int) DESCALE(tmp13 + tmp0,
CONST_BITS+PASS1_BITS+3));
outptr[4] = range_limit((int) DESCALE(tmp13 - tmp0,
CONST_BITS+PASS1_BITS+3));
wsptr += 8; /* advance pointer to next row */
}