forked from len0rd/rockbox
Replace the range_limit lookup table with an inline function, asm optimised for SH1 and coldfire. Slight speedup on SH1, up to 15% speedup on coldfire. Saves almost 1KB.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@8736 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
da5cef6330
commit
2ea75fdbec
1 changed files with 69 additions and 110 deletions
|
@ -143,78 +143,47 @@ static struct plugin_api* rb;
|
||||||
|
|
||||||
/**************** begin JPEG code ********************/
|
/**************** begin JPEG code ********************/
|
||||||
|
|
||||||
/* LUT for IDCT, this could also be used for gamma correction */
|
INLINE unsigned range_limit(int value)
|
||||||
const unsigned char range_limit[1024] =
|
|
||||||
{
|
{
|
||||||
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
|
#if CONFIG_CPU == SH7034
|
||||||
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
|
unsigned tmp;
|
||||||
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
|
asm ( /* Note: Uses knowledge that only the low byte of the result is used */
|
||||||
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
|
"mov #-128,%[t] \n"
|
||||||
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
|
"sub %[t],%[v] \n" /* value -= -128; equals value += 128; */
|
||||||
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
|
"extu.b %[v],%[t] \n"
|
||||||
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
|
"cmp/eq %[v],%[t] \n" /* low byte == whole number ? */
|
||||||
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
|
"bt 1f \n" /* yes: no overflow */
|
||||||
|
"cmp/pz %[v] \n" /* overflow: positive? */
|
||||||
|
"subc %[v],%[v] \n" /* %[r] now either 0 or 0xffffffff */
|
||||||
|
"1: \n"
|
||||||
|
: /* outputs */
|
||||||
|
[v]"+r"(value),
|
||||||
|
[t]"=&r"(tmp)
|
||||||
|
);
|
||||||
|
return value;
|
||||||
|
#elif defined(CPU_COLDFIRE)
|
||||||
|
asm ( /* Note: Uses knowledge that only the low byte of the result is used */
|
||||||
|
"add.l #128,%[v] \n" /* value += 128; */
|
||||||
|
"cmp.l #255,%[v] \n" /* overflow? */
|
||||||
|
"bls.b 1f \n" /* no: return value */
|
||||||
|
"spl.b %[v] \n" /* yes: set low byte to appropriate boundary */
|
||||||
|
"1: \n"
|
||||||
|
: /* outputs */
|
||||||
|
[v]"+r"(value)
|
||||||
|
);
|
||||||
|
return value;
|
||||||
|
#else
|
||||||
|
value += 128;
|
||||||
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
if ((unsigned)value <= 255)
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
return value;
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
|
||||||
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
|
|
||||||
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
if (value < 0)
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
return 0;
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
|
||||||
|
|
||||||
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
|
|
||||||
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
|
|
||||||
32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
|
|
||||||
48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
|
|
||||||
64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
|
|
||||||
80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
|
|
||||||
96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
|
|
||||||
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127
|
|
||||||
};
|
|
||||||
|
|
||||||
|
return 255;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/* IDCT implementation */
|
/* IDCT implementation */
|
||||||
|
|
||||||
|
@ -266,8 +235,6 @@ const unsigned char range_limit[1024] =
|
||||||
*/
|
*/
|
||||||
#define DESCALE(x,n) (((x) + (1l << ((n)-1))) >> (n))
|
#define DESCALE(x,n) (((x) + (1l << ((n)-1))) >> (n))
|
||||||
|
|
||||||
#define RANGE_MASK (255 * 4 + 3) /* 2 bits wider than legal samples */
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -277,7 +244,7 @@ const unsigned char range_limit[1024] =
|
||||||
void idct1x1(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
|
void idct1x1(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
|
||||||
{
|
{
|
||||||
(void)skip_line; /* unused */
|
(void)skip_line; /* unused */
|
||||||
*p_byte = range_limit[(inptr[0] * quantptr[0] >> 3) & RANGE_MASK];
|
*p_byte = range_limit(inptr[0] * quantptr[0] >> 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -312,18 +279,14 @@ void idct2x2(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
|
||||||
/* Row 0 */
|
/* Row 0 */
|
||||||
outptr = p_byte;
|
outptr = p_byte;
|
||||||
|
|
||||||
outptr[0] = range_limit[(int) DESCALE(tmp0 + tmp1, 3)
|
outptr[0] = range_limit((int) DESCALE(tmp0 + tmp1, 3));
|
||||||
& RANGE_MASK];
|
outptr[1] = range_limit((int) DESCALE(tmp0 - tmp1, 3));
|
||||||
outptr[1] = range_limit[(int) DESCALE(tmp0 - tmp1, 3)
|
|
||||||
& RANGE_MASK];
|
|
||||||
|
|
||||||
/* Row 1 */
|
/* Row 1 */
|
||||||
outptr = p_byte + skip_line;
|
outptr = p_byte + skip_line;
|
||||||
|
|
||||||
outptr[0] = range_limit[(int) DESCALE(tmp2 + tmp3, 3)
|
outptr[0] = range_limit((int) DESCALE(tmp2 + tmp3, 3));
|
||||||
& RANGE_MASK];
|
outptr[1] = range_limit((int) DESCALE(tmp2 - tmp3, 3));
|
||||||
outptr[1] = range_limit[(int) DESCALE(tmp2 - tmp3, 3)
|
|
||||||
& RANGE_MASK];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -398,18 +361,14 @@ void idct4x4(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
|
||||||
|
|
||||||
/* Final output stage */
|
/* Final output stage */
|
||||||
|
|
||||||
outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp2,
|
outptr[0] = range_limit((int) DESCALE(tmp10 + tmp2,
|
||||||
CONST_BITS+PASS1_BITS+3)
|
CONST_BITS+PASS1_BITS+3));
|
||||||
& RANGE_MASK];
|
outptr[3] = range_limit((int) DESCALE(tmp10 - tmp2,
|
||||||
outptr[3] = range_limit[(int) DESCALE(tmp10 - tmp2,
|
CONST_BITS+PASS1_BITS+3));
|
||||||
CONST_BITS+PASS1_BITS+3)
|
outptr[1] = range_limit((int) DESCALE(tmp12 + tmp0,
|
||||||
& RANGE_MASK];
|
CONST_BITS+PASS1_BITS+3));
|
||||||
outptr[1] = range_limit[(int) DESCALE(tmp12 + tmp0,
|
outptr[2] = range_limit((int) DESCALE(tmp12 - tmp0,
|
||||||
CONST_BITS+PASS1_BITS+3)
|
CONST_BITS+PASS1_BITS+3));
|
||||||
& RANGE_MASK];
|
|
||||||
outptr[2] = range_limit[(int) DESCALE(tmp12 - tmp0,
|
|
||||||
CONST_BITS+PASS1_BITS+3)
|
|
||||||
& RANGE_MASK];
|
|
||||||
|
|
||||||
wsptr += 4; /* advance pointer to next row */
|
wsptr += 4; /* advance pointer to next row */
|
||||||
}
|
}
|
||||||
|
@ -549,8 +508,8 @@ void idct8x8(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
|
||||||
| wsptr[4] | wsptr[5] | wsptr[6] | wsptr[7]) == 0)
|
| wsptr[4] | wsptr[5] | wsptr[6] | wsptr[7]) == 0)
|
||||||
{
|
{
|
||||||
/* AC terms all zero */
|
/* AC terms all zero */
|
||||||
unsigned char dcval = range_limit[(int) DESCALE((long) wsptr[0],
|
unsigned char dcval = range_limit((int) DESCALE((long) wsptr[0],
|
||||||
PASS1_BITS+3) & RANGE_MASK];
|
PASS1_BITS+3));
|
||||||
|
|
||||||
outptr[0] = dcval;
|
outptr[0] = dcval;
|
||||||
outptr[1] = dcval;
|
outptr[1] = dcval;
|
||||||
|
@ -617,22 +576,22 @@ void idct8x8(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
|
||||||
|
|
||||||
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
|
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
|
||||||
|
|
||||||
outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp3,
|
outptr[0] = range_limit((int) DESCALE(tmp10 + tmp3,
|
||||||
CONST_BITS+PASS1_BITS+3) & RANGE_MASK];
|
CONST_BITS+PASS1_BITS+3));
|
||||||
outptr[7] = range_limit[(int) DESCALE(tmp10 - tmp3,
|
outptr[7] = range_limit((int) DESCALE(tmp10 - tmp3,
|
||||||
CONST_BITS+PASS1_BITS+3) & RANGE_MASK];
|
CONST_BITS+PASS1_BITS+3));
|
||||||
outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp2,
|
outptr[1] = range_limit((int) DESCALE(tmp11 + tmp2,
|
||||||
CONST_BITS+PASS1_BITS+3) & RANGE_MASK];
|
CONST_BITS+PASS1_BITS+3));
|
||||||
outptr[6] = range_limit[(int) DESCALE(tmp11 - tmp2,
|
outptr[6] = range_limit((int) DESCALE(tmp11 - tmp2,
|
||||||
CONST_BITS+PASS1_BITS+3) & RANGE_MASK];
|
CONST_BITS+PASS1_BITS+3));
|
||||||
outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp1,
|
outptr[2] = range_limit((int) DESCALE(tmp12 + tmp1,
|
||||||
CONST_BITS+PASS1_BITS+3) & RANGE_MASK];
|
CONST_BITS+PASS1_BITS+3));
|
||||||
outptr[5] = range_limit[(int) DESCALE(tmp12 - tmp1,
|
outptr[5] = range_limit((int) DESCALE(tmp12 - tmp1,
|
||||||
CONST_BITS+PASS1_BITS+3) & RANGE_MASK];
|
CONST_BITS+PASS1_BITS+3));
|
||||||
outptr[3] = range_limit[(int) DESCALE(tmp13 + tmp0,
|
outptr[3] = range_limit((int) DESCALE(tmp13 + tmp0,
|
||||||
CONST_BITS+PASS1_BITS+3) & RANGE_MASK];
|
CONST_BITS+PASS1_BITS+3));
|
||||||
outptr[4] = range_limit[(int) DESCALE(tmp13 - tmp0,
|
outptr[4] = range_limit((int) DESCALE(tmp13 - tmp0,
|
||||||
CONST_BITS+PASS1_BITS+3) & RANGE_MASK];
|
CONST_BITS+PASS1_BITS+3));
|
||||||
|
|
||||||
wsptr += 8; /* advance pointer to next row */
|
wsptr += 8; /* advance pointer to next row */
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue