1
0
Fork 0
forked from len0rd/rockbox

Replace the range_limit lookup table with an inline function, asm optimised for SH1 and coldfire. Slight speedup on SH1, up to 15% speedup on coldfire. Saves almost 1KB.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@8736 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Jens Arnold 2006-02-19 12:43:50 +00:00
parent da5cef6330
commit 2ea75fdbec

View file

@ -143,78 +143,47 @@ static struct plugin_api* rb;
/**************** begin JPEG code ********************/ /**************** begin JPEG code ********************/
/* LUT for IDCT, this could also be used for gamma correction */ INLINE unsigned range_limit(int value)
const unsigned char range_limit[1024] =
{ {
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, #if CONFIG_CPU == SH7034
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, unsigned tmp;
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, asm ( /* Note: Uses knowledge that only the low byte of the result is used */
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, "mov #-128,%[t] \n"
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, "sub %[t],%[v] \n" /* value -= -128; equals value += 128; */
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, "extu.b %[v],%[t] \n"
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, "cmp/eq %[v],%[t] \n" /* low byte == whole number ? */
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255, "bt 1f \n" /* yes: no overflow */
"cmp/pz %[v] \n" /* overflow: positive? */
"subc %[v],%[v] \n" /* %[r] now either 0 or 0xffffffff */
"1: \n"
: /* outputs */
[v]"+r"(value),
[t]"=&r"(tmp)
);
return value;
#elif defined(CPU_COLDFIRE)
asm ( /* Note: Uses knowledge that only the low byte of the result is used */
"add.l #128,%[v] \n" /* value += 128; */
"cmp.l #255,%[v] \n" /* overflow? */
"bls.b 1f \n" /* no: return value */
"spl.b %[v] \n" /* yes: set low byte to appropriate boundary */
"1: \n"
: /* outputs */
[v]"+r"(value)
);
return value;
#else
value += 128;
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, if ((unsigned)value <= 255)
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, return value;
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, if (value < 0)
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, return 0;
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127
};
return 255;
#endif
}
/* IDCT implementation */ /* IDCT implementation */
@ -266,8 +235,6 @@ const unsigned char range_limit[1024] =
*/ */
#define DESCALE(x,n) (((x) + (1l << ((n)-1))) >> (n)) #define DESCALE(x,n) (((x) + (1l << ((n)-1))) >> (n))
#define RANGE_MASK (255 * 4 + 3) /* 2 bits wider than legal samples */
/* /*
@ -277,7 +244,7 @@ const unsigned char range_limit[1024] =
void idct1x1(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line) void idct1x1(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
{ {
(void)skip_line; /* unused */ (void)skip_line; /* unused */
*p_byte = range_limit[(inptr[0] * quantptr[0] >> 3) & RANGE_MASK]; *p_byte = range_limit(inptr[0] * quantptr[0] >> 3);
} }
@ -312,18 +279,14 @@ void idct2x2(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
/* Row 0 */ /* Row 0 */
outptr = p_byte; outptr = p_byte;
outptr[0] = range_limit[(int) DESCALE(tmp0 + tmp1, 3) outptr[0] = range_limit((int) DESCALE(tmp0 + tmp1, 3));
& RANGE_MASK]; outptr[1] = range_limit((int) DESCALE(tmp0 - tmp1, 3));
outptr[1] = range_limit[(int) DESCALE(tmp0 - tmp1, 3)
& RANGE_MASK];
/* Row 1 */ /* Row 1 */
outptr = p_byte + skip_line; outptr = p_byte + skip_line;
outptr[0] = range_limit[(int) DESCALE(tmp2 + tmp3, 3) outptr[0] = range_limit((int) DESCALE(tmp2 + tmp3, 3));
& RANGE_MASK]; outptr[1] = range_limit((int) DESCALE(tmp2 - tmp3, 3));
outptr[1] = range_limit[(int) DESCALE(tmp2 - tmp3, 3)
& RANGE_MASK];
} }
@ -398,18 +361,14 @@ void idct4x4(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
/* Final output stage */ /* Final output stage */
outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp2, outptr[0] = range_limit((int) DESCALE(tmp10 + tmp2,
CONST_BITS+PASS1_BITS+3) CONST_BITS+PASS1_BITS+3));
& RANGE_MASK]; outptr[3] = range_limit((int) DESCALE(tmp10 - tmp2,
outptr[3] = range_limit[(int) DESCALE(tmp10 - tmp2, CONST_BITS+PASS1_BITS+3));
CONST_BITS+PASS1_BITS+3) outptr[1] = range_limit((int) DESCALE(tmp12 + tmp0,
& RANGE_MASK]; CONST_BITS+PASS1_BITS+3));
outptr[1] = range_limit[(int) DESCALE(tmp12 + tmp0, outptr[2] = range_limit((int) DESCALE(tmp12 - tmp0,
CONST_BITS+PASS1_BITS+3) CONST_BITS+PASS1_BITS+3));
& RANGE_MASK];
outptr[2] = range_limit[(int) DESCALE(tmp12 - tmp0,
CONST_BITS+PASS1_BITS+3)
& RANGE_MASK];
wsptr += 4; /* advance pointer to next row */ wsptr += 4; /* advance pointer to next row */
} }
@ -549,8 +508,8 @@ void idct8x8(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
| wsptr[4] | wsptr[5] | wsptr[6] | wsptr[7]) == 0) | wsptr[4] | wsptr[5] | wsptr[6] | wsptr[7]) == 0)
{ {
/* AC terms all zero */ /* AC terms all zero */
unsigned char dcval = range_limit[(int) DESCALE((long) wsptr[0], unsigned char dcval = range_limit((int) DESCALE((long) wsptr[0],
PASS1_BITS+3) & RANGE_MASK]; PASS1_BITS+3));
outptr[0] = dcval; outptr[0] = dcval;
outptr[1] = dcval; outptr[1] = dcval;
@ -617,22 +576,22 @@ void idct8x8(unsigned char* p_byte, int* inptr, int* quantptr, int skip_line)
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp3, outptr[0] = range_limit((int) DESCALE(tmp10 + tmp3,
CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; CONST_BITS+PASS1_BITS+3));
outptr[7] = range_limit[(int) DESCALE(tmp10 - tmp3, outptr[7] = range_limit((int) DESCALE(tmp10 - tmp3,
CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; CONST_BITS+PASS1_BITS+3));
outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp2, outptr[1] = range_limit((int) DESCALE(tmp11 + tmp2,
CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; CONST_BITS+PASS1_BITS+3));
outptr[6] = range_limit[(int) DESCALE(tmp11 - tmp2, outptr[6] = range_limit((int) DESCALE(tmp11 - tmp2,
CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; CONST_BITS+PASS1_BITS+3));
outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp1, outptr[2] = range_limit((int) DESCALE(tmp12 + tmp1,
CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; CONST_BITS+PASS1_BITS+3));
outptr[5] = range_limit[(int) DESCALE(tmp12 - tmp1, outptr[5] = range_limit((int) DESCALE(tmp12 - tmp1,
CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; CONST_BITS+PASS1_BITS+3));
outptr[3] = range_limit[(int) DESCALE(tmp13 + tmp0, outptr[3] = range_limit((int) DESCALE(tmp13 + tmp0,
CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; CONST_BITS+PASS1_BITS+3));
outptr[4] = range_limit[(int) DESCALE(tmp13 - tmp0, outptr[4] = range_limit((int) DESCALE(tmp13 - tmp0,
CONST_BITS+PASS1_BITS+3) & RANGE_MASK]; CONST_BITS+PASS1_BITS+3));
wsptr += 8; /* advance pointer to next row */ wsptr += 8; /* advance pointer to next row */
} }
@ -2273,7 +2232,7 @@ int jpegmem(struct jpeg *p_jpg, int ds)
{ {
int size; int size;
size = (p_jpg->x_phys/ds/p_jpg->subsample_x[0]) size = (p_jpg->x_phys/ds/p_jpg->subsample_x[0])
* (p_jpg->y_phys/ds/p_jpg->subsample_y[0]); * (p_jpg->y_phys/ds/p_jpg->subsample_y[0]);
#ifdef HAVE_LCD_COLOR #ifdef HAVE_LCD_COLOR
if (p_jpg->blocks > 1) /* colour, add requirements for chroma */ if (p_jpg->blocks > 1) /* colour, add requirements for chroma */