mirror of
https://github.com/Rockbox/rockbox.git
synced 2025-10-14 02:27:39 -04:00
Use pre-multiplication in scaler to save one multiply per color component on ARM and Coldfire, at the cost of an extra add/shift in the horizontal scaler to reduce values to a workable range. SH-1 retains the same basic math, as
the use of 16x16->32 hardware multiplication in the earlier scaler stages saves more than removing the 32x32->40 multiply to descale output. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@21091 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
c4ed88f593
commit
92785b8f2f
5 changed files with 356 additions and 212 deletions
|
@ -49,8 +49,8 @@ static void output_row_null(uint32_t row, void * row_in,
|
||||||
#else
|
#else
|
||||||
uint32_t *lim = in + ctx->bm->width;
|
uint32_t *lim = in + ctx->bm->width;
|
||||||
#endif
|
#endif
|
||||||
for (; in < lim; in++)
|
while (in < lim)
|
||||||
output = SC_MUL(*in + ctx->round, ctx->divisor);
|
output = SC_OUT(*in++, ctx);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -733,7 +733,7 @@ static void output_row_grey_32(uint32_t row, void * row_in,
|
||||||
uint32_t *qp = (uint32_t*)row_in;
|
uint32_t *qp = (uint32_t*)row_in;
|
||||||
uint8_t *dest = (uint8_t*)ctx->bm->data + ctx->bm->width * row;
|
uint8_t *dest = (uint8_t*)ctx->bm->data + ctx->bm->width * row;
|
||||||
for (col = 0; col < ctx->bm->width; col++)
|
for (col = 0; col < ctx->bm->width; col++)
|
||||||
*dest++ = SC_MUL((*qp++) + ctx->round,ctx->divisor);
|
*dest++ = SC_OUT(*qp++, ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned int get_size_grey(struct bitmap *bm)
|
static unsigned int get_size_grey(struct bitmap *bm)
|
||||||
|
|
|
@ -592,25 +592,12 @@ static inline PFreal fcos(int iangle)
|
||||||
return fsin(iangle + (IANGLE_MAX >> 2));
|
return fsin(iangle + (IANGLE_MAX >> 2));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uint32_t div255(uint32_t val)
|
static inline unsigned scale_val(unsigned val, unsigned bits)
|
||||||
{
|
{
|
||||||
return ((((val >> 8) + val) >> 8) + val) >> 8;
|
val = val * ((1 << bits) - 1);
|
||||||
|
return ((val >> 8) + val + 128) >> 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define SCALE_VAL(val,out) div255((val) * (out) + 127)
|
|
||||||
#define SCALE_VAL32(val, out) \
|
|
||||||
({ \
|
|
||||||
uint32_t val__ = (val) * (out); \
|
|
||||||
val__ = ((((val__ >> 8) + val__) >> 8) + val__ + 128) >> 8; \
|
|
||||||
val__; \
|
|
||||||
})
|
|
||||||
#define SCALE_VAL8(val, out) \
|
|
||||||
({ \
|
|
||||||
unsigned val__ = (val) * (out); \
|
|
||||||
val__ = ((val__ >> 8) + val__ + 128) >> 8; \
|
|
||||||
val__; \
|
|
||||||
})
|
|
||||||
|
|
||||||
static void output_row_8_transposed(uint32_t row, void * row_in,
|
static void output_row_8_transposed(uint32_t row, void * row_in,
|
||||||
struct scaler_context *ctx)
|
struct scaler_context *ctx)
|
||||||
{
|
{
|
||||||
|
@ -625,9 +612,9 @@ static void output_row_8_transposed(uint32_t row, void * row_in,
|
||||||
unsigned r, g, b;
|
unsigned r, g, b;
|
||||||
for (; dest < end; dest += ctx->bm->height)
|
for (; dest < end; dest += ctx->bm->height)
|
||||||
{
|
{
|
||||||
r = SCALE_VAL8(qp->red, 31);
|
r = scale_val(qp->red, 5);
|
||||||
g = SCALE_VAL8(qp->green, 63);
|
g = scale_val(qp->green, 6);
|
||||||
b = SCALE_VAL8((qp++)->blue, 31);
|
b = scale_val((qp++)->blue, 5);
|
||||||
*dest = LCD_RGBPACK_LCD(r,g,b);
|
*dest = LCD_RGBPACK_LCD(r,g,b);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -641,19 +628,15 @@ static void output_row_32_transposed(uint32_t row, void * row_in,
|
||||||
#ifdef USEGSLIB
|
#ifdef USEGSLIB
|
||||||
uint32_t *qp = (uint32_t*)row_in;
|
uint32_t *qp = (uint32_t*)row_in;
|
||||||
for (; dest < end; dest += ctx->bm->height)
|
for (; dest < end; dest += ctx->bm->height)
|
||||||
*dest = SC_MUL((*qp++) + ctx->round, ctx->divisor);
|
*dest = SC_OUT(*qp++, ctx);
|
||||||
#else
|
#else
|
||||||
struct uint32_rgb *qp = (struct uint32_rgb*)row_in;
|
struct uint32_rgb *qp = (struct uint32_rgb*)row_in;
|
||||||
uint32_t rb_mul = SCALE_VAL32(ctx->divisor, 31),
|
|
||||||
rb_rnd = SCALE_VAL32(ctx->round, 31),
|
|
||||||
g_mul = SCALE_VAL32(ctx->divisor, 63),
|
|
||||||
g_rnd = SCALE_VAL32(ctx->round, 63);
|
|
||||||
int r, g, b;
|
int r, g, b;
|
||||||
for (; dest < end; dest += ctx->bm->height)
|
for (; dest < end; dest += ctx->bm->height)
|
||||||
{
|
{
|
||||||
r = SC_MUL(qp->r + rb_rnd, rb_mul);
|
r = scale_val(SC_OUT(qp->r, ctx), 5);
|
||||||
g = SC_MUL(qp->g + g_rnd, g_mul);
|
g = scale_val(SC_OUT(qp->g, ctx), 6);
|
||||||
b = SC_MUL(qp->b + rb_rnd, rb_mul);
|
b = scale_val(SC_OUT(qp->b, ctx), 5);
|
||||||
qp++;
|
qp++;
|
||||||
*dest = LCD_RGBPACK_LCD(r,g,b);
|
*dest = LCD_RGBPACK_LCD(r,g,b);
|
||||||
}
|
}
|
||||||
|
@ -670,14 +653,14 @@ static void output_row_32_transposed_fromyuv(uint32_t row, void * row_in,
|
||||||
for (; dest < end; dest += ctx->bm->height)
|
for (; dest < end; dest += ctx->bm->height)
|
||||||
{
|
{
|
||||||
unsigned r, g, b, y, u, v;
|
unsigned r, g, b, y, u, v;
|
||||||
y = SC_MUL(qp->b + ctx->round, ctx->divisor);
|
y = SC_OUT(qp->b, ctx);
|
||||||
u = SC_MUL(qp->g + ctx->round, ctx->divisor);
|
u = SC_OUT(qp->g, ctx);
|
||||||
v = SC_MUL(qp->r + ctx->round, ctx->divisor);
|
v = SC_OUT(qp->r, ctx);
|
||||||
qp++;
|
qp++;
|
||||||
yuv_to_rgb(y, u, v, &r, &g, &b);
|
yuv_to_rgb(y, u, v, &r, &g, &b);
|
||||||
r = (31 * r + (r >> 3) + 127) >> 8;
|
r = scale_val(r, 5);
|
||||||
g = (63 * g + (g >> 2) + 127) >> 8;
|
g = scale_val(g, 6);
|
||||||
b = (31 * b + (b >> 3) + 127) >> 8;
|
b = scale_val(b, 5);
|
||||||
*dest = LCD_RGBPACK_LCD(r, g, b);
|
*dest = LCD_RGBPACK_LCD(r, g, b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -131,20 +131,45 @@ int recalc_dimension(struct dim *dst, struct dim *src)
|
||||||
return false; \
|
return false; \
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set up rounding and scale factors for horizontal area scaler */
|
#if defined(CPU_COLDFIRE)
|
||||||
static inline void scale_h_area_setup(struct scaler_context *ctx)
|
#define MAC(op1, op2, num) \
|
||||||
|
asm volatile( \
|
||||||
|
"mac.l %0, %1, %%acc" #num \
|
||||||
|
: \
|
||||||
|
: "%d" (op1), "d" (op2)\
|
||||||
|
)
|
||||||
|
#define MAC_OUT(dest, num) \
|
||||||
|
asm volatile( \
|
||||||
|
"movclr.l %%acc" #num ", %0" \
|
||||||
|
: "=d" (dest) \
|
||||||
|
)
|
||||||
|
#elif defined(CPU_SH)
|
||||||
|
/* calculate the 32-bit product of unsigned 16-bit op1 and op2 */
|
||||||
|
static inline int32_t mul_s16_s16(int16_t op1, int16_t op2)
|
||||||
{
|
{
|
||||||
/* sum is output value * src->width */
|
return (int32_t)(op1 * op2);
|
||||||
SDEBUGF("scale_h_area_setup\n");
|
|
||||||
ctx->divisor = ctx->src->width;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* calculate the 32-bit product of signed 16-bit op1 and op2 */
|
||||||
|
static inline uint32_t mul_u16_u16(uint16_t op1, uint16_t op2)
|
||||||
|
{
|
||||||
|
return (uint32_t)(op1 * op2);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* horizontal area average scaler */
|
/* horizontal area average scaler */
|
||||||
static bool scale_h_area(void *out_line_ptr,
|
static bool scale_h_area(void *out_line_ptr,
|
||||||
struct scaler_context *ctx, bool accum)
|
struct scaler_context *ctx, bool accum)
|
||||||
{
|
{
|
||||||
SDEBUGF("scale_h_area\n");
|
SDEBUGF("scale_h_area\n");
|
||||||
unsigned int ix, ox, oxe, mul;
|
unsigned int ix, ox, oxe, mul;
|
||||||
|
#if defined(CPU_SH) || defined (TEST_SH_MATH)
|
||||||
|
const uint32_t h_i_val = ctx->src->width,
|
||||||
|
h_o_val = ctx->bm->width;
|
||||||
|
#else
|
||||||
|
const uint32_t h_i_val = ctx->h_i_val,
|
||||||
|
h_o_val = ctx->h_o_val;
|
||||||
|
#endif
|
||||||
#ifdef HAVE_LCD_COLOR
|
#ifdef HAVE_LCD_COLOR
|
||||||
struct uint32_rgb rgbvalacc = { 0, 0, 0 },
|
struct uint32_rgb rgbvalacc = { 0, 0, 0 },
|
||||||
rgbvaltmp = { 0, 0, 0 },
|
rgbvaltmp = { 0, 0, 0 },
|
||||||
|
@ -161,31 +186,57 @@ static bool scale_h_area(void *out_line_ptr,
|
||||||
yield();
|
yield();
|
||||||
for (ix = 0; ix < (unsigned int)ctx->src->width; ix++)
|
for (ix = 0; ix < (unsigned int)ctx->src->width; ix++)
|
||||||
{
|
{
|
||||||
oxe += ctx->bm->width;
|
oxe += h_o_val;
|
||||||
/* end of current area has been reached */
|
/* end of current area has been reached */
|
||||||
/* fill buffer if needed */
|
/* fill buffer if needed */
|
||||||
FILL_BUF(part,ctx->store_part,ctx->args);
|
FILL_BUF(part,ctx->store_part,ctx->args);
|
||||||
#ifdef HAVE_LCD_COLOR
|
#ifdef HAVE_LCD_COLOR
|
||||||
if (oxe >= (unsigned int)ctx->src->width)
|
if (oxe >= h_i_val)
|
||||||
{
|
{
|
||||||
/* "reset" error, which now represents partial coverage of next
|
/* "reset" error, which now represents partial coverage of next
|
||||||
pixel by the next area
|
pixel by the next area
|
||||||
*/
|
*/
|
||||||
oxe -= ctx->src->width;
|
oxe -= h_i_val;
|
||||||
|
|
||||||
|
#if defined(CPU_COLDFIRE)
|
||||||
|
/* Coldfire EMAC math */
|
||||||
/* add saved partial pixel from start of area */
|
/* add saved partial pixel from start of area */
|
||||||
rgbvalacc.r = rgbvalacc.r * ctx->bm->width + rgbvaltmp.r * mul;
|
MAC(rgbvalacc.r, h_o_val, 0);
|
||||||
rgbvalacc.g = rgbvalacc.g * ctx->bm->width + rgbvaltmp.g * mul;
|
MAC(rgbvalacc.g, h_o_val, 1);
|
||||||
rgbvalacc.b = rgbvalacc.b * ctx->bm->width + rgbvaltmp.b * mul;
|
MAC(rgbvalacc.b, h_o_val, 2);
|
||||||
|
MAC(rgbvaltmp.r, mul, 0);
|
||||||
|
MAC(rgbvaltmp.g, mul, 1);
|
||||||
|
MAC(rgbvaltmp.b, mul, 2);
|
||||||
|
/* get new pixel , then add its partial coverage to this area */
|
||||||
|
mul = h_o_val - oxe;
|
||||||
|
rgbvaltmp.r = part->buf->red;
|
||||||
|
rgbvaltmp.g = part->buf->green;
|
||||||
|
rgbvaltmp.b = part->buf->blue;
|
||||||
|
MAC(rgbvaltmp.r, mul, 0);
|
||||||
|
MAC(rgbvaltmp.g, mul, 1);
|
||||||
|
MAC(rgbvaltmp.b, mul, 2);
|
||||||
|
MAC_OUT(rgbvalacc.r, 0);
|
||||||
|
MAC_OUT(rgbvalacc.g, 1);
|
||||||
|
MAC_OUT(rgbvalacc.b, 2);
|
||||||
|
#else
|
||||||
|
/* generic C math */
|
||||||
|
/* add saved partial pixel from start of area */
|
||||||
|
rgbvalacc.r = rgbvalacc.r * h_o_val + rgbvaltmp.r * mul;
|
||||||
|
rgbvalacc.g = rgbvalacc.g * h_o_val + rgbvaltmp.g * mul;
|
||||||
|
rgbvalacc.b = rgbvalacc.b * h_o_val + rgbvaltmp.b * mul;
|
||||||
|
|
||||||
/* get new pixel , then add its partial coverage to this area */
|
/* get new pixel , then add its partial coverage to this area */
|
||||||
rgbvaltmp.r = part->buf->red;
|
rgbvaltmp.r = part->buf->red;
|
||||||
rgbvaltmp.g = part->buf->green;
|
rgbvaltmp.g = part->buf->green;
|
||||||
rgbvaltmp.b = part->buf->blue;
|
rgbvaltmp.b = part->buf->blue;
|
||||||
mul = ctx->bm->width - oxe;
|
mul = h_o_val - oxe;
|
||||||
rgbvalacc.r += rgbvaltmp.r * mul;
|
rgbvalacc.r += rgbvaltmp.r * mul;
|
||||||
rgbvalacc.g += rgbvaltmp.g * mul;
|
rgbvalacc.g += rgbvaltmp.g * mul;
|
||||||
rgbvalacc.b += rgbvaltmp.b * mul;
|
rgbvalacc.b += rgbvaltmp.b * mul;
|
||||||
|
#endif /* CPU */
|
||||||
|
rgbvalacc.r = (rgbvalacc.r + (1 << 21)) >> 22;
|
||||||
|
rgbvalacc.g = (rgbvalacc.g + (1 << 21)) >> 22;
|
||||||
|
rgbvalacc.b = (rgbvalacc.b + (1 << 21)) >> 22;
|
||||||
/* store or accumulate to output row */
|
/* store or accumulate to output row */
|
||||||
if (accum)
|
if (accum)
|
||||||
{
|
{
|
||||||
|
@ -200,7 +251,7 @@ static bool scale_h_area(void *out_line_ptr,
|
||||||
rgbvalacc.r = 0;
|
rgbvalacc.r = 0;
|
||||||
rgbvalacc.g = 0;
|
rgbvalacc.g = 0;
|
||||||
rgbvalacc.b = 0;
|
rgbvalacc.b = 0;
|
||||||
mul = ctx->bm->width - mul;
|
mul = oxe;
|
||||||
ox += 1;
|
ox += 1;
|
||||||
/* inside an area */
|
/* inside an area */
|
||||||
} else {
|
} else {
|
||||||
|
@ -210,21 +261,45 @@ static bool scale_h_area(void *out_line_ptr,
|
||||||
rgbvalacc.b += part->buf->blue;
|
rgbvalacc.b += part->buf->blue;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if (oxe >= (unsigned int)ctx->src->width)
|
if (oxe >= h_i_val)
|
||||||
{
|
{
|
||||||
/* "reset" error, which now represents partial coverage of next
|
/* "reset" error, which now represents partial coverage of next
|
||||||
pixel by the next area
|
pixel by the next area
|
||||||
*/
|
*/
|
||||||
oxe -= ctx->src->width;
|
oxe -= h_i_val;
|
||||||
|
#if defined(CPU_COLDFIRE)
|
||||||
|
/* Coldfire EMAC math */
|
||||||
/* add saved partial pixel from start of area */
|
/* add saved partial pixel from start of area */
|
||||||
acc = MULUQ(acc, ctx->bm->width) + MULUQ(tmp, mul);
|
MAC(acc, h_o_val, 0);
|
||||||
|
MAC(tmp, mul, 0);
|
||||||
|
/* get new pixel , then add its partial coverage to this area */
|
||||||
|
tmp = *(part->buf);
|
||||||
|
mul = h_o_val - oxe;
|
||||||
|
MAC(tmp, mul, 0);
|
||||||
|
MAC_OUT(acc, 0);
|
||||||
|
#elif defined(CPU_SH)
|
||||||
|
/* SH-1 16x16->32 math */
|
||||||
|
/* add saved partial pixel from start of area */
|
||||||
|
acc = mul_u16_u16(acc, h_o_val) + mul_u16_u16(tmp, mul);
|
||||||
|
|
||||||
/* get new pixel , then add its partial coverage to this area */
|
/* get new pixel , then add its partial coverage to this area */
|
||||||
tmp = *(part->buf);
|
tmp = *(part->buf);
|
||||||
mul = ctx->bm->width - oxe;
|
mul = h_o_val - oxe;
|
||||||
acc += MULUQ(tmp, mul);
|
acc += mul_u16_u16(tmp, mul);
|
||||||
|
#else
|
||||||
|
/* generic C math */
|
||||||
|
/* add saved partial pixel from start of area */
|
||||||
|
acc = (acc * h_o_val) + (tmp * mul);
|
||||||
|
|
||||||
|
/* get new pixel , then add its partial coverage to this area */
|
||||||
|
tmp = *(part->buf);
|
||||||
|
mul = h_o_val - oxe;
|
||||||
|
acc += tmp * mul;
|
||||||
|
#endif /* CPU */
|
||||||
|
#if !(defined(CPU_SH) || defined(TEST_SH_MATH))
|
||||||
/* round, divide, and either store or accumulate to output row */
|
/* round, divide, and either store or accumulate to output row */
|
||||||
|
acc = (acc + (1 << 21)) >> 22;
|
||||||
|
#endif
|
||||||
if (accum)
|
if (accum)
|
||||||
{
|
{
|
||||||
acc += out_line[ox];
|
acc += out_line[ox];
|
||||||
|
@ -232,7 +307,7 @@ static bool scale_h_area(void *out_line_ptr,
|
||||||
out_line[ox] = acc;
|
out_line[ox] = acc;
|
||||||
/* reset accumulator */
|
/* reset accumulator */
|
||||||
acc = 0;
|
acc = 0;
|
||||||
mul = ctx->bm->width - mul;
|
mul = oxe;
|
||||||
ox += 1;
|
ox += 1;
|
||||||
/* inside an area */
|
/* inside an area */
|
||||||
} else {
|
} else {
|
||||||
|
@ -249,56 +324,56 @@ static bool scale_h_area(void *out_line_ptr,
|
||||||
/* vertical area average scaler */
|
/* vertical area average scaler */
|
||||||
static inline bool scale_v_area(struct rowset *rset, struct scaler_context *ctx)
|
static inline bool scale_v_area(struct rowset *rset, struct scaler_context *ctx)
|
||||||
{
|
{
|
||||||
uint32_t mul, x, oy, iy, oye;
|
uint32_t mul, oy, iy, oye;
|
||||||
|
#if defined(CPU_SH) || defined (TEST_SH_MATH)
|
||||||
|
const uint32_t v_i_val = ctx->src->height,
|
||||||
|
v_o_val = ctx->bm->height;
|
||||||
|
#else
|
||||||
|
const uint32_t v_i_val = ctx->v_i_val,
|
||||||
|
v_o_val = ctx->v_o_val;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Set up rounding and scale factors */
|
/* Set up rounding and scale factors */
|
||||||
ctx->divisor *= ctx->src->height;
|
|
||||||
ctx->round = ctx->divisor >> 1;
|
|
||||||
ctx->divisor = 1 + (-((ctx->divisor + 1) >> 1)) / ctx->divisor;
|
|
||||||
mul = 0;
|
mul = 0;
|
||||||
oy = rset->rowstart;
|
oy = rset->rowstart;
|
||||||
oye = 0;
|
oye = 0;
|
||||||
#ifdef HAVE_LCD_COLOR
|
#ifdef HAVE_LCD_COLOR
|
||||||
uint32_t *rowacc = (uint32_t *) ctx->buf,
|
uint32_t *rowacc = (uint32_t *) ctx->buf,
|
||||||
*rowtmp = rowacc + 3 * ctx->bm->width;
|
*rowtmp = rowacc + 3 * ctx->bm->width,
|
||||||
|
*rowacc_px, *rowtmp_px;
|
||||||
memset((void *)ctx->buf, 0, ctx->bm->width * 2 * sizeof(struct uint32_rgb));
|
memset((void *)ctx->buf, 0, ctx->bm->width * 2 * sizeof(struct uint32_rgb));
|
||||||
#else
|
#else
|
||||||
uint32_t *rowacc = (uint32_t *) ctx->buf,
|
uint32_t *rowacc = (uint32_t *) ctx->buf,
|
||||||
*rowtmp = rowacc + ctx->bm->width;
|
*rowtmp = rowacc + ctx->bm->width,
|
||||||
|
*rowacc_px, *rowtmp_px;
|
||||||
memset((void *)ctx->buf, 0, ctx->bm->width * 2 * sizeof(uint32_t));
|
memset((void *)ctx->buf, 0, ctx->bm->width * 2 * sizeof(uint32_t));
|
||||||
#endif
|
#endif
|
||||||
SDEBUGF("scale_v_area\n");
|
SDEBUGF("scale_v_area\n");
|
||||||
/* zero the accumulator and temp rows */
|
/* zero the accumulator and temp rows */
|
||||||
for (iy = 0; iy < (unsigned int)ctx->src->height; iy++)
|
for (iy = 0; iy < (unsigned int)ctx->src->height; iy++)
|
||||||
{
|
{
|
||||||
oye += ctx->bm->height;
|
oye += v_o_val;
|
||||||
/* end of current area has been reached */
|
/* end of current area has been reached */
|
||||||
if (oye >= (unsigned int)ctx->src->height)
|
if (oye >= v_i_val)
|
||||||
{
|
{
|
||||||
/* "reset" error, which now represents partial coverage of the next
|
/* "reset" error, which now represents partial coverage of the next
|
||||||
row by the next area
|
row by the next area
|
||||||
*/
|
*/
|
||||||
oye -= ctx->src->height;
|
oye -= v_i_val;
|
||||||
/* add stored partial row to accumulator */
|
/* add stored partial row to accumulator */
|
||||||
#ifdef HAVE_LCD_COLOR
|
for(rowacc_px = rowacc, rowtmp_px = rowtmp; rowacc_px != rowtmp;
|
||||||
for (x = 0; x < 3 * (unsigned int)ctx->bm->width; x++)
|
rowacc_px++, rowtmp_px++)
|
||||||
#else
|
*rowacc_px = *rowacc_px * v_o_val + *rowtmp_px * mul;
|
||||||
for (x = 0; x < (unsigned int)ctx->bm->width; x++)
|
|
||||||
#endif
|
|
||||||
rowacc[x] = rowacc[x] * ctx->bm->height + mul * rowtmp[x];
|
|
||||||
/* store new scaled row in temp row */
|
/* store new scaled row in temp row */
|
||||||
if(!ctx->h_scaler(rowtmp, ctx, false))
|
if(!ctx->h_scaler(rowtmp, ctx, false))
|
||||||
return false;
|
return false;
|
||||||
/* add partial coverage by new row to this area, then round and
|
/* add partial coverage by new row to this area, then round and
|
||||||
scale to final value
|
scale to final value
|
||||||
*/
|
*/
|
||||||
mul = ctx->bm->height - oye;
|
mul = v_o_val - oye;
|
||||||
#ifdef HAVE_LCD_COLOR
|
for(rowacc_px = rowacc, rowtmp_px = rowtmp; rowacc_px != rowtmp;
|
||||||
for (x = 0; x < 3 * (unsigned int)ctx->bm->width; x++)
|
rowacc_px++, rowtmp_px++)
|
||||||
#else
|
*rowacc_px += mul * *rowtmp_px;
|
||||||
for (x = 0; x < (unsigned int)ctx->bm->width; x++)
|
|
||||||
#endif
|
|
||||||
rowacc[x] += mul * rowtmp[x];
|
|
||||||
ctx->output_row(oy, (void*)rowacc, ctx);
|
ctx->output_row(oy, (void*)rowacc, ctx);
|
||||||
/* clear accumulator row, store partial coverage for next row */
|
/* clear accumulator row, store partial coverage for next row */
|
||||||
#ifdef HAVE_LCD_COLOR
|
#ifdef HAVE_LCD_COLOR
|
||||||
|
@ -319,20 +394,18 @@ static inline bool scale_v_area(struct rowset *rset, struct scaler_context *ctx)
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_UPSCALER
|
#ifdef HAVE_UPSCALER
|
||||||
/* Set up rounding and scale factors for the horizontal scaler. The divisor
|
|
||||||
is bm->width - 1, so that the first and last pixels in the row align
|
|
||||||
exactly between input and output
|
|
||||||
*/
|
|
||||||
static inline void scale_h_linear_setup(struct scaler_context *ctx)
|
|
||||||
{
|
|
||||||
ctx->divisor = ctx->bm->width - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* horizontal linear scaler */
|
/* horizontal linear scaler */
|
||||||
static bool scale_h_linear(void *out_line_ptr, struct scaler_context *ctx,
|
static bool scale_h_linear(void *out_line_ptr, struct scaler_context *ctx,
|
||||||
bool accum)
|
bool accum)
|
||||||
{
|
{
|
||||||
unsigned int ix, ox, ixe;
|
unsigned int ix, ox, ixe;
|
||||||
|
#if defined(CPU_SH) || defined (TEST_SH_MATH)
|
||||||
|
const uint32_t h_i_val = ctx->src->width - 1,
|
||||||
|
h_o_val = ctx->bm->width - 1;
|
||||||
|
#else
|
||||||
|
const uint32_t h_i_val = ctx->h_i_val,
|
||||||
|
h_o_val = ctx->h_o_val;
|
||||||
|
#endif
|
||||||
/* type x = x is an ugly hack for hiding an unitialized data warning. The
|
/* type x = x is an ugly hack for hiding an unitialized data warning. The
|
||||||
values are conditionally initialized before use, but other values are
|
values are conditionally initialized before use, but other values are
|
||||||
set such that this will occur before these are used.
|
set such that this will occur before these are used.
|
||||||
|
@ -348,27 +421,35 @@ static bool scale_h_linear(void *out_line_ptr, struct scaler_context *ctx,
|
||||||
FILL_BUF_INIT(part,ctx->store_part,ctx->args);
|
FILL_BUF_INIT(part,ctx->store_part,ctx->args);
|
||||||
ix = 0;
|
ix = 0;
|
||||||
/* The error is set so that values are initialized on the first pass. */
|
/* The error is set so that values are initialized on the first pass. */
|
||||||
ixe = ctx->bm->width - 1;
|
ixe = h_o_val;
|
||||||
/* give other tasks a chance to run */
|
/* give other tasks a chance to run */
|
||||||
yield();
|
yield();
|
||||||
for (ox = 0; ox < (uint32_t)ctx->bm->width; ox++)
|
for (ox = 0; ox < (uint32_t)ctx->bm->width; ox++)
|
||||||
{
|
{
|
||||||
#ifdef HAVE_LCD_COLOR
|
#ifdef HAVE_LCD_COLOR
|
||||||
if (ixe >= ((uint32_t)ctx->bm->width - 1))
|
if (ixe >= h_o_val)
|
||||||
{
|
{
|
||||||
/* Store the new "current" pixel value in rgbval, and the color
|
/* Store the new "current" pixel value in rgbval, and the color
|
||||||
step value in rgbinc.
|
step value in rgbinc.
|
||||||
*/
|
*/
|
||||||
ixe -= (ctx->bm->width - 1);
|
ixe -= h_o_val;
|
||||||
rgbinc.r = -(part->buf->red);
|
rgbinc.r = -(part->buf->red);
|
||||||
rgbinc.g = -(part->buf->green);
|
rgbinc.g = -(part->buf->green);
|
||||||
rgbinc.b = -(part->buf->blue);
|
rgbinc.b = -(part->buf->blue);
|
||||||
rgbval.r = (part->buf->red) * (ctx->bm->width - 1);
|
#if defined(CPU_COLDFIRE)
|
||||||
rgbval.g = (part->buf->green) * (ctx->bm->width - 1);
|
/* Coldfire EMAC math */
|
||||||
rgbval.b = (part->buf->blue) * (ctx->bm->width - 1);
|
MAC(part->buf->red, h_o_val, 0);
|
||||||
|
MAC(part->buf->green, h_o_val, 1);
|
||||||
|
MAC(part->buf->blue, h_o_val, 2);
|
||||||
|
#else
|
||||||
|
/* generic C math */
|
||||||
|
rgbval.r = (part->buf->red) * h_o_val;
|
||||||
|
rgbval.g = (part->buf->green) * h_o_val;
|
||||||
|
rgbval.b = (part->buf->blue) * h_o_val;
|
||||||
|
#endif /* CPU */
|
||||||
ix += 1;
|
ix += 1;
|
||||||
/* If this wasn't the last pixel, add the next one to rgbinc. */
|
/* If this wasn't the last pixel, add the next one to rgbinc. */
|
||||||
if (ix < (uint32_t)ctx->src->width) {
|
if (LIKELY(ix < (uint32_t)ctx->src->width)) {
|
||||||
part->buf++;
|
part->buf++;
|
||||||
part->len--;
|
part->len--;
|
||||||
/* Fetch new pixels if needed */
|
/* Fetch new pixels if needed */
|
||||||
|
@ -379,14 +460,28 @@ static bool scale_h_linear(void *out_line_ptr, struct scaler_context *ctx,
|
||||||
/* Add a partial step to rgbval, in this pixel isn't precisely
|
/* Add a partial step to rgbval, in this pixel isn't precisely
|
||||||
aligned with the new source pixel
|
aligned with the new source pixel
|
||||||
*/
|
*/
|
||||||
|
#if defined(CPU_COLDFIRE)
|
||||||
|
/* Coldfire EMAC math */
|
||||||
|
MAC(rgbinc.r, ixe, 0);
|
||||||
|
MAC(rgbinc.g, ixe, 1);
|
||||||
|
MAC(rgbinc.b, ixe, 2);
|
||||||
|
#else
|
||||||
|
/* generic C math */
|
||||||
rgbval.r += rgbinc.r * ixe;
|
rgbval.r += rgbinc.r * ixe;
|
||||||
rgbval.g += rgbinc.g * ixe;
|
rgbval.g += rgbinc.g * ixe;
|
||||||
rgbval.b += rgbinc.b * ixe;
|
rgbval.b += rgbinc.b * ixe;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
/* Now multiple the color increment to its proper value */
|
#if defined(CPU_COLDFIRE)
|
||||||
rgbinc.r *= ctx->src->width - 1;
|
/* get final EMAC result out of ACC registers */
|
||||||
rgbinc.g *= ctx->src->width - 1;
|
MAC_OUT(rgbval.r, 0);
|
||||||
rgbinc.b *= ctx->src->width - 1;
|
MAC_OUT(rgbval.g, 1);
|
||||||
|
MAC_OUT(rgbval.b, 2);
|
||||||
|
#endif
|
||||||
|
/* Now multiply the color increment to its proper value */
|
||||||
|
rgbinc.r *= h_i_val;
|
||||||
|
rgbinc.g *= h_i_val;
|
||||||
|
rgbinc.b *= h_i_val;
|
||||||
} else {
|
} else {
|
||||||
rgbval.r += rgbinc.r;
|
rgbval.r += rgbinc.r;
|
||||||
rgbval.g += rgbinc.g;
|
rgbval.g += rgbinc.g;
|
||||||
|
@ -395,27 +490,36 @@ static bool scale_h_linear(void *out_line_ptr, struct scaler_context *ctx,
|
||||||
/* round and scale values, and accumulate or store to output */
|
/* round and scale values, and accumulate or store to output */
|
||||||
if (accum)
|
if (accum)
|
||||||
{
|
{
|
||||||
out_line[ox].r += rgbval.r;
|
out_line[ox].r += (rgbval.r + (1 << 21)) >> 22;
|
||||||
out_line[ox].g += rgbval.g;
|
out_line[ox].g += (rgbval.g + (1 << 21)) >> 22;
|
||||||
out_line[ox].b += rgbval.b;
|
out_line[ox].b += (rgbval.b + (1 << 21)) >> 22;
|
||||||
} else {
|
} else {
|
||||||
out_line[ox].r = rgbval.r;
|
out_line[ox].r = (rgbval.r + (1 << 21)) >> 22;
|
||||||
out_line[ox].g = rgbval.g;
|
out_line[ox].g = (rgbval.g + (1 << 21)) >> 22;
|
||||||
out_line[ox].b = rgbval.b;
|
out_line[ox].b = (rgbval.b + (1 << 21)) >> 22;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if (ixe >= ((uint32_t)ctx->bm->width - 1))
|
if (ixe >= h_o_val)
|
||||||
{
|
{
|
||||||
/* Store the new "current" pixel value in rgbval, and the color
|
/* Store the new "current" pixel value in rgbval, and the color
|
||||||
step value in rgbinc.
|
step value in rgbinc.
|
||||||
*/
|
*/
|
||||||
ixe -= (ctx->bm->width - 1);
|
ixe -= h_o_val;
|
||||||
val = *(part->buf);
|
val = *(part->buf);
|
||||||
inc = -val;
|
inc = -val;
|
||||||
val = MULUQ(val, ctx->bm->width - 1);
|
#if defined(CPU_COLDFIRE)
|
||||||
|
/* Coldfire EMAC math */
|
||||||
|
MAC(val, h_o_val, 0);
|
||||||
|
#elif defined(CPU_SH)
|
||||||
|
/* SH-1 16x16->32 math */
|
||||||
|
val = mul_u16_u16(val, h_o_val);
|
||||||
|
#else
|
||||||
|
/* generic C math */
|
||||||
|
val = val * h_o_val;
|
||||||
|
#endif
|
||||||
ix += 1;
|
ix += 1;
|
||||||
/* If this wasn't the last pixel, add the next one to rgbinc. */
|
/* If this wasn't the last pixel, add the next one to rgbinc. */
|
||||||
if (ix < (uint32_t)ctx->src->width) {
|
if (LIKELY(ix < (uint32_t)ctx->src->width)) {
|
||||||
part->buf++;
|
part->buf++;
|
||||||
part->len--;
|
part->len--;
|
||||||
/* Fetch new pixels if needed */
|
/* Fetch new pixels if needed */
|
||||||
|
@ -424,12 +528,40 @@ static bool scale_h_linear(void *out_line_ptr, struct scaler_context *ctx,
|
||||||
/* Add a partial step to rgbval, in this pixel isn't precisely
|
/* Add a partial step to rgbval, in this pixel isn't precisely
|
||||||
aligned with the new source pixel
|
aligned with the new source pixel
|
||||||
*/
|
*/
|
||||||
val += MULQ(inc, ixe);
|
#if defined(CPU_COLDFIRE)
|
||||||
|
/* Coldfire EMAC math */
|
||||||
|
MAC(inc, ixe, 0);
|
||||||
|
#elif defined(CPU_SH)
|
||||||
|
/* SH-1 16x16->32 math */
|
||||||
|
val += mul_s16_s16(inc, ixe);
|
||||||
|
#else
|
||||||
|
/* generic C math */
|
||||||
|
val += inc * ixe;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
#if defined(CPU_COLDFIRE)
|
||||||
|
/* get final EMAC result out of ACC register */
|
||||||
|
MAC_OUT(val, 0);
|
||||||
|
#endif
|
||||||
/* Now multiply the color increment to its proper value */
|
/* Now multiply the color increment to its proper value */
|
||||||
inc = MULQ(inc, ctx->src->width - 1);
|
#if defined(CPU_SH)
|
||||||
|
/* SH-1 16x16->32 math */
|
||||||
|
inc = mul_s16_s16(inc, h_i_val);
|
||||||
|
#else
|
||||||
|
/* generic C math */
|
||||||
|
inc *= h_i_val;
|
||||||
|
#endif
|
||||||
} else
|
} else
|
||||||
val += inc;
|
val += inc;
|
||||||
|
#if !(defined(CPU_SH) || defined(TEST_SH_MATH))
|
||||||
|
/* round and scale values, and accumulate or store to output */
|
||||||
|
if (accum)
|
||||||
|
{
|
||||||
|
out_line[ox] += (val + (1 << 21)) >> 22;
|
||||||
|
} else {
|
||||||
|
out_line[ox] = (val + (1 << 21)) >> 22;
|
||||||
|
}
|
||||||
|
#else
|
||||||
/* round and scale values, and accumulate or store to output */
|
/* round and scale values, and accumulate or store to output */
|
||||||
if (accum)
|
if (accum)
|
||||||
{
|
{
|
||||||
|
@ -438,7 +570,8 @@ static bool scale_h_linear(void *out_line_ptr, struct scaler_context *ctx,
|
||||||
out_line[ox] = val;
|
out_line[ox] = val;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
ixe += ctx->src->width - 1;
|
#endif
|
||||||
|
ixe += h_i_val;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -447,71 +580,66 @@ static bool scale_h_linear(void *out_line_ptr, struct scaler_context *ctx,
|
||||||
static inline bool scale_v_linear(struct rowset *rset,
|
static inline bool scale_v_linear(struct rowset *rset,
|
||||||
struct scaler_context *ctx)
|
struct scaler_context *ctx)
|
||||||
{
|
{
|
||||||
uint32_t mul, x, iy, iye;
|
uint32_t mul, iy, iye;
|
||||||
int32_t oy;
|
int32_t oy;
|
||||||
/* Set up scale and rounding factors, the divisor is bm->height - 1 */
|
#if defined(CPU_SH) || defined (TEST_SH_MATH)
|
||||||
ctx->divisor *= (ctx->bm->height - 1);
|
const uint32_t v_i_val = ctx->src->height - 1,
|
||||||
ctx->round = ctx->divisor >> 1;
|
v_o_val = ctx->bm->height - 1;
|
||||||
ctx->divisor = 1 + (-((ctx->divisor + 1) >> 1)) / ctx->divisor;
|
#else
|
||||||
/* Set up our two temp buffers. The names are generic because they'll be
|
const uint32_t v_i_val = ctx->v_i_val,
|
||||||
swapped each time a new input row is read
|
v_o_val = ctx->v_o_val;
|
||||||
|
#endif
|
||||||
|
/* Set up our buffers, to store the increment and current value for each
|
||||||
|
column, and one temp buffer used to read in new rows.
|
||||||
*/
|
*/
|
||||||
#ifdef HAVE_LCD_COLOR
|
#ifdef HAVE_LCD_COLOR
|
||||||
uint32_t *rowinc = (uint32_t *)(ctx->buf),
|
uint32_t *rowinc = (uint32_t *)(ctx->buf),
|
||||||
*rowval = rowinc + 3 * ctx->bm->width,
|
*rowval = rowinc + 3 * ctx->bm->width,
|
||||||
*rowtmp = rowval + 3 * ctx->bm->width;
|
*rowtmp = rowval + 3 * ctx->bm->width,
|
||||||
#else
|
#else
|
||||||
uint32_t *rowinc = (uint32_t *)(ctx->buf),
|
uint32_t *rowinc = (uint32_t *)(ctx->buf),
|
||||||
*rowval = rowinc + ctx->bm->width,
|
*rowval = rowinc + ctx->bm->width,
|
||||||
*rowtmp = rowval + ctx->bm->width;
|
*rowtmp = rowval + ctx->bm->width,
|
||||||
#endif
|
#endif
|
||||||
|
*rowinc_px, *rowval_px, *rowtmp_px;
|
||||||
|
|
||||||
SDEBUGF("scale_v_linear\n");
|
SDEBUGF("scale_v_linear\n");
|
||||||
mul = 0;
|
mul = 0;
|
||||||
iy = 0;
|
iy = 0;
|
||||||
iye = ctx->bm->height - 1;
|
iye = v_o_val;
|
||||||
/* get first scaled row in rowtmp */
|
/* get first scaled row in rowtmp */
|
||||||
if(!ctx->h_scaler((void*)rowtmp, ctx, false))
|
if(!ctx->h_scaler((void*)rowtmp, ctx, false))
|
||||||
return false;
|
return false;
|
||||||
for (oy = rset->rowstart; oy != rset->rowstop; oy += rset->rowstep)
|
for (oy = rset->rowstart; oy != rset->rowstop; oy += rset->rowstep)
|
||||||
{
|
{
|
||||||
if (iye >= (uint32_t)ctx->bm->height - 1)
|
if (iye >= v_o_val)
|
||||||
{
|
{
|
||||||
iye -= ctx->bm->height - 1;
|
iye -= v_o_val;
|
||||||
iy += 1;
|
iy += 1;
|
||||||
#ifdef HAVE_LCD_COLOR
|
for(rowinc_px = rowinc, rowtmp_px = rowtmp, rowval_px = rowval;
|
||||||
for (x = 0; x < 3 * (uint32_t)ctx->bm->width; x++)
|
rowinc_px < rowval; rowinc_px++, rowtmp_px++, rowval_px++)
|
||||||
#else
|
|
||||||
for (x = 0; x < (uint32_t)ctx->bm->width; x++)
|
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
rowinc[x] = -rowtmp[x];
|
*rowinc_px = -*rowtmp_px;
|
||||||
rowval[x] = rowtmp[x] * (ctx->bm->height - 1);
|
*rowval_px = *rowtmp_px * v_o_val;
|
||||||
}
|
}
|
||||||
if (iy < (uint32_t)ctx->src->height)
|
if (iy < (uint32_t)ctx->src->height)
|
||||||
{
|
{
|
||||||
if (!ctx->h_scaler((void*)rowtmp, ctx, false))
|
if (!ctx->h_scaler((void*)rowtmp, ctx, false))
|
||||||
return false;
|
return false;
|
||||||
#ifdef HAVE_LCD_COLOR
|
for(rowinc_px = rowinc, rowtmp_px = rowtmp, rowval_px = rowval;
|
||||||
for (x = 0; x < 3 * (uint32_t)ctx->bm->width; x++)
|
rowinc_px < rowval; rowinc_px++, rowtmp_px++, rowval_px++)
|
||||||
#else
|
|
||||||
for (x = 0; x < (uint32_t)ctx->bm->width; x++)
|
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
rowinc[x] += rowtmp[x];
|
*rowinc_px += *rowtmp_px;
|
||||||
rowval[x] += rowinc[x] * iye;
|
*rowval_px += *rowinc_px * iye;
|
||||||
rowinc[x] *= ctx->src->height - 1;
|
*rowinc_px *= v_i_val;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else
|
} else
|
||||||
#ifdef HAVE_LCD_COLOR
|
for(rowinc_px = rowinc, rowval_px = rowval; rowinc_px < rowval;
|
||||||
for (x = 0; x < 3 * (uint32_t)ctx->bm->width; x++)
|
rowinc_px++, rowval_px++)
|
||||||
#else
|
*rowval_px += *rowinc_px;
|
||||||
for (x = 0; x < (uint32_t)ctx->bm->width; x++)
|
|
||||||
#endif
|
|
||||||
rowval[x] += rowinc[x];
|
|
||||||
ctx->output_row(oy, (void*)rowval, ctx);
|
ctx->output_row(oy, (void*)rowval, ctx);
|
||||||
iye += ctx->src->height - 1;
|
iye += v_i_val;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -533,9 +661,9 @@ static void output_row_32_native_fromyuv(uint32_t row, void * row_in,
|
||||||
for (col = 0; col < ctx->bm->width; col++) {
|
for (col = 0; col < ctx->bm->width; col++) {
|
||||||
if (ctx->dither)
|
if (ctx->dither)
|
||||||
delta = DITHERXDY(col,dy);
|
delta = DITHERXDY(col,dy);
|
||||||
y = SC_MUL(qp->b + ctx->round, ctx->divisor);
|
y = SC_OUT(qp->b, ctx);
|
||||||
u = SC_MUL(qp->g + ctx->round, ctx->divisor);
|
u = SC_OUT(qp->g, ctx);
|
||||||
v = SC_MUL(qp->r + ctx->round, ctx->divisor);
|
v = SC_OUT(qp->r, ctx);
|
||||||
qp++;
|
qp++;
|
||||||
yuv_to_rgb(y, u, v, &r, &g, &b);
|
yuv_to_rgb(y, u, v, &r, &g, &b);
|
||||||
r = (31 * r + (r >> 3) + delta) >> 8;
|
r = (31 * r + (r >> 3) + delta) >> 8;
|
||||||
|
@ -571,7 +699,7 @@ static void output_row_32_native(uint32_t row, void * row_in,
|
||||||
for (col = 0; col < ctx->bm->width; col++) {
|
for (col = 0; col < ctx->bm->width; col++) {
|
||||||
if (ctx->dither)
|
if (ctx->dither)
|
||||||
delta = DITHERXDY(col,dy);
|
delta = DITHERXDY(col,dy);
|
||||||
bright = SC_MUL((*qp++) + ctx->round,ctx->divisor);
|
bright = SC_OUT(*qp++, ctx);
|
||||||
bright = (3 * bright + (bright >> 6) + delta) >> 8;
|
bright = (3 * bright + (bright >> 6) + delta) >> 8;
|
||||||
data |= (~bright & 3) << shift;
|
data |= (~bright & 3) << shift;
|
||||||
shift -= 2;
|
shift -= 2;
|
||||||
|
@ -594,7 +722,7 @@ static void output_row_32_native(uint32_t row, void * row_in,
|
||||||
for (col = 0; col < ctx->bm->width; col++) {
|
for (col = 0; col < ctx->bm->width; col++) {
|
||||||
if (ctx->dither)
|
if (ctx->dither)
|
||||||
delta = DITHERXDY(col,dy);
|
delta = DITHERXDY(col,dy);
|
||||||
bright = SC_MUL((*qp++) + ctx->round, ctx->divisor);
|
bright = SC_OUT(*qp++, ctx);
|
||||||
bright = (3 * bright + (bright >> 6) + delta) >> 8;
|
bright = (3 * bright + (bright >> 6) + delta) >> 8;
|
||||||
*dest++ |= (~bright & 3) << shift;
|
*dest++ |= (~bright & 3) << shift;
|
||||||
}
|
}
|
||||||
|
@ -609,7 +737,7 @@ static void output_row_32_native(uint32_t row, void * row_in,
|
||||||
for (col = 0; col < ctx->bm->width; col++) {
|
for (col = 0; col < ctx->bm->width; col++) {
|
||||||
if (ctx->dither)
|
if (ctx->dither)
|
||||||
delta = DITHERXDY(col,dy);
|
delta = DITHERXDY(col,dy);
|
||||||
bright = SC_MUL((*qp++) + ctx->round, ctx->divisor);
|
bright = SC_OUT(*qp++, ctx);
|
||||||
bright = (3 * bright + (bright >> 6) + delta) >> 8;
|
bright = (3 * bright + (bright >> 6) + delta) >> 8;
|
||||||
*dest++ |= vi_pattern[bright] << shift;
|
*dest++ |= vi_pattern[bright] << shift;
|
||||||
}
|
}
|
||||||
|
@ -625,9 +753,9 @@ static void output_row_32_native(uint32_t row, void * row_in,
|
||||||
if (ctx->dither)
|
if (ctx->dither)
|
||||||
delta = DITHERXDY(col,dy);
|
delta = DITHERXDY(col,dy);
|
||||||
q0 = *qp++;
|
q0 = *qp++;
|
||||||
r = SC_MUL(q0.r + ctx->round, ctx->divisor);
|
r = SC_OUT(q0.r, ctx);
|
||||||
g = SC_MUL(q0.g + ctx->round, ctx->divisor);
|
g = SC_OUT(q0.g, ctx);
|
||||||
b = SC_MUL(q0.b + ctx->round, ctx->divisor);
|
b = SC_OUT(q0.b, ctx);
|
||||||
r = (31 * r + (r >> 3) + delta) >> 8;
|
r = (31 * r + (r >> 3) + delta) >> 8;
|
||||||
g = (63 * g + (g >> 2) + delta) >> 8;
|
g = (63 * g + (g >> 2) + delta) >> 8;
|
||||||
b = (31 * b + (b >> 3) + delta) >> 8;
|
b = (31 * b + (b >> 3) + delta) >> 8;
|
||||||
|
@ -664,13 +792,10 @@ int resize_on_load(struct bitmap *bm, bool dither, struct dim *src,
|
||||||
struct img_part* (*store_part)(void *args),
|
struct img_part* (*store_part)(void *args),
|
||||||
void *args)
|
void *args)
|
||||||
{
|
{
|
||||||
|
|
||||||
#ifdef HAVE_UPSCALER
|
|
||||||
const int sw = src->width;
|
const int sw = src->width;
|
||||||
const int sh = src->height;
|
const int sh = src->height;
|
||||||
const int dw = bm->width;
|
const int dw = bm->width;
|
||||||
const int dh = bm->height;
|
const int dh = bm->height;
|
||||||
#endif
|
|
||||||
int ret;
|
int ret;
|
||||||
#ifdef HAVE_LCD_COLOR
|
#ifdef HAVE_LCD_COLOR
|
||||||
unsigned int needed = sizeof(struct uint32_rgb) * 3 * bm->width;
|
unsigned int needed = sizeof(struct uint32_rgb) * 3 * bm->width;
|
||||||
|
@ -721,6 +846,9 @@ int resize_on_load(struct bitmap *bm, bool dither, struct dim *src,
|
||||||
ctx.bm = bm;
|
ctx.bm = bm;
|
||||||
ctx.src = src;
|
ctx.src = src;
|
||||||
ctx.dither = dither;
|
ctx.dither = dither;
|
||||||
|
#if defined(CPU_SH) || defined (TEST_SH_MATH)
|
||||||
|
uint32_t div;
|
||||||
|
#endif
|
||||||
#if !defined(PLUGIN)
|
#if !defined(PLUGIN)
|
||||||
#if defined(HAVE_LCD_COLOR) && defined(HAVE_JPEG)
|
#if defined(HAVE_LCD_COLOR) && defined(HAVE_JPEG)
|
||||||
ctx.output_row = format_index ? output_row_32_native_fromyuv
|
ctx.output_row = format_index ? output_row_32_native_fromyuv
|
||||||
|
@ -740,23 +868,56 @@ int resize_on_load(struct bitmap *bm, bool dither, struct dim *src,
|
||||||
{
|
{
|
||||||
#endif
|
#endif
|
||||||
ctx.h_scaler = scale_h_area;
|
ctx.h_scaler = scale_h_area;
|
||||||
scale_h_area_setup(&ctx);
|
#if defined(CPU_SH) || defined (TEST_SH_MATH)
|
||||||
|
div = sw;
|
||||||
|
#else
|
||||||
|
uint32_t h_div = (1U << 24) / sw;
|
||||||
|
ctx.h_i_val = sw * h_div;
|
||||||
|
ctx.h_o_val = dw * h_div;
|
||||||
|
#endif
|
||||||
#ifdef HAVE_UPSCALER
|
#ifdef HAVE_UPSCALER
|
||||||
} else {
|
} else {
|
||||||
ctx.h_scaler = scale_h_linear;
|
ctx.h_scaler = scale_h_linear;
|
||||||
scale_h_linear_setup(&ctx);
|
#if defined(CPU_SH) || defined (TEST_SH_MATH)
|
||||||
|
div = dw - 1;
|
||||||
|
#else
|
||||||
|
uint32_t h_div = (1U << 24) / (dw - 1);
|
||||||
|
ctx.h_i_val = (sw - 1) * h_div;
|
||||||
|
ctx.h_o_val = (dw - 1) * h_div;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
SC_MUL_INIT;
|
#ifdef CPU_COLDFIRE
|
||||||
|
coldfire_set_macsr(EMAC_UNSIGNED);
|
||||||
|
#endif
|
||||||
#ifdef HAVE_UPSCALER
|
#ifdef HAVE_UPSCALER
|
||||||
if (sh > dh)
|
if (sh > dh)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
#if defined(CPU_SH) || defined (TEST_SH_MATH)
|
||||||
|
div *= sh;
|
||||||
|
ctx.recip = ((uint32_t)(-div)) / div + 1;
|
||||||
|
#else
|
||||||
|
uint32_t v_div = (1U << 22) / sh;
|
||||||
|
ctx.v_i_val = sh * v_div;
|
||||||
|
ctx.v_o_val = dh * v_div;
|
||||||
#endif
|
#endif
|
||||||
ret = scale_v_area(rset, &ctx);
|
ret = scale_v_area(rset, &ctx);
|
||||||
|
}
|
||||||
#ifdef HAVE_UPSCALER
|
#ifdef HAVE_UPSCALER
|
||||||
else
|
else
|
||||||
ret = scale_v_linear(rset, &ctx);
|
{
|
||||||
|
#if defined(CPU_SH) || defined (TEST_SH_MATH)
|
||||||
|
div *= dh - 1;
|
||||||
|
ctx.recip = ((uint32_t)(-div)) / div + 1;
|
||||||
|
#else
|
||||||
|
uint32_t v_div = (1U << 22) / dh;
|
||||||
|
ctx.v_i_val = (sh - 1) * v_div;
|
||||||
|
ctx.v_o_val = (dh - 1) * v_div;
|
||||||
|
#endif
|
||||||
|
ret = scale_v_linear(rset, &ctx);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
SC_MUL_END;
|
|
||||||
#ifdef HAVE_ADJUSTABLE_CPU_FREQ
|
#ifdef HAVE_ADJUSTABLE_CPU_FREQ
|
||||||
cpu_boost(false);
|
cpu_boost(false);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -43,67 +43,61 @@
|
||||||
#define MAX_SC_STACK_ALLOC 0
|
#define MAX_SC_STACK_ALLOC 0
|
||||||
#define HAVE_UPSCALER 1
|
#define HAVE_UPSCALER 1
|
||||||
|
|
||||||
#if defined(CPU_COLDFIRE)
|
#if defined(CPU_SH)
|
||||||
#define SC_MUL_INIT \
|
/* perform 32x32->40 unsigned multiply, round off and return top 8 bits */
|
||||||
unsigned long macsr_st = coldfire_get_macsr(); \
|
static inline uint32_t sc_mul_u32_rnd(uint32_t m, uint32_t n)
|
||||||
coldfire_set_macsr(EMAC_UNSIGNED);
|
|
||||||
#define SC_MUL_END coldfire_set_macsr(macsr_st);
|
|
||||||
#define SC_MUL(x, y) \
|
|
||||||
({ \
|
|
||||||
unsigned long t; \
|
|
||||||
asm ("mac.l %[a], %[b], %%acc0\n\t" \
|
|
||||||
"move.l %%accext01, %[t]\n\t" \
|
|
||||||
"move.l #0, %%acc0\n\t" \
|
|
||||||
: [t] "=r" (t) : [a] "r" (x), [b] "r" (y)); \
|
|
||||||
t; \
|
|
||||||
})
|
|
||||||
#elif (CONFIG_CPU == SH7034)
|
|
||||||
/* multiply two unsigned 32 bit values and return the top 32 bit
|
|
||||||
* of the 64 bit result */
|
|
||||||
static inline unsigned sc_mul32(unsigned a, unsigned b)
|
|
||||||
{
|
{
|
||||||
unsigned r, t1, t2, t3;
|
unsigned r, t1, t2, t3;
|
||||||
|
unsigned h = 1 << 15;
|
||||||
|
/* notation:
|
||||||
|
m = ab, n = cd
|
||||||
|
final result is (((a *c) << 32) + ((b * c + a * d) << 16) + b * d +
|
||||||
|
(1 << 31)) >> 32
|
||||||
|
*/
|
||||||
asm (
|
asm (
|
||||||
"swap.w %[a], %[t1] \n" /* t1 = ba */
|
"swap.w %[m], %[t1]\n\t" /* t1 = ba */
|
||||||
"mulu %[t1], %[b] \n" /* a * d */
|
"mulu %[m], %[n]\n\t" /* b * d */
|
||||||
"swap.w %[b], %[t3] \n" /* t3 = dc */
|
"swap.w %[n], %[t3]\n\t" /* t3 = dc */
|
||||||
"sts macl, %[t2] \n" /* t2 = a * d */
|
"sts macl, %[r]\n\t" /* r = b * d */
|
||||||
"mulu %[t1], %[t3] \n" /* a * c */
|
"mulu %[m], %[t3]\n\t" /* b * c */
|
||||||
"sts macl, %[r] \n" /* hi = a * c */
|
"shlr16 %[r]\n\t"
|
||||||
"mulu %[a], %[t3] \n" /* b * c */
|
"sts macl, %[t2]\n\t" /* t2 = b * c */
|
||||||
"clrt \n"
|
"mulu %[t1], %[t3]\n\t" /* a * c */
|
||||||
"sts macl, %[t3] \n" /* t3 = b * c */
|
"add %[t2], %[r]\n\t"
|
||||||
"addc %[t2], %[t3] \n" /* t3 += t2, carry -> t2 */
|
"sts macl, %[t3]\n\t" /* t3 = a * c */
|
||||||
"movt %[t2] \n"
|
"mulu %[t1], %[n]\n\t" /* a * d */
|
||||||
"mulu %[a], %[b] \n" /* b * d */
|
"shll16 %[t3]\n\t"
|
||||||
"mov %[t3], %[t1] \n" /* t1t3 = t2t3 << 16 */
|
"sts macl, %[t2]\n\t" /* t2 = a * d */
|
||||||
"xtrct %[t2], %[t1] \n"
|
"add %[t2], %[r]\n\t"
|
||||||
"shll16 %[t3] \n"
|
"add %[t3], %[r]\n\t" /* r = ((b * d) >> 16) + (b * c + a * d) +
|
||||||
"sts macl, %[t2] \n" /* lo = b * d */
|
((a * c) << 16) */
|
||||||
"clrt \n" /* hi.lo += t1t3 */
|
"add %[h], %[r]\n\t" /* round result */
|
||||||
"addc %[t3], %[t2] \n"
|
"shlr16 %[r]\n\t" /* truncate result */
|
||||||
"addc %[t1], %[r] \n"
|
|
||||||
: /* outputs */
|
: /* outputs */
|
||||||
[r] "=&r"(r),
|
[r] "=&r"(r),
|
||||||
[t1]"=&r"(t1),
|
[t1]"=&r"(t1),
|
||||||
[t2]"=&r"(t2),
|
[t2]"=&r"(t2),
|
||||||
[t3]"=&r"(t3)
|
[t3]"=&r"(t3)
|
||||||
: /* inputs */
|
: /* inputs */
|
||||||
[a] "r" (a),
|
[h] "r" (h),
|
||||||
[b] "r" (b)
|
[m] "r" (m),
|
||||||
|
[n] "r" (n)
|
||||||
);
|
);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
#define SC_MUL(x, y) sc_mul32(x, y)
|
#elif defined(TEST_SH_MATH)
|
||||||
#define SC_MUL_INIT
|
static inline uint32_t sc_mul_u32_rnd(uint32_t op1, uint32_t op2)
|
||||||
#define SC_MUL_END
|
{
|
||||||
|
uint64_t tmp = (uint64_t)op1 * op2;
|
||||||
|
tmp += 1LU << 31;
|
||||||
|
tmp >>= 32;
|
||||||
|
return tmp;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#define SC_OUT(n, c) (((n) + (1 << 23)) >> 24)
|
||||||
#endif
|
#endif
|
||||||
|
#ifndef SC_OUT
|
||||||
#ifndef SC_MUL
|
#define SC_OUT(n, c) (sc_mul_u32_rnd(n, (c)->recip))
|
||||||
#define SC_MUL(x, y) ((x) * (uint64_t)(y) >> 32)
|
|
||||||
#define SC_MUL_INIT
|
|
||||||
#define SC_MUL_END
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
struct img_part {
|
struct img_part {
|
||||||
|
@ -130,8 +124,14 @@ struct uint32_rgb {
|
||||||
horizontal scaler, and row output
|
horizontal scaler, and row output
|
||||||
*/
|
*/
|
||||||
struct scaler_context {
|
struct scaler_context {
|
||||||
uint32_t divisor;
|
#if defined(CPU_SH) || defined(TEST_SH_MATH)
|
||||||
uint32_t round;
|
uint32_t recip;
|
||||||
|
#else
|
||||||
|
uint32_t h_i_val;
|
||||||
|
uint32_t h_o_val;
|
||||||
|
uint32_t v_i_val;
|
||||||
|
uint32_t v_o_val;
|
||||||
|
#endif
|
||||||
struct bitmap *bm;
|
struct bitmap *bm;
|
||||||
struct dim *src;
|
struct dim *src;
|
||||||
unsigned char *buf;
|
unsigned char *buf;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue