forked from len0rd/rockbox
		
	scaler optimizations:
on sh, use 8.24 fixed-point C math for final division in scaler on coldfire, use 8.32 fixed-point via emac on other architectures, use 8.32 fixed-point C math use shift-and-add to divide when adjusting scale factors in pictureflow git-svn-id: svn://svn.rockbox.org/rockbox/trunk@19802 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
		
							parent
							
								
									73f2d001fd
								
							
						
					
					
						commit
						1b13299769
					
				
					 3 changed files with 66 additions and 22 deletions
				
			
		|  | @ -397,8 +397,8 @@ static inline PFreal fcos(int iangle) | |||
|     return fsin(iangle + (IANGLE_MAX >> 2)); | ||||
| } | ||||
| 
 | ||||
| #define RB_DIV ((31ULL << 32) / 255 + 1) | ||||
| #define G_DIV ((63ULL << 32) / 255 + 1) | ||||
| #define DIV255(val) ((((((val)>>8)+(val))>>8)+(val))>>8) | ||||
| #define SCALE_VAL(val,out) DIV255((val) * (out) + 127) | ||||
| 
 | ||||
| static void output_row_transposed(uint32_t row, void * row_in, | ||||
|                                        struct scaler_context *ctx) | ||||
|  | @ -408,19 +408,19 @@ static void output_row_transposed(uint32_t row, void * row_in, | |||
| #ifdef USEGSLIB | ||||
|     uint32_t *qp = (uint32_t*)row_in; | ||||
|     for (; dest < end; dest += ctx->bm->height) | ||||
|         *dest = ((*qp++) + ctx->round) * (uint64_t)ctx->divisor >> 32; | ||||
|         *dest = SC_MUL((*qp++) + ctx->round), ctx->divisor); | ||||
| #else | ||||
|     struct uint32_rgb *qp = (struct uint32_rgb*)row_in; | ||||
|     uint32_t rb_mul = ctx->divisor * (uint64_t)RB_DIV >> 32, | ||||
|              rb_rnd = ctx->round * (uint64_t)RB_DIV >> 32, | ||||
|              g_mul = ctx->divisor * (uint64_t)G_DIV >> 32, | ||||
|              g_rnd = ctx->round * (uint64_t)G_DIV >> 32; | ||||
|              int r, g, b; | ||||
|     uint32_t rb_mul = SCALE_VAL(ctx->divisor, 31), | ||||
|              rb_rnd = SCALE_VAL(ctx->round, 31), | ||||
|              g_mul = SCALE_VAL(ctx->divisor, 63), | ||||
|              g_rnd = SCALE_VAL(ctx->round, 63); | ||||
|     int r, g, b; | ||||
|     for (; dest < end; dest += ctx->bm->height) | ||||
|     { | ||||
|         r = (qp->r + rb_rnd) * (uint64_t)rb_mul >> 32; | ||||
|         g = (qp->g + g_rnd) * (uint64_t)g_mul >> 32; | ||||
|         b = (qp->b + rb_rnd) * (uint64_t)rb_mul >> 32; | ||||
|         r = SC_MUL(qp->r + rb_rnd, rb_mul); | ||||
|         g = SC_MUL(qp->g + g_rnd, g_mul); | ||||
|         b = SC_MUL(qp->b + rb_rnd, rb_mul); | ||||
|         qp++; | ||||
|         *dest = LCD_RGBPACK_LCD(r,g,b); | ||||
|     } | ||||
|  |  | |||
|  | @ -244,7 +244,7 @@ static inline bool scale_v_area(struct rowset *rset, struct scaler_context *ctx) | |||
|     /* Set up rounding and scale factors */ | ||||
|     ctx->divisor *= ctx->src->height; | ||||
|     ctx->round = ctx->divisor >> 1; | ||||
|     ctx->divisor = ((ctx->divisor - 1 + 0x80000000U) / ctx->divisor) << 1; | ||||
|     ctx->divisor = (((ctx->divisor >> 1) + SC_NUM) / ctx->divisor) << SC_FIX; | ||||
|     mul = 0; | ||||
|     oy = rset->rowstart; | ||||
|     oye = 0; | ||||
|  | @ -442,7 +442,7 @@ static inline bool scale_v_linear(struct rowset *rset, | |||
|     /* Set up scale and rounding factors, the divisor is bm->height - 1 */ | ||||
|     ctx->divisor *= (ctx->bm->height - 1); | ||||
|     ctx->round = ctx->divisor >> 1; | ||||
|     ctx->divisor = ((ctx->divisor - 1 + 0x80000000U) / ctx->divisor) << 1; | ||||
|     ctx->divisor = (((ctx->divisor >> 1) + SC_NUM) / ctx->divisor) << SC_FIX; | ||||
|     /* Set up our two temp buffers. The names are generic because they'll be
 | ||||
|        swapped each time a new input row is read | ||||
|     */ | ||||
|  | @ -531,8 +531,7 @@ void output_row_native(uint32_t row, void * row_in, struct scaler_context *ctx) | |||
|                 for (col = 0; col < ctx->bm->width; col++) { | ||||
|                     if (ctx->dither) | ||||
|                         delta = DITHERXDY(col,dy); | ||||
|                     bright = ((*qp++) + ctx->round) * | ||||
|                              (uint64_t)ctx->divisor >> 32; | ||||
|                     bright = SC_MUL((*qp++) + ctx->round,ctx->divisor); | ||||
|                     bright = (3 * bright + (bright >> 6) + delta) >> 8; | ||||
|                     data |= (~bright & 3) << shift; | ||||
|                     shift -= 2; | ||||
|  | @ -555,8 +554,7 @@ void output_row_native(uint32_t row, void * row_in, struct scaler_context *ctx) | |||
|                 for (col = 0; col < ctx->bm->width; col++) { | ||||
|                     if (ctx->dither) | ||||
|                         delta = DITHERXDY(col,dy); | ||||
|                     bright = ((*qp++) + ctx->round) * | ||||
|                              (uint64_t)ctx->divisor >> 32; | ||||
|                     bright = SC_MUL((*qp++) + ctx->round, ctx->divisor); | ||||
|                     bright = (3 * bright + (bright >> 6) + delta) >> 8; | ||||
|                     *dest++ |= (~bright & 3) << shift; | ||||
|                 } | ||||
|  | @ -571,8 +569,7 @@ void output_row_native(uint32_t row, void * row_in, struct scaler_context *ctx) | |||
|                 for (col = 0; col < ctx->bm->width; col++) { | ||||
|                     if (ctx->dither) | ||||
|                         delta = DITHERXDY(col,dy); | ||||
|                     bright = ((*qp++) + ctx->round) * | ||||
|                              (uint64_t)ctx->divisor >> 32; | ||||
|                     bright = SC_MUL((*qp++) + ctx->round, ctx->divisor); | ||||
|                     bright = (3 * bright + (bright >> 6) + delta) >> 8; | ||||
|                     *dest++ |= vi_pattern[bright] << shift; | ||||
|                 } | ||||
|  | @ -588,9 +585,9 @@ void output_row_native(uint32_t row, void * row_in, struct scaler_context *ctx) | |||
|                     if (ctx->dither) | ||||
|                         delta = DITHERXDY(col,dy); | ||||
|                     q0 = *qp++; | ||||
|                     r = (q0.r + ctx->round) * (uint64_t)ctx->divisor >> 32; | ||||
|                     g = (q0.g + ctx->round) * (uint64_t)ctx->divisor >> 32; | ||||
|                     b = (q0.b + ctx->round) * (uint64_t)ctx->divisor >> 32; | ||||
|                     r = SC_MUL(q0.r + ctx->round, ctx->divisor); | ||||
|                     g = SC_MUL(q0.g + ctx->round, ctx->divisor); | ||||
|                     b = SC_MUL(q0.b + ctx->round, ctx->divisor); | ||||
|                     r = (31 * r + (r >> 3) + delta) >> 8; | ||||
|                     g = (63 * g + (g >> 2) + delta) >> 8; | ||||
|                     b = (31 * b + (b >> 3) + delta) >> 8; | ||||
|  | @ -680,6 +677,7 @@ int resize_on_load(struct bitmap *bm, bool dither, struct dim *src, | |||
|         scale_h_linear_setup(&ctx); | ||||
|     } | ||||
| #endif | ||||
|     SC_MUL_INIT; | ||||
| #ifdef HAVE_UPSCALER | ||||
|     if (sh > dh) | ||||
| #endif | ||||
|  | @ -688,6 +686,7 @@ int resize_on_load(struct bitmap *bm, bool dither, struct dim *src, | |||
|     else | ||||
|         ret = scale_v_linear(rset, &ctx); | ||||
| #endif | ||||
|     SC_MUL_END; | ||||
| #ifdef HAVE_ADJUSTABLE_CPU_FREQ | ||||
|     cpu_boost(false); | ||||
| #endif | ||||
|  |  | |||
|  | @ -43,6 +43,51 @@ | |||
| #define MAX_SC_STACK_ALLOC 0 | ||||
| #define HAVE_UPSCALER 1 | ||||
| 
 | ||||
| #if defined(CPU_COLDFIRE) | ||||
| #define SC_NUM 0x80000000U | ||||
| #define SC_MUL_INIT \ | ||||
|     unsigned long macsr_st = coldfire_get_macsr(); \ | ||||
|     coldfire_set_macsr(0); | ||||
| #define SC_MUL_END coldfire_set_macsr(macsr_st); | ||||
| #define SC_MUL(x, y) \ | ||||
| ({ \ | ||||
|     unsigned long t; \ | ||||
|     asm ("mac.l    %[a], %[b], %%acc0\n\t" \ | ||||
|          "move.l %%accext01, %[t]\n\t" \ | ||||
|          "move.l #0, %%acc0\n\t" \ | ||||
|          : [t] "=r" (t) : [a] "r" (x), [b] "r" (y)); \ | ||||
|     t; \ | ||||
| }) | ||||
| #elif defined(CPU_SH) | ||||
| #define SC_SHIFT 24 | ||||
| #endif | ||||
| 
 | ||||
| #ifndef SC_SHIFT | ||||
| #define SC_SHIFT 32 | ||||
| #endif | ||||
| 
 | ||||
| #if SC_SHIFT == 24 | ||||
| #define SC_NUM 0x1000000U | ||||
| #define SC_FIX 0 | ||||
| 
 | ||||
| #ifndef SC_MUL | ||||
| #define SC_MUL(x, y) ((x) * (y) >> 24) | ||||
| #define SC_MUL_INIT | ||||
| #define SC_MUL_END | ||||
| #endif | ||||
| 
 | ||||
| #else /* SC_SHIFT == 32 */ | ||||
| #define SC_NUM 0x80000000U | ||||
| #define SC_FIX 1 | ||||
| 
 | ||||
| #ifndef SC_MUL | ||||
| #define SC_MUL(x, y) ((x) * (uint64_t)(y) >> 32) | ||||
| #define SC_MUL_INIT | ||||
| #define SC_MUL_END | ||||
| #endif | ||||
| 
 | ||||
| #endif | ||||
| 
 | ||||
| struct img_part { | ||||
|     int len; | ||||
| #if !defined(HAVE_LCD_COLOR)     | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue