forked from len0rd/rockbox
Approx 10% speedup in cook on files tested: Remove some inner loops in favour of memcpy/memset/vect_add calls; remove multiplication from index arithmetic in loops in favour of pointer arithmetic; make use of the MULT31, MULT31_SHIFT15 and CLIP_TO_15 implementations from codelib instead of having their own implementations in cook
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@22055 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
cece75eb42
commit
a8d1cfdec8
2 changed files with 84 additions and 99 deletions
|
|
@ -328,13 +328,8 @@ static void categorize(COOKContext *q, int* quant_index_table,
|
||||||
--exp_index2[index];
|
--exp_index2[index];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
memcpy(category, exp_index2, sizeof(int) * q->total_subbands );
|
||||||
for(i=0 ; i<q->total_subbands ; i++)
|
memcpy(category_index, tmp_categorize_array+tmp_categorize_array2_idx, sizeof(int) * (q->numvector_size-1) );
|
||||||
category[i] = exp_index2[i];
|
|
||||||
|
|
||||||
for(i=0 ; i<q->numvector_size-1 ; i++)
|
|
||||||
category_index[i] = tmp_categorize_array[tmp_categorize_array2_idx++];
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -370,27 +365,38 @@ static int unpack_SQVH(COOKContext *q, int category, int* subband_coef_index,
|
||||||
|
|
||||||
vd = vd_tab[category];
|
vd = vd_tab[category];
|
||||||
result = 0;
|
result = 0;
|
||||||
for(i=0 ; i<vpr_tab[category] ; i++){
|
for(i=0 ; i<vpr_tab[category] ; i++)
|
||||||
|
{
|
||||||
vlc = get_vlc2(&q->gb, q->sqvh[category].table, q->sqvh[category].bits, 3);
|
vlc = get_vlc2(&q->gb, q->sqvh[category].table, q->sqvh[category].bits, 3);
|
||||||
if (q->bits_per_subpacket < get_bits_count(&q->gb)){
|
if (q->bits_per_subpacket < get_bits_count(&q->gb))
|
||||||
|
{
|
||||||
vlc = 0;
|
vlc = 0;
|
||||||
result = 1;
|
result = 1;
|
||||||
|
memset(subband_coef_index, 0, sizeof(int)*vd);
|
||||||
|
memset(subband_coef_sign, 0, sizeof(int)*vd);
|
||||||
|
subband_coef_index+=vd;
|
||||||
|
subband_coef_sign+=vd;
|
||||||
}
|
}
|
||||||
for(j=vd-1 ; j>=0 ; j--){
|
else
|
||||||
tmp = (vlc * invradix_tab[category])/0x100000;
|
{
|
||||||
subband_coef_index[vd*i+j] = vlc - tmp * (kmax_tab[category]+1);
|
for(j=vd-1 ; j>=0 ; j--){
|
||||||
vlc = tmp;
|
tmp = (vlc * invradix_tab[category])/0x100000;
|
||||||
}
|
subband_coef_index[j] = vlc - tmp * (kmax_tab[category]+1);
|
||||||
for(j=0 ; j<vd ; j++){
|
vlc = tmp;
|
||||||
if (subband_coef_index[i*vd + j]) {
|
}
|
||||||
if(get_bits_count(&q->gb) < q->bits_per_subpacket){
|
|
||||||
subband_coef_sign[i*vd+j] = get_bits1(&q->gb);
|
for(j=0 ; j<vd ; j++)
|
||||||
|
{
|
||||||
|
if (*subband_coef_index++) {
|
||||||
|
if(get_bits_count(&q->gb) < q->bits_per_subpacket) {
|
||||||
|
*subband_coef_sign++ = get_bits1(&q->gb);
|
||||||
|
} else {
|
||||||
|
result=1;
|
||||||
|
*subband_coef_sign++=0;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
result=1;
|
*subband_coef_sign++=0;
|
||||||
subband_coef_sign[i*vd+j]=0;
|
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
subband_coef_sign[i*vd+j]=0;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -505,7 +511,7 @@ static void decouple_info(COOKContext *q, int* decouple_tab){
|
||||||
|
|
||||||
static void joint_decode(COOKContext *q, REAL_T* mlt_buffer1,
|
static void joint_decode(COOKContext *q, REAL_T* mlt_buffer1,
|
||||||
REAL_T* mlt_buffer2) {
|
REAL_T* mlt_buffer2) {
|
||||||
int i,j;
|
int i;
|
||||||
int decouple_tab[SUBBAND_SIZE];
|
int decouple_tab[SUBBAND_SIZE];
|
||||||
REAL_T *decode_buffer = q->decode_buffer_0;
|
REAL_T *decode_buffer = q->decode_buffer_0;
|
||||||
int idx;
|
int idx;
|
||||||
|
|
@ -520,11 +526,14 @@ static void joint_decode(COOKContext *q, REAL_T* mlt_buffer1,
|
||||||
mono_decode(q, decode_buffer);
|
mono_decode(q, decode_buffer);
|
||||||
|
|
||||||
/* The two channels are stored interleaved in decode_buffer. */
|
/* The two channels are stored interleaved in decode_buffer. */
|
||||||
for (i=0 ; i<q->js_subband_start ; i++) {
|
REAL_T * mlt_buffer1_end = mlt_buffer1 + (q->js_subband_start*SUBBAND_SIZE);
|
||||||
for (j=0 ; j<SUBBAND_SIZE ; j++) {
|
while(mlt_buffer1 < mlt_buffer1_end)
|
||||||
mlt_buffer1[i*20+j] = decode_buffer[i*40+j];
|
{
|
||||||
mlt_buffer2[i*20+j] = decode_buffer[i*40+20+j];
|
memcpy(mlt_buffer1,decode_buffer,sizeof(REAL_T)*SUBBAND_SIZE);
|
||||||
}
|
memcpy(mlt_buffer2,decode_buffer+20,sizeof(REAL_T)*SUBBAND_SIZE);
|
||||||
|
mlt_buffer1 += 20;
|
||||||
|
mlt_buffer2 += 20;
|
||||||
|
decode_buffer += 40;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* When we reach js_subband_start (the higher frequencies)
|
/* When we reach js_subband_start (the higher frequencies)
|
||||||
|
|
@ -533,11 +542,15 @@ static void joint_decode(COOKContext *q, REAL_T* mlt_buffer1,
|
||||||
for (i=q->js_subband_start ; i<q->subbands ; i++) {
|
for (i=q->js_subband_start ; i<q->subbands ; i++) {
|
||||||
int i1 = decouple_tab[cplband[i]];
|
int i1 = decouple_tab[cplband[i]];
|
||||||
int i2 = idx - i1 - 1;
|
int i2 = idx - i1 - 1;
|
||||||
for (j=0 ; j<SUBBAND_SIZE ; j++) {
|
mlt_buffer1_end = mlt_buffer1 + SUBBAND_SIZE;
|
||||||
REAL_T x = decode_buffer[((q->js_subband_start + i)*20)+j];
|
while(mlt_buffer1 < mlt_buffer1_end)
|
||||||
mlt_buffer1[20*i+j] = cplscale_math(x, q->js_vlc_bits, i1);
|
{
|
||||||
mlt_buffer2[20*i+j] = cplscale_math(x, q->js_vlc_bits, i2);
|
*mlt_buffer1++ = cplscale_math(*decode_buffer, q->js_vlc_bits, i1);
|
||||||
|
*mlt_buffer2++ = cplscale_math(*decode_buffer++, q->js_vlc_bits, i2);
|
||||||
}
|
}
|
||||||
|
mlt_buffer1 += (20-SUBBAND_SIZE);
|
||||||
|
mlt_buffer2 += (20-SUBBAND_SIZE);
|
||||||
|
decode_buffer += (20-SUBBAND_SIZE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -581,7 +594,7 @@ decode_bytes_and_gain(COOKContext *q, const uint8_t *inbuffer,
|
||||||
* @param chan 0: left or single channel, 1: right channel
|
* @param chan 0: left or single channel, 1: right channel
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static inline void
|
static void
|
||||||
mlt_compensate_output(COOKContext *q, REAL_T *decode_buffer,
|
mlt_compensate_output(COOKContext *q, REAL_T *decode_buffer,
|
||||||
cook_gains *gains, REAL_T *previous_buffer,
|
cook_gains *gains, REAL_T *previous_buffer,
|
||||||
int16_t *out, int chan)
|
int16_t *out, int chan)
|
||||||
|
|
|
||||||
|
|
@ -35,8 +35,13 @@
|
||||||
* in C using two 32 bit integer multiplications.
|
* in C using two 32 bit integer multiplications.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* get definitions of MULT31, MULT31_SHIFT15, CLIP_TO_15, vect_add, from codelib */
|
||||||
|
#include "asm_arm.h"
|
||||||
|
#include "asm_mcf5249.h"
|
||||||
|
#include "codeclib_misc.h"
|
||||||
|
|
||||||
/* The following table is taken from libavutil/mathematics.c */
|
/* The following table is taken from libavutil/mathematics.c */
|
||||||
const uint8_t ff_log2_tab[256]={
|
const uint8_t ff_log2_tab[256] ={
|
||||||
0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
|
0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
|
||||||
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
|
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
|
||||||
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
|
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
|
||||||
|
|
@ -67,6 +72,11 @@ static inline FIXP fixp_pow2(FIXP x, int i)
|
||||||
return x << i; /* no check for overflow */
|
return x << i; /* no check for overflow */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline FIXP fixp_pow2_neg(FIXP x, int i)
|
||||||
|
{
|
||||||
|
return (x >> i) + ((x >> (i-1)) & 1);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fixed point multiply by fraction.
|
* Fixed point multiply by fraction.
|
||||||
*
|
*
|
||||||
|
|
@ -74,53 +84,10 @@ static inline FIXP fixp_pow2(FIXP x, int i)
|
||||||
* @param b fix point fraction, 0 <= b < 1
|
* @param b fix point fraction, 0 <= b < 1
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static inline FIXP fixp_mult_su(FIXP a, FIXPU b)
|
#define fixp_mult_su(x,y) (MULT31_SHIFT15(x,y))
|
||||||
{
|
|
||||||
|
|
||||||
int32_t hb = (a >> 16) * b;
|
|
||||||
uint32_t lb = (a & 0xffff) * b;
|
|
||||||
|
|
||||||
return hb + (lb >> 16) + ((lb & 0x8000) >> 15);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Faster version of the above using 32x32=64 bit multiply */
|
/* Faster version of the above using 32x32=64 bit multiply */
|
||||||
#ifdef CPU_ARM
|
#define fixmul31(x,y) (MULT31(x,y))
|
||||||
#define fixmul31(x, y) \
|
|
||||||
({ int32_t __hi; \
|
|
||||||
uint32_t __lo; \
|
|
||||||
int32_t __result; \
|
|
||||||
asm ("smull %0, %1, %3, %4\n\t" \
|
|
||||||
"movs %2, %1, lsl #1" \
|
|
||||||
: "=&r" (__lo), "=&r" (__hi), "=r" (__result) \
|
|
||||||
: "%r" (x), "r" (y) \
|
|
||||||
: "cc"); \
|
|
||||||
__result; \
|
|
||||||
})
|
|
||||||
|
|
||||||
#elif defined(CPU_COLDFIRE)
|
|
||||||
static inline int32_t fixmul31(int32_t x, int32_t y)
|
|
||||||
{
|
|
||||||
asm (
|
|
||||||
"mac.l %[x], %[y], %%acc0 \n" /* multiply */
|
|
||||||
"movclr.l %%acc0, %[x] \n" /* get higher half */
|
|
||||||
: [x] "+d" (x)
|
|
||||||
: [y] "d" (y)
|
|
||||||
);
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static inline int32_t fixmul31(int32_t x, int32_t y)
|
|
||||||
{
|
|
||||||
int64_t temp;
|
|
||||||
|
|
||||||
temp = x;
|
|
||||||
temp *= y;
|
|
||||||
|
|
||||||
temp >>= 31; //16+31-16 = 31 bits
|
|
||||||
|
|
||||||
return (int32_t)temp;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* math functions taken from libavutil/common.h */
|
/* math functions taken from libavutil/common.h */
|
||||||
|
|
||||||
|
|
@ -169,13 +136,13 @@ static void scalar_dequant_math(COOKContext *q, int index,
|
||||||
int* subband_coef_sign, REAL_T *mlt_p)
|
int* subband_coef_sign, REAL_T *mlt_p)
|
||||||
{
|
{
|
||||||
/* Num. half bits to right shift */
|
/* Num. half bits to right shift */
|
||||||
const int s = 33 - quant_index + av_log2(q->samples_per_channel);
|
const int s = (33 - quant_index + av_log2(q->samples_per_channel)) >> 1;
|
||||||
const FIXP *table = quant_tables[s & 1][index];
|
const FIXP *table = quant_tables[s & 1][index];
|
||||||
FIXP f;
|
FIXP f;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
|
||||||
if(s >= 64)
|
if(s >= 32)
|
||||||
memset(mlt_p, 0, sizeof(REAL_T)*SUBBAND_SIZE);
|
memset(mlt_p, 0, sizeof(REAL_T)*SUBBAND_SIZE);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
@ -186,7 +153,7 @@ static void scalar_dequant_math(COOKContext *q, int index,
|
||||||
((subband_coef_index[i] != 0) && subband_coef_sign[i]))
|
((subband_coef_index[i] != 0) && subband_coef_sign[i]))
|
||||||
f = -f;
|
f = -f;
|
||||||
|
|
||||||
mlt_p[i] =fixp_pow2(f, -(s/2));
|
*mlt_p++ = fixp_pow2_neg(f, s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -274,10 +241,9 @@ static inline void imlt_math(COOKContext *q, FIXP *in)
|
||||||
static inline void overlap_math(COOKContext *q, int gain, FIXP buffer[])
|
static inline void overlap_math(COOKContext *q, int gain, FIXP buffer[])
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
if(LIKELY(gain == 0)){
|
if(LIKELY(gain == 0))
|
||||||
for(i=0 ; i<q->samples_per_channel ; i++) {
|
{
|
||||||
q->mono_mdct_output[i] += buffer[i];
|
vect_add(q->mono_mdct_output, buffer, q->samples_per_channel);
|
||||||
}
|
|
||||||
|
|
||||||
} else if (gain > 0){
|
} else if (gain > 0){
|
||||||
for(i=0 ; i<q->samples_per_channel ; i++) {
|
for(i=0 ; i<q->samples_per_channel ; i++) {
|
||||||
|
|
@ -301,7 +267,7 @@ static inline void overlap_math(COOKContext *q, int gain, FIXP buffer[])
|
||||||
* @param gain_index_next index for the next block multiplier
|
* @param gain_index_next index for the next block multiplier
|
||||||
*/
|
*/
|
||||||
static inline void
|
static inline void
|
||||||
interpolate_math(COOKContext *q, FIXP* buffer,
|
interpolate_math(COOKContext *q, register FIXP* buffer,
|
||||||
int gain_index, int gain_index_next)
|
int gain_index, int gain_index_next)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
@ -315,14 +281,17 @@ interpolate_math(COOKContext *q, FIXP* buffer,
|
||||||
int step = (gain_index_next - gain_index)
|
int step = (gain_index_next - gain_index)
|
||||||
<< (7 - av_log2(gain_size_factor));
|
<< (7 - av_log2(gain_size_factor));
|
||||||
int x = 0;
|
int x = 0;
|
||||||
|
register FIXP* bufferend = buffer+gain_size_factor;
|
||||||
for(i = 0; i < gain_size_factor; i++) {
|
while(buffer < bufferend )
|
||||||
buffer[i] = fixp_mult_su(buffer[i], pow128_tab[x]);
|
{
|
||||||
buffer[i] = fixp_pow2(buffer[i], gain_index+1);
|
*buffer = fixp_pow2(
|
||||||
|
fixp_mult_su(*buffer, pow128_tab[x]),
|
||||||
|
gain_index+1);
|
||||||
|
buffer++;
|
||||||
|
|
||||||
x += step;
|
x += step;
|
||||||
gain_index += (x + 128) / 128 - 1;
|
gain_index += ( (x + 128) >> 7 ) - 1;
|
||||||
x = (x + 128) % 128;
|
x = ( (x + 128) & 127 );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -349,12 +318,15 @@ static inline FIXP cplscale_math(FIXP x, int table, int i)
|
||||||
* @param out pointer to the output buffer
|
* @param out pointer to the output buffer
|
||||||
* @param chan 0: left or single channel, 1: right channel
|
* @param chan 0: left or single channel, 1: right channel
|
||||||
*/
|
*/
|
||||||
static inline void output_math(COOKContext *q, int16_t *out, int chan)
|
static inline void output_math(COOKContext *q, register int16_t *out, int chan)
|
||||||
{
|
{
|
||||||
int j;
|
register REAL_T * mono_output_ptr = q->mono_mdct_output;
|
||||||
|
register REAL_T * mono_output_end = mono_output_ptr + q->samples_per_channel;
|
||||||
for (j = 0; j < q->samples_per_channel; j++) {
|
out += chan;
|
||||||
out[chan + q->nb_channels * j] =
|
const int STEP = q->nb_channels;
|
||||||
av_clip(fixp_pow2(q->mono_mdct_output[j], -11), -32768, 32767);
|
while( mono_output_ptr < mono_output_end )
|
||||||
|
{
|
||||||
|
*out = CLIP_TO_15(fixp_pow2_neg(*mono_output_ptr++, 11));
|
||||||
|
out += STEP;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue