Libfaad pow2/log2 improvements:

Correct pow2_* to calculate correct output for input < 0.0 or between 0.0 and 1.0 instead of returning 0. This will change output, but the float version of this codec uses pow(2.0,x) in place of these functions, so this behavior was probably a bug Replace 64-bit multiply in pow2_* with left or right shift by whole part of input, rounding if shifting right. An ARM-specific optimized version is provided as a C inline function, other targets use a C macro. Increase constant size of table for pow2 to improve accuracy, also avoid early truncation in linear interpolation of result. Move constant tables for pow2 and log2 to iram. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@23967 a1c6a512-1295-4272-9138-f99709370657
2009-12-13 03:45:40 +00:00 · 2009-12-13 03:45:40 +00:00 · af5bad46d8
commit af5bad46d8
parent 7668e3fe51
3 changed files with 105 additions and 64 deletions
--- a/apps/codecs/libfaad/common.c
+++ b/apps/codecs/libfaad/common.c
@ -248,33 +248,78 @@ uint32_t random_int(void)
 #define TABLE_BITS 6
 /* just take the maximum number of bits for interpolation */
 #define INTERP_BITS (REAL_BITS-TABLE_BITS)
-
-static const real_t pow2_tab[] = {
-    REAL_CONST(1.000000000000000), REAL_CONST(1.010889286051701), REAL_CONST(1.021897148654117),
-    REAL_CONST(1.033024879021228), REAL_CONST(1.044273782427414), REAL_CONST(1.055645178360557),
-    REAL_CONST(1.067140400676824), REAL_CONST(1.078760797757120), REAL_CONST(1.090507732665258),
-    REAL_CONST(1.102382583307841), REAL_CONST(1.114386742595892), REAL_CONST(1.126521618608242),
-    REAL_CONST(1.138788634756692), REAL_CONST(1.151189229952983), REAL_CONST(1.163724858777578),
-    REAL_CONST(1.176396991650281), REAL_CONST(1.189207115002721), REAL_CONST(1.202156731452703),
-    REAL_CONST(1.215247359980469), REAL_CONST(1.228480536106870), REAL_CONST(1.241857812073484),
-    REAL_CONST(1.255380757024691), REAL_CONST(1.269050957191733), REAL_CONST(1.282870016078778),
-    REAL_CONST(1.296839554651010), REAL_CONST(1.310961211524764), REAL_CONST(1.325236643159741),
-    REAL_CONST(1.339667524053303), REAL_CONST(1.354255546936893), REAL_CONST(1.369002422974591),
-    REAL_CONST(1.383909881963832), REAL_CONST(1.398979672538311), REAL_CONST(1.414213562373095),
-    REAL_CONST(1.429613338391970), REAL_CONST(1.445180806977047), REAL_CONST(1.460917794180647),
-    REAL_CONST(1.476826145939499), REAL_CONST(1.492907728291265), REAL_CONST(1.509164427593423),
-    REAL_CONST(1.525598150744538), REAL_CONST(1.542210825407941), REAL_CONST(1.559004400237837),
-    REAL_CONST(1.575980845107887), REAL_CONST(1.593142151342267), REAL_CONST(1.610490331949254),
-    REAL_CONST(1.628027421857348), REAL_CONST(1.645755478153965), REAL_CONST(1.663676580326736),
-    REAL_CONST(1.681792830507429), REAL_CONST(1.700106353718524), REAL_CONST(1.718619298122478),
-    REAL_CONST(1.737333835273706), REAL_CONST(1.756252160373300), REAL_CONST(1.775376492526521),
-    REAL_CONST(1.794709075003107), REAL_CONST(1.814252175500399), REAL_CONST(1.834008086409342),
-    REAL_CONST(1.853979125083386), REAL_CONST(1.874167634110300), REAL_CONST(1.894575981586966),
-    REAL_CONST(1.915206561397147), REAL_CONST(1.936061793492294), REAL_CONST(1.957144124175400),
-    REAL_CONST(1.978456026387951), REAL_CONST(2.000000000000000)
+/* precision of values in pow2_tab */
+#define POWTBL_BITS (31-(INTERP_BITS))
+#define POWTBL_PRECIS (1U<<(POWTBL_BITS))
+static const uint32_t pow2_tab[] ICONST_ATTR = {
+    UFIX_CONST(1.000000000000000,POWTBL_PRECIS),
+    UFIX_CONST(1.010889286051701,POWTBL_PRECIS),
+    UFIX_CONST(1.021897148654117,POWTBL_PRECIS),
+    UFIX_CONST(1.033024879021228,POWTBL_PRECIS),
+    UFIX_CONST(1.044273782427414,POWTBL_PRECIS),
+    UFIX_CONST(1.055645178360557,POWTBL_PRECIS),
+    UFIX_CONST(1.067140400676824,POWTBL_PRECIS),
+    UFIX_CONST(1.078760797757120,POWTBL_PRECIS),
+    UFIX_CONST(1.090507732665258,POWTBL_PRECIS),
+    UFIX_CONST(1.102382583307841,POWTBL_PRECIS),
+    UFIX_CONST(1.114386742595892,POWTBL_PRECIS),
+    UFIX_CONST(1.126521618608242,POWTBL_PRECIS),
+    UFIX_CONST(1.138788634756692,POWTBL_PRECIS),
+    UFIX_CONST(1.151189229952983,POWTBL_PRECIS),
+    UFIX_CONST(1.163724858777578,POWTBL_PRECIS),
+    UFIX_CONST(1.176396991650281,POWTBL_PRECIS),
+    UFIX_CONST(1.189207115002721,POWTBL_PRECIS),
+    UFIX_CONST(1.202156731452703,POWTBL_PRECIS),
+    UFIX_CONST(1.215247359980469,POWTBL_PRECIS),
+    UFIX_CONST(1.228480536106870,POWTBL_PRECIS),
+    UFIX_CONST(1.241857812073484,POWTBL_PRECIS),
+    UFIX_CONST(1.255380757024691,POWTBL_PRECIS),
+    UFIX_CONST(1.269050957191733,POWTBL_PRECIS),
+    UFIX_CONST(1.282870016078778,POWTBL_PRECIS),
+    UFIX_CONST(1.296839554651010,POWTBL_PRECIS),
+    UFIX_CONST(1.310961211524764,POWTBL_PRECIS),
+    UFIX_CONST(1.325236643159741,POWTBL_PRECIS),
+    UFIX_CONST(1.339667524053303,POWTBL_PRECIS),
+    UFIX_CONST(1.354255546936893,POWTBL_PRECIS),
+    UFIX_CONST(1.369002422974591,POWTBL_PRECIS),
+    UFIX_CONST(1.383909881963832,POWTBL_PRECIS),
+    UFIX_CONST(1.398979672538311,POWTBL_PRECIS),
+    UFIX_CONST(1.414213562373095,POWTBL_PRECIS),
+    UFIX_CONST(1.429613338391970,POWTBL_PRECIS),
+    UFIX_CONST(1.445180806977047,POWTBL_PRECIS),
+    UFIX_CONST(1.460917794180647,POWTBL_PRECIS),
+    UFIX_CONST(1.476826145939499,POWTBL_PRECIS),
+    UFIX_CONST(1.492907728291265,POWTBL_PRECIS),
+    UFIX_CONST(1.509164427593423,POWTBL_PRECIS),
+    UFIX_CONST(1.525598150744538,POWTBL_PRECIS),
+    UFIX_CONST(1.542210825407941,POWTBL_PRECIS),
+    UFIX_CONST(1.559004400237837,POWTBL_PRECIS),
+    UFIX_CONST(1.575980845107887,POWTBL_PRECIS),
+    UFIX_CONST(1.593142151342267,POWTBL_PRECIS),
+    UFIX_CONST(1.610490331949254,POWTBL_PRECIS),
+    UFIX_CONST(1.628027421857348,POWTBL_PRECIS),
+    UFIX_CONST(1.645755478153965,POWTBL_PRECIS),
+    UFIX_CONST(1.663676580326736,POWTBL_PRECIS),
+    UFIX_CONST(1.681792830507429,POWTBL_PRECIS),
+    UFIX_CONST(1.700106353718524,POWTBL_PRECIS),
+    UFIX_CONST(1.718619298122478,POWTBL_PRECIS),
+    UFIX_CONST(1.737333835273706,POWTBL_PRECIS),
+    UFIX_CONST(1.756252160373300,POWTBL_PRECIS),
+    UFIX_CONST(1.775376492526521,POWTBL_PRECIS),
+    UFIX_CONST(1.794709075003107,POWTBL_PRECIS),
+    UFIX_CONST(1.814252175500399,POWTBL_PRECIS),
+    UFIX_CONST(1.834008086409342,POWTBL_PRECIS),
+    UFIX_CONST(1.853979125083386,POWTBL_PRECIS),
+    UFIX_CONST(1.874167634110300,POWTBL_PRECIS),
+    UFIX_CONST(1.894575981586966,POWTBL_PRECIS),
+    UFIX_CONST(1.915206561397147,POWTBL_PRECIS),
+    UFIX_CONST(1.936061793492294,POWTBL_PRECIS),
+    UFIX_CONST(1.957144124175400,POWTBL_PRECIS),
+    UFIX_CONST(1.978456026387951,POWTBL_PRECIS),
+    UFIX_CONST(2.000000000000000,POWTBL_PRECIS)
 };

-static const real_t log2_tab[] = {
+static const real_t log2_tab[] ICONST_ATTR = {
    REAL_CONST(0.000000000000000), REAL_CONST(0.022367813028455), REAL_CONST(0.044394119358453),
    REAL_CONST(0.066089190457772), REAL_CONST(0.087462841250339), REAL_CONST(0.108524456778169),
    REAL_CONST(0.129283016944966), REAL_CONST(0.149747119504682), REAL_CONST(0.169925001442312),
@ -299,12 +344,12 @@ static const real_t log2_tab[] = {
    REAL_CONST(0.988684686772166), REAL_CONST(1.000000000000000)
 };

-real_t pow2_fix(real_t val)
+uint32_t pow2_fix(real_t val)
 {
    uint32_t x1, x2;
    uint32_t errcorr;
    uint32_t index_frac;
-    real_t retval;
+    uint32_t retval;
    int32_t whole = (val >> REAL_BITS);

    /* rest = [0..1] */
@ -313,41 +358,27 @@ real_t pow2_fix(real_t val)
    /* index into pow2_tab */
    int32_t index = rest >> (REAL_BITS-TABLE_BITS);

-
-    if (val == 0)
-        return (1<<REAL_BITS);
-
    /* leave INTERP_BITS bits */
    index_frac = rest >> (REAL_BITS-TABLE_BITS-INTERP_BITS);
    index_frac = index_frac & ((1<<INTERP_BITS)-1);

-    if (whole > 0)
-    {
-        retval = 1 << whole;
-    } else {
-        retval = REAL_CONST(1) >> -whole;
-    }
-
    x1 = pow2_tab[index & ((1<<TABLE_BITS)-1)];
    x2 = pow2_tab[(index & ((1<<TABLE_BITS)-1)) + 1];
-    errcorr = ( (index_frac*(x2-x1))) >> INTERP_BITS;
+    errcorr = ( (index_frac*(x2-x1)));

-    if (whole > 0)
-    {
-        retval = retval * (errcorr + x1);
-    } else {
-        retval = MUL_R(retval, (errcorr + x1));
-    }
+    retval = errcorr + (x1<<INTERP_BITS);
+
+    retval = DESCALE_SHIFT(retval, whole, POWTBL_BITS + INTERP_BITS - REAL_BITS);

    return retval;
 }

-int32_t pow2_int(real_t val)
+uint32_t pow2_int(real_t val)
 {
    uint32_t x1, x2;
    uint32_t errcorr;
    uint32_t index_frac;
-    real_t retval;
+    uint32_t retval;
    int32_t whole = (val >> REAL_BITS);

    /* rest = [0..1] */
@ -356,24 +387,17 @@ int32_t pow2_int(real_t val)
    /* index into pow2_tab */
    int32_t index = rest >> (REAL_BITS-TABLE_BITS);

-
-    if (val == 0)
-        return 1;
-
    /* leave INTERP_BITS bits */
    index_frac = rest >> (REAL_BITS-TABLE_BITS-INTERP_BITS);
    index_frac = index_frac & ((1<<INTERP_BITS)-1);

-    if (whole > 0)
-        retval = 1 << whole;
-    else
-        retval = 0;
-
    x1 = pow2_tab[index & ((1<<TABLE_BITS)-1)];
    x2 = pow2_tab[(index & ((1<<TABLE_BITS)-1)) + 1];
-    errcorr = ( (index_frac*(x2-x1))) >> INTERP_BITS;
+    errcorr = ( (index_frac*(x2-x1)));

-    retval = MUL_R(retval, (errcorr + x1));
+    retval = errcorr + (x1<<INTERP_BITS);
+
+    retval = DESCALE_SHIFT(retval, whole, POWTBL_BITS + INTERP_BITS);

    return retval;
 }
--- a/apps/codecs/libfaad/common.h
+++ b/apps/codecs/libfaad/common.h
@ -404,8 +404,8 @@ uint32_t random_int(void);
 #define LOG2_MIN_INF REAL_CONST(-10000)
 int32_t log2_int(uint32_t val);
 int32_t log2_fix(uint32_t val);
-int32_t pow2_int(real_t val);
-real_t pow2_fix(real_t val);
+uint32_t pow2_int(real_t val);
+uint32_t pow2_fix(real_t val);
 #endif
 uint8_t get_sr_index(const uint32_t samplerate);
 uint8_t max_pred_sfb(const uint8_t sr_index);
--- a/apps/codecs/libfaad/fixed.h
+++ b/apps/codecs/libfaad/fixed.h
@ -49,10 +49,11 @@ extern "C" {

 typedef int32_t real_t;

-
-#define REAL_CONST(A) (((A) >= 0) ? ((real_t)((A)*(REAL_PRECISION)+0.5)) : ((real_t)((A)*(REAL_PRECISION)-0.5)))
-#define COEF_CONST(A) (((A) >= 0) ? ((real_t)((A)*(COEF_PRECISION)+0.5)) : ((real_t)((A)*(COEF_PRECISION)-0.5)))
-#define FRAC_CONST(A) (((A) == 1.00) ? ((real_t)FRAC_MAX) : (((A) >= 0) ? ((real_t)((A)*(FRAC_PRECISION)+0.5)) : ((real_t)((A)*(FRAC_PRECISION)-0.5))))
+#define UFIX_CONST(A,PRECISION) ((uint32_t)((A)*(PRECISION)+0.5))
+#define FIX_CONST(A,PRECISION) (((A) >= 0) ? ((real_t)((A)*(PRECISION)+0.5)) : ((real_t)((A)*(PRECISION)-0.5)))
+#define REAL_CONST(A) FIX_CONST((A),(REAL_PRECISION))
+#define COEF_CONST(A) FIX_CONST((A),(COEF_PRECISION))
+#define FRAC_CONST(A) (((A) == 1.00) ? ((real_t)FRAC_MAX) : FIX_CONST((A),(FRAC_PRECISION)))
 //#define FRAC_CONST(A) (((A) >= 0) ? ((real_t)((A)*(FRAC_PRECISION)+0.5)) : ((real_t)((A)*(FRAC_PRECISION)-0.5)))

 #define Q2_BITS 22
@ -128,6 +129,8 @@ static INLINE void ComplexMult(real_t *y1, real_t *y2,
  #define MUL_Q2(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (Q2_BITS-1))) >> Q2_BITS)
  #define MUL_SHIFT6(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (6-1))) >> 6)
  #define MUL_SHIFT23(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (23-1))) >> 23)
+  #define DESCALE(A,S) ((S)>0?(((A)>>((S)-1))+1)>>1:(A)<<-(S))
+  #define DESCALE_SHIFT(A,SH,SC) DESCALE((A),(SC)-(SH)

 #elif defined(__GNUC__) && defined (__arm__)

@ -205,6 +208,18 @@ static INLINE void ComplexMult(real_t *y1, real_t *y2,
    *y2 = yt2 << (FRAC_SIZE-FRAC_BITS);
 }

+static inline real_t DESCALE_SHIFT(unsigned val, int shift, int scale)
+{
+    unsigned out;
+    if ((out = val >> (scale - shift - 1)))
+    {
+        out++;
+        out >>= 1;
+    } else
+        out = val << (shift - scale);
+    return out;
+}
+
 #else

  /* multiply with real shift */
@ -225,6 +240,8 @@ static INLINE void ComplexMult(real_t *y1, real_t *y2,
  #define MUL_Q2(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (Q2_BITS-1))) >> Q2_BITS)
  #define MUL_SHIFT6(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (6-1))) >> 6)
  #define MUL_SHIFT23(A,B) (real_t)(((int64_t)(A)*(int64_t)(B)+(1 << (23-1))) >> 23)
+  #define DESCALE(A,S) ((S)>0?(((A)>>((S)-1))+1)>>1:(A)<<-(S))
+  #define DESCALE_SHIFT(A,SH,SC) DESCALE((A),(SC)-(SH)

 /* Complex multiplication */
 static INLINE void ComplexMult(real_t *y1, real_t *y2,