Submit FS#11445. Speed up of faad (aac) decoder via several optimizations like refactoring some requantization routines, moving several arrays and code tables to IRAM, using an optimized swap32() function and inlining several huffman decoder functions. Decoding is sped up by ~10% (PP5002, PP5022, MCF5249) and ~22% (MCF5250).

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27225 a1c6a512-1295-4272-9138-f99709370657
2010-07-01 21:18:42 +00:00 · 2010-07-01 21:18:42 +00:00 · 52f17dfe9d
commit 52f17dfe9d
parent b013fb76c4
19 changed files with 121 additions and 134 deletions
--- a/apps/codecs/aac.c
+++ b/apps/codecs/aac.c
@ -27,6 +27,11 @@
 CODEC_HEADER
 /* Global buffers to be used in the mdct synthesis. This way the arrays can
 * be moved to IRAM for some targets */
 ALIGN real_t gb_time_buffer[2][1024] IBSS_ATTR_FAAD_LARGE_IRAM;
 ALIGN real_t gb_fb_intermed[2][1024] IBSS_ATTR_FAAD_LARGE_IRAM;
 /* this is the codec entry point */
 enum codec_status codec_main(void)
 {
@ -105,7 +110,14 @@ next_track:
        err = CODEC_ERROR;
        goto done;
    }
-
+    
    /* Set pointer to be able to use IRAM an to avoid alloc in decoder. Must
     * be called after NeAACDecOpen(). */
    decoder->time_out[0]    = &gb_time_buffer[0][0];
    decoder->time_out[1]    = &gb_time_buffer[1][0];
    decoder->fb_intermed[0] = &gb_fb_intermed[0][0];
    decoder->fb_intermed[1] = &gb_fb_intermed[1][0];
    ci->id3->frequency = s;
    i = 0;
--- a/apps/codecs/libfaad/bits.c
+++ b/apps/codecs/libfaad/bits.c
@ -33,9 +33,10 @@
 #include "bits.h"
 /* Need to be large enough to fit the largest compressed sample in a file.
- * Samples a little larger than 1 KB observed in a 256 kbps file.
+ * Samples were observed to need up to 1500 bytes (400 kbps nero aac).
 */
-uint8_t static_buffer[2048];
+#define BUFFER_SIZE 2048
 uint8_t static_buffer[BUFFER_SIZE] IBSS_ATTR;
 /* initialize buffer, call once before first getbits or showbits */
 void faad_initbits(bitfile *ld, const void *_buffer, const uint32_t buffer_size)
@ -47,7 +48,7 @@ void faad_initbits(bitfile *ld, const void *_buffer, const uint32_t buffer_size)
    memset(ld, 0, sizeof(bitfile));
-    if (buffer_size == 0 || _buffer == NULL)
+    if (buffer_size == 0 || _buffer == NULL || (buffer_size+12)>BUFFER_SIZE)
    {
        ld->error = 1;
        ld->no_more_reading = 1;
--- a/apps/codecs/libfaad/bits.h
+++ b/apps/codecs/libfaad/bits.h
@ -55,15 +55,11 @@ typedef struct _bitfile
    void *buffer;
 } bitfile;
-
+/* rockbox: use asm optimized swap32()
 #if defined (_WIN32) && !defined(_WIN32_WCE) && !defined(__MINGW32__)
 #define BSWAP(a) __asm mov eax,a __asm bswap eax __asm mov a, eax
 #elif defined(LINUX) || defined(DJGPP) || defined(__MINGW32__)
 #define BSWAP(a) __asm__ ( "bswapl %0\n" : "=r" (a) : "0" (a) )
 #else
 #define BSWAP(a) \
    ((a) = ( ((a)&0xff)<<24) | (((a)&0xff00)<<8) | (((a)>>8)&0xff00) | (((a)>>24)&0xff))
-#endif
+*/
 #define BSWAP(a) swap32(a)
 static uint32_t bitmask[] = {
    0x0, 0x1, 0x3, 0x7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF,
@ -81,7 +77,7 @@ void faad_initbits_rev(bitfile *ld, void *buffer,
                       uint32_t bits_in_buffer);
 uint8_t faad_byte_align(bitfile *ld);
 uint32_t faad_get_processed_bits(bitfile *ld);
-void faad_flushbits_ex(bitfile *ld, uint32_t bits);
+INLINE void faad_flushbits_ex(bitfile *ld, uint32_t bits);
 void faad_rewindbits(bitfile *ld);
 uint8_t *faad_getbitbuffer(bitfile *ld, uint32_t bits
                       DEBUGDEC);
@ -93,28 +89,10 @@ uint32_t faad_origbitbuffer_size(bitfile *ld);
 /* circumvent memory alignment errors on ARM */
 static INLINE uint32_t getdword(void *mem)
 {
 #ifdef ARM
    uint32_t tmp;
 #ifndef ARCH_IS_BIG_ENDIAN
-    ((uint8_t*)&tmp)[0] = ((uint8_t*)mem)[3];
+    return BSWAP(*(uint32_t*)mem);
    ((uint8_t*)&tmp)[1] = ((uint8_t*)mem)[2];
    ((uint8_t*)&tmp)[2] = ((uint8_t*)mem)[1];
    ((uint8_t*)&tmp)[3] = ((uint8_t*)mem)[0];
 #else
-    ((uint8_t*)&tmp)[0] = ((uint8_t*)mem)[0];
+    return *(uint32_t*)mem;
    ((uint8_t*)&tmp)[1] = ((uint8_t*)mem)[1];
    ((uint8_t*)&tmp)[2] = ((uint8_t*)mem)[2];
    ((uint8_t*)&tmp)[3] = ((uint8_t*)mem)[3];
 #endif
    return tmp;
 #else
    uint32_t tmp;
    tmp = *(uint32_t*)mem;
 #ifndef ARCH_IS_BIG_ENDIAN
    BSWAP(tmp);
 #endif
    return tmp;
 #endif
 }
--- a/apps/codecs/libfaad/codebook/hcb_1.h
+++ b/apps/codecs/libfaad/codebook/hcb_1.h
@ -33,7 +33,7 @@
 *
 * Used to find offset into 2nd step table and number of extra bits to get
 */
-static hcb hcb1_1[] = {
+static hcb hcb1_1[] ICONST_ATTR_FAAD_LARGE_IRAM = {
    { /* 00000 */ 0, 0 },
    { /*       */ 0, 0 },
    { /*       */ 0, 0 },
@ -78,7 +78,7 @@ static hcb hcb1_1[] = {
 *
 * Gives size of codeword and actual data (x,y,v,w)
 */
-static hcb_2_quad hcb1_2[] = {
+static hcb_2_quad hcb1_2[] ICONST_ATTR_FAAD_LARGE_IRAM = {
    /* 1 bit codeword */
    { 1,  0,  0,  0,  0 },
--- a/apps/codecs/libfaad/codebook/hcb_10.h
+++ b/apps/codecs/libfaad/codebook/hcb_10.h
@ -33,7 +33,7 @@
 *
 * Used to find offset into 2nd step table and number of extra bits to get
 */
-static hcb hcb10_1[] = {
+static hcb hcb10_1[] ICONST_ATTR_FAAD_LARGE_IRAM = {
    /* 4 bit codewords */
    { /* 000000 */ 0, 0 },
    { /*        */ 0, 0 },
@ -115,7 +115,7 @@ static hcb hcb10_1[] = {
 *
 * Gives size of codeword and actual data (x,y,v,w)
 */
-static hcb_2_pair hcb10_2[] = {
+static hcb_2_pair hcb10_2[] ICONST_ATTR = {
    /* 4 bit codewords */
    { 4,  1,  1 },
    { 4,  1,  2 },
--- a/apps/codecs/libfaad/codebook/hcb_11.h
+++ b/apps/codecs/libfaad/codebook/hcb_11.h
@ -33,7 +33,7 @@
 *
 * Used to find offset into 2nd step table and number of extra bits to get
 */
-static hcb hcb11_1[] = {
+static hcb hcb11_1[] ICONST_ATTR_FAAD_LARGE_IRAM = {
    /* 4 bits */
    { /* 00000 */ 0, 0 },
    { /*       */ 0, 0 },
@ -95,7 +95,7 @@ static hcb hcb11_1[] = {
 *
 * Gives size of codeword and actual data (x,y,v,w)
 */
-static hcb_2_pair hcb11_2[] = {
+static hcb_2_pair hcb11_2[] ICONST_ATTR_FAAD_LARGE_IRAM = {
    /* 4 */
    { 4,  0,  0 },
    { 4,  1,  1 },
--- a/apps/codecs/libfaad/codebook/hcb_2.h
+++ b/apps/codecs/libfaad/codebook/hcb_2.h
@ -33,7 +33,7 @@
 *
 * Used to find offset into 2nd step table and number of extra bits to get
 */
-static hcb hcb2_1[] = {
+static hcb hcb2_1[] ICONST_ATTR_FAAD_LARGE_IRAM = {
    { /* 00000 */ 0, 0 },
    { /*       */ 0, 0 },
    { /*       */ 0, 0 },
@ -82,7 +82,7 @@ static hcb hcb2_1[] = {
 *
 * Gives size of codeword and actual data (x,y,v,w)
 */
-static hcb_2_quad hcb2_2[] = {
+static hcb_2_quad hcb2_2[] ICONST_ATTR_FAAD_LARGE_IRAM = {
    /* 3 bit codeword */
    { 3,  0,  0,  0,  0 },
--- a/apps/codecs/libfaad/codebook/hcb_3.h
+++ b/apps/codecs/libfaad/codebook/hcb_3.h
@ -28,7 +28,7 @@
 /* Binary search huffman table HCB_3 */
-static hcb_bin_quad hcb3[] = {
+static hcb_bin_quad hcb3[] ICONST_ATTR_FAAD_LARGE_IRAM = {
    { /*  0 */ 0, {  1,  2, 0, 0 } },
    { /*  1 */ 1, {  0,  0, 0, 0 } }, /* 0 */
    { /*  2 */ 0, {  1,  2, 0, 0 } },
--- a/apps/codecs/libfaad/codebook/hcb_4.h
+++ b/apps/codecs/libfaad/codebook/hcb_4.h
@ -33,7 +33,7 @@
 *
 * Used to find offset into 2nd step table and number of extra bits to get
 */
-static hcb hcb4_1[] = {
+static hcb hcb4_1[] ICONST_ATTR_FAAD_LARGE_IRAM = {
    /* 4 bit codewords */
    { /* 00000 */ 0, 0 },
    { /*       */ 0, 0 },
@ -85,7 +85,7 @@ static hcb hcb4_1[] = {
 *
 * Gives size of codeword and actual data (x,y,v,w)
 */
-static hcb_2_quad hcb4_2[] = {
+static hcb_2_quad hcb4_2[] ICONST_ATTR_FAAD_LARGE_IRAM = {
    /* 4 bit codewords */
    { 4,  1,  1,  1,  1 },
    { 4,  0,  1,  1,  1 },
--- a/apps/codecs/libfaad/codebook/hcb_5.h
+++ b/apps/codecs/libfaad/codebook/hcb_5.h
@ -28,7 +28,7 @@
 /* Binary search huffman table HCB_5 */
-static hcb_bin_pair hcb5[] = {
+static hcb_bin_pair hcb5[] ICONST_ATTR_FAAD_LARGE_IRAM = {
    { /*  0 */ 0, {  1,  2 } },
    { /*  1 */ 1, {  0,  0 } }, /* 0 */
    { /*  2 */ 0, {  1,  2 } },
--- a/apps/codecs/libfaad/codebook/hcb_6.h
+++ b/apps/codecs/libfaad/codebook/hcb_6.h
@ -33,7 +33,7 @@
 *
 * Used to find offset into 2nd step table and number of extra bits to get
 */
-static hcb hcb6_1[] = {
+static hcb hcb6_1[] ICONST_ATTR = {
    /* 4 bit codewords */
    { /* 00000 */ 0, 0 },
    { /*       */ 0, 0 },
@ -83,7 +83,7 @@ static hcb hcb6_1[] = {
 *
 * Gives size of codeword and actual data (x,y,v,w)
 */
-static hcb_2_pair hcb6_2[] = {
+static hcb_2_pair hcb6_2[] ICONST_ATTR = {
    /* 4 bit codewords */
    { 4,  0,  0 },
    { 4,  1,  0 },
--- a/apps/codecs/libfaad/codebook/hcb_7.h
+++ b/apps/codecs/libfaad/codebook/hcb_7.h
@ -28,7 +28,7 @@
 /* Binary search huffman table HCB_7 */
-static hcb_bin_pair hcb7[] = {
+static hcb_bin_pair hcb7[] ICONST_ATTR_FAAD_LARGE_IRAM = {
    { /*  0 */ 0, { 1, 2 } },
    { /*  1 */ 1, { 0, 0 } },
    { /*  2 */ 0, { 1, 2 } },
--- a/apps/codecs/libfaad/codebook/hcb_8.h
+++ b/apps/codecs/libfaad/codebook/hcb_8.h
@ -33,7 +33,7 @@
 *
 * Used to find offset into 2nd step table and number of extra bits to get
 */
-static hcb hcb8_1[] = {
+static hcb hcb8_1[] ICONST_ATTR = {
    /* 3 bit codeword */
    { /* 00000 */ 0, 0 },
    { /*       */ 0, 0 },
@ -87,7 +87,7 @@ static hcb hcb8_1[] = {
 *
 * Gives size of codeword and actual data (x,y,v,w)
 */
-static hcb_2_pair hcb8_2[] = {
+static hcb_2_pair hcb8_2[] ICONST_ATTR = {
    /* 3 bit codeword */
    { 3,  1,  1 },
--- a/apps/codecs/libfaad/codebook/hcb_9.h
+++ b/apps/codecs/libfaad/codebook/hcb_9.h
@ -28,7 +28,7 @@
 /* Binary search huffman table HCB_9 */
-static hcb_bin_pair hcb9[] = {
+static hcb_bin_pair hcb9[] ICONST_ATTR_FAAD_LARGE_IRAM = {
    { /*  0 */ 0, { 1, 2 } },
    { /*  1 */ 1, { 0, 0 } },
    { /*  2 */ 0, { 1, 2 } },
--- a/apps/codecs/libfaad/codebook/hcb_sf.h
+++ b/apps/codecs/libfaad/codebook/hcb_sf.h
@ -28,7 +28,7 @@
 /* Binary search huffman table HCB_SF */
-static uint8_t hcb_sf[][2] = {
+static uint8_t hcb_sf[][2] ICONST_ATTR_FAAD_LARGE_IRAM = {
    { /*  0 */  1, 2 },
    { /*  1 */  60, 0 },
    { /*  2 */  1, 2 },
--- a/apps/codecs/libfaad/common.h
+++ b/apps/codecs/libfaad/common.h
@ -51,6 +51,25 @@ extern struct codec_api* ci;
 #define LOGF(...)
 #endif
 #if   (CONFIG_CPU == MCF5250) || defined(CPU_S5L870X)
 /* Enough IRAM but performance suffers with ICODE_ATTR. */
 #define IBSS_ATTR_FAAD_LARGE_IRAM   IBSS_ATTR
 #define ICODE_ATTR_FAAD_LARGE_IRAM
 #define ICONST_ATTR_FAAD_LARGE_IRAM ICONST_ATTR
 #elif (CONFIG_CPU == PP5022) || (CONFIG_CPU == PP5024)
 /* Enough IRAM to move additional data and code to it. */
 #define IBSS_ATTR_FAAD_LARGE_IRAM   IBSS_ATTR
 #define ICODE_ATTR_FAAD_LARGE_IRAM  ICODE_ATTR
 #define ICONST_ATTR_FAAD_LARGE_IRAM ICONST_ATTR
 #else
 /* Not enough IRAM available. */
 #define IBSS_ATTR_FAAD_LARGE_IRAM
 #define ICODE_ATTR_FAAD_LARGE_IRAM
 #define ICONST_ATTR_FAAD_LARGE_IRAM
 #endif
 #define INLINE __inline
 #if 0 //defined(_WIN32) && !defined(_WIN32_WCE)
 #define ALIGN __declspec(align(16))
@ -71,7 +90,7 @@ extern struct codec_api* ci;
 /* #define USE_DOUBLE_PRECISION */
 /* use fixed point reals */
 #define FIXED_POINT
-//#define BIG_IQ_TABLE
+#define BIG_IQ_TABLE /* BIG_IQ_TABLE results in faster requantization */
 /* Use if target platform has address generators with autoincrement */
 //#define PREFER_POINTERS
--- a/apps/codecs/libfaad/huffman.c
+++ b/apps/codecs/libfaad/huffman.c
@ -39,17 +39,18 @@
 /* static function declarations */
-static INLINE void huffman_sign_bits(bitfile *ld, int16_t *sp, uint8_t len);
+static INLINE void huffman_sign_bits_pair(bitfile *ld, int16_t *sp);
 static INLINE void huffman_sign_bits_quad(bitfile *ld, int16_t *sp);
 static INLINE int16_t huffman_getescape(bitfile *ld, int16_t sp);
 static uint8_t huffman_2step_quad(uint8_t cb, bitfile *ld, int16_t *sp);
 static uint8_t huffman_2step_quad_sign(uint8_t cb, bitfile *ld, int16_t *sp);
-static uint8_t huffman_2step_pair(uint8_t cb, bitfile *ld, int16_t *sp);
+static INLINE uint8_t huffman_2step_pair(uint8_t cb, bitfile *ld, int16_t *sp);
-static uint8_t huffman_2step_pair_sign(uint8_t cb, bitfile *ld, int16_t *sp);
+static INLINE uint8_t huffman_2step_pair_sign(uint8_t cb, bitfile *ld, int16_t *sp);
 static uint8_t huffman_binary_quad(uint8_t cb, bitfile *ld, int16_t *sp);
 static uint8_t huffman_binary_quad_sign(uint8_t cb, bitfile *ld, int16_t *sp);
 static uint8_t huffman_binary_pair(uint8_t cb, bitfile *ld, int16_t *sp);
 static uint8_t huffman_binary_pair_sign(uint8_t cb, bitfile *ld, int16_t *sp);
-static int16_t huffman_codebook(uint8_t i);
+static int16_t huffman_codebook(uint8_t i) ICODE_ATTR_FAAD_LARGE_IRAM;
 #ifdef ERROR_RESILIENCE
 static void vcb11_check_LAV(uint8_t cb, int16_t *sp);
 #endif
@ -75,49 +76,51 @@ int8_t huffman_scale_factor(bitfile *ld)
 }
-hcb *hcb_table[] = {
+hcb *hcb_table[] ICONST_ATTR = {
    0, hcb1_1, hcb2_1, 0, hcb4_1, 0, hcb6_1, 0, hcb8_1, 0, hcb10_1, hcb11_1
 };
-hcb_2_quad *hcb_2_quad_table[] = {
+hcb_2_quad *hcb_2_quad_table[] ICONST_ATTR = {
    0, hcb1_2, hcb2_2, 0, hcb4_2, 0, 0, 0, 0, 0, 0, 0
 };
-hcb_2_pair *hcb_2_pair_table[] = {
+hcb_2_pair *hcb_2_pair_table[] ICONST_ATTR = {
    0, 0, 0, 0, 0, 0, hcb6_2, 0, hcb8_2, 0, hcb10_2, hcb11_2
 };
-hcb_bin_pair *hcb_bin_table[] = {
+hcb_bin_pair *hcb_bin_table[] ICONST_ATTR = {
    0, 0, 0, 0, 0, hcb5, 0, hcb7, 0, hcb9, 0, 0
 };
-uint8_t hcbN[] = { 0, 5, 5, 0, 5, 0, 5, 0, 5, 0, 6, 5 };
+uint8_t hcbN[] ICONST_ATTR = { 0, 5, 5, 0, 5, 0, 5, 0, 5, 0, 6, 5 };
 /* defines whether a huffman codebook is unsigned or not */
 /* Table 4.6.2 */
-uint8_t unsigned_cb[] = { 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0,
+uint8_t unsigned_cb[] ICONST_ATTR = { 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0,
-  /* codebook 16 to 31 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
+              /* codebook 16 to 31 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
 };
-int hcb_2_quad_table_size[] = { 0, 114, 86, 0, 185, 0, 0, 0, 0, 0, 0, 0 };
+int hcb_2_quad_table_size[] ICONST_ATTR = { 0, 114, 86, 0, 185, 0, 0, 0, 0, 0, 0, 0 };
-int hcb_2_pair_table_size[] = { 0, 0, 0, 0, 0, 0, 126, 0, 83, 0, 210, 373 };
+int hcb_2_pair_table_size[] ICONST_ATTR = { 0, 0, 0, 0, 0, 0, 126, 0, 83, 0, 210, 373 };
-int hcb_bin_table_size[] = { 0, 0, 0, 161, 0, 161, 0, 127, 0, 337, 0, 0 };
+int hcb_bin_table_size[] ICONST_ATTR = { 0, 0, 0, 161, 0, 161, 0, 127, 0, 337, 0, 0 };
-static INLINE void huffman_sign_bits(bitfile *ld, int16_t *sp, uint8_t len)
+#define FAAD_GET_SIGN(idx)          \
        if (sp[idx])                \
            if (faad_get1bit(ld)&1) \
                sp[idx] = -sp[idx]; \
 static INLINE void huffman_sign_bits_pair(bitfile *ld, int16_t *sp)
 {
-    uint8_t i;
+    FAAD_GET_SIGN(0)
    FAAD_GET_SIGN(1)
 }
-    for (i = 0; i < len; i++)
+static INLINE void huffman_sign_bits_quad(bitfile *ld, int16_t *sp)
-    {
+{
-        if(sp[i])
+    FAAD_GET_SIGN(0)
-        {
+    FAAD_GET_SIGN(1)
-            if(faad_get1bit(ld
+    FAAD_GET_SIGN(2)
-                DEBUGVAR(1,5,"huffman_sign_bits(): sign bit")) & 1)
+    FAAD_GET_SIGN(3)
            {
                sp[i] = -sp[i];
            }
        }
    }
 }
 static INLINE int16_t huffman_getescape(bitfile *ld, int16_t sp)
@ -194,12 +197,12 @@ static uint8_t huffman_2step_quad(uint8_t cb, bitfile *ld, int16_t *sp)
 static uint8_t huffman_2step_quad_sign(uint8_t cb, bitfile *ld, int16_t *sp)
 {
    uint8_t err = huffman_2step_quad(cb, ld, sp);
-    huffman_sign_bits(ld, sp, QUAD_LEN);
+    huffman_sign_bits_quad(ld, sp);
    return err;
 }
-static uint8_t huffman_2step_pair(uint8_t cb, bitfile *ld, int16_t *sp)
+static INLINE uint8_t huffman_2step_pair(uint8_t cb, bitfile *ld, int16_t *sp)
 {
    uint32_t cw;
    uint16_t offset = 0;
@ -232,10 +235,10 @@ static uint8_t huffman_2step_pair(uint8_t cb, bitfile *ld, int16_t *sp)
    return 0;
 }
-static uint8_t huffman_2step_pair_sign(uint8_t cb, bitfile *ld, int16_t *sp)
+static INLINE uint8_t huffman_2step_pair_sign(uint8_t cb, bitfile *ld, int16_t *sp)
 {
    uint8_t err = huffman_2step_pair(cb, ld, sp);
-    huffman_sign_bits(ld, sp, PAIR_LEN);
+    huffman_sign_bits_pair(ld, sp);
    return err;
 }
@ -269,7 +272,7 @@ static uint8_t huffman_binary_quad(uint8_t cb, bitfile *ld, int16_t *sp)
 static uint8_t huffman_binary_quad_sign(uint8_t cb, bitfile *ld, int16_t *sp)
 {
    uint8_t err = huffman_binary_quad(cb, ld, sp);
-    huffman_sign_bits(ld, sp, QUAD_LEN);
+    huffman_sign_bits_quad(ld, sp);
    return err;
 }
@ -301,7 +304,7 @@ static uint8_t huffman_binary_pair(uint8_t cb, bitfile *ld, int16_t *sp)
 static uint8_t huffman_binary_pair_sign(uint8_t cb, bitfile *ld, int16_t *sp)
 {
    uint8_t err = huffman_binary_pair(cb, ld, sp);
-    huffman_sign_bits(ld, sp, PAIR_LEN);
+    huffman_sign_bits_pair(ld, sp);
    return err;
 }
--- a/apps/codecs/libfaad/huffman.h
+++ b/apps/codecs/libfaad/huffman.h
@ -33,7 +33,7 @@ extern "C" {
 #endif
 int8_t huffman_scale_factor(bitfile *ld);
-uint8_t huffman_spectral_data(uint8_t cb, bitfile *ld, int16_t *sp);
+uint8_t huffman_spectral_data(uint8_t cb, bitfile *ld, int16_t *sp) ICODE_ATTR_FAAD_LARGE_IRAM;
 #ifdef ERROR_RESILIENCE
 int8_t huffman_spectral_data_2(uint8_t cb, bits_t *ld, int16_t *sp);
 #endif
--- a/apps/codecs/libfaad/specrec.c
+++ b/apps/codecs/libfaad/specrec.c
@ -414,19 +414,18 @@ uint8_t window_grouping_info(NeAACDecHandle hDecoder, ic_stream *ics)
 /**/
 static INLINE real_t iquant(int16_t q, const real_t *tab, uint8_t *error)
 {
-#ifdef FIXED_POINT
+#ifndef BIG_IQ_TABLE
 /* For FIXED_POINT the iq_table is prescaled by 3 bits (iq_table[]/8) */
 /* BIG_IQ_TABLE allows you to use the full 8192 value table, if this is not
 * defined a 1026 value table and interpolation will be used
 */
 #ifndef BIG_IQ_TABLE
    static const real_t errcorr[] = {
        REAL_CONST(0), REAL_CONST(1.0/8.0), REAL_CONST(2.0/8.0), REAL_CONST(3.0/8.0),
        REAL_CONST(4.0/8.0),  REAL_CONST(5.0/8.0), REAL_CONST(6.0/8.0), REAL_CONST(7.0/8.0),
        REAL_CONST(0)
    };
    real_t x1, x2;
-#endif
+
    int16_t sgn = 1;
    if (q < 0)
@ -445,7 +444,6 @@ static INLINE real_t iquant(int16_t q, const real_t *tab, uint8_t *error)
        return sgn * tab[q];
    }
 #ifndef BIG_IQ_TABLE
    if (q >= 8192)
    {
        *error = 17;
@ -456,12 +454,7 @@ static INLINE real_t iquant(int16_t q, const real_t *tab, uint8_t *error)
    x1 = tab[q>>3];
    x2 = tab[(q>>3) + 1];
    return sgn * 16 * (MUL_R(errcorr[q&7],(x2-x1)) + x1);
-#else
+#else /* #ifndef BIG_IQ_TABLE */
    *error = 17;
    return 0;
 #endif
 #else
    if (q < 0)
    {
        /* tab contains a value for all possible q [0,8192] */
@ -547,9 +540,7 @@ static uint8_t quant_to_spec(NeAACDecHandle hDecoder,
    uint8_t g, sfb, win;
    uint16_t width, bin, k, gindex, wa, wb;
    uint8_t error = 0; /* Init error flag */
 #ifndef FIXED_POINT
    real_t scf;
 #endif
    k = 0;
    gindex = 0;
@ -597,6 +588,8 @@ static uint8_t quant_to_spec(NeAACDecHandle hDecoder,
 #ifndef FIXED_POINT
            scf = pow2sf_tab[exp/*+25*/] * pow2_table[frac];
 #else
            scf = pow2_table[frac];
 #endif
            for (win = 0; win < ics->window_group_length[g]; win++)
@ -612,32 +605,12 @@ static uint8_t quant_to_spec(NeAACDecHandle hDecoder,
                    spec_data[wb+3] = iquant(quant_data[k+3], tab, &error) * scf;
 #else
                    real_t iq0 = iquant(quant_data[k+0], tab, &error);
                    real_t iq1 = iquant(quant_data[k+1], tab, &error);
                    real_t iq2 = iquant(quant_data[k+2], tab, &error);
                    real_t iq3 = iquant(quant_data[k+3], tab, &error);
                    wb = wa + bin;
-
+                 
-                    if (exp < 0)
+                    spec_data[wb+0] = MUL_C((iquant(quant_data[k+0], tab, &error)<<exp), scf);
-                    {
+                    spec_data[wb+1] = MUL_C((iquant(quant_data[k+1], tab, &error)<<exp), scf);
-                        spec_data[wb+0] = iq0 >>= -exp;
+                    spec_data[wb+2] = MUL_C((iquant(quant_data[k+2], tab, &error)<<exp), scf);
-                        spec_data[wb+1] = iq1 >>= -exp;
+                    spec_data[wb+3] = MUL_C((iquant(quant_data[k+3], tab, &error)<<exp), scf);
                        spec_data[wb+2] = iq2 >>= -exp;
                        spec_data[wb+3] = iq3 >>= -exp;
                    } else {
                        spec_data[wb+0] = iq0 <<= exp;
                        spec_data[wb+1] = iq1 <<= exp;
                        spec_data[wb+2] = iq2 <<= exp;
                        spec_data[wb+3] = iq3 <<= exp;
                    }
                    if (frac != 0)
                    {
                        spec_data[wb+0] = MUL_C(spec_data[wb+0],pow2_table[frac]);
                        spec_data[wb+1] = MUL_C(spec_data[wb+1],pow2_table[frac]);
                        spec_data[wb+2] = MUL_C(spec_data[wb+2],pow2_table[frac]);
                        spec_data[wb+3] = MUL_C(spec_data[wb+3],pow2_table[frac]);
                    }
 //#define SCFS_PRINT
 #ifdef SCFS_PRINT
@ -855,11 +828,14 @@ static uint8_t allocate_channel_pair(NeAACDecHandle hDecoder,
    return 0;
 }
 /* used by reconstruct_single_channel() and reconstruct_channel_pair() */
 ALIGN static real_t spec_coef1[1024] IBSS_ATTR;
 ALIGN static real_t spec_coef2[1024] IBSS_ATTR;
 uint8_t reconstruct_single_channel(NeAACDecHandle hDecoder, ic_stream *ics,
                                   element *sce, int16_t *spec_data)
 {
    uint8_t retval, output_channels;
    ALIGN static real_t spec_coef[1024];
 #ifdef PROFILE
    int64_t count = faad_get_ts();
@ -893,7 +869,7 @@ uint8_t reconstruct_single_channel(NeAACDecHandle hDecoder, ic_stream *ics,
    /* dequantisation and scaling */
-    retval = quant_to_spec(hDecoder, ics, spec_data, spec_coef, hDecoder->frameLength);
+    retval = quant_to_spec(hDecoder, ics, spec_data, spec_coef1, hDecoder->frameLength);
    if (retval > 0)
        return retval;
@ -904,14 +880,14 @@ uint8_t reconstruct_single_channel(NeAACDecHandle hDecoder, ic_stream *ics,
    /* pns decoding */
-    pns_decode(ics, NULL, spec_coef, NULL, hDecoder->frameLength, 0, hDecoder->object_type);
+    pns_decode(ics, NULL, spec_coef1, NULL, hDecoder->frameLength, 0, hDecoder->object_type);
 #ifdef MAIN_DEC
    /* MAIN object type prediction */
    if (hDecoder->object_type == MAIN)
    {
        /* intra channel prediction */
-        ic_prediction(ics, spec_coef, hDecoder->pred_stat[sce->channel], hDecoder->frameLength,
+        ic_prediction(ics, spec_coef1, hDecoder->pred_stat[sce->channel], hDecoder->frameLength,
            hDecoder->sf_index);
        /* In addition, for scalefactor bands coded by perceptual
@ -938,7 +914,7 @@ uint8_t reconstruct_single_channel(NeAACDecHandle hDecoder, ic_stream *ics,
 #endif
        /* long term prediction */
-        lt_prediction(ics, &(ics->ltp), spec_coef, hDecoder->lt_pred_stat[sce->channel], hDecoder->fb,
+        lt_prediction(ics, &(ics->ltp), spec_coef1, hDecoder->lt_pred_stat[sce->channel], hDecoder->fb,
            ics->window_shape, hDecoder->window_shape_prev[sce->channel],
            hDecoder->sf_index, hDecoder->object_type, hDecoder->frameLength);
    }
@ -946,13 +922,13 @@ uint8_t reconstruct_single_channel(NeAACDecHandle hDecoder, ic_stream *ics,
    /* tns decoding */
    tns_decode_frame(ics, &(ics->tns), hDecoder->sf_index, hDecoder->object_type,
-        spec_coef, hDecoder->frameLength);
+        spec_coef1, hDecoder->frameLength);
    /* drc decoding */
    if (hDecoder->drc->present)
    {
        if (!hDecoder->drc->exclude_mask[sce->channel] || !hDecoder->drc->excluded_chns_present)
-            drc_decode(hDecoder->drc, spec_coef);
+            drc_decode(hDecoder->drc, spec_coef1);
    }
    /* filter bank */
@ -961,13 +937,13 @@ uint8_t reconstruct_single_channel(NeAACDecHandle hDecoder, ic_stream *ics,
    {
 #endif
        ifilter_bank(ics->window_sequence,ics->window_shape,
-            hDecoder->window_shape_prev[sce->channel],spec_coef,
+            hDecoder->window_shape_prev[sce->channel],spec_coef1,
            hDecoder->time_out[sce->channel], hDecoder->fb_intermed[sce->channel],
            hDecoder->object_type, hDecoder->frameLength);
 #ifdef SSR_DEC
    } else {
        ssr_decode(&(ics->ssr), hDecoder->fb, ics->window_sequence, ics->window_shape,
-            hDecoder->window_shape_prev[sce->channel], spec_coef, hDecoder->time_out[sce->channel],
+            hDecoder->window_shape_prev[sce->channel], spec_coef1, hDecoder->time_out[sce->channel],
            hDecoder->ssr_overlap[sce->channel], hDecoder->ipqf_buffer[sce->channel], hDecoder->prev_fmd[sce->channel],
            hDecoder->frameLength);
    }
@ -1051,8 +1027,6 @@ uint8_t reconstruct_channel_pair(NeAACDecHandle hDecoder, ic_stream *ics1, ic_st
                                 element *cpe, int16_t *spec_data1, int16_t *spec_data2)
 {
    uint8_t retval;
    ALIGN static real_t spec_coef1[1024] IBSS_ATTR;
    ALIGN static real_t spec_coef2[1024] IBSS_ATTR;
 #ifdef PROFILE
    int64_t count = faad_get_ts();