1
0
Fork 0
forked from len0rd/rockbox

Submit FS#11445. Speed up of faad (aac) decoder via several optimizations like refactoring some requantization routines, moving several arrays and code tables to IRAM, using an optimized swap32() function and inlining several huffman decoder functions. Decoding is sped up by ~10% (PP5002, PP5022, MCF5249) and ~22% (MCF5250).

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27225 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Andree Buschmann 2010-07-01 21:18:42 +00:00
parent b013fb76c4
commit 52f17dfe9d
19 changed files with 121 additions and 134 deletions

View file

@ -27,6 +27,11 @@
CODEC_HEADER CODEC_HEADER
/* Global buffers to be used in the mdct synthesis. This way the arrays can
* be moved to IRAM for some targets */
ALIGN real_t gb_time_buffer[2][1024] IBSS_ATTR_FAAD_LARGE_IRAM;
ALIGN real_t gb_fb_intermed[2][1024] IBSS_ATTR_FAAD_LARGE_IRAM;
/* this is the codec entry point */ /* this is the codec entry point */
enum codec_status codec_main(void) enum codec_status codec_main(void)
{ {
@ -105,7 +110,14 @@ next_track:
err = CODEC_ERROR; err = CODEC_ERROR;
goto done; goto done;
} }
/* Set pointer to be able to use IRAM an to avoid alloc in decoder. Must
* be called after NeAACDecOpen(). */
decoder->time_out[0] = &gb_time_buffer[0][0];
decoder->time_out[1] = &gb_time_buffer[1][0];
decoder->fb_intermed[0] = &gb_fb_intermed[0][0];
decoder->fb_intermed[1] = &gb_fb_intermed[1][0];
ci->id3->frequency = s; ci->id3->frequency = s;
i = 0; i = 0;

View file

@ -33,9 +33,10 @@
#include "bits.h" #include "bits.h"
/* Need to be large enough to fit the largest compressed sample in a file. /* Need to be large enough to fit the largest compressed sample in a file.
* Samples a little larger than 1 KB observed in a 256 kbps file. * Samples were observed to need up to 1500 bytes (400 kbps nero aac).
*/ */
uint8_t static_buffer[2048]; #define BUFFER_SIZE 2048
uint8_t static_buffer[BUFFER_SIZE] IBSS_ATTR;
/* initialize buffer, call once before first getbits or showbits */ /* initialize buffer, call once before first getbits or showbits */
void faad_initbits(bitfile *ld, const void *_buffer, const uint32_t buffer_size) void faad_initbits(bitfile *ld, const void *_buffer, const uint32_t buffer_size)
@ -47,7 +48,7 @@ void faad_initbits(bitfile *ld, const void *_buffer, const uint32_t buffer_size)
memset(ld, 0, sizeof(bitfile)); memset(ld, 0, sizeof(bitfile));
if (buffer_size == 0 || _buffer == NULL) if (buffer_size == 0 || _buffer == NULL || (buffer_size+12)>BUFFER_SIZE)
{ {
ld->error = 1; ld->error = 1;
ld->no_more_reading = 1; ld->no_more_reading = 1;

View file

@ -55,15 +55,11 @@ typedef struct _bitfile
void *buffer; void *buffer;
} bitfile; } bitfile;
/* rockbox: use asm optimized swap32()
#if defined (_WIN32) && !defined(_WIN32_WCE) && !defined(__MINGW32__)
#define BSWAP(a) __asm mov eax,a __asm bswap eax __asm mov a, eax
#elif defined(LINUX) || defined(DJGPP) || defined(__MINGW32__)
#define BSWAP(a) __asm__ ( "bswapl %0\n" : "=r" (a) : "0" (a) )
#else
#define BSWAP(a) \ #define BSWAP(a) \
((a) = ( ((a)&0xff)<<24) | (((a)&0xff00)<<8) | (((a)>>8)&0xff00) | (((a)>>24)&0xff)) ((a) = ( ((a)&0xff)<<24) | (((a)&0xff00)<<8) | (((a)>>8)&0xff00) | (((a)>>24)&0xff))
#endif */
#define BSWAP(a) swap32(a)
static uint32_t bitmask[] = { static uint32_t bitmask[] = {
0x0, 0x1, 0x3, 0x7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x0, 0x1, 0x3, 0x7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF,
@ -81,7 +77,7 @@ void faad_initbits_rev(bitfile *ld, void *buffer,
uint32_t bits_in_buffer); uint32_t bits_in_buffer);
uint8_t faad_byte_align(bitfile *ld); uint8_t faad_byte_align(bitfile *ld);
uint32_t faad_get_processed_bits(bitfile *ld); uint32_t faad_get_processed_bits(bitfile *ld);
void faad_flushbits_ex(bitfile *ld, uint32_t bits); INLINE void faad_flushbits_ex(bitfile *ld, uint32_t bits);
void faad_rewindbits(bitfile *ld); void faad_rewindbits(bitfile *ld);
uint8_t *faad_getbitbuffer(bitfile *ld, uint32_t bits uint8_t *faad_getbitbuffer(bitfile *ld, uint32_t bits
DEBUGDEC); DEBUGDEC);
@ -93,28 +89,10 @@ uint32_t faad_origbitbuffer_size(bitfile *ld);
/* circumvent memory alignment errors on ARM */ /* circumvent memory alignment errors on ARM */
static INLINE uint32_t getdword(void *mem) static INLINE uint32_t getdword(void *mem)
{ {
#ifdef ARM
uint32_t tmp;
#ifndef ARCH_IS_BIG_ENDIAN #ifndef ARCH_IS_BIG_ENDIAN
((uint8_t*)&tmp)[0] = ((uint8_t*)mem)[3]; return BSWAP(*(uint32_t*)mem);
((uint8_t*)&tmp)[1] = ((uint8_t*)mem)[2];
((uint8_t*)&tmp)[2] = ((uint8_t*)mem)[1];
((uint8_t*)&tmp)[3] = ((uint8_t*)mem)[0];
#else #else
((uint8_t*)&tmp)[0] = ((uint8_t*)mem)[0]; return *(uint32_t*)mem;
((uint8_t*)&tmp)[1] = ((uint8_t*)mem)[1];
((uint8_t*)&tmp)[2] = ((uint8_t*)mem)[2];
((uint8_t*)&tmp)[3] = ((uint8_t*)mem)[3];
#endif
return tmp;
#else
uint32_t tmp;
tmp = *(uint32_t*)mem;
#ifndef ARCH_IS_BIG_ENDIAN
BSWAP(tmp);
#endif
return tmp;
#endif #endif
} }

View file

@ -33,7 +33,7 @@
* *
* Used to find offset into 2nd step table and number of extra bits to get * Used to find offset into 2nd step table and number of extra bits to get
*/ */
static hcb hcb1_1[] = { static hcb hcb1_1[] ICONST_ATTR_FAAD_LARGE_IRAM = {
{ /* 00000 */ 0, 0 }, { /* 00000 */ 0, 0 },
{ /* */ 0, 0 }, { /* */ 0, 0 },
{ /* */ 0, 0 }, { /* */ 0, 0 },
@ -78,7 +78,7 @@ static hcb hcb1_1[] = {
* *
* Gives size of codeword and actual data (x,y,v,w) * Gives size of codeword and actual data (x,y,v,w)
*/ */
static hcb_2_quad hcb1_2[] = { static hcb_2_quad hcb1_2[] ICONST_ATTR_FAAD_LARGE_IRAM = {
/* 1 bit codeword */ /* 1 bit codeword */
{ 1, 0, 0, 0, 0 }, { 1, 0, 0, 0, 0 },

View file

@ -33,7 +33,7 @@
* *
* Used to find offset into 2nd step table and number of extra bits to get * Used to find offset into 2nd step table and number of extra bits to get
*/ */
static hcb hcb10_1[] = { static hcb hcb10_1[] ICONST_ATTR_FAAD_LARGE_IRAM = {
/* 4 bit codewords */ /* 4 bit codewords */
{ /* 000000 */ 0, 0 }, { /* 000000 */ 0, 0 },
{ /* */ 0, 0 }, { /* */ 0, 0 },
@ -115,7 +115,7 @@ static hcb hcb10_1[] = {
* *
* Gives size of codeword and actual data (x,y,v,w) * Gives size of codeword and actual data (x,y,v,w)
*/ */
static hcb_2_pair hcb10_2[] = { static hcb_2_pair hcb10_2[] ICONST_ATTR = {
/* 4 bit codewords */ /* 4 bit codewords */
{ 4, 1, 1 }, { 4, 1, 1 },
{ 4, 1, 2 }, { 4, 1, 2 },

View file

@ -33,7 +33,7 @@
* *
* Used to find offset into 2nd step table and number of extra bits to get * Used to find offset into 2nd step table and number of extra bits to get
*/ */
static hcb hcb11_1[] = { static hcb hcb11_1[] ICONST_ATTR_FAAD_LARGE_IRAM = {
/* 4 bits */ /* 4 bits */
{ /* 00000 */ 0, 0 }, { /* 00000 */ 0, 0 },
{ /* */ 0, 0 }, { /* */ 0, 0 },
@ -95,7 +95,7 @@ static hcb hcb11_1[] = {
* *
* Gives size of codeword and actual data (x,y,v,w) * Gives size of codeword and actual data (x,y,v,w)
*/ */
static hcb_2_pair hcb11_2[] = { static hcb_2_pair hcb11_2[] ICONST_ATTR_FAAD_LARGE_IRAM = {
/* 4 */ /* 4 */
{ 4, 0, 0 }, { 4, 0, 0 },
{ 4, 1, 1 }, { 4, 1, 1 },

View file

@ -33,7 +33,7 @@
* *
* Used to find offset into 2nd step table and number of extra bits to get * Used to find offset into 2nd step table and number of extra bits to get
*/ */
static hcb hcb2_1[] = { static hcb hcb2_1[] ICONST_ATTR_FAAD_LARGE_IRAM = {
{ /* 00000 */ 0, 0 }, { /* 00000 */ 0, 0 },
{ /* */ 0, 0 }, { /* */ 0, 0 },
{ /* */ 0, 0 }, { /* */ 0, 0 },
@ -82,7 +82,7 @@ static hcb hcb2_1[] = {
* *
* Gives size of codeword and actual data (x,y,v,w) * Gives size of codeword and actual data (x,y,v,w)
*/ */
static hcb_2_quad hcb2_2[] = { static hcb_2_quad hcb2_2[] ICONST_ATTR_FAAD_LARGE_IRAM = {
/* 3 bit codeword */ /* 3 bit codeword */
{ 3, 0, 0, 0, 0 }, { 3, 0, 0, 0, 0 },

View file

@ -28,7 +28,7 @@
/* Binary search huffman table HCB_3 */ /* Binary search huffman table HCB_3 */
static hcb_bin_quad hcb3[] = { static hcb_bin_quad hcb3[] ICONST_ATTR_FAAD_LARGE_IRAM = {
{ /* 0 */ 0, { 1, 2, 0, 0 } }, { /* 0 */ 0, { 1, 2, 0, 0 } },
{ /* 1 */ 1, { 0, 0, 0, 0 } }, /* 0 */ { /* 1 */ 1, { 0, 0, 0, 0 } }, /* 0 */
{ /* 2 */ 0, { 1, 2, 0, 0 } }, { /* 2 */ 0, { 1, 2, 0, 0 } },

View file

@ -33,7 +33,7 @@
* *
* Used to find offset into 2nd step table and number of extra bits to get * Used to find offset into 2nd step table and number of extra bits to get
*/ */
static hcb hcb4_1[] = { static hcb hcb4_1[] ICONST_ATTR_FAAD_LARGE_IRAM = {
/* 4 bit codewords */ /* 4 bit codewords */
{ /* 00000 */ 0, 0 }, { /* 00000 */ 0, 0 },
{ /* */ 0, 0 }, { /* */ 0, 0 },
@ -85,7 +85,7 @@ static hcb hcb4_1[] = {
* *
* Gives size of codeword and actual data (x,y,v,w) * Gives size of codeword and actual data (x,y,v,w)
*/ */
static hcb_2_quad hcb4_2[] = { static hcb_2_quad hcb4_2[] ICONST_ATTR_FAAD_LARGE_IRAM = {
/* 4 bit codewords */ /* 4 bit codewords */
{ 4, 1, 1, 1, 1 }, { 4, 1, 1, 1, 1 },
{ 4, 0, 1, 1, 1 }, { 4, 0, 1, 1, 1 },

View file

@ -28,7 +28,7 @@
/* Binary search huffman table HCB_5 */ /* Binary search huffman table HCB_5 */
static hcb_bin_pair hcb5[] = { static hcb_bin_pair hcb5[] ICONST_ATTR_FAAD_LARGE_IRAM = {
{ /* 0 */ 0, { 1, 2 } }, { /* 0 */ 0, { 1, 2 } },
{ /* 1 */ 1, { 0, 0 } }, /* 0 */ { /* 1 */ 1, { 0, 0 } }, /* 0 */
{ /* 2 */ 0, { 1, 2 } }, { /* 2 */ 0, { 1, 2 } },

View file

@ -33,7 +33,7 @@
* *
* Used to find offset into 2nd step table and number of extra bits to get * Used to find offset into 2nd step table and number of extra bits to get
*/ */
static hcb hcb6_1[] = { static hcb hcb6_1[] ICONST_ATTR = {
/* 4 bit codewords */ /* 4 bit codewords */
{ /* 00000 */ 0, 0 }, { /* 00000 */ 0, 0 },
{ /* */ 0, 0 }, { /* */ 0, 0 },
@ -83,7 +83,7 @@ static hcb hcb6_1[] = {
* *
* Gives size of codeword and actual data (x,y,v,w) * Gives size of codeword and actual data (x,y,v,w)
*/ */
static hcb_2_pair hcb6_2[] = { static hcb_2_pair hcb6_2[] ICONST_ATTR = {
/* 4 bit codewords */ /* 4 bit codewords */
{ 4, 0, 0 }, { 4, 0, 0 },
{ 4, 1, 0 }, { 4, 1, 0 },

View file

@ -28,7 +28,7 @@
/* Binary search huffman table HCB_7 */ /* Binary search huffman table HCB_7 */
static hcb_bin_pair hcb7[] = { static hcb_bin_pair hcb7[] ICONST_ATTR_FAAD_LARGE_IRAM = {
{ /* 0 */ 0, { 1, 2 } }, { /* 0 */ 0, { 1, 2 } },
{ /* 1 */ 1, { 0, 0 } }, { /* 1 */ 1, { 0, 0 } },
{ /* 2 */ 0, { 1, 2 } }, { /* 2 */ 0, { 1, 2 } },

View file

@ -33,7 +33,7 @@
* *
* Used to find offset into 2nd step table and number of extra bits to get * Used to find offset into 2nd step table and number of extra bits to get
*/ */
static hcb hcb8_1[] = { static hcb hcb8_1[] ICONST_ATTR = {
/* 3 bit codeword */ /* 3 bit codeword */
{ /* 00000 */ 0, 0 }, { /* 00000 */ 0, 0 },
{ /* */ 0, 0 }, { /* */ 0, 0 },
@ -87,7 +87,7 @@ static hcb hcb8_1[] = {
* *
* Gives size of codeword and actual data (x,y,v,w) * Gives size of codeword and actual data (x,y,v,w)
*/ */
static hcb_2_pair hcb8_2[] = { static hcb_2_pair hcb8_2[] ICONST_ATTR = {
/* 3 bit codeword */ /* 3 bit codeword */
{ 3, 1, 1 }, { 3, 1, 1 },

View file

@ -28,7 +28,7 @@
/* Binary search huffman table HCB_9 */ /* Binary search huffman table HCB_9 */
static hcb_bin_pair hcb9[] = { static hcb_bin_pair hcb9[] ICONST_ATTR_FAAD_LARGE_IRAM = {
{ /* 0 */ 0, { 1, 2 } }, { /* 0 */ 0, { 1, 2 } },
{ /* 1 */ 1, { 0, 0 } }, { /* 1 */ 1, { 0, 0 } },
{ /* 2 */ 0, { 1, 2 } }, { /* 2 */ 0, { 1, 2 } },

View file

@ -28,7 +28,7 @@
/* Binary search huffman table HCB_SF */ /* Binary search huffman table HCB_SF */
static uint8_t hcb_sf[][2] = { static uint8_t hcb_sf[][2] ICONST_ATTR_FAAD_LARGE_IRAM = {
{ /* 0 */ 1, 2 }, { /* 0 */ 1, 2 },
{ /* 1 */ 60, 0 }, { /* 1 */ 60, 0 },
{ /* 2 */ 1, 2 }, { /* 2 */ 1, 2 },

View file

@ -51,6 +51,25 @@ extern struct codec_api* ci;
#define LOGF(...) #define LOGF(...)
#endif #endif
#if (CONFIG_CPU == MCF5250) || defined(CPU_S5L870X)
/* Enough IRAM but performance suffers with ICODE_ATTR. */
#define IBSS_ATTR_FAAD_LARGE_IRAM IBSS_ATTR
#define ICODE_ATTR_FAAD_LARGE_IRAM
#define ICONST_ATTR_FAAD_LARGE_IRAM ICONST_ATTR
#elif (CONFIG_CPU == PP5022) || (CONFIG_CPU == PP5024)
/* Enough IRAM to move additional data and code to it. */
#define IBSS_ATTR_FAAD_LARGE_IRAM IBSS_ATTR
#define ICODE_ATTR_FAAD_LARGE_IRAM ICODE_ATTR
#define ICONST_ATTR_FAAD_LARGE_IRAM ICONST_ATTR
#else
/* Not enough IRAM available. */
#define IBSS_ATTR_FAAD_LARGE_IRAM
#define ICODE_ATTR_FAAD_LARGE_IRAM
#define ICONST_ATTR_FAAD_LARGE_IRAM
#endif
#define INLINE __inline #define INLINE __inline
#if 0 //defined(_WIN32) && !defined(_WIN32_WCE) #if 0 //defined(_WIN32) && !defined(_WIN32_WCE)
#define ALIGN __declspec(align(16)) #define ALIGN __declspec(align(16))
@ -71,7 +90,7 @@ extern struct codec_api* ci;
/* #define USE_DOUBLE_PRECISION */ /* #define USE_DOUBLE_PRECISION */
/* use fixed point reals */ /* use fixed point reals */
#define FIXED_POINT #define FIXED_POINT
//#define BIG_IQ_TABLE #define BIG_IQ_TABLE /* BIG_IQ_TABLE results in faster requantization */
/* Use if target platform has address generators with autoincrement */ /* Use if target platform has address generators with autoincrement */
//#define PREFER_POINTERS //#define PREFER_POINTERS

View file

@ -39,17 +39,18 @@
/* static function declarations */ /* static function declarations */
static INLINE void huffman_sign_bits(bitfile *ld, int16_t *sp, uint8_t len); static INLINE void huffman_sign_bits_pair(bitfile *ld, int16_t *sp);
static INLINE void huffman_sign_bits_quad(bitfile *ld, int16_t *sp);
static INLINE int16_t huffman_getescape(bitfile *ld, int16_t sp); static INLINE int16_t huffman_getescape(bitfile *ld, int16_t sp);
static uint8_t huffman_2step_quad(uint8_t cb, bitfile *ld, int16_t *sp); static uint8_t huffman_2step_quad(uint8_t cb, bitfile *ld, int16_t *sp);
static uint8_t huffman_2step_quad_sign(uint8_t cb, bitfile *ld, int16_t *sp); static uint8_t huffman_2step_quad_sign(uint8_t cb, bitfile *ld, int16_t *sp);
static uint8_t huffman_2step_pair(uint8_t cb, bitfile *ld, int16_t *sp); static INLINE uint8_t huffman_2step_pair(uint8_t cb, bitfile *ld, int16_t *sp);
static uint8_t huffman_2step_pair_sign(uint8_t cb, bitfile *ld, int16_t *sp); static INLINE uint8_t huffman_2step_pair_sign(uint8_t cb, bitfile *ld, int16_t *sp);
static uint8_t huffman_binary_quad(uint8_t cb, bitfile *ld, int16_t *sp); static uint8_t huffman_binary_quad(uint8_t cb, bitfile *ld, int16_t *sp);
static uint8_t huffman_binary_quad_sign(uint8_t cb, bitfile *ld, int16_t *sp); static uint8_t huffman_binary_quad_sign(uint8_t cb, bitfile *ld, int16_t *sp);
static uint8_t huffman_binary_pair(uint8_t cb, bitfile *ld, int16_t *sp); static uint8_t huffman_binary_pair(uint8_t cb, bitfile *ld, int16_t *sp);
static uint8_t huffman_binary_pair_sign(uint8_t cb, bitfile *ld, int16_t *sp); static uint8_t huffman_binary_pair_sign(uint8_t cb, bitfile *ld, int16_t *sp);
static int16_t huffman_codebook(uint8_t i); static int16_t huffman_codebook(uint8_t i) ICODE_ATTR_FAAD_LARGE_IRAM;
#ifdef ERROR_RESILIENCE #ifdef ERROR_RESILIENCE
static void vcb11_check_LAV(uint8_t cb, int16_t *sp); static void vcb11_check_LAV(uint8_t cb, int16_t *sp);
#endif #endif
@ -75,49 +76,51 @@ int8_t huffman_scale_factor(bitfile *ld)
} }
hcb *hcb_table[] = { hcb *hcb_table[] ICONST_ATTR = {
0, hcb1_1, hcb2_1, 0, hcb4_1, 0, hcb6_1, 0, hcb8_1, 0, hcb10_1, hcb11_1 0, hcb1_1, hcb2_1, 0, hcb4_1, 0, hcb6_1, 0, hcb8_1, 0, hcb10_1, hcb11_1
}; };
hcb_2_quad *hcb_2_quad_table[] = { hcb_2_quad *hcb_2_quad_table[] ICONST_ATTR = {
0, hcb1_2, hcb2_2, 0, hcb4_2, 0, 0, 0, 0, 0, 0, 0 0, hcb1_2, hcb2_2, 0, hcb4_2, 0, 0, 0, 0, 0, 0, 0
}; };
hcb_2_pair *hcb_2_pair_table[] = { hcb_2_pair *hcb_2_pair_table[] ICONST_ATTR = {
0, 0, 0, 0, 0, 0, hcb6_2, 0, hcb8_2, 0, hcb10_2, hcb11_2 0, 0, 0, 0, 0, 0, hcb6_2, 0, hcb8_2, 0, hcb10_2, hcb11_2
}; };
hcb_bin_pair *hcb_bin_table[] = { hcb_bin_pair *hcb_bin_table[] ICONST_ATTR = {
0, 0, 0, 0, 0, hcb5, 0, hcb7, 0, hcb9, 0, 0 0, 0, 0, 0, 0, hcb5, 0, hcb7, 0, hcb9, 0, 0
}; };
uint8_t hcbN[] = { 0, 5, 5, 0, 5, 0, 5, 0, 5, 0, 6, 5 }; uint8_t hcbN[] ICONST_ATTR = { 0, 5, 5, 0, 5, 0, 5, 0, 5, 0, 6, 5 };
/* defines whether a huffman codebook is unsigned or not */ /* defines whether a huffman codebook is unsigned or not */
/* Table 4.6.2 */ /* Table 4.6.2 */
uint8_t unsigned_cb[] = { 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, uint8_t unsigned_cb[] ICONST_ATTR = { 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0,
/* codebook 16 to 31 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 /* codebook 16 to 31 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
}; };
int hcb_2_quad_table_size[] = { 0, 114, 86, 0, 185, 0, 0, 0, 0, 0, 0, 0 }; int hcb_2_quad_table_size[] ICONST_ATTR = { 0, 114, 86, 0, 185, 0, 0, 0, 0, 0, 0, 0 };
int hcb_2_pair_table_size[] = { 0, 0, 0, 0, 0, 0, 126, 0, 83, 0, 210, 373 }; int hcb_2_pair_table_size[] ICONST_ATTR = { 0, 0, 0, 0, 0, 0, 126, 0, 83, 0, 210, 373 };
int hcb_bin_table_size[] = { 0, 0, 0, 161, 0, 161, 0, 127, 0, 337, 0, 0 }; int hcb_bin_table_size[] ICONST_ATTR = { 0, 0, 0, 161, 0, 161, 0, 127, 0, 337, 0, 0 };
static INLINE void huffman_sign_bits(bitfile *ld, int16_t *sp, uint8_t len) #define FAAD_GET_SIGN(idx) \
if (sp[idx]) \
if (faad_get1bit(ld)&1) \
sp[idx] = -sp[idx]; \
static INLINE void huffman_sign_bits_pair(bitfile *ld, int16_t *sp)
{ {
uint8_t i; FAAD_GET_SIGN(0)
FAAD_GET_SIGN(1)
}
for (i = 0; i < len; i++) static INLINE void huffman_sign_bits_quad(bitfile *ld, int16_t *sp)
{ {
if(sp[i]) FAAD_GET_SIGN(0)
{ FAAD_GET_SIGN(1)
if(faad_get1bit(ld FAAD_GET_SIGN(2)
DEBUGVAR(1,5,"huffman_sign_bits(): sign bit")) & 1) FAAD_GET_SIGN(3)
{
sp[i] = -sp[i];
}
}
}
} }
static INLINE int16_t huffman_getescape(bitfile *ld, int16_t sp) static INLINE int16_t huffman_getescape(bitfile *ld, int16_t sp)
@ -194,12 +197,12 @@ static uint8_t huffman_2step_quad(uint8_t cb, bitfile *ld, int16_t *sp)
static uint8_t huffman_2step_quad_sign(uint8_t cb, bitfile *ld, int16_t *sp) static uint8_t huffman_2step_quad_sign(uint8_t cb, bitfile *ld, int16_t *sp)
{ {
uint8_t err = huffman_2step_quad(cb, ld, sp); uint8_t err = huffman_2step_quad(cb, ld, sp);
huffman_sign_bits(ld, sp, QUAD_LEN); huffman_sign_bits_quad(ld, sp);
return err; return err;
} }
static uint8_t huffman_2step_pair(uint8_t cb, bitfile *ld, int16_t *sp) static INLINE uint8_t huffman_2step_pair(uint8_t cb, bitfile *ld, int16_t *sp)
{ {
uint32_t cw; uint32_t cw;
uint16_t offset = 0; uint16_t offset = 0;
@ -232,10 +235,10 @@ static uint8_t huffman_2step_pair(uint8_t cb, bitfile *ld, int16_t *sp)
return 0; return 0;
} }
static uint8_t huffman_2step_pair_sign(uint8_t cb, bitfile *ld, int16_t *sp) static INLINE uint8_t huffman_2step_pair_sign(uint8_t cb, bitfile *ld, int16_t *sp)
{ {
uint8_t err = huffman_2step_pair(cb, ld, sp); uint8_t err = huffman_2step_pair(cb, ld, sp);
huffman_sign_bits(ld, sp, PAIR_LEN); huffman_sign_bits_pair(ld, sp);
return err; return err;
} }
@ -269,7 +272,7 @@ static uint8_t huffman_binary_quad(uint8_t cb, bitfile *ld, int16_t *sp)
static uint8_t huffman_binary_quad_sign(uint8_t cb, bitfile *ld, int16_t *sp) static uint8_t huffman_binary_quad_sign(uint8_t cb, bitfile *ld, int16_t *sp)
{ {
uint8_t err = huffman_binary_quad(cb, ld, sp); uint8_t err = huffman_binary_quad(cb, ld, sp);
huffman_sign_bits(ld, sp, QUAD_LEN); huffman_sign_bits_quad(ld, sp);
return err; return err;
} }
@ -301,7 +304,7 @@ static uint8_t huffman_binary_pair(uint8_t cb, bitfile *ld, int16_t *sp)
static uint8_t huffman_binary_pair_sign(uint8_t cb, bitfile *ld, int16_t *sp) static uint8_t huffman_binary_pair_sign(uint8_t cb, bitfile *ld, int16_t *sp)
{ {
uint8_t err = huffman_binary_pair(cb, ld, sp); uint8_t err = huffman_binary_pair(cb, ld, sp);
huffman_sign_bits(ld, sp, PAIR_LEN); huffman_sign_bits_pair(ld, sp);
return err; return err;
} }

View file

@ -33,7 +33,7 @@ extern "C" {
#endif #endif
int8_t huffman_scale_factor(bitfile *ld); int8_t huffman_scale_factor(bitfile *ld);
uint8_t huffman_spectral_data(uint8_t cb, bitfile *ld, int16_t *sp); uint8_t huffman_spectral_data(uint8_t cb, bitfile *ld, int16_t *sp) ICODE_ATTR_FAAD_LARGE_IRAM;
#ifdef ERROR_RESILIENCE #ifdef ERROR_RESILIENCE
int8_t huffman_spectral_data_2(uint8_t cb, bits_t *ld, int16_t *sp); int8_t huffman_spectral_data_2(uint8_t cb, bits_t *ld, int16_t *sp);
#endif #endif

View file

@ -414,19 +414,18 @@ uint8_t window_grouping_info(NeAACDecHandle hDecoder, ic_stream *ics)
/**/ /**/
static INLINE real_t iquant(int16_t q, const real_t *tab, uint8_t *error) static INLINE real_t iquant(int16_t q, const real_t *tab, uint8_t *error)
{ {
#ifdef FIXED_POINT #ifndef BIG_IQ_TABLE
/* For FIXED_POINT the iq_table is prescaled by 3 bits (iq_table[]/8) */ /* For FIXED_POINT the iq_table is prescaled by 3 bits (iq_table[]/8) */
/* BIG_IQ_TABLE allows you to use the full 8192 value table, if this is not /* BIG_IQ_TABLE allows you to use the full 8192 value table, if this is not
* defined a 1026 value table and interpolation will be used * defined a 1026 value table and interpolation will be used
*/ */
#ifndef BIG_IQ_TABLE
static const real_t errcorr[] = { static const real_t errcorr[] = {
REAL_CONST(0), REAL_CONST(1.0/8.0), REAL_CONST(2.0/8.0), REAL_CONST(3.0/8.0), REAL_CONST(0), REAL_CONST(1.0/8.0), REAL_CONST(2.0/8.0), REAL_CONST(3.0/8.0),
REAL_CONST(4.0/8.0), REAL_CONST(5.0/8.0), REAL_CONST(6.0/8.0), REAL_CONST(7.0/8.0), REAL_CONST(4.0/8.0), REAL_CONST(5.0/8.0), REAL_CONST(6.0/8.0), REAL_CONST(7.0/8.0),
REAL_CONST(0) REAL_CONST(0)
}; };
real_t x1, x2; real_t x1, x2;
#endif
int16_t sgn = 1; int16_t sgn = 1;
if (q < 0) if (q < 0)
@ -445,7 +444,6 @@ static INLINE real_t iquant(int16_t q, const real_t *tab, uint8_t *error)
return sgn * tab[q]; return sgn * tab[q];
} }
#ifndef BIG_IQ_TABLE
if (q >= 8192) if (q >= 8192)
{ {
*error = 17; *error = 17;
@ -456,12 +454,7 @@ static INLINE real_t iquant(int16_t q, const real_t *tab, uint8_t *error)
x1 = tab[q>>3]; x1 = tab[q>>3];
x2 = tab[(q>>3) + 1]; x2 = tab[(q>>3) + 1];
return sgn * 16 * (MUL_R(errcorr[q&7],(x2-x1)) + x1); return sgn * 16 * (MUL_R(errcorr[q&7],(x2-x1)) + x1);
#else #else /* #ifndef BIG_IQ_TABLE */
*error = 17;
return 0;
#endif
#else
if (q < 0) if (q < 0)
{ {
/* tab contains a value for all possible q [0,8192] */ /* tab contains a value for all possible q [0,8192] */
@ -547,9 +540,7 @@ static uint8_t quant_to_spec(NeAACDecHandle hDecoder,
uint8_t g, sfb, win; uint8_t g, sfb, win;
uint16_t width, bin, k, gindex, wa, wb; uint16_t width, bin, k, gindex, wa, wb;
uint8_t error = 0; /* Init error flag */ uint8_t error = 0; /* Init error flag */
#ifndef FIXED_POINT
real_t scf; real_t scf;
#endif
k = 0; k = 0;
gindex = 0; gindex = 0;
@ -597,6 +588,8 @@ static uint8_t quant_to_spec(NeAACDecHandle hDecoder,
#ifndef FIXED_POINT #ifndef FIXED_POINT
scf = pow2sf_tab[exp/*+25*/] * pow2_table[frac]; scf = pow2sf_tab[exp/*+25*/] * pow2_table[frac];
#else
scf = pow2_table[frac];
#endif #endif
for (win = 0; win < ics->window_group_length[g]; win++) for (win = 0; win < ics->window_group_length[g]; win++)
@ -612,32 +605,12 @@ static uint8_t quant_to_spec(NeAACDecHandle hDecoder,
spec_data[wb+3] = iquant(quant_data[k+3], tab, &error) * scf; spec_data[wb+3] = iquant(quant_data[k+3], tab, &error) * scf;
#else #else
real_t iq0 = iquant(quant_data[k+0], tab, &error);
real_t iq1 = iquant(quant_data[k+1], tab, &error);
real_t iq2 = iquant(quant_data[k+2], tab, &error);
real_t iq3 = iquant(quant_data[k+3], tab, &error);
wb = wa + bin; wb = wa + bin;
if (exp < 0) spec_data[wb+0] = MUL_C((iquant(quant_data[k+0], tab, &error)<<exp), scf);
{ spec_data[wb+1] = MUL_C((iquant(quant_data[k+1], tab, &error)<<exp), scf);
spec_data[wb+0] = iq0 >>= -exp; spec_data[wb+2] = MUL_C((iquant(quant_data[k+2], tab, &error)<<exp), scf);
spec_data[wb+1] = iq1 >>= -exp; spec_data[wb+3] = MUL_C((iquant(quant_data[k+3], tab, &error)<<exp), scf);
spec_data[wb+2] = iq2 >>= -exp;
spec_data[wb+3] = iq3 >>= -exp;
} else {
spec_data[wb+0] = iq0 <<= exp;
spec_data[wb+1] = iq1 <<= exp;
spec_data[wb+2] = iq2 <<= exp;
spec_data[wb+3] = iq3 <<= exp;
}
if (frac != 0)
{
spec_data[wb+0] = MUL_C(spec_data[wb+0],pow2_table[frac]);
spec_data[wb+1] = MUL_C(spec_data[wb+1],pow2_table[frac]);
spec_data[wb+2] = MUL_C(spec_data[wb+2],pow2_table[frac]);
spec_data[wb+3] = MUL_C(spec_data[wb+3],pow2_table[frac]);
}
//#define SCFS_PRINT //#define SCFS_PRINT
#ifdef SCFS_PRINT #ifdef SCFS_PRINT
@ -855,11 +828,14 @@ static uint8_t allocate_channel_pair(NeAACDecHandle hDecoder,
return 0; return 0;
} }
/* used by reconstruct_single_channel() and reconstruct_channel_pair() */
ALIGN static real_t spec_coef1[1024] IBSS_ATTR;
ALIGN static real_t spec_coef2[1024] IBSS_ATTR;
uint8_t reconstruct_single_channel(NeAACDecHandle hDecoder, ic_stream *ics, uint8_t reconstruct_single_channel(NeAACDecHandle hDecoder, ic_stream *ics,
element *sce, int16_t *spec_data) element *sce, int16_t *spec_data)
{ {
uint8_t retval, output_channels; uint8_t retval, output_channels;
ALIGN static real_t spec_coef[1024];
#ifdef PROFILE #ifdef PROFILE
int64_t count = faad_get_ts(); int64_t count = faad_get_ts();
@ -893,7 +869,7 @@ uint8_t reconstruct_single_channel(NeAACDecHandle hDecoder, ic_stream *ics,
/* dequantisation and scaling */ /* dequantisation and scaling */
retval = quant_to_spec(hDecoder, ics, spec_data, spec_coef, hDecoder->frameLength); retval = quant_to_spec(hDecoder, ics, spec_data, spec_coef1, hDecoder->frameLength);
if (retval > 0) if (retval > 0)
return retval; return retval;
@ -904,14 +880,14 @@ uint8_t reconstruct_single_channel(NeAACDecHandle hDecoder, ic_stream *ics,
/* pns decoding */ /* pns decoding */
pns_decode(ics, NULL, spec_coef, NULL, hDecoder->frameLength, 0, hDecoder->object_type); pns_decode(ics, NULL, spec_coef1, NULL, hDecoder->frameLength, 0, hDecoder->object_type);
#ifdef MAIN_DEC #ifdef MAIN_DEC
/* MAIN object type prediction */ /* MAIN object type prediction */
if (hDecoder->object_type == MAIN) if (hDecoder->object_type == MAIN)
{ {
/* intra channel prediction */ /* intra channel prediction */
ic_prediction(ics, spec_coef, hDecoder->pred_stat[sce->channel], hDecoder->frameLength, ic_prediction(ics, spec_coef1, hDecoder->pred_stat[sce->channel], hDecoder->frameLength,
hDecoder->sf_index); hDecoder->sf_index);
/* In addition, for scalefactor bands coded by perceptual /* In addition, for scalefactor bands coded by perceptual
@ -938,7 +914,7 @@ uint8_t reconstruct_single_channel(NeAACDecHandle hDecoder, ic_stream *ics,
#endif #endif
/* long term prediction */ /* long term prediction */
lt_prediction(ics, &(ics->ltp), spec_coef, hDecoder->lt_pred_stat[sce->channel], hDecoder->fb, lt_prediction(ics, &(ics->ltp), spec_coef1, hDecoder->lt_pred_stat[sce->channel], hDecoder->fb,
ics->window_shape, hDecoder->window_shape_prev[sce->channel], ics->window_shape, hDecoder->window_shape_prev[sce->channel],
hDecoder->sf_index, hDecoder->object_type, hDecoder->frameLength); hDecoder->sf_index, hDecoder->object_type, hDecoder->frameLength);
} }
@ -946,13 +922,13 @@ uint8_t reconstruct_single_channel(NeAACDecHandle hDecoder, ic_stream *ics,
/* tns decoding */ /* tns decoding */
tns_decode_frame(ics, &(ics->tns), hDecoder->sf_index, hDecoder->object_type, tns_decode_frame(ics, &(ics->tns), hDecoder->sf_index, hDecoder->object_type,
spec_coef, hDecoder->frameLength); spec_coef1, hDecoder->frameLength);
/* drc decoding */ /* drc decoding */
if (hDecoder->drc->present) if (hDecoder->drc->present)
{ {
if (!hDecoder->drc->exclude_mask[sce->channel] || !hDecoder->drc->excluded_chns_present) if (!hDecoder->drc->exclude_mask[sce->channel] || !hDecoder->drc->excluded_chns_present)
drc_decode(hDecoder->drc, spec_coef); drc_decode(hDecoder->drc, spec_coef1);
} }
/* filter bank */ /* filter bank */
@ -961,13 +937,13 @@ uint8_t reconstruct_single_channel(NeAACDecHandle hDecoder, ic_stream *ics,
{ {
#endif #endif
ifilter_bank(ics->window_sequence,ics->window_shape, ifilter_bank(ics->window_sequence,ics->window_shape,
hDecoder->window_shape_prev[sce->channel],spec_coef, hDecoder->window_shape_prev[sce->channel],spec_coef1,
hDecoder->time_out[sce->channel], hDecoder->fb_intermed[sce->channel], hDecoder->time_out[sce->channel], hDecoder->fb_intermed[sce->channel],
hDecoder->object_type, hDecoder->frameLength); hDecoder->object_type, hDecoder->frameLength);
#ifdef SSR_DEC #ifdef SSR_DEC
} else { } else {
ssr_decode(&(ics->ssr), hDecoder->fb, ics->window_sequence, ics->window_shape, ssr_decode(&(ics->ssr), hDecoder->fb, ics->window_sequence, ics->window_shape,
hDecoder->window_shape_prev[sce->channel], spec_coef, hDecoder->time_out[sce->channel], hDecoder->window_shape_prev[sce->channel], spec_coef1, hDecoder->time_out[sce->channel],
hDecoder->ssr_overlap[sce->channel], hDecoder->ipqf_buffer[sce->channel], hDecoder->prev_fmd[sce->channel], hDecoder->ssr_overlap[sce->channel], hDecoder->ipqf_buffer[sce->channel], hDecoder->prev_fmd[sce->channel],
hDecoder->frameLength); hDecoder->frameLength);
} }
@ -1051,8 +1027,6 @@ uint8_t reconstruct_channel_pair(NeAACDecHandle hDecoder, ic_stream *ics1, ic_st
element *cpe, int16_t *spec_data1, int16_t *spec_data2) element *cpe, int16_t *spec_data1, int16_t *spec_data2)
{ {
uint8_t retval; uint8_t retval;
ALIGN static real_t spec_coef1[1024] IBSS_ATTR;
ALIGN static real_t spec_coef2[1024] IBSS_ATTR;
#ifdef PROFILE #ifdef PROFILE
int64_t count = faad_get_ts(); int64_t count = faad_get_ts();