Import libatrac from ffmpeg and modify librm to support ATRAC3.

The decoder is still in floating point. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@22235 a1c6a512-1295-4272-9138-f99709370657
2009-08-10 14:46:31 +00:00 · 2009-08-10 14:46:31 +00:00 · 519adfbaae
commit 519adfbaae
parent 1c0aeb18ca
25 changed files with 13440 additions and 9 deletions
--- a/apps/codecs/libatrac/Makefile.test
+++ b/apps/codecs/libatrac/Makefile.test
@ -0,0 +1,11 @@
 CFLAGS = -Wall -O3 -DTEST -D"DEBUGF=printf"
 OBJS = atrac3.o dsputil.o bitstream.o fft.o mdct.o libavutil/log.o libavutil/mem.o ../librm/rm.o
 atractest: $(OBJS)
 	gcc -o atractest $(OBJS) -lm
 .c.o :
 	$(CC) $(CFLAGS) -c -o $@ $<
 clean:
 	rm -f atractest $(OBJS) *~ output.wav
--- a/apps/codecs/libatrac/README.rockbox
+++ b/apps/codecs/libatrac/README.rockbox
@ -0,0 +1,20 @@
 Library: libatrac
 Imported by : Mohamed Tarek
 Import date : 10-August-2009
 LICENSING INFORMATION
 ffmpeg is licensed under the Lesser GNU General Public License.
 IMPORT DETAILS
 The decoder is based on ffmpeg-svn r18079. It still uses floating 
 point math and not suitable to be used in rockbox.
 TESTING
 The test program should compile in any Unix-like environment using the
 command "make -f Makefile.test".
 Running "./atractest file.rm" will decode the audio data to a WAV file
 called "output.wav" in the current directory.
--- a/apps/codecs/libatrac/atrac3.c
+++ b/apps/codecs/libatrac/atrac3.c
--- a/apps/codecs/libatrac/atrac3data.h
+++ b/apps/codecs/libatrac/atrac3data.h
@ -0,0 +1,144 @@
 /*
 * Atrac 3 compatible decoder data
 * Copyright (c) 2006-2007 Maxim Poliakovski
 * Copyright (c) 2006-2007 Benjamin Larsson
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
 /**
 * @file libavcodec/atrac3data.h
 * Atrac 3 AKA RealAudio 8 compatible decoder data
 */
 #ifndef AVCODEC_ATRAC3DATA_H
 #define AVCODEC_ATRAC3DATA_H
 #include <stdint.h>
 /* VLC tables */
 static const uint8_t huffcode1[9] = {
  0x0,0x4,0x5,0xC,0xD,0x1C,0x1D,0x1E,0x1F,
 };
 static const uint8_t huffbits1[9] = {
  1,3,3,4,4,5,5,5,5,
 };
 static const uint8_t huffcode2[5] = {
  0x0,0x4,0x5,0x6,0x7,
 };
 static const uint8_t huffbits2[5] = {
  1,3,3,3,3,
 };
 static const uint8_t huffcode3[7] = {
 0x0,0x4,0x5,0xC,0xD,0xE,0xF,
 };
 static const uint8_t huffbits3[7] = {
  1,3,3,4,4,4,4,
 };
 static const uint8_t huffcode4[9] = {
  0x0,0x4,0x5,0xC,0xD,0x1C,0x1D,0x1E,0x1F,
 };
 static const uint8_t huffbits4[9] = {
  1,3,3,4,4,5,5,5,5,
 };
 static const uint8_t huffcode5[15] = {
  0x0,0x2,0x3,0x8,0x9,0xA,0xB,0x1C,0x1D,0x3C,0x3D,0x3E,0x3F,0xC,0xD,
 };
 static const uint8_t huffbits5[15] = {
  2,3,3,4,4,4,4,5,5,6,6,6,6,4,4
 };
 static const uint8_t huffcode6[31] = {
  0x0,0x2,0x3,0x4,0x5,0x6,0x7,0x14,0x15,0x16,0x17,0x18,0x19,0x34,0x35,
  0x36,0x37,0x38,0x39,0x3A,0x3B,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,0x8,0x9,
 };
 static const uint8_t huffbits6[31] = {
  3,4,4,4,4,4,4,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,4,4
 };
 static const uint8_t huffcode7[63] = {
  0x0,0x8,0x9,0xA,0xB,0xC,0xD,0xE,0xF,0x10,0x11,0x24,0x25,0x26,0x27,0x28,
  0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,0x30,0x31,0x32,0x33,0x68,0x69,0x6A,0x6B,0x6C,
  0x6D,0x6E,0x6F,0x70,0x71,0x72,0x73,0x74,0x75,0xEC,0xED,0xEE,0xEF,0xF0,0xF1,0xF2,
  0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,0x2,0x3,
 };
 static const uint8_t huffbits7[63] = {
  3,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,
  7,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,4,4
 };
 static const uint8_t huff_tab_sizes[7] = {
  9, 5, 7, 9, 15, 31, 63,
 };
 static const uint8_t* const huff_codes[7] = {
  huffcode1,huffcode2,huffcode3,huffcode4,huffcode5,huffcode6,huffcode7,
 };
 static const uint8_t* const huff_bits[7] = {
  huffbits1,huffbits2,huffbits3,huffbits4,huffbits5,huffbits6,huffbits7,
 };
 static const uint16_t atrac3_vlc_offs[] = {
    0,512,1024,1536,2048,2560,3072,3584,4096
 };
 /* selector tables */
 static const uint8_t CLCLengthTab[8] = {0, 4, 3, 3, 4, 4, 5, 6};
 static const int8_t seTab_0[4] = {0, 1, -2, -1};
 static const int8_t decTable1[18] = {0,0, 0,1, 0,-1, 1,0, -1,0, 1,1, 1,-1, -1,1, -1,-1};
 /* tables for the scalefactor decoding */
 static const float iMaxQuant[8] = {
  0.0, 1.0/1.5, 1.0/2.5, 1.0/3.5, 1.0/4.5, 1.0/7.5, 1.0/15.5, 1.0/31.5
 };
 static const uint16_t subbandTab[33] = {
  0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224,
  256, 288, 320, 352, 384, 416, 448, 480, 512, 576, 640, 704, 768, 896, 1024
 };
 /* transform data */
 static const float qmf_48tap_half[24] = {
   -0.00001461907, -0.00009205479, -0.000056157569, 0.00030117269,
    0.0002422519,-0.00085293897, -0.0005205574, 0.0020340169,
    0.00078333891, -0.0042153862, -0.00075614988, 0.0078402944,
   -0.000061169922, -0.01344162, 0.0024626821, 0.021736089,
   -0.007801671, -0.034090221, 0.01880949, 0.054326009,
   -0.043596379, -0.099384367, 0.13207909, 0.46424159
 };
 /* joint stereo related tables */
 static const float matrixCoeffs[8] = {0.0, 2.0, 2.0, 2.0, 0.0, 0.0, 1.0, 1.0};
 #endif /* AVCODEC_ATRAC3DATA_H */
--- a/apps/codecs/libatrac/avcodec.h
+++ b/apps/codecs/libatrac/avcodec.h
--- a/apps/codecs/libatrac/bitstream.c
+++ b/apps/codecs/libatrac/bitstream.c
@ -0,0 +1,276 @@
 /*
 * Common bit i/o utils
 * Copyright (c) 2000, 2001 Fabrice Bellard
 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
 *
 * alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
 #include "bitstream.h"
 #ifdef ROCKBOX
 #undef DEBUGF
 #define DEBUGF(...)
 #endif
 const uint8_t ff_log2_run[32]={
 0, 0, 0, 0, 1, 1, 1, 1,
 2, 2, 2, 2, 3, 3, 3, 3,
 4, 4, 5, 5, 6, 6, 7, 7,
 8, 9,10,11,12,13,14,15
 };
 /**
 * Same as av_mallocz_static(), but does a realloc.
 *
 * @param[in] ptr The block of memory to reallocate.
 * @param[in] size The requested size.
 * @return Block of memory of requested size.
 * @deprecated. Code which uses ff_realloc_static is broken/misdesigned
 * and should correctly use static arrays
 */
 void ff_put_string(PutBitContext * pbc, const char *s, int put_zero)
 {
    while(*s){
        put_bits(pbc, 8, *s);
        s++;
    }
    if(put_zero)
        put_bits(pbc, 8, 0);
 }
 /* VLC decoding */
 //#define DEBUG_VLC
 #define GET_DATA(v, table, i, wrap, size) \
 {\
    const uint8_t *ptr = (const uint8_t *)table + i * wrap;\
    switch(size) {\
    case 1:\
        v = *(const uint8_t *)ptr;\
        break;\
    case 2:\
        v = *(const uint16_t *)ptr;\
        break;\
    default:\
        v = *(const uint32_t *)ptr;\
        break;\
    }\
 }
 static int alloc_table(VLC *vlc, int size, int use_static)
 {
    int index;
    index = vlc->table_size;
    vlc->table_size += size;
    if (vlc->table_size > vlc->table_allocated) {
        if(use_static>1){
            DEBUGF("init_vlc() used with too little memory : table_size > allocated_memory\n");
        }
        if (!vlc->table)
            return -1;
    }
    return index;
 }
 static int build_table(VLC *vlc, int table_nb_bits,
                       int nb_codes,
                       const void *bits, int bits_wrap, int bits_size,
                       const void *codes, int codes_wrap, int codes_size,
                       const void *symbols, int symbols_wrap, int symbols_size,
                       uint32_t code_prefix, int n_prefix, int flags)
 {
    int i, j, k, n, table_size, table_index, nb, n1, index, code_prefix2, symbol;
    uint32_t code;
    VLC_TYPE (*table)[2];
    table_size = 1 << table_nb_bits;
    table_index = alloc_table(vlc, table_size, flags & (INIT_VLC_USE_STATIC|INIT_VLC_USE_NEW_STATIC));
 #ifdef DEBUG_VLC
    DEBUGF("new table index=%d size=%d code_prefix=%x n=%d\n",
           table_index, table_size, code_prefix, n_prefix);
 #endif
    if (table_index < 0)
        return -1;
    table = &vlc->table[table_index];
    for(i=0;i<table_size;i++) {
        table[i][1] = 0; //bits
        table[i][0] = -1; //codes
    }
    /* first pass: map codes and compute auxillary table sizes */
    for(i=0;i<nb_codes;i++) {
        GET_DATA(n, bits, i, bits_wrap, bits_size);
        GET_DATA(code, codes, i, codes_wrap, codes_size);
        /* we accept tables with holes */
        if (n <= 0)
            continue;
        if (!symbols)
            symbol = i;
        else
            GET_DATA(symbol, symbols, i, symbols_wrap, symbols_size);
 #if defined(DEBUG_VLC) && 0
        DEBUGF("i=%d n=%d code=0x%x\n", i, n, code);
 #endif
        /* if code matches the prefix, it is in the table */
        n -= n_prefix;
        if(flags & INIT_VLC_LE)
            code_prefix2= code & (n_prefix>=32 ? (int)0xffffffff : (1 << n_prefix)-1);
        else
            code_prefix2= code >> n;
        if (n > 0 && code_prefix2 == (int)code_prefix) {
            if (n <= table_nb_bits) {
                /* no need to add another table */
                j = (code << (table_nb_bits - n)) & (table_size - 1);
                nb = 1 << (table_nb_bits - n);
                for(k=0;k<nb;k++) {
                    if(flags & INIT_VLC_LE)
                        j = (code >> n_prefix) + (k<<n);
 #ifdef DEBUG_VLC
                    DEBUGF("%4x: code=%d n=%d\n",
                           j, i, n);
 #endif
                    if (table[j][1] /*bits*/ != 0) {
                        DEBUGF("incorrect codes\n");
                        return -1;
                    }
                    table[j][1] = n; //bits
                    table[j][0] = symbol;
                    j++;
                }
            } else {
                n -= table_nb_bits;
                j = (code >> ((flags & INIT_VLC_LE) ? n_prefix : n)) & ((1 << table_nb_bits) - 1);
 #ifdef DEBUG_VLC
                DEBUGF("%4x: n=%d (subtable)\n",
                       j, n);
 #endif
                /* compute table size */
                n1 = -table[j][1]; //bits
                if (n > n1)
                    n1 = n;
                table[j][1] = -n1; //bits
            }
        }
    }
    /* second pass : fill auxillary tables recursively */
    for(i=0;i<table_size;i++) {
        n = table[i][1]; //bits
        if (n < 0) {
            n = -n;
            if (n > table_nb_bits) {
                n = table_nb_bits;
                table[i][1] = -n; //bits
            }
            index = build_table(vlc, n, nb_codes,
                                bits, bits_wrap, bits_size,
                                codes, codes_wrap, codes_size,
                                symbols, symbols_wrap, symbols_size,
                                (flags & INIT_VLC_LE) ? (code_prefix | (i << n_prefix)) : ((code_prefix << table_nb_bits) | i),
                                n_prefix + table_nb_bits, flags);
            if (index < 0)
                return -1;
            /* note: realloc has been done, so reload tables */
            table = &vlc->table[table_index];
            table[i][0] = index; //code
        }
    }
    return table_index;
 }
 /* Build VLC decoding tables suitable for use with get_vlc().
   'nb_bits' set thee decoding table size (2^nb_bits) entries. The
   bigger it is, the faster is the decoding. But it should not be too
   big to save memory and L1 cache. '9' is a good compromise.
   'nb_codes' : number of vlcs codes
   'bits' : table which gives the size (in bits) of each vlc code.
   'codes' : table which gives the bit pattern of of each vlc code.
   'symbols' : table which gives the values to be returned from get_vlc().
   'xxx_wrap' : give the number of bytes between each entry of the
   'bits' or 'codes' tables.
   'xxx_size' : gives the number of bytes of each entry of the 'bits'
   or 'codes' tables.
   'wrap' and 'size' allows to use any memory configuration and types
   (byte/word/long) to store the 'bits', 'codes', and 'symbols' tables.
   'use_static' should be set to 1 for tables, which should be freed
   with av_free_static(), 0 if free_vlc() will be used.
 */
 int init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes,
             const void *bits, int bits_wrap, int bits_size,
             const void *codes, int codes_wrap, int codes_size,
             const void *symbols, int symbols_wrap, int symbols_size,
             int flags)
 {
    vlc->bits = nb_bits;
    if(flags & INIT_VLC_USE_NEW_STATIC){
        if(vlc->table_size && vlc->table_size == vlc->table_allocated){
            return 0;
        }else if(vlc->table_size){
            return -1; // fatal error, we are called on a partially initialized table
        }
    }else if(!(flags & INIT_VLC_USE_STATIC)) {
        vlc->table = NULL;
        vlc->table_allocated = 0;
        vlc->table_size = 0;
    } else {
        /* Static tables are initially always NULL, return
           if vlc->table != NULL to avoid double allocation */
        if(vlc->table)
            return 0;
    }
 #ifdef DEBUG_VLC
    DEBUGF("build table nb_codes=%d\n", nb_codes);
 #endif
    if (build_table(vlc, nb_bits, nb_codes,
                    bits, bits_wrap, bits_size,
                    codes, codes_wrap, codes_size,
                    symbols, symbols_wrap, symbols_size,
                    0, 0, flags) < 0) {
        //free(&vlc->table);
        return -1;
    }
    /* Changed the following condition to be true if table_size > table_allocated. *
     * This would be more sensible for static tables since we want warnings for    *
     * memory shortages only.                                                      */
 #ifdef TEST
    if((flags & INIT_VLC_USE_NEW_STATIC) && vlc->table_size > vlc->table_allocated)
        DEBUGF("needed %d had %d\n", vlc->table_size, vlc->table_allocated);
 #endif
    return 0;
 }
--- a/apps/codecs/libatrac/bitstream.h
+++ b/apps/codecs/libatrac/bitstream.h
@ -0,0 +1,963 @@
 /*
 * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
 #ifndef BITSTREAM_H
 #define BITSTREAM_H
 #include <inttypes.h>
 #include <stdlib.h>
 //#include <assert.h>
 #include <string.h>
 #include <stdio.h>
 #include "bswap.h"
 /* The following 2 defines are taken from libavutil/intreadwrite.h */
 #define AV_RB32(x)  ((((const uint8_t*)(x))[0] << 24) | \
                     (((const uint8_t*)(x))[1] << 16) | \
                     (((const uint8_t*)(x))[2] <<  8) | \
                      ((const uint8_t*)(x))[3])
 #define AV_WB32(p, d) do { \
                    ((uint8_t*)(p))[3] = (d); \
                    ((uint8_t*)(p))[2] = (d)>>8; \
                    ((uint8_t*)(p))[1] = (d)>>16; \
                    ((uint8_t*)(p))[0] = (d)>>24; } while(0)
 #if defined(ALT_BITSTREAM_READER_LE) && !defined(ALT_BITSTREAM_READER)
 #   define ALT_BITSTREAM_READER
 #endif
 //#define ALT_BITSTREAM_WRITER
 //#define ALIGNED_BITSTREAM_WRITER
 #if !defined(LIBMPEG2_BITSTREAM_READER) && !defined(A32_BITSTREAM_READER) && !defined(ALT_BITSTREAM_READER)
 #   if defined(ARCH_ARM)
 #       define A32_BITSTREAM_READER
 #   else
 #       define ALT_BITSTREAM_READER
 //#define LIBMPEG2_BITSTREAM_READER
 //#define A32_BITSTREAM_READER
 #   endif
 #endif
 extern const uint8_t ff_reverse[256];
 #if defined(ARCH_X86)
 // avoid +32 for shift optimization (gcc should do that ...)
 static inline  int32_t NEG_SSR32( int32_t a, int8_t s){
    __asm__ ("sarl %1, %0\n\t"
         : "+r" (a)
         : "ic" ((uint8_t)(-s))
    );
    return a;
 }
 static inline uint32_t NEG_USR32(uint32_t a, int8_t s){
    __asm__ ("shrl %1, %0\n\t"
         : "+r" (a)
         : "ic" ((uint8_t)(-s))
    );
    return a;
 }
 #else
 #    define NEG_SSR32(a,s) ((( int32_t)(a))>>(32-(s)))
 #    define NEG_USR32(a,s) (((uint32_t)(a))>>(32-(s)))
 #endif
 /* bit output */
 /* buf and buf_end must be present and used by every alternative writer. */
 typedef struct PutBitContext {
 #ifdef ALT_BITSTREAM_WRITER
    uint8_t *buf, *buf_end;
    int index;
 #else
    uint32_t bit_buf;
    int bit_left;
    uint8_t *buf, *buf_ptr, *buf_end;
 #endif
    int size_in_bits;
 } PutBitContext;
 static inline void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
 {
    if(buffer_size < 0) {
        buffer_size = 0;
        buffer = NULL;
    }
    s->size_in_bits= 8*buffer_size;
    s->buf = buffer;
    s->buf_end = s->buf + buffer_size;
 #ifdef ALT_BITSTREAM_WRITER
    s->index=0;
    ((uint32_t*)(s->buf))[0]=0;
 //    memset(buffer, 0, buffer_size);
 #else
    s->buf_ptr = s->buf;
    s->bit_left=32;
    s->bit_buf=0;
 #endif
 }
 /* return the number of bits output */
 static inline int put_bits_count(PutBitContext *s)
 {
 #ifdef ALT_BITSTREAM_WRITER
    return s->index;
 #else
    return (s->buf_ptr - s->buf) * 8 + 32 - s->bit_left;
 #endif
 }
 /* pad the end of the output stream with zeros */
 static inline void flush_put_bits(PutBitContext *s)
 {
 #ifdef ALT_BITSTREAM_WRITER
    align_put_bits(s);
 #else
 #ifndef BITSTREAM_WRITER_LE
    s->bit_buf<<= s->bit_left;
 #endif
    while (s->bit_left < 32) {
        /* XXX: should test end of buffer */
 #ifdef BITSTREAM_WRITER_LE
        *s->buf_ptr++=s->bit_buf;
        s->bit_buf>>=8;
 #else
        *s->buf_ptr++=s->bit_buf >> 24;
        s->bit_buf<<=8;
 #endif
        s->bit_left+=8;
    }
    s->bit_left=32;
    s->bit_buf=0;
 #endif
 }
 void align_put_bits(PutBitContext *s);
 void ff_put_string(PutBitContext * pbc, const char *s, int put_zero);
 void ff_copy_bits(PutBitContext *pb, const uint8_t *src, int length);
 /* bit input */
 /* buffer, buffer_end and size_in_bits must be present and used by every reader */
 typedef struct GetBitContext {
    const uint8_t *buffer, *buffer_end;
 #ifdef ALT_BITSTREAM_READER
    int index;
 #elif defined LIBMPEG2_BITSTREAM_READER
    uint8_t *buffer_ptr;
    uint32_t cache;
    int bit_count;
 #elif defined A32_BITSTREAM_READER
    uint32_t *buffer_ptr;
    uint32_t cache0;
    uint32_t cache1;
    int bit_count;
 #endif
    int size_in_bits;
 } GetBitContext;
 #define VLC_TYPE int16_t
 typedef struct VLC {
    int bits;
    VLC_TYPE (*table)[2]; ///< code, bits
    int table_size, table_allocated;
 } VLC;
 typedef struct RL_VLC_ELEM {
    int16_t level;
    int8_t len;
    uint8_t run;
 } RL_VLC_ELEM;
 #ifndef ALT_BITSTREAM_WRITER
 static inline void put_bits(PutBitContext *s, int n, unsigned int value)
 {
    unsigned int bit_buf;
    int bit_left;
    //    printf("put_bits=%d %x\n", n, value);
    //assert(n == 32 || value < (1U << n));
    bit_buf = s->bit_buf;
    bit_left = s->bit_left;
    //    printf("n=%d value=%x cnt=%d buf=%x\n", n, value, bit_cnt, bit_buf);
    /* XXX: optimize */
 #ifdef BITSTREAM_WRITER_LE
    bit_buf |= value << (32 - bit_left);
    if (n >= bit_left) {
 #if !HAVE_FAST_UNALIGNED
        if (3 & (intptr_t) s->buf_ptr) {
            AV_WL32(s->buf_ptr, bit_buf);
        } else
 #endif
        *(uint32_t *)s->buf_ptr = le2me_32(bit_buf);
        s->buf_ptr+=4;
        bit_buf = (bit_left==32)?0:value >> bit_left;
        bit_left+=32;
    }
    bit_left-=n;
 #else
    if (n < bit_left) {
        bit_buf = (bit_buf<<n) | value;
        bit_left-=n;
    } else {
        bit_buf<<=bit_left;
        bit_buf |= value >> (n - bit_left);
 #if !defined(HAVE_FAST_UNALIGNED)
        if (3 & (intptr_t) s->buf_ptr) {
            AV_WB32(s->buf_ptr, bit_buf);
        } else
 #endif
        *(uint32_t *)s->buf_ptr = be2me_32(bit_buf);
        //printf("bitbuf = %08x\n", bit_buf);
        s->buf_ptr+=4;
        bit_left+=32 - n;
        bit_buf = value;
    }
 #endif
    s->bit_buf = bit_buf;
    s->bit_left = bit_left;
 }
 #endif
 #ifdef ALT_BITSTREAM_WRITER
 static inline void put_bits(PutBitContext *s, int n, unsigned int value)
 {
 #    ifdef ALIGNED_BITSTREAM_WRITER
 #        if ARCH_X86
    __asm__ volatile(
        "movl %0, %%ecx                 \n\t"
        "xorl %%eax, %%eax              \n\t"
        "shrdl %%cl, %1, %%eax          \n\t"
        "shrl %%cl, %1                  \n\t"
        "movl %0, %%ecx                 \n\t"
        "shrl $3, %%ecx                 \n\t"
        "andl $0xFFFFFFFC, %%ecx        \n\t"
        "bswapl %1                      \n\t"
        "orl %1, (%2, %%ecx)            \n\t"
        "bswapl %%eax                   \n\t"
        "addl %3, %0                    \n\t"
        "movl %%eax, 4(%2, %%ecx)       \n\t"
        : "=&r" (s->index), "=&r" (value)
        : "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n))
        : "%eax", "%ecx"
    );
 #        else
    int index= s->index;
    uint32_t *ptr= ((uint32_t *)s->buf)+(index>>5);
    value<<= 32-n;
    ptr[0] |= be2me_32(value>>(index&31));
    ptr[1]  = be2me_32(value<<(32-(index&31)));
 //if(n>24) printf("%d %d\n", n, value);
    index+= n;
    s->index= index;
 #        endif
 #    else //ALIGNED_BITSTREAM_WRITER
 #        if ARCH_X86
    __asm__ volatile(
        "movl $7, %%ecx                 \n\t"
        "andl %0, %%ecx                 \n\t"
        "addl %3, %%ecx                 \n\t"
        "negl %%ecx                     \n\t"
        "shll %%cl, %1                  \n\t"
        "bswapl %1                      \n\t"
        "movl %0, %%ecx                 \n\t"
        "shrl $3, %%ecx                 \n\t"
        "orl %1, (%%ecx, %2)            \n\t"
        "addl %3, %0                    \n\t"
        "movl $0, 4(%%ecx, %2)          \n\t"
        : "=&r" (s->index), "=&r" (value)
        : "r" (s->buf), "r" (n), "0" (s->index), "1" (value)
        : "%ecx"
    );
 #        else
    int index= s->index;
    uint32_t *ptr= (uint32_t*)(((uint8_t *)s->buf)+(index>>3));
    ptr[0] |= be2me_32(value<<(32-n-(index&7) ));
    ptr[1] = 0;
 //if(n>24) printf("%d %d\n", n, value);
    index+= n;
    s->index= index;
 #        endif
 #    endif //!ALIGNED_BITSTREAM_WRITER
 }
 #endif
 static inline void put_sbits(PutBitContext *pb, int bits, int32_t val)
 {
    //assert(bits >= 0 && bits <= 31);
    put_bits(pb, bits, val & ((1<<bits)-1));
 }
 static inline uint8_t* pbBufPtr(PutBitContext *s)
 {
 #ifdef ALT_BITSTREAM_WRITER
        return s->buf + (s->index>>3);
 #else
        return s->buf_ptr;
 #endif
 }
 /**
 *
 * PutBitContext must be flushed & aligned to a byte boundary before calling this.
 */
 static inline void skip_put_bytes(PutBitContext *s, int n){
        //assert((put_bits_count(s)&7)==0);
 #ifdef ALT_BITSTREAM_WRITER
        FIXME may need some cleaning of the buffer
        s->index += n<<3;
 #else
        //assert(s->bit_left==32);
        s->buf_ptr += n;
 #endif
 }
 /**
 * Skips the given number of bits.
 * Must only be used if the actual values in the bitstream do not matter.
 */
 static inline void skip_put_bits(PutBitContext *s, int n){
 #ifdef ALT_BITSTREAM_WRITER
    s->index += n;
 #else
    s->bit_left -= n;
    s->buf_ptr-= s->bit_left>>5;
    s->bit_left &= 31;
 #endif
 }
 /**
 * Changes the end of the buffer.
 */
 static inline void set_put_bits_buffer_size(PutBitContext *s, int size){
    s->buf_end= s->buf + size;
 }
 /* Bitstream reader API docs:
 name
    arbitrary name which is used as prefix for the internal variables
 gb
    getbitcontext
 OPEN_READER(name, gb)
    loads gb into local variables
 CLOSE_READER(name, gb)
    stores local vars in gb
 UPDATE_CACHE(name, gb)
    refills the internal cache from the bitstream
    after this call at least MIN_CACHE_BITS will be available,
 GET_CACHE(name, gb)
    will output the contents of the internal cache, next bit is MSB of 32 or 64 bit (FIXME 64bit)
 SHOW_UBITS(name, gb, num)
    will return the next num bits
 SHOW_SBITS(name, gb, num)
    will return the next num bits and do sign extension
 SKIP_BITS(name, gb, num)
    will skip over the next num bits
    note, this is equivalent to SKIP_CACHE; SKIP_COUNTER
 SKIP_CACHE(name, gb, num)
    will remove the next num bits from the cache (note SKIP_COUNTER MUST be called before UPDATE_CACHE / CLOSE_READER)
 SKIP_COUNTER(name, gb, num)
    will increment the internal bit counter (see SKIP_CACHE & SKIP_BITS)
 LAST_SKIP_CACHE(name, gb, num)
    will remove the next num bits from the cache if it is needed for UPDATE_CACHE otherwise it will do nothing
 LAST_SKIP_BITS(name, gb, num)
    is equivalent to SKIP_LAST_CACHE; SKIP_COUNTER
 for examples see get_bits, show_bits, skip_bits, get_vlc
 */
 #ifdef ALT_BITSTREAM_READER
 #   define MIN_CACHE_BITS 25
 #   define OPEN_READER(name, gb)\
        int name##_index= (gb)->index;\
        int name##_cache= 0;\
 #   define CLOSE_READER(name, gb)\
        (gb)->index= name##_index;\
 # ifdef ALT_BITSTREAM_READER_LE
 #   define UPDATE_CACHE(name, gb)\
        name##_cache= AV_RL32( ((const uint8_t *)(gb)->buffer)+(name##_index>>3) ) >> (name##_index&0x07);\
 #   define SKIP_CACHE(name, gb, num)\
        name##_cache >>= (num);
 # else
 #   define UPDATE_CACHE(name, gb)\
        name##_cache= AV_RB32( ((const uint8_t *)(gb)->buffer)+(name##_index>>3) ) << (name##_index&0x07);\
 #   define SKIP_CACHE(name, gb, num)\
        name##_cache <<= (num);
 # endif
 // FIXME name?
 #   define SKIP_COUNTER(name, gb, num)\
        name##_index += (num);\
 #   define SKIP_BITS(name, gb, num)\
        {\
            SKIP_CACHE(name, gb, num)\
            SKIP_COUNTER(name, gb, num)\
        }\
 #   define LAST_SKIP_BITS(name, gb, num) SKIP_COUNTER(name, gb, num)
 #   define LAST_SKIP_CACHE(name, gb, num) ;
 # ifdef ALT_BITSTREAM_READER_LE
 #   define SHOW_UBITS(name, gb, num)\
        ((name##_cache) & (NEG_USR32(0xffffffff,num)))
 #   define SHOW_SBITS(name, gb, num)\
        NEG_SSR32((name##_cache)<<(32-(num)), num)
 # else
 #   define SHOW_UBITS(name, gb, num)\
        NEG_USR32(name##_cache, num)
 #   define SHOW_SBITS(name, gb, num)\
        NEG_SSR32(name##_cache, num)
 # endif
 #   define GET_CACHE(name, gb)\
        ((uint32_t)name##_cache)
 static inline int get_bits_count(GetBitContext *s){
    return s->index;
 }
 static inline void skip_bits_long(GetBitContext *s, int n){
    s->index += n;
 }
 #elif defined LIBMPEG2_BITSTREAM_READER
 //libmpeg2 like reader
 #   define MIN_CACHE_BITS 17
 #   define OPEN_READER(name, gb)\
        int name##_bit_count=(gb)->bit_count;\
        int name##_cache= (gb)->cache;\
        uint8_t * name##_buffer_ptr=(gb)->buffer_ptr;\
 #   define CLOSE_READER(name, gb)\
        (gb)->bit_count= name##_bit_count;\
        (gb)->cache= name##_cache;\
        (gb)->buffer_ptr= name##_buffer_ptr;\
 #   define UPDATE_CACHE(name, gb)\
    if(name##_bit_count >= 0){\
        name##_cache+= AV_RB16(name##_buffer_ptr) << name##_bit_count; \
        name##_buffer_ptr+=2;\
        name##_bit_count-= 16;\
    }\
 #   define SKIP_CACHE(name, gb, num)\
        name##_cache <<= (num);\
 #   define SKIP_COUNTER(name, gb, num)\
        name##_bit_count += (num);\
 #   define SKIP_BITS(name, gb, num)\
        {\
            SKIP_CACHE(name, gb, num)\
            SKIP_COUNTER(name, gb, num)\
        }\
 #   define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num)
 #   define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num)
 #   define SHOW_UBITS(name, gb, num)\
        NEG_USR32(name##_cache, num)
 #   define SHOW_SBITS(name, gb, num)\
        NEG_SSR32(name##_cache, num)
 #   define GET_CACHE(name, gb)\
        ((uint32_t)name##_cache)
 static inline int get_bits_count(GetBitContext *s){
    return (s->buffer_ptr - s->buffer)*8 - 16 + s->bit_count;
 }
 static inline void skip_bits_long(GetBitContext *s, int n){
    OPEN_READER(re, s)
    re_bit_count += n;
    re_buffer_ptr += 2*(re_bit_count>>4);
    re_bit_count &= 15;
    re_cache = ((re_buffer_ptr[-2]<<8) + re_buffer_ptr[-1]) << (16+re_bit_count);
    UPDATE_CACHE(re, s)
    CLOSE_READER(re, s)
 }
 #elif defined A32_BITSTREAM_READER
 #   define MIN_CACHE_BITS 32
 #   define OPEN_READER(name, gb)\
        int name##_bit_count=(gb)->bit_count;\
        uint32_t name##_cache0= (gb)->cache0;\
        uint32_t name##_cache1= (gb)->cache1;\
        uint32_t * name##_buffer_ptr=(gb)->buffer_ptr;\
 #   define CLOSE_READER(name, gb)\
        (gb)->bit_count= name##_bit_count;\
        (gb)->cache0= name##_cache0;\
        (gb)->cache1= name##_cache1;\
        (gb)->buffer_ptr= name##_buffer_ptr;\
 #   define UPDATE_CACHE(name, gb)\
    if(name##_bit_count > 0){\
        const uint32_t next= be2me_32( *name##_buffer_ptr );\
        name##_cache0 |= NEG_USR32(next,name##_bit_count);\
        name##_cache1 |= next<<name##_bit_count;\
        name##_buffer_ptr++;\
        name##_bit_count-= 32;\
    }\
 #if ARCH_X86
 #   define SKIP_CACHE(name, gb, num)\
        __asm__(\
            "shldl %2, %1, %0          \n\t"\
            "shll %2, %1               \n\t"\
            : "+r" (name##_cache0), "+r" (name##_cache1)\
            : "Ic" ((uint8_t)(num))\
           );
 #else
 #   define SKIP_CACHE(name, gb, num)\
        name##_cache0 <<= (num);\
        name##_cache0 |= NEG_USR32(name##_cache1,num);\
        name##_cache1 <<= (num);
 #endif
 #   define SKIP_COUNTER(name, gb, num)\
        name##_bit_count += (num);\
 #   define SKIP_BITS(name, gb, num)\
        {\
            SKIP_CACHE(name, gb, num)\
            SKIP_COUNTER(name, gb, num)\
        }\
 #   define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num)
 #   define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num)
 #   define SHOW_UBITS(name, gb, num)\
        NEG_USR32(name##_cache0, num)
 #   define SHOW_SBITS(name, gb, num)\
        NEG_SSR32(name##_cache0, num)
 #   define GET_CACHE(name, gb)\
        (name##_cache0)
 static inline int get_bits_count(GetBitContext *s){
    return ((uint8_t*)s->buffer_ptr - s->buffer)*8 - 32 + s->bit_count;
 }
 static inline void skip_bits_long(GetBitContext *s, int n){
    OPEN_READER(re, s)
    re_bit_count += n;
    re_buffer_ptr += re_bit_count>>5;
    re_bit_count &= 31;
    re_cache0 = be2me_32( re_buffer_ptr[-1] ) << re_bit_count;
    re_cache1 = 0;
    UPDATE_CACHE(re, s)
    CLOSE_READER(re, s)
 }
 #endif
 /**
 * read mpeg1 dc style vlc (sign bit + mantisse with no MSB).
 * if MSB not set it is negative
 * @param n length in bits
 * @author BERO
 */
 static inline int get_xbits(GetBitContext *s, int n){
    register int sign;
    register int32_t cache;
    OPEN_READER(re, s)
    UPDATE_CACHE(re, s)
    cache = GET_CACHE(re,s);
    sign=(~cache)>>31;
    LAST_SKIP_BITS(re, s, n)
    CLOSE_READER(re, s)
    return (NEG_USR32(sign ^ cache, n) ^ sign) - sign;
 }
 static inline int get_sbits(GetBitContext *s, int n){
    register int tmp;
    OPEN_READER(re, s)
    UPDATE_CACHE(re, s)
    tmp= SHOW_SBITS(re, s, n);
    LAST_SKIP_BITS(re, s, n)
    CLOSE_READER(re, s)
    return tmp;
 }
 /**
 * reads 1-17 bits.
 * Note, the alt bitstream reader can read up to 25 bits, but the libmpeg2 reader can't
 */
 static inline unsigned int get_bits(GetBitContext *s, int n){
    register int tmp;
    OPEN_READER(re, s)
    UPDATE_CACHE(re, s)
    tmp= SHOW_UBITS(re, s, n);
    LAST_SKIP_BITS(re, s, n)
    CLOSE_READER(re, s)
    return tmp;
 }
 /**
 * shows 1-17 bits.
 * Note, the alt bitstream reader can read up to 25 bits, but the libmpeg2 reader can't
 */
 static inline unsigned int show_bits(GetBitContext *s, int n){
    register int tmp;
    OPEN_READER(re, s)
    UPDATE_CACHE(re, s)
    tmp= SHOW_UBITS(re, s, n);
 //    CLOSE_READER(re, s)
    return tmp;
 }
 static inline void skip_bits(GetBitContext *s, int n){
 //Note gcc seems to optimize this to s->index+=n for the ALT_READER :))
    OPEN_READER(re, s)
    UPDATE_CACHE(re, s)
    LAST_SKIP_BITS(re, s, n)
    CLOSE_READER(re, s)
 }
 static inline unsigned int get_bits1(GetBitContext *s){
 #ifdef ALT_BITSTREAM_READER
    int index= s->index;
    uint8_t result= s->buffer[ index>>3 ];
 #ifdef ALT_BITSTREAM_READER_LE
    result>>= (index&0x07);
    result&= 1;
 #else
    result<<= (index&0x07);
    result>>= 8 - 1;
 #endif
    index++;
    s->index= index;
    return result;
 #else
    return get_bits(s, 1);
 #endif
 }
 static inline unsigned int show_bits1(GetBitContext *s){
    return show_bits(s, 1);
 }
 static inline void skip_bits1(GetBitContext *s){
    skip_bits(s, 1);
 }
 /**
 * reads 0-32 bits.
 */
 static inline unsigned int get_bits_long(GetBitContext *s, int n){
    if(n<=17) return get_bits(s, n);
    else{
 #ifdef ALT_BITSTREAM_READER_LE
        int ret= get_bits(s, 16);
        return ret | (get_bits(s, n-16) << 16);
 #else
        int ret= get_bits(s, 16) << (n-16);
        return ret | get_bits(s, n-16);
 #endif
    }
 }
 #if 0
 /**
 * reads 0-32 bits as a signed integer.
 */
 static inline int get_sbits_long(GetBitContext *s, int n) {
    return sign_extend(get_bits_long(s, n), n);
 }
 #endif
 /**
 * shows 0-32 bits.
 */
 static inline unsigned int show_bits_long(GetBitContext *s, int n){
    if(n<=17) return show_bits(s, n);
    else{
        GetBitContext gb= *s;
        return get_bits_long(&gb, n);
    }
 }
 #if 0
 static inline int check_marker(GetBitContext *s, const char *msg)
 {
    int bit= get_bits1(s);
    if(!bit)
        printf("Marker bit missing %s\n", msg);
    return bit;
 }
 #endif
 /**
 * init GetBitContext.
 * @param buffer bitstream buffer, must be FF_INPUT_BUFFER_PADDING_SIZE bytes larger then the actual read bits
 * because some optimized bitstream readers read 32 or 64 bit at once and could read over the end
 * @param bit_size the size of the buffer in bits
 */
 static inline void init_get_bits(GetBitContext *s,
                   const uint8_t *buffer, int bit_size)
 {
    int buffer_size= (bit_size+7)>>3;
    if(buffer_size < 0 || bit_size < 0) {
        buffer_size = bit_size = 0;
        buffer = NULL;
    }
    s->buffer= buffer;
    s->size_in_bits= bit_size;
    s->buffer_end= buffer + buffer_size;
 #ifdef ALT_BITSTREAM_READER
    s->index=0;
 #elif defined LIBMPEG2_BITSTREAM_READER
    s->buffer_ptr = (uint8_t*)((intptr_t)buffer&(~1));
    s->bit_count = 16 + 8*((intptr_t)buffer&1);
    skip_bits_long(s, 0);
 #elif defined A32_BITSTREAM_READER
    s->buffer_ptr = (uint32_t*)((intptr_t)buffer&(~3));
    s->bit_count = 32 + 8*((intptr_t)buffer&3);
    skip_bits_long(s, 0);
 #endif
 }
 static inline void align_get_bits(GetBitContext *s)
 {
    int n= (-get_bits_count(s)) & 7;
    if(n) skip_bits(s, n);
 }
 #define init_vlc(vlc, nb_bits, nb_codes,\
                 bits, bits_wrap, bits_size,\
                 codes, codes_wrap, codes_size,\
                 flags)\
        init_vlc_sparse(vlc, nb_bits, nb_codes,\
                 bits, bits_wrap, bits_size,\
                 codes, codes_wrap, codes_size,\
                 NULL, 0, 0, flags)
 int init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes,
             const void *bits, int bits_wrap, int bits_size,
             const void *codes, int codes_wrap, int codes_size,
             const void *symbols, int symbols_wrap, int symbols_size,
             int flags);
 #define INIT_VLC_USE_STATIC 1 ///< VERY strongly deprecated and forbidden
 #define INIT_VLC_LE         2
 #define INIT_VLC_USE_NEW_STATIC 4
 void free_vlc(VLC *vlc);
 #define INIT_VLC_STATIC(vlc, bits, a,b,c,d,e,f,g, static_size)\
 {\
    static VLC_TYPE table[static_size][2];\
    (vlc)->table= table;\
    (vlc)->table_allocated= static_size;\
    init_vlc(vlc, bits, a,b,c,d,e,f,g, INIT_VLC_USE_NEW_STATIC);\
 }
 /**
 *
 * if the vlc code is invalid and max_depth=1 than no bits will be removed
 * if the vlc code is invalid and max_depth>1 than the number of bits removed
 * is undefined
 */
 #define GET_VLC(code, name, gb, table, bits, max_depth)\
 {\
    int n, index, nb_bits;\
 \
    index= SHOW_UBITS(name, gb, bits);\
    code = table[index][0];\
    n    = table[index][1];\
 \
    if(max_depth > 1 && n < 0){\
        LAST_SKIP_BITS(name, gb, bits)\
        UPDATE_CACHE(name, gb)\
 \
        nb_bits = -n;\
 \
        index= SHOW_UBITS(name, gb, nb_bits) + code;\
        code = table[index][0];\
        n    = table[index][1];\
        if(max_depth > 2 && n < 0){\
            LAST_SKIP_BITS(name, gb, nb_bits)\
            UPDATE_CACHE(name, gb)\
 \
            nb_bits = -n;\
 \
            index= SHOW_UBITS(name, gb, nb_bits) + code;\
            code = table[index][0];\
            n    = table[index][1];\
        }\
    }\
    SKIP_BITS(name, gb, n)\
 }
 #define GET_RL_VLC(level, run, name, gb, table, bits, max_depth, need_update)\
 {\
    int n, index, nb_bits;\
 \
    index= SHOW_UBITS(name, gb, bits);\
    level = table[index].level;\
    n     = table[index].len;\
 \
    if(max_depth > 1 && n < 0){\
        SKIP_BITS(name, gb, bits)\
        if(need_update){\
            UPDATE_CACHE(name, gb)\
        }\
 \
        nb_bits = -n;\
 \
        index= SHOW_UBITS(name, gb, nb_bits) + level;\
        level = table[index].level;\
        n     = table[index].len;\
    }\
    run= table[index].run;\
    SKIP_BITS(name, gb, n)\
 }
 /**
 * parses a vlc code, faster then get_vlc()
 * @param bits is the number of bits which will be read at once, must be
 *             identical to nb_bits in init_vlc()
 * @param max_depth is the number of times bits bits must be read to completely
 *                  read the longest vlc code
 *                  = (max_vlc_length + bits - 1) / bits
 */
 static inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
                                  int bits, int max_depth)
 {
    int code;
    OPEN_READER(re, s)
    UPDATE_CACHE(re, s)
    GET_VLC(code, re, s, table, bits, max_depth)
    CLOSE_READER(re, s)
    return code;
 }
 //#define TRACE
 #ifdef TRACE
 static inline void print_bin(int bits, int n){
    int i;
    for(i=n-1; i>=0; i--){
        printf("%d", (bits>>i)&1);
    }
    for(i=n; i<24; i++)
        printf(" ");
 }
 static inline int get_bits_trace(GetBitContext *s, int n, char *file, const char *func, int line){
    int r= get_bits(s, n);
    print_bin(r, n);
    printf("%5d %2d %3d bit @%5d in %s %s:%d\n", r, n, r, get_bits_count(s)-n, file, func, line);
    return r;
 }
 static inline int get_vlc_trace(GetBitContext *s, VLC_TYPE (*table)[2], int bits, int max_depth, char *file, const char *func, int line){
    int show= show_bits(s, 24);
    int pos= get_bits_count(s);
    int r= get_vlc2(s, table, bits, max_depth);
    int len= get_bits_count(s) - pos;
    int bits2= show>>(24-len);
    print_bin(bits2, len);
    printf("%5d %2d %3d vlc @%5d in %s %s:%d\n", bits2, len, r, pos, file, func, line);
    return r;
 }
 static inline int get_xbits_trace(GetBitContext *s, int n, char *file, const char *func, int line){
    int show= show_bits(s, n);
    int r= get_xbits(s, n);
    print_bin(show, n);
    printf("%5d %2d %3d xbt @%5d in %s %s:%d\n", show, n, r, get_bits_count(s)-n, file, func, line);
    return r;
 }
 #define get_bits(s, n)  get_bits_trace(s, n, __FILE__, __PRETTY_FUNCTION__, __LINE__)
 #define get_bits1(s)    get_bits_trace(s, 1, __FILE__, __PRETTY_FUNCTION__, __LINE__)
 #define get_xbits(s, n) get_xbits_trace(s, n, __FILE__, __PRETTY_FUNCTION__, __LINE__)
 #define get_vlc(s, vlc)            get_vlc_trace(s, (vlc)->table, (vlc)->bits, 3, __FILE__, __PRETTY_FUNCTION__, __LINE__)
 #define get_vlc2(s, tab, bits, max) get_vlc_trace(s, tab, bits, max, __FILE__, __PRETTY_FUNCTION__, __LINE__)
 #define tprintf(p, ...) printf
 #else //TRACE
 #define tprintf(p, ...) {}
 #endif
 static inline int decode012(GetBitContext *gb){
    int n;
    n = get_bits1(gb);
    if (n == 0)
        return 0;
    else
        return get_bits1(gb) + 1;
 }
 static inline int decode210(GetBitContext *gb){
    if (get_bits1(gb))
        return 0;
    else
        return 2 - get_bits1(gb);
 }
 #endif /* BITSTREAM_H */
--- a/apps/codecs/libatrac/bswap.h
+++ b/apps/codecs/libatrac/bswap.h
@ -0,0 +1,150 @@
 /**
 * @file bswap.h
 * byte swap.
 */
 #ifndef __BSWAP_H__
 #define __BSWAP_H__
 #ifdef HAVE_BYTESWAP_H
 #include <byteswap.h>
 #else
 #ifdef ROCKBOX
 #include "codecs.h"
 /* rockbox' optimised inline functions */
 #define bswap_16(x) swap16(x)
 #define bswap_32(x) swap32(x)
 static inline uint64_t ByteSwap64(uint64_t x)
 {
    union { 
        uint64_t ll;
        struct {
           uint32_t l,h;
        } l;
    } r;
    r.l.l = bswap_32 (x);
    r.l.h = bswap_32 (x>>32);
    return r.ll;
 }
 #define bswap_64(x) ByteSwap64(x)
 #elif defined(ARCH_X86)
 static inline unsigned short ByteSwap16(unsigned short x)
 {
  __asm("xchgb %b0,%h0"	:
        "=q" (x)	:
        "0" (x));
    return x;
 }
 #define bswap_16(x) ByteSwap16(x)
 static inline unsigned int ByteSwap32(unsigned int x)
 {
 #if __CPU__ > 386
 __asm("bswap	%0":
      "=r" (x)     :
 #else
 __asm("xchgb	%b0,%h0\n"
      "	rorl	$16,%0\n"
      "	xchgb	%b0,%h0":
      "=q" (x)		:
 #endif
      "0" (x));
  return x;
 }
 #define bswap_32(x) ByteSwap32(x)
 static inline unsigned long long int ByteSwap64(unsigned long long int x)
 {
  register union { __extension__ uint64_t __ll;
          uint32_t __l[2]; } __x;
  asm("xchgl	%0,%1":
      "=r"(__x.__l[0]),"=r"(__x.__l[1]):
      "0"(bswap_32((unsigned long)x)),"1"(bswap_32((unsigned long)(x>>32))));
  return __x.__ll;
 }
 #define bswap_64(x) ByteSwap64(x)
 #elif defined(ARCH_SH4)
 static inline uint16_t ByteSwap16(uint16_t x) {
 	__asm__("swap.b %0,%0":"=r"(x):"0"(x));
 	return x;
 }
 static inline uint32_t ByteSwap32(uint32_t x) {
 	__asm__(
 	"swap.b %0,%0\n"
 	"swap.w %0,%0\n"
 	"swap.b %0,%0\n"
 	:"=r"(x):"0"(x));
 	return x;
 }
 #define bswap_16(x) ByteSwap16(x)
 #define bswap_32(x) ByteSwap32(x)
 static inline uint64_t ByteSwap64(uint64_t x)
 {
    union { 
        uint64_t ll;
        struct {
           uint32_t l,h;
        } l;
    } r;
    r.l.l = bswap_32 (x);
    r.l.h = bswap_32 (x>>32);
    return r.ll;
 }
 #define bswap_64(x) ByteSwap64(x)
 #else
 #define bswap_16(x) (((x) & 0x00ff) << 8 | ((x) & 0xff00) >> 8)
 // code from bits/byteswap.h (C) 1997, 1998 Free Software Foundation, Inc.
 #define bswap_32(x) \
     ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >>  8) | \
      (((x) & 0x0000ff00) <<  8) | (((x) & 0x000000ff) << 24))
 /*static inline uint64_t ByteSwap64(uint64_t x)
 {
    union { 
        uint64_t ll;
        uint32_t l[2]; 
    } w, r;
    w.ll = x;
    r.l[0] = bswap_32 (w.l[1]);
    r.l[1] = bswap_32 (w.l[0]);
    return r.ll;
 }*/
 #define bswap_64(x) ByteSwap64(x)
 #endif	/* !ARCH_X86 */
 #endif	/* !HAVE_BYTESWAP_H */
 // be2me ... BigEndian to MachineEndian
 // le2me ... LittleEndian to MachineEndian
 #ifdef WORDS_BIGENDIAN
 #define be2me_16(x) (x)
 #define be2me_32(x) (x)
 #define be2me_64(x) (x)
 #define le2me_16(x) bswap_16(x)
 #define le2me_32(x) bswap_32(x)
 #define le2me_64(x) bswap_64(x)
 #else
 #define be2me_16(x) bswap_16(x)
 #define be2me_32(x) bswap_32(x)
 #define be2me_64(x) bswap_64(x)
 #define le2me_16(x) (x)
 #define le2me_32(x) (x)
 #define le2me_64(x) (x)
 #endif
 #endif /* __BSWAP_H__ */
--- a/apps/codecs/libatrac/bytestream.h
+++ b/apps/codecs/libatrac/bytestream.h
@ -0,0 +1,71 @@
 /*
 * Bytestream functions
 * copyright (c) 2006 Baptiste Coudurier <baptiste.coudurier@free.fr>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
 #ifndef AVCODEC_BYTESTREAM_H
 #define AVCODEC_BYTESTREAM_H
 #include <string.h>
 #include "libavutil/common.h"
 #include "libavutil/intreadwrite.h"
 #define DEF_T(type, name, bytes, read, write)                             \
 static av_always_inline type bytestream_get_ ## name(const uint8_t **b){\
    (*b) += bytes;\
    return read(*b - bytes);\
 }\
 static av_always_inline void bytestream_put_ ##name(uint8_t **b, const type value){\
    write(*b, value);\
    (*b) += bytes;\
 }
 #define DEF(name, bytes, read, write) \
    DEF_T(unsigned int, name, bytes, read, write)
 #define DEF64(name, bytes, read, write) \
    DEF_T(uint64_t, name, bytes, read, write)
 DEF64(le64, 8, AV_RL64, AV_WL64)
 DEF  (le32, 4, AV_RL32, AV_WL32)
 DEF  (le24, 3, AV_RL24, AV_WL24)
 DEF  (le16, 2, AV_RL16, AV_WL16)
 DEF64(be64, 8, AV_RB64, AV_WB64)
 DEF  (be32, 4, AV_RB32, AV_WB32)
 DEF  (be24, 3, AV_RB24, AV_WB24)
 DEF  (be16, 2, AV_RB16, AV_WB16)
 DEF  (byte, 1, AV_RB8 , AV_WB8 )
 #undef DEF
 #undef DEF64
 #undef DEF_T
 static av_always_inline unsigned int bytestream_get_buffer(const uint8_t **b, uint8_t *dst, unsigned int size)
 {
    memcpy(dst, *b, size);
    (*b) += size;
    return size;
 }
 static av_always_inline void bytestream_put_buffer(uint8_t **b, const uint8_t *src, unsigned int size)
 {
    memcpy(*b, src, size);
    (*b) += size;
 }
 #endif /* AVCODEC_BYTESTREAM_H */
--- a/apps/codecs/libatrac/dsputil.c
+++ b/apps/codecs/libatrac/dsputil.c
--- a/apps/codecs/libatrac/dsputil.h
+++ b/apps/codecs/libatrac/dsputil.h
@ -0,0 +1,898 @@
 /*
 * DSP utils
 * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
 /**
 * @file libavcodec/dsputil.h
 * DSP utils.
 * note, many functions in here may use MMX which trashes the FPU state, it is
 * absolutely necessary to call emms_c() between dsp & float/double code
 */
 #ifndef AVCODEC_DSPUTIL_H
 #define AVCODEC_DSPUTIL_H
 #include "libavutil/intreadwrite.h"
 #include "avcodec.h"
 //#define DEBUG
 /* dct code */
 typedef short DCTELEM;
 typedef int DWTELEM;
 typedef short IDWTELEM;
 void fdct_ifast (DCTELEM *data);
 void fdct_ifast248 (DCTELEM *data);
 void ff_jpeg_fdct_islow (DCTELEM *data);
 void ff_fdct248_islow (DCTELEM *data);
 void j_rev_dct (DCTELEM *data);
 void j_rev_dct4 (DCTELEM *data);
 void j_rev_dct2 (DCTELEM *data);
 void j_rev_dct1 (DCTELEM *data);
 void ff_wmv2_idct_c(DCTELEM *data);
 void ff_fdct_mmx(DCTELEM *block);
 void ff_fdct_mmx2(DCTELEM *block);
 void ff_fdct_sse2(DCTELEM *block);
 void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride);
 void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride);
 void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
 void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
 void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block);
 void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block);
 void ff_h264_idct_add16_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
 void ff_h264_idct_add16intra_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
 void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
 void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
 void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1,
                              const float *src2, int src3, int blocksize, int step);
 void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1,
                             const float *win, float add_bias, int len);
 void ff_float_to_int16_c(int16_t *dst, const float *src, long len);
 void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels);
 /* encoding scans */
 extern const uint8_t ff_alternate_horizontal_scan[64];
 extern const uint8_t ff_alternate_vertical_scan[64];
 extern const uint8_t ff_zigzag_direct[64];
 extern const uint8_t ff_zigzag248_direct[64];
 /* pixel operations */
 #define MAX_NEG_CROP 1024
 /* temporary */
 extern uint32_t ff_squareTbl[512];
 extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP];
 /* VP3 DSP functions */
 void ff_vp3_idct_c(DCTELEM *block/* align 16*/);
 void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
 void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
 void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
 void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
 /* VP6 DSP functions */
 void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, int stride,
                           const int16_t *h_weights, const int16_t *v_weights);
 /* 1/2^n downscaling functions from imgconvert.c */
 void ff_img_copy_plane(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
 void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
 void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
 void ff_shrink88(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
              int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
 /* minimum alignment rules ;)
 If you notice errors in the align stuff, need more alignment for some ASM code
 for some CPU or need to use a function with less aligned data then send a mail
 to the ffmpeg-devel mailing list, ...
 !warning These alignments might not match reality, (missing attribute((align))
 stuff somewhere possible).
 I (Michael) did not check them, these are just the alignments which I think
 could be reached easily ...
 !future video codecs might need functions with less strict alignment
 */
 /*
 void get_pixels_c(DCTELEM *block, const uint8_t *pixels, int line_size);
 void diff_pixels_c(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride);
 void put_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
 void add_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
 void clear_blocks_c(DCTELEM *blocks);
 */
 /* add and put pixel (decoding) */
 // blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
 //h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller then 4
 typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h);
 typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h);
 typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
 typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
 typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset);
 typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset);
 #define DEF_OLD_QPEL(name)\
 void ff_put_        ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
 void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
 void ff_avg_        ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
 DEF_OLD_QPEL(qpel16_mc11_old_c)
 DEF_OLD_QPEL(qpel16_mc31_old_c)
 DEF_OLD_QPEL(qpel16_mc12_old_c)
 DEF_OLD_QPEL(qpel16_mc32_old_c)
 DEF_OLD_QPEL(qpel16_mc13_old_c)
 DEF_OLD_QPEL(qpel16_mc33_old_c)
 DEF_OLD_QPEL(qpel8_mc11_old_c)
 DEF_OLD_QPEL(qpel8_mc31_old_c)
 DEF_OLD_QPEL(qpel8_mc12_old_c)
 DEF_OLD_QPEL(qpel8_mc32_old_c)
 DEF_OLD_QPEL(qpel8_mc13_old_c)
 DEF_OLD_QPEL(qpel8_mc33_old_c)
 #define CALL_2X_PIXELS(a, b, n)\
 static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
    b(block  , pixels  , line_size, h);\
    b(block+n, pixels+n, line_size, h);\
 }
 /* motion estimation */
 // h is limited to {width/2, width, 2*width} but never larger than 16 and never smaller then 2
 // although currently h<4 is not used as functions with width <8 are neither used nor implemented
 typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/;
 // for snow slices
 typedef struct slice_buffer_s slice_buffer;
 /**
 * Scantable.
 */
 typedef struct ScanTable{
    const uint8_t *scantable;
    uint8_t permutated[64];
    uint8_t raster_end[64];
 #if ARCH_PPC
                /** Used by dct_quantize_altivec to find last-non-zero */
    DECLARE_ALIGNED(16, uint8_t, inverse[64]);
 #endif
 } ScanTable;
 void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
 void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize,
                         int block_w, int block_h,
                         int src_x, int src_y, int w, int h);
 /**
 * DSPContext.
 */
 typedef struct DSPContext {
    /* pixel ops : interface with DCT */
    void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
    void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
    void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
    void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
    void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
    void (*add_pixels8)(uint8_t *pixels, DCTELEM *block, int line_size);
    void (*add_pixels4)(uint8_t *pixels, DCTELEM *block, int line_size);
    int (*sum_abs_dctelem)(DCTELEM *block/*align 16*/);
    /**
     * translational global motion compensation.
     */
    void (*gmc1)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
    /**
     * global motion compensation.
     */
    void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy,
                    int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
    void (*clear_block)(DCTELEM *block/*align 16*/);
    void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
    int (*pix_sum)(uint8_t * pix, int line_size);
    int (*pix_norm1)(uint8_t * pix, int line_size);
 // 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4
    me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */
    me_cmp_func sse[6];
    me_cmp_func hadamard8_diff[6];
    me_cmp_func dct_sad[6];
    me_cmp_func quant_psnr[6];
    me_cmp_func bit[6];
    me_cmp_func rd[6];
    me_cmp_func vsad[6];
    me_cmp_func vsse[6];
    me_cmp_func nsse[6];
    me_cmp_func w53[6];
    me_cmp_func w97[6];
    me_cmp_func dct_max[6];
    me_cmp_func dct264_sad[6];
    me_cmp_func me_pre_cmp[6];
    me_cmp_func me_cmp[6];
    me_cmp_func me_sub_cmp[6];
    me_cmp_func mb_cmp[6];
    me_cmp_func ildct_cmp[6]; //only width 16 used
    me_cmp_func frame_skip_cmp[6]; //only width 8 used
    int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
                             int size);
    /**
     * Halfpel motion compensation with rounding (a+b+1)>>1.
     * this is an array[4][4] of motion compensation functions for 4
     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
     * @param block destination where the result is stored
     * @param pixels source
     * @param line_size number of bytes in a horizontal line of block
     * @param h height
     */
    op_pixels_func put_pixels_tab[4][4];
    /**
     * Halfpel motion compensation with rounding (a+b+1)>>1.
     * This is an array[4][4] of motion compensation functions for 4
     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
     * @param block destination into which the result is averaged (a+b+1)>>1
     * @param pixels source
     * @param line_size number of bytes in a horizontal line of block
     * @param h height
     */
    op_pixels_func avg_pixels_tab[4][4];
    /**
     * Halfpel motion compensation with no rounding (a+b)>>1.
     * this is an array[2][4] of motion compensation functions for 2
     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
     * @param block destination where the result is stored
     * @param pixels source
     * @param line_size number of bytes in a horizontal line of block
     * @param h height
     */
    op_pixels_func put_no_rnd_pixels_tab[4][4];
    /**
     * Halfpel motion compensation with no rounding (a+b)>>1.
     * this is an array[2][4] of motion compensation functions for 2
     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
     * @param block destination into which the result is averaged (a+b)>>1
     * @param pixels source
     * @param line_size number of bytes in a horizontal line of block
     * @param h height
     */
    op_pixels_func avg_no_rnd_pixels_tab[4][4];
    void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h);
    /**
     * Thirdpel motion compensation with rounding (a+b+1)>>1.
     * this is an array[12] of motion compensation functions for the 9 thirdpe
     * positions<br>
     * *pixels_tab[ xthirdpel + 4*ythirdpel ]
     * @param block destination where the result is stored
     * @param pixels source
     * @param line_size number of bytes in a horizontal line of block
     * @param h height
     */
    tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
    tpel_mc_func avg_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
    qpel_mc_func put_qpel_pixels_tab[2][16];
    qpel_mc_func avg_qpel_pixels_tab[2][16];
    qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
    qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
    qpel_mc_func put_mspel_pixels_tab[8];
    /**
     * h264 Chroma MC
     */
    h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
    /* This is really one func used in VC-1 decoding */
    h264_chroma_mc_func put_no_rnd_h264_chroma_pixels_tab[3];
    h264_chroma_mc_func avg_h264_chroma_pixels_tab[3];
    qpel_mc_func put_h264_qpel_pixels_tab[4][16];
    qpel_mc_func avg_h264_qpel_pixels_tab[4][16];
    qpel_mc_func put_2tap_qpel_pixels_tab[4][16];
    qpel_mc_func avg_2tap_qpel_pixels_tab[4][16];
    h264_weight_func weight_h264_pixels_tab[10];
    h264_biweight_func biweight_h264_pixels_tab[10];
    /* AVS specific */
    qpel_mc_func put_cavs_qpel_pixels_tab[2][16];
    qpel_mc_func avg_cavs_qpel_pixels_tab[2][16];
    void (*cavs_filter_lv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
    void (*cavs_filter_lh)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
    void (*cavs_filter_cv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
    void (*cavs_filter_ch)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
    void (*cavs_idct8_add)(uint8_t *dst, DCTELEM *block, int stride);
    me_cmp_func pix_abs[2][4];
    /* huffyuv specific */
    void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
    void (*add_bytes_l2)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 16*/, int w);
    void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w);
    /**
     * subtract huffyuv's variant of median prediction
     * note, this might read from src1[-1], src2[-1]
     */
    void (*sub_hfyu_median_prediction)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top);
    void (*add_hfyu_median_prediction)(uint8_t *dst, uint8_t *top, uint8_t *diff, int w, int *left, int *left_top);
    /* this might write to dst[w] */
    void (*add_png_paeth_prediction)(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
    void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w);
    void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0);
    void (*h264_h_loop_filter_luma)(uint8_t *pix/*align 4 */, int stride, int alpha, int beta, int8_t *tc0);
    /* v/h_loop_filter_luma_intra: align 16 */
    void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
    void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
    void (*h264_v_loop_filter_chroma)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0);
    void (*h264_h_loop_filter_chroma)(uint8_t *pix/*align 4*/, int stride, int alpha, int beta, int8_t *tc0);
    void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
    void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
    // h264_loop_filter_strength: simd only. the C version is inlined in h264.c
    void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
                                      int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field);
    void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
    void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
    void (*h261_loop_filter)(uint8_t *src, int stride);
    void (*x8_v_loop_filter)(uint8_t *src, int stride, int qscale);
    void (*x8_h_loop_filter)(uint8_t *src, int stride, int qscale);
    void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
    void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values);
    void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, int stride,
                             const int16_t *h_weights,const int16_t *v_weights);
    /* assume len is a multiple of 4, and arrays are 16-byte aligned */
    void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
    void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
    /* no alignment needed */
    void (*flac_compute_autocorr)(const int32_t *data, int len, int lag, double *autoc);
    /* assume len is a multiple of 8, and arrays are 16-byte aligned */
    void (*vector_fmul)(float *dst, const float *src, int len);
    void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len);
    /* assume len is a multiple of 8, and src arrays are 16-byte aligned */
    void (*vector_fmul_add_add)(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step);
    /* assume len is a multiple of 4, and arrays are 16-byte aligned */
    void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len);
    /* assume len is a multiple of 8, and arrays are 16-byte aligned */
    void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len);
    /* C version: convert floats from the range [384.0,386.0] to ints in [-32768,32767]
     * simd versions: convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */
    void (*float_to_int16)(int16_t *dst, const float *src, long len);
    void (*float_to_int16_interleave)(int16_t *dst, const float **src, long len, int channels);
    /* (I)DCT */
    void (*fdct)(DCTELEM *block/* align 16*/);
    void (*fdct248)(DCTELEM *block/* align 16*/);
    /* IDCT really*/
    void (*idct)(DCTELEM *block/* align 16*/);
    /**
     * block -> idct -> clip to unsigned 8 bit -> dest.
     * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
     * @param line_size size in bytes of a horizontal line of dest
     */
    void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
    /**
     * block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
     * @param line_size size in bytes of a horizontal line of dest
     */
    void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
    /**
     * idct input permutation.
     * several optimized IDCTs need a permutated input (relative to the normal order of the reference
     * IDCT)
     * this permutation must be performed before the idct_put/add, note, normally this can be merged
     * with the zigzag/alternate scan<br>
     * an example to avoid confusion:
     * - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...)
     * - (x -> referece dct -> reference idct -> x)
     * - (x -> referece dct -> simple_mmx_perm = idct_permutation -> simple_idct_mmx -> x)
     * - (->decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant -> simple_idct_mmx ->...)
     */
    uint8_t idct_permutation[64];
    int idct_permutation_type;
 #define FF_NO_IDCT_PERM 1
 #define FF_LIBMPEG2_IDCT_PERM 2
 #define FF_SIMPLE_IDCT_PERM 3
 #define FF_TRANSPOSE_IDCT_PERM 4
 #define FF_PARTTRANS_IDCT_PERM 5
 #define FF_SSE2_IDCT_PERM 6
    int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale);
    void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
 #define BASIS_SHIFT 16
 #define RECON_SHIFT 6
    void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w);
 #define EDGE_WIDTH 16
    /* h264 functions */
    /* NOTE!!! if you implement any of h264_idct8_add, h264_idct8_add4 then you must implement all of them
       NOTE!!! if you implement any of h264_idct_add, h264_idct_add16, h264_idct_add16intra, h264_idct_add8 then you must implement all of them
        The reason for above, is that no 2 out of one list may use a different permutation.
    */
    void (*h264_idct_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
    void (*h264_idct8_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
    void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
    void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
    void (*h264_dct)(DCTELEM block[4][4]);
    void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
    void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
    void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
    void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
    /* snow wavelet */
    void (*vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
    void (*horizontal_compose97i)(IDWTELEM *b, int width);
    void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
    void (*prefetch)(void *mem, int stride, int h);
    void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
    /* vc1 functions */
    void (*vc1_inv_trans_8x8)(DCTELEM *b);
    void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block);
    void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block);
    void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block);
    void (*vc1_v_overlap)(uint8_t* src, int stride);
    void (*vc1_h_overlap)(uint8_t* src, int stride);
    /* put 8x8 block with bicubic interpolation and quarterpel precision
     * last argument is actually round value instead of height
     */
    op_pixels_func put_vc1_mspel_pixels_tab[16];
    /* intrax8 functions */
    void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize);
    void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize,
           int * range, int * sum,  int edges);
    /* ape functions */
    /**
     * Add contents of the second vector to the first one.
     * @param len length of vectors, should be multiple of 16
     */
    void (*add_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len);
    /**
     * Add contents of the second vector to the first one.
     * @param len length of vectors, should be multiple of 16
     */
    void (*sub_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len);
    /**
     * Calculate scalar product of two vectors.
     * @param len length of vectors, should be multiple of 16
     * @param shift number of bits to discard from product
     */
    int32_t (*scalarproduct_int16)(int16_t *v1, int16_t *v2/*align 16*/, int len, int shift);
    /* rv30 functions */
    qpel_mc_func put_rv30_tpel_pixels_tab[4][16];
    qpel_mc_func avg_rv30_tpel_pixels_tab[4][16];
    /* rv40 functions */
    qpel_mc_func put_rv40_qpel_pixels_tab[4][16];
    qpel_mc_func avg_rv40_qpel_pixels_tab[4][16];
    h264_chroma_mc_func put_rv40_chroma_pixels_tab[3];
    h264_chroma_mc_func avg_rv40_chroma_pixels_tab[3];
 } DSPContext;
 void dsputil_static_init(void);
 void dsputil_init(DSPContext* p);
 int ff_check_alignment(void);
 /**
 * permute block according to permuatation.
 * @param last last non zero element in scantable order
 */
 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last);
 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
 #define         BYTE_VEC32(c)   ((c)*0x01010101UL)
 static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
 {
    return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
 }
 static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
 {
    return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
 }
 static inline int get_penalty_factor(int lambda, int lambda2, int type){
    switch(type&0xFF){
    default:
    case FF_CMP_SAD:
        return lambda>>FF_LAMBDA_SHIFT;
    case FF_CMP_DCT:
        return (3*lambda)>>(FF_LAMBDA_SHIFT+1);
    case FF_CMP_W53:
        return (4*lambda)>>(FF_LAMBDA_SHIFT);
    case FF_CMP_W97:
        return (2*lambda)>>(FF_LAMBDA_SHIFT);
    case FF_CMP_SATD:
    case FF_CMP_DCT264:
        return (2*lambda)>>FF_LAMBDA_SHIFT;
    case FF_CMP_RD:
    case FF_CMP_PSNR:
    case FF_CMP_SSE:
    case FF_CMP_NSSE:
        return lambda2>>FF_LAMBDA_SHIFT;
    case FF_CMP_BIT:
        return 1;
    }
 }
 /**
 * Empty mmx state.
 * this must be called between any dsp function and float/double code.
 * for example sin(); dsp->idct_put(); emms_c(); cos()
 */
 #define emms_c()
 /* should be defined by architectures supporting
   one or more MultiMedia extension */
 int mm_support(void);
 void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
 void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx);
 void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
 void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx);
 void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
 void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
 void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
 void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);
 #define DECLARE_ALIGNED_16(t, v) DECLARE_ALIGNED(16, t, v)
 #if HAVE_MMX
 #undef emms_c
 extern int mm_flags;
 void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
 void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
 void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
 static inline void emms(void)
 {
    __asm__ volatile ("emms;":::"memory");
 }
 #define emms_c() \
 {\
    if (mm_flags & FF_MM_MMX)\
        emms();\
 }
 void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
 #elif ARCH_ARM
 extern int mm_flags;
 #if HAVE_NEON
 #   define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v)
 #   define STRIDE_ALIGN 16
 #endif
 #elif ARCH_PPC
 extern int mm_flags;
 #define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v)
 #define STRIDE_ALIGN 16
 #elif HAVE_MMI
 #define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v)
 #define STRIDE_ALIGN 16
 #else
 #define mm_flags 0
 #define mm_support() 0
 #endif
 #ifndef DECLARE_ALIGNED_8
 #   define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(8, t, v)
 #endif
 #ifndef STRIDE_ALIGN
 #   define STRIDE_ALIGN 8
 #endif
 /* PSNR */
 void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3],
              int orig_linesize[3], int coded_linesize,
              AVCodecContext *avctx);
 /* FFT computation */
 /* NOTE: soon integer code will be added, so you must use the
   FFTSample type */
 typedef float FFTSample;
 struct MDCTContext;
 typedef struct FFTComplex {
    FFTSample re, im;
 } FFTComplex;
 typedef struct FFTContext {
    int nbits;
    int inverse;
    uint16_t *revtab;
    FFTComplex *exptab;
    FFTComplex *exptab1; /* only used by SSE code */
    FFTComplex *tmp_buf;
    void (*fft_permute)(struct FFTContext *s, FFTComplex *z);
    void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
    void (*imdct_calc)(struct MDCTContext *s, FFTSample *output, const FFTSample *input);
    void (*imdct_half)(struct MDCTContext *s, FFTSample *output, const FFTSample *input);
 } FFTContext;
 extern FFTSample* ff_cos_tabs[13];
 /**
 * Sets up a complex FFT.
 * @param nbits           log2 of the length of the input array
 * @param inverse         if 0 perform the forward transform, if 1 perform the inverse
 */
 int ff_fft_init(FFTContext *s, int nbits, int inverse);
 void ff_fft_permute_c(FFTContext *s, FFTComplex *z);
 void ff_fft_permute_sse(FFTContext *s, FFTComplex *z);
 void ff_fft_calc_c(FFTContext *s, FFTComplex *z);
 void ff_fft_calc_sse(FFTContext *s, FFTComplex *z);
 void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z);
 void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z);
 void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z);
 /**
 * Do the permutation needed BEFORE calling ff_fft_calc().
 */
 static inline void ff_fft_permute(FFTContext *s, FFTComplex *z)
 {
    s->fft_permute(s, z);
 }
 /**
 * Do a complex FFT with the parameters defined in ff_fft_init(). The
 * input data must be permuted before. No 1.0/sqrt(n) normalization is done.
 */
 static inline void ff_fft_calc(FFTContext *s, FFTComplex *z)
 {
    s->fft_calc(s, z);
 }
 void ff_fft_end(FFTContext *s);
 /* MDCT computation */
 typedef struct MDCTContext {
    int n;  /* size of MDCT (i.e. number of input data * 2) */
    int nbits; /* n = 2^nbits */
    /* pre/post rotation tables */
    FFTSample *tcos;
    FFTSample *tsin;
    FFTContext fft;
 } MDCTContext;
 static inline void ff_imdct_calc(MDCTContext *s, FFTSample *output, const FFTSample *input)
 {
    s->fft.imdct_calc(s, output, input);
 }
 static inline void ff_imdct_half(MDCTContext *s, FFTSample *output, const FFTSample *input)
 {
    s->fft.imdct_half(s, output, input);
 }
 /**
 * Generate a Kaiser-Bessel Derived Window.
 * @param   window  pointer to half window
 * @param   alpha   determines window shape
 * @param   n       size of half window
 */
 void ff_kbd_window_init(float *window, float alpha, int n);
 /**
 * Generate a sine window.
 * @param   window  pointer to half window
 * @param   n       size of half window
 */
 void ff_sine_window_init(float *window, int n);
 extern float ff_sine_128 [ 128];
 extern float ff_sine_256 [ 256];
 extern float ff_sine_512 [ 512];
 extern float ff_sine_1024[1024];
 extern float ff_sine_2048[2048];
 extern float ff_sine_4096[4096];
 extern float *ff_sine_windows[6];
 int ff_mdct_init(MDCTContext *s, int nbits, int inverse);
 void ff_imdct_calc_c(MDCTContext *s, FFTSample *output, const FFTSample *input);
 void ff_imdct_half_c(MDCTContext *s, FFTSample *output, const FFTSample *input);
 void ff_imdct_calc_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input);
 void ff_imdct_half_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input);
 void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input);
 void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input);
 void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, const FFTSample *input);
 void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input);
 void ff_mdct_calc(MDCTContext *s, FFTSample *out, const FFTSample *input);
 void ff_mdct_end(MDCTContext *s);
 /* Real Discrete Fourier Transform */
 enum RDFTransformType {
    RDFT,
    IRDFT,
    RIDFT,
    IRIDFT,
 };
 typedef struct {
    int nbits;
    int inverse;
    int sign_convention;
    /* pre/post rotation tables */
    FFTSample *tcos;
    FFTSample *tsin;
    FFTContext fft;
 } RDFTContext;
 /**
 * Sets up a real FFT.
 * @param nbits           log2 of the length of the input array
 * @param trans           the type of transform
 */
 int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans);
 void ff_rdft_calc(RDFTContext *s, FFTSample *data);
 void ff_rdft_end(RDFTContext *s);
 #define WRAPPER8_16(name8, name16)\
 static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
    return name8(s, dst           , src           , stride, h)\
          +name8(s, dst+8         , src+8         , stride, h);\
 }
 #define WRAPPER8_16_SQ(name8, name16)\
 static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
    int score=0;\
    score +=name8(s, dst           , src           , stride, 8);\
    score +=name8(s, dst+8         , src+8         , stride, 8);\
    if(h==16){\
        dst += 8*stride;\
        src += 8*stride;\
        score +=name8(s, dst           , src           , stride, 8);\
        score +=name8(s, dst+8         , src+8         , stride, 8);\
    }\
    return score;\
 }
 static inline void copy_block2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
 {
    int i;
    for(i=0; i<h; i++)
    {
        AV_WN16(dst   , AV_RN16(src   ));
        dst+=dstStride;
        src+=srcStride;
    }
 }
 static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
 {
    int i;
    for(i=0; i<h; i++)
    {
        AV_WN32(dst   , AV_RN32(src   ));
        dst+=dstStride;
        src+=srcStride;
    }
 }
 static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
 {
    int i;
    for(i=0; i<h; i++)
    {
        AV_WN32(dst   , AV_RN32(src   ));
        AV_WN32(dst+4 , AV_RN32(src+4 ));
        dst+=dstStride;
        src+=srcStride;
    }
 }
 static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
 {
    int i;
    for(i=0; i<h; i++)
    {
        AV_WN32(dst   , AV_RN32(src   ));
        AV_WN32(dst+4 , AV_RN32(src+4 ));
        dst[8]= src[8];
        dst+=dstStride;
        src+=srcStride;
    }
 }
 static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
 {
    int i;
    for(i=0; i<h; i++)
    {
        AV_WN32(dst   , AV_RN32(src   ));
        AV_WN32(dst+4 , AV_RN32(src+4 ));
        AV_WN32(dst+8 , AV_RN32(src+8 ));
        AV_WN32(dst+12, AV_RN32(src+12));
        dst+=dstStride;
        src+=srcStride;
    }
 }
 static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
 {
    int i;
    for(i=0; i<h; i++)
    {
        AV_WN32(dst   , AV_RN32(src   ));
        AV_WN32(dst+4 , AV_RN32(src+4 ));
        AV_WN32(dst+8 , AV_RN32(src+8 ));
        AV_WN32(dst+12, AV_RN32(src+12));
        dst[16]= src[16];
        dst+=dstStride;
        src+=srcStride;
    }
 }
 #endif /* AVCODEC_DSPUTIL_H */
--- a/apps/codecs/libatrac/ffmpeg_config.h
+++ b/apps/codecs/libatrac/ffmpeg_config.h
@ -0,0 +1,14 @@
 /* Automatically generated by configure - do not modify */
 #ifndef _FFMPEG_CONFIG_H
 #define _FFMPEG_CONFIG_H
 // CHECK THIS : #include "codecs.h"
 #ifdef CPU_ARM
 #define CONFIG_ALIGN 1
 #endif
 #ifdef ROCKBOX_BIG_ENDIAN
 #define WORDS_BIGENDIAN
 #endif
 #endif
--- a/apps/codecs/libatrac/fft.c
+++ b/apps/codecs/libatrac/fft.c
@ -0,0 +1,374 @@
 /*
 * FFT/IFFT transforms
 * Copyright (c) 2008 Loren Merritt
 * Copyright (c) 2002 Fabrice Bellard
 * Partly based on libdjbfft by D. J. Bernstein
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
 /**
 * @file libavcodec/fft.c
 * FFT/IFFT transforms.
 */
 #include "dsputil.h"
 /* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
 DECLARE_ALIGNED_16(FFTSample, ff_cos_16[8]);
 DECLARE_ALIGNED_16(FFTSample, ff_cos_32[16]);
 DECLARE_ALIGNED_16(FFTSample, ff_cos_64[32]);
 DECLARE_ALIGNED_16(FFTSample, ff_cos_128[64]);
 DECLARE_ALIGNED_16(FFTSample, ff_cos_256[128]);
 DECLARE_ALIGNED_16(FFTSample, ff_cos_512[256]);
 DECLARE_ALIGNED_16(FFTSample, ff_cos_1024[512]);
 DECLARE_ALIGNED_16(FFTSample, ff_cos_2048[1024]);
 DECLARE_ALIGNED_16(FFTSample, ff_cos_4096[2048]);
 DECLARE_ALIGNED_16(FFTSample, ff_cos_8192[4096]);
 DECLARE_ALIGNED_16(FFTSample, ff_cos_16384[8192]);
 DECLARE_ALIGNED_16(FFTSample, ff_cos_32768[16384]);
 DECLARE_ALIGNED_16(FFTSample, ff_cos_65536[32768]);
 FFTSample *ff_cos_tabs[] = {
    ff_cos_16, ff_cos_32, ff_cos_64, ff_cos_128, ff_cos_256, ff_cos_512, ff_cos_1024,
    ff_cos_2048, ff_cos_4096, ff_cos_8192, ff_cos_16384, ff_cos_32768, ff_cos_65536,
 };
 static int split_radix_permutation(int i, int n, int inverse)
 {
    int m;
    if(n <= 2) return i&1;
    m = n >> 1;
    if(!(i&m))            return split_radix_permutation(i, m, inverse)*2;
    m >>= 1;
    if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1;
    else                  return split_radix_permutation(i, m, inverse)*4 - 1;
 }
 av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
 {
    int i, j, m, n;
    float alpha, c1, s1, s2;
    int split_radix = 1;
    int av_unused has_vectors;
    if (nbits < 2 || nbits > 16)
        goto fail;
    s->nbits = nbits;
    n = 1 << nbits;
    s->tmp_buf = NULL;
    s->exptab  = av_malloc((n / 2) * sizeof(FFTComplex));
    if (!s->exptab)
        goto fail;
    s->revtab = av_malloc(n * sizeof(uint16_t));
    if (!s->revtab)
        goto fail;
    s->inverse = inverse;
    s2 = inverse ? 1.0 : -1.0;
    s->fft_permute = ff_fft_permute_c;
    s->fft_calc    = ff_fft_calc_c;
    s->imdct_calc  = ff_imdct_calc_c;
    s->imdct_half  = ff_imdct_half_c;
    s->exptab1     = NULL;
 #if HAVE_MMX && HAVE_YASM
    has_vectors = mm_support();
    if (has_vectors & FF_MM_SSE && HAVE_SSE) {
        /* SSE for P3/P4/K8 */
        s->imdct_calc  = ff_imdct_calc_sse;
        s->imdct_half  = ff_imdct_half_sse;
        s->fft_permute = ff_fft_permute_sse;
        s->fft_calc    = ff_fft_calc_sse;
    } else if (has_vectors & FF_MM_3DNOWEXT && HAVE_AMD3DNOWEXT) {
        /* 3DNowEx for K7 */
        s->imdct_calc = ff_imdct_calc_3dn2;
        s->imdct_half = ff_imdct_half_3dn2;
        s->fft_calc   = ff_fft_calc_3dn2;
    } else if (has_vectors & FF_MM_3DNOW && HAVE_AMD3DNOW) {
        /* 3DNow! for K6-2/3 */
        s->imdct_calc = ff_imdct_calc_3dn;
        s->imdct_half = ff_imdct_half_3dn;
        s->fft_calc   = ff_fft_calc_3dn;
    }
 #elif HAVE_ALTIVEC
    has_vectors = mm_support();
    if (has_vectors & FF_MM_ALTIVEC) {
        s->fft_calc = ff_fft_calc_altivec;
        split_radix = 0;
    }
 #endif
    if (split_radix) {
        for(j=4; j<=nbits; j++) {
            int m = 1<<j;
            double freq = 2*M_PI/m;
            FFTSample *tab = ff_cos_tabs[j-4];
            for(i=0; i<=m/4; i++)
                tab[i] = cos(i*freq);
            for(i=1; i<m/4; i++)
                tab[m/2-i] = tab[i];
        }
        for(i=0; i<n; i++)
            s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = i;
        s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
    } else {
        int np, nblocks, np2, l;
        FFTComplex *q;
        for(i=0; i<(n/2); i++) {
            alpha = 2 * M_PI * (float)i / (float)n;
            c1 = cos(alpha);
            s1 = sin(alpha) * s2;
            s->exptab[i].re = c1;
            s->exptab[i].im = s1;
        }
        np = 1 << nbits;
        nblocks = np >> 3;
        np2 = np >> 1;
        s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex));
        if (!s->exptab1)
            goto fail;
        q = s->exptab1;
        do {
            for(l = 0; l < np2; l += 2 * nblocks) {
                *q++ = s->exptab[l];
                *q++ = s->exptab[l + nblocks];
                q->re = -s->exptab[l].im;
                q->im = s->exptab[l].re;
                q++;
                q->re = -s->exptab[l + nblocks].im;
                q->im = s->exptab[l + nblocks].re;
                q++;
            }
            nblocks = nblocks >> 1;
        } while (nblocks != 0);
        av_freep(&s->exptab);
        /* compute bit reverse table */
        for(i=0;i<n;i++) {
            m=0;
            for(j=0;j<nbits;j++) {
                m |= ((i >> j) & 1) << (nbits-j-1);
            }
            s->revtab[i]=m;
        }
    }
    return 0;
 fail:
    av_freep(&s->revtab);
    av_freep(&s->exptab);
    av_freep(&s->exptab1);
    av_freep(&s->tmp_buf);
    return -1;
 }
 void ff_fft_permute_c(FFTContext *s, FFTComplex *z)
 {
    int j, k, np;
    FFTComplex tmp;
    const uint16_t *revtab = s->revtab;
    np = 1 << s->nbits;
    if (s->tmp_buf) {
        /* TODO: handle split-radix permute in a more optimal way, probably in-place */
        for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
        memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
        return;
    }
    /* reverse */
    for(j=0;j<np;j++) {
        k = revtab[j];
        if (k < j) {
            tmp = z[k];
            z[k] = z[j];
            z[j] = tmp;
        }
    }
 }
 av_cold void ff_fft_end(FFTContext *s)
 {
    av_freep(&s->revtab);
    av_freep(&s->exptab);
    av_freep(&s->exptab1);
    av_freep(&s->tmp_buf);
 }
 #define sqrthalf (float)M_SQRT1_2
 #define BF(x,y,a,b) {\
    x = a - b;\
    y = a + b;\
 }
 #define BUTTERFLIES(a0,a1,a2,a3) {\
    BF(t3, t5, t5, t1);\
    BF(a2.re, a0.re, a0.re, t5);\
    BF(a3.im, a1.im, a1.im, t3);\
    BF(t4, t6, t2, t6);\
    BF(a3.re, a1.re, a1.re, t4);\
    BF(a2.im, a0.im, a0.im, t6);\
 }
 // force loading all the inputs before storing any.
 // this is slightly slower for small data, but avoids store->load aliasing
 // for addresses separated by large powers of 2.
 #define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
    FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
    BF(t3, t5, t5, t1);\
    BF(a2.re, a0.re, r0, t5);\
    BF(a3.im, a1.im, i1, t3);\
    BF(t4, t6, t2, t6);\
    BF(a3.re, a1.re, r1, t4);\
    BF(a2.im, a0.im, i0, t6);\
 }
 #define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
    t1 = a2.re * wre + a2.im * wim;\
    t2 = a2.im * wre - a2.re * wim;\
    t5 = a3.re * wre - a3.im * wim;\
    t6 = a3.im * wre + a3.re * wim;\
    BUTTERFLIES(a0,a1,a2,a3)\
 }
 #define TRANSFORM_ZERO(a0,a1,a2,a3) {\
    t1 = a2.re;\
    t2 = a2.im;\
    t5 = a3.re;\
    t6 = a3.im;\
    BUTTERFLIES(a0,a1,a2,a3)\
 }
 /* z[0...8n-1], w[1...2n-1] */
 #define PASS(name)\
 static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
 {\
    FFTSample t1, t2, t3, t4, t5, t6;\
    int o1 = 2*n;\
    int o2 = 4*n;\
    int o3 = 6*n;\
    const FFTSample *wim = wre+o1;\
    n--;\
 \
    TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
    TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
    do {\
        z += 2;\
        wre += 2;\
        wim -= 2;\
        TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
        TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
    } while(--n);\
 }
 PASS(pass)
 #undef BUTTERFLIES
 #define BUTTERFLIES BUTTERFLIES_BIG
 PASS(pass_big)
 #define DECL_FFT(n,n2,n4)\
 static void fft##n(FFTComplex *z)\
 {\
    fft##n2(z);\
    fft##n4(z+n4*2);\
    fft##n4(z+n4*3);\
    pass(z,ff_cos_##n,n4/2);\
 }
 static void fft4(FFTComplex *z)
 {
    FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
    BF(t3, t1, z[0].re, z[1].re);
    BF(t8, t6, z[3].re, z[2].re);
    BF(z[2].re, z[0].re, t1, t6);
    BF(t4, t2, z[0].im, z[1].im);
    BF(t7, t5, z[2].im, z[3].im);
    BF(z[3].im, z[1].im, t4, t8);
    BF(z[3].re, z[1].re, t3, t7);
    BF(z[2].im, z[0].im, t2, t5);
 }
 static void fft8(FFTComplex *z)
 {
    FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
    fft4(z);
    BF(t1, z[5].re, z[4].re, -z[5].re);
    BF(t2, z[5].im, z[4].im, -z[5].im);
    BF(t3, z[7].re, z[6].re, -z[7].re);
    BF(t4, z[7].im, z[6].im, -z[7].im);
    BF(t8, t1, t3, t1);
    BF(t7, t2, t2, t4);
    BF(z[4].re, z[0].re, z[0].re, t1);
    BF(z[4].im, z[0].im, z[0].im, t2);
    BF(z[6].re, z[2].re, z[2].re, t7);
    BF(z[6].im, z[2].im, z[2].im, t8);
    TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf);
 }
 #if !CONFIG_SMALL
 static void fft16(FFTComplex *z)
 {
    FFTSample t1, t2, t3, t4, t5, t6;
    fft8(z);
    fft4(z+8);
    fft4(z+12);
    TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
    TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf);
    TRANSFORM(z[1],z[5],z[9],z[13],ff_cos_16[1],ff_cos_16[3]);
    TRANSFORM(z[3],z[7],z[11],z[15],ff_cos_16[3],ff_cos_16[1]);
 }
 #else
 DECL_FFT(16,8,4)
 #endif
 DECL_FFT(32,16,8)
 DECL_FFT(64,32,16)
 DECL_FFT(128,64,32)
 DECL_FFT(256,128,64)
 DECL_FFT(512,256,128)
 #if !CONFIG_SMALL
 #define pass pass_big
 #endif
 DECL_FFT(1024,512,256)
 DECL_FFT(2048,1024,512)
 DECL_FFT(4096,2048,1024)
 DECL_FFT(8192,4096,2048)
 DECL_FFT(16384,8192,4096)
 DECL_FFT(32768,16384,8192)
 DECL_FFT(65536,32768,16384)
 static void (*fft_dispatch[])(FFTComplex*) = {
    fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
    fft2048, fft4096, fft8192, fft16384, fft32768, fft65536,
 };
 void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
 {
    fft_dispatch[s->nbits-2](z);
 }
--- a/apps/codecs/libatrac/libavutil/avutil.h
+++ b/apps/codecs/libatrac/libavutil/avutil.h
@ -0,0 +1,63 @@
 /*
 * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
 #ifndef AVUTIL_AVUTIL_H
 #define AVUTIL_AVUTIL_H
 /**
 * @file libavutil/avutil.h
 * external API header
 */
 #define AV_STRINGIFY(s)         AV_TOSTRING(s)
 #define AV_TOSTRING(s) #s
 #define AV_VERSION_INT(a, b, c) (a<<16 | b<<8 | c)
 #define AV_VERSION_DOT(a, b, c) a ##.## b ##.## c
 #define AV_VERSION(a, b, c) AV_VERSION_DOT(a, b, c)
 #define LIBAVUTIL_VERSION_MAJOR 50
 #define LIBAVUTIL_VERSION_MINOR  0
 #define LIBAVUTIL_VERSION_MICRO  0
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
                                               LIBAVUTIL_VERSION_MINOR, \
                                               LIBAVUTIL_VERSION_MICRO)
 #define LIBAVUTIL_VERSION       AV_VERSION(LIBAVUTIL_VERSION_MAJOR,     \
                                           LIBAVUTIL_VERSION_MINOR,     \
                                           LIBAVUTIL_VERSION_MICRO)
 #define LIBAVUTIL_BUILD         LIBAVUTIL_VERSION_INT
 #define LIBAVUTIL_IDENT         "Lavu" AV_STRINGIFY(LIBAVUTIL_VERSION)
 /**
 * Returns the LIBAVUTIL_VERSION_INT constant.
 */
 unsigned avutil_version(void);
 #include "common.h"
 //#include "mathematics.h"
 //#include "rational.h"
 //#include "intfloat_readwrite.h"
 #include "log.h"
 //#include "pixfmt.h"
 #endif /* AVUTIL_AVUTIL_H */
--- a/apps/codecs/libatrac/libavutil/bswap.h
+++ b/apps/codecs/libatrac/libavutil/bswap.h
@ -0,0 +1,99 @@
 /*
 * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
 /**
 * @file libavutil/bswap.h
 * byte swapping routines
 */
 #ifndef AVUTIL_BSWAP_H
 #define AVUTIL_BSWAP_H
 #include <stdint.h>
 //#include "ffmpeg_config.h"
 #include "common.h"
 #if   ARCH_ARM
 #   include "arm/bswap.h"
 #elif ARCH_BFIN
 #   include "bfin/bswap.h"
 #elif ARCH_SH4
 #   include "sh4/bswap.h"
 #elif ARCH_X86
 #   include "x86/bswap.h"
 #endif
 #ifndef bswap_16
 static av_always_inline av_const uint16_t bswap_16(uint16_t x)
 {
    x= (x>>8) | (x<<8);
    return x;
 }
 #endif
 #ifndef bswap_32
 static av_always_inline av_const uint32_t bswap_32(uint32_t x)
 {
    x= ((x<<8)&0xFF00FF00) | ((x>>8)&0x00FF00FF);
    x= (x>>16) | (x<<16);
    return x;
 }
 #endif
 #ifndef bswap_64
 static inline uint64_t av_const bswap_64(uint64_t x)
 {
 #if 0
    x= ((x<< 8)&0xFF00FF00FF00FF00ULL) | ((x>> 8)&0x00FF00FF00FF00FFULL);
    x= ((x<<16)&0xFFFF0000FFFF0000ULL) | ((x>>16)&0x0000FFFF0000FFFFULL);
    return (x>>32) | (x<<32);
 #else
    union {
        uint64_t ll;
        uint32_t l[2];
    } w, r;
    w.ll = x;
    r.l[0] = bswap_32 (w.l[1]);
    r.l[1] = bswap_32 (w.l[0]);
    return r.ll;
 #endif
 }
 #endif
 // be2me ... big-endian to machine-endian
 // le2me ... little-endian to machine-endian
 #ifdef WORDS_BIGENDIAN
 #define be2me_16(x) (x)
 #define be2me_32(x) (x)
 #define be2me_64(x) (x)
 #define le2me_16(x) bswap_16(x)
 #define le2me_32(x) bswap_32(x)
 #define le2me_64(x) bswap_64(x)
 #else
 #define be2me_16(x) bswap_16(x)
 #define be2me_32(x) bswap_32(x)
 #define be2me_64(x) bswap_64(x)
 #define le2me_16(x) (x)
 #define le2me_32(x) (x)
 #define le2me_64(x) (x)
 #endif
 #endif /* AVUTIL_BSWAP_H */
--- a/apps/codecs/libatrac/libavutil/common.h
+++ b/apps/codecs/libatrac/libavutil/common.h
@ -0,0 +1,286 @@
 /*
 * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
 /**
 * @file libavutil/common.h
 * common internal and external API header
 */
 #ifndef AVUTIL_COMMON_H
 #define AVUTIL_COMMON_H
 #include <ctype.h>
 #include <errno.h>
 #include <inttypes.h>
 #include <limits.h>
 #include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #ifdef __GNUC__
 #    define AV_GCC_VERSION_AT_LEAST(x,y) (__GNUC__ > x || __GNUC__ == x && __GNUC_MINOR__ >= y)
 #else
 #    define AV_GCC_VERSION_AT_LEAST(x,y) 0
 #endif
 #ifndef av_always_inline
 #if AV_GCC_VERSION_AT_LEAST(3,1)
 #    define av_always_inline __attribute__((always_inline)) inline
 #else
 #    define av_always_inline inline
 #endif
 #endif
 #ifndef av_noinline
 #if AV_GCC_VERSION_AT_LEAST(3,1)
 #    define av_noinline __attribute__((noinline))
 #else
 #    define av_noinline
 #endif
 #endif
 #ifndef av_pure
 #if AV_GCC_VERSION_AT_LEAST(3,1)
 #    define av_pure __attribute__((pure))
 #else
 #    define av_pure
 #endif
 #endif
 #ifndef av_const
 #if AV_GCC_VERSION_AT_LEAST(2,6)
 #    define av_const __attribute__((const))
 #else
 #    define av_const
 #endif
 #endif
 #ifndef av_cold
 #if (!defined(__ICC) || __ICC > 1100) && AV_GCC_VERSION_AT_LEAST(4,3)
 #    define av_cold __attribute__((cold))
 #else
 #    define av_cold
 #endif
 #endif
 #ifndef av_flatten
 #if AV_GCC_VERSION_AT_LEAST(4,1)
 #    define av_flatten __attribute__((flatten))
 #else
 #    define av_flatten
 #endif
 #endif
 #ifndef attribute_deprecated
 #if AV_GCC_VERSION_AT_LEAST(3,1)
 #    define attribute_deprecated __attribute__((deprecated))
 #else
 #    define attribute_deprecated
 #endif
 #endif
 #ifndef av_unused
 #if defined(__GNUC__)
 #    define av_unused __attribute__((unused))
 #else
 #    define av_unused
 #endif
 #endif
 #ifndef av_uninit
 #if defined(__GNUC__) && !defined(__ICC)
 #    define av_uninit(x) x=x
 #else
 #    define av_uninit(x) x
 #endif
 #endif
 //rounded division & shift
 #define RSHIFT(a,b) ((a) > 0 ? ((a) + ((1<<(b))>>1))>>(b) : ((a) + ((1<<(b))>>1)-1)>>(b))
 /* assume b>0 */
 #define ROUNDED_DIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b))
 #define FFABS(a) ((a) >= 0 ? (a) : (-(a)))
 #define FFSIGN(a) ((a) > 0 ? 1 : -1)
 #define FFMAX(a,b) ((a) > (b) ? (a) : (b))
 #define FFMAX3(a,b,c) FFMAX(FFMAX(a,b),c)
 #define FFMIN(a,b) ((a) > (b) ? (b) : (a))
 #define FFMIN3(a,b,c) FFMIN(FFMIN(a,b),c)
 #define FFSWAP(type,a,b) do{type SWAP_tmp= b; b= a; a= SWAP_tmp;}while(0)
 #define FF_ARRAY_ELEMS(a) (sizeof(a) / sizeof((a)[0]))
 /* misc math functions */
 extern const uint8_t ff_log2_tab[256];
 static inline av_const int av_log2(unsigned int v)
 {
    int n = 0;
    if (v & 0xffff0000) {
        v >>= 16;
        n += 16;
    }
    if (v & 0xff00) {
        v >>= 8;
        n += 8;
    }
    n += ff_log2_tab[v];
    return n;
 }
 static inline av_const int av_log2_16bit(unsigned int v)
 {
    int n = 0;
    if (v & 0xff00) {
        v >>= 8;
        n += 8;
    }
    n += ff_log2_tab[v];
    return n;
 }
 /**
 * Clips a signed integer value into the amin-amax range.
 * @param a value to clip
 * @param amin minimum value of the clip range
 * @param amax maximum value of the clip range
 * @return clipped value
 */
 static inline av_const int av_clip(int a, int amin, int amax)
 {
    if      (a < amin) return amin;
    else if (a > amax) return amax;
    else               return a;
 }
 /**
 * Clips a signed integer value into the 0-255 range.
 * @param a value to clip
 * @return clipped value
 */
 static inline av_const uint8_t av_clip_uint8(int a)
 {
    if (a&(~255)) return (-a)>>31;
    else          return a;
 }
 /**
 * Clips a signed integer value into the -32768,32767 range.
 * @param a value to clip
 * @return clipped value
 */
 static inline av_const int16_t av_clip_int16(int a)
 {
    if ((a+32768) & ~65535) return (a>>31) ^ 32767;
    else                    return a;
 }
 /**
 * Clips a float value into the amin-amax range.
 * @param a value to clip
 * @param amin minimum value of the clip range
 * @param amax maximum value of the clip range
 * @return clipped value
 */
 static inline av_const float av_clipf(float a, float amin, float amax)
 {
    if      (a < amin) return amin;
    else if (a > amax) return amax;
    else               return a;
 }
 #define MKTAG(a,b,c,d) (a | (b << 8) | (c << 16) | (d << 24))
 #define MKBETAG(a,b,c,d) (d | (c << 8) | (b << 16) | (a << 24))
 /*!
 * \def GET_UTF8(val, GET_BYTE, ERROR)
 * Converts a UTF-8 character (up to 4 bytes long) to its 32-bit UCS-4 encoded form
 * \param val is the output and should be of type uint32_t. It holds the converted
 * UCS-4 character and should be a left value.
 * \param GET_BYTE gets UTF-8 encoded bytes from any proper source. It can be
 * a function or a statement whose return value or evaluated value is of type
 * uint8_t. It will be executed up to 4 times for values in the valid UTF-8 range,
 * and up to 7 times in the general case.
 * \param ERROR action that should be taken when an invalid UTF-8 byte is returned
 * from GET_BYTE. It should be a statement that jumps out of the macro,
 * like exit(), goto, return, break, or continue.
 */
 #define GET_UTF8(val, GET_BYTE, ERROR)\
    val= GET_BYTE;\
    {\
        int ones= 7 - av_log2(val ^ 255);\
        if(ones==1)\
            ERROR\
        val&= 127>>ones;\
        while(--ones > 0){\
            int tmp= GET_BYTE - 128;\
            if(tmp>>6)\
                ERROR\
            val= (val<<6) + tmp;\
        }\
    }
 /*!
 * \def PUT_UTF8(val, tmp, PUT_BYTE)
 * Converts a 32-bit Unicode character to its UTF-8 encoded form (up to 4 bytes long).
 * \param val is an input-only argument and should be of type uint32_t. It holds
 * a UCS-4 encoded Unicode character that is to be converted to UTF-8. If
 * val is given as a function it is executed only once.
 * \param tmp is a temporary variable and should be of type uint8_t. It
 * represents an intermediate value during conversion that is to be
 * output by PUT_BYTE.
 * \param PUT_BYTE writes the converted UTF-8 bytes to any proper destination.
 * It could be a function or a statement, and uses tmp as the input byte.
 * For example, PUT_BYTE could be "*output++ = tmp;" PUT_BYTE will be
 * executed up to 4 times for values in the valid UTF-8 range and up to
 * 7 times in the general case, depending on the length of the converted
 * Unicode character.
 */
 #define PUT_UTF8(val, tmp, PUT_BYTE)\
    {\
        int bytes, shift;\
        uint32_t in = val;\
        if (in < 0x80) {\
            tmp = in;\
            PUT_BYTE\
        } else {\
            bytes = (av_log2(in) + 4) / 5;\
            shift = (bytes - 1) * 6;\
            tmp = (256 - (256 >> bytes)) | (in >> shift);\
            PUT_BYTE\
            while (shift >= 6) {\
                shift -= 6;\
                tmp = 0x80 | ((in >> shift) & 0x3f);\
                PUT_BYTE\
            }\
        }\
    }
 #include "mem.h"
 //#ifdef HAVE_AV_CONFIG_H
 //#    include "ffmpeg_config.h"
 #    include "internal.h"
 //#endif /* HAVE_AV_CONFIG_H */
 #endif /* AVUTIL_COMMON_H */
--- a/apps/codecs/libatrac/libavutil/internal.h
+++ b/apps/codecs/libatrac/libavutil/internal.h
@ -0,0 +1,328 @@
 /*
 * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
 /**
 * @file libavutil/internal.h
 * common internal API header
 */
 #ifndef AVUTIL_INTERNAL_H
 #define AVUTIL_INTERNAL_H
 #if !defined(DEBUG) && !defined(NDEBUG)
 #    define NDEBUG
 #endif
 #include <limits.h>
 #include <stdint.h>
 #include <stddef.h>
 #include <assert.h>
 //#include "ffmpeg_config.h"
 #include "common.h"
 #include "mem.h"
 //#include "timer.h"
 #ifndef attribute_align_arg
 #if (!defined(__ICC) || __ICC > 1100) && AV_GCC_VERSION_AT_LEAST(4,2)
 #    define attribute_align_arg __attribute__((force_align_arg_pointer))
 #else
 #    define attribute_align_arg
 #endif
 #endif
 #ifndef attribute_used
 #if AV_GCC_VERSION_AT_LEAST(3,1)
 #    define attribute_used __attribute__((used))
 #else
 #    define attribute_used
 #endif
 #endif
 #ifndef INT16_MIN
 #define INT16_MIN       (-0x7fff-1)
 #endif
 #ifndef INT16_MAX
 #define INT16_MAX       0x7fff
 #endif
 #ifndef INT32_MIN
 #define INT32_MIN       (-0x7fffffff-1)
 #endif
 #ifndef INT32_MAX
 #define INT32_MAX       0x7fffffff
 #endif
 #ifndef UINT32_MAX
 #define UINT32_MAX      0xffffffff
 #endif
 #ifndef INT64_MIN
 #define INT64_MIN       (-0x7fffffffffffffffLL-1)
 #endif
 #ifndef INT64_MAX
 #define INT64_MAX INT64_C(9223372036854775807)
 #endif
 #ifndef UINT64_MAX
 #define UINT64_MAX UINT64_C(0xFFFFFFFFFFFFFFFF)
 #endif
 #ifndef INT_BIT
 #    define INT_BIT (CHAR_BIT * sizeof(int))
 #endif
 #if ( defined(__PIC__) || defined(__pic__) ) && ! defined(PIC)
 #    define PIC
 #endif
 #ifndef offsetof
 #    define offsetof(T,F) ((unsigned int)((char *)&((T *)0)->F))
 #endif
 // Use rip-relative addressing if compiling PIC code on x86-64.
 #if ARCH_X86_64 && defined(PIC)
 #    define LOCAL_MANGLE(a) #a "(%%rip)"
 #else
 #    define LOCAL_MANGLE(a) #a
 #endif
 #define MANGLE(a) EXTERN_PREFIX LOCAL_MANGLE(a)
 /* debug stuff */
 /* dprintf macros */
 #ifdef DEBUG
 #    define dprintf(pctx, ...) av_log(pctx, AV_LOG_DEBUG, __VA_ARGS__)
 #else
 #    define dprintf(pctx, ...)
 #endif
 #define av_abort()      do { av_log(NULL, AV_LOG_ERROR, "Abort at %s:%d\n", __FILE__, __LINE__); abort(); } while (0)
 /* math */
 extern const uint32_t ff_inverse[256];
 #if ARCH_X86
 #    define FASTDIV(a,b) \
    ({\
        int ret,dmy;\
        __asm__ volatile(\
            "mull %3"\
            :"=d"(ret),"=a"(dmy)\
            :"1"(a),"g"(ff_inverse[b])\
            );\
        ret;\
    })
 #elif HAVE_ARMV6 && HAVE_INLINE_ASM
 static inline av_const int FASTDIV(int a, int b)
 {
    int r, t;
    __asm__ volatile("cmp     %3, #2               \n\t"
                     "ldr     %1, [%4, %3, lsl #2] \n\t"
                     "lsrle   %0, %2, #1           \n\t"
                     "smmulgt %0, %1, %2           \n\t"
                     : "=&r"(r), "=&r"(t) : "r"(a), "r"(b), "r"(ff_inverse));
    return r;
 }
 #elif ARCH_ARM && HAVE_INLINE_ASM
 static inline av_const int FASTDIV(int a, int b)
 {
    int r, t;
    __asm__ volatile ("umull %1, %0, %2, %3"
                      : "=&r"(r), "=&r"(t) : "r"(a), "r"(ff_inverse[b]));
    return r;
 }
 #elif CONFIG_FASTDIV
 #    define FASTDIV(a,b)   ((uint32_t)((((uint64_t)a)*ff_inverse[b])>>32))
 #else
 #    define FASTDIV(a,b)   ((a)/(b))
 #endif
 extern const uint8_t ff_sqrt_tab[256];
 static inline av_const unsigned int ff_sqrt(unsigned int a)
 {
    unsigned int b;
    if(a<255) return (ff_sqrt_tab[a+1]-1)>>4;
    else if(a<(1<<12)) b= ff_sqrt_tab[a>>4 ]>>2;
 #if !CONFIG_SMALL
    else if(a<(1<<14)) b= ff_sqrt_tab[a>>6 ]>>1;
    else if(a<(1<<16)) b= ff_sqrt_tab[a>>8 ]   ;
 #endif
    else{
        int s= av_log2_16bit(a>>16)>>1;
        unsigned int c= a>>(s+2);
        b= ff_sqrt_tab[c>>(s+8)];
        b= FASTDIV(c,b) + (b<<s);
    }
    return b - (a<b*b);
 }
 #if ARCH_X86
 #define MASK_ABS(mask, level)\
            __asm__ volatile(\
                "cltd                   \n\t"\
                "xorl %1, %0            \n\t"\
                "subl %1, %0            \n\t"\
                : "+a" (level), "=&d" (mask)\
            );
 #else
 #define MASK_ABS(mask, level)\
            mask= level>>31;\
            level= (level^mask)-mask;
 #endif
 #if HAVE_CMOV
 #define COPY3_IF_LT(x,y,a,b,c,d)\
 __asm__ volatile (\
    "cmpl %0, %3        \n\t"\
    "cmovl %3, %0       \n\t"\
    "cmovl %4, %1       \n\t"\
    "cmovl %5, %2       \n\t"\
    : "+&r" (x), "+&r" (a), "+r" (c)\
    : "r" (y), "r" (b), "r" (d)\
 );
 #else
 #define COPY3_IF_LT(x,y,a,b,c,d)\
 if((y)<(x)){\
     (x)=(y);\
     (a)=(b);\
     (c)=(d);\
 }
 #endif
 /* avoid usage of dangerous/inappropriate system functions */
 #undef  malloc
 #define malloc please_use_av_malloc
 #undef  free
 #define free please_use_av_free
 #undef  realloc
 #define realloc please_use_av_realloc
 #undef  time
 #define time time_is_forbidden_due_to_security_issues
 //#undef  rand
 //#define rand rand_is_forbidden_due_to_state_trashing_use_av_random
 //#undef  srand
 //#define srand srand_is_forbidden_due_to_state_trashing_use_av_random_init
 #undef  random
 #define random random_is_forbidden_due_to_state_trashing_use_av_random
 #undef  sprintf
 #define sprintf sprintf_is_forbidden_due_to_security_issues_use_snprintf
 #undef  strcat
 #define strcat strcat_is_forbidden_due_to_security_issues_use_av_strlcat
 #undef  exit
 #define exit exit_is_forbidden
 #ifndef LIBAVFORMAT_BUILD
 //#undef  printf
 //#define printf please_use_av_log_instead_of_printf
 #undef  fprintf
 #define fprintf please_use_av_log_instead_of_fprintf
 #undef  puts
 #define puts please_use_av_log_instead_of_puts
 #undef  perror
 #define perror please_use_av_log_instead_of_perror
 #endif
 #define CHECKED_ALLOCZ(p, size)\
 {\
    p= av_mallocz(size);\
    if(p==NULL && (size)!=0){\
        av_log(NULL, AV_LOG_ERROR, "Cannot allocate memory.");\
        goto fail;\
    }\
 }
 #if defined(__ICC) || defined(__SUNPRO_C)
    #define DECLARE_ALIGNED(n,t,v)      t v __attribute__ ((aligned (n)))
    #define DECLARE_ASM_CONST(n,t,v)    const t __attribute__ ((aligned (n))) v
 #elif defined(__GNUC__)
    #define DECLARE_ALIGNED(n,t,v)      t v __attribute__ ((aligned (n)))
    #define DECLARE_ASM_CONST(n,t,v)    static const t v attribute_used __attribute__ ((aligned (n)))
 #elif defined(_MSC_VER)
    #define DECLARE_ALIGNED(n,t,v)      __declspec(align(n)) t v
    #define DECLARE_ASM_CONST(n,t,v)    __declspec(align(n)) static const t v
 #elif HAVE_INLINE_ASM
    #error The asm code needs alignment, but we do not know how to do it for this compiler.
 #else
    #define DECLARE_ALIGNED(n,t,v)      t v
    #define DECLARE_ASM_CONST(n,t,v)    static const t v
 #endif
 #if !HAVE_LLRINT
 static av_always_inline av_const long long llrint(double x)
 {
    return rint(x);
 }
 #endif /* HAVE_LLRINT */
 #if !HAVE_LRINT
 static av_always_inline av_const long int lrint(double x)
 {
    return rint(x);
 }
 #endif /* HAVE_LRINT */
 #if !HAVE_LRINTF
 static av_always_inline av_const long int lrintf(float x)
 {
    return (int)(rint(x));
 }
 #endif /* HAVE_LRINTF */
 #if !HAVE_ROUND
 static av_always_inline av_const double round(double x)
 {
    return (x > 0) ? floor(x + 0.5) : ceil(x - 0.5);
 }
 #endif /* HAVE_ROUND */
 #if !HAVE_ROUNDF
 static av_always_inline av_const float roundf(float x)
 {
    return (x > 0) ? floor(x + 0.5) : ceil(x - 0.5);
 }
 #endif /* HAVE_ROUNDF */
 #if !HAVE_TRUNCF
 static av_always_inline av_const float truncf(float x)
 {
    return (x > 0) ? floor(x) : ceil(x);
 }
 #endif /* HAVE_TRUNCF */
 /**
 * Returns NULL if CONFIG_SMALL is true, otherwise the argument
 * without modification. Used to disable the definition of strings
 * (for example AVCodec long_names).
 */
 #if CONFIG_SMALL
 #   define NULL_IF_CONFIG_SMALL(x) NULL
 #else
 #   define NULL_IF_CONFIG_SMALL(x) x
 #endif
 #endif /* AVUTIL_INTERNAL_H */
--- a/apps/codecs/libatrac/libavutil/intreadwrite.h
+++ b/apps/codecs/libatrac/libavutil/intreadwrite.h
@ -0,0 +1,192 @@
 /*
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
 #ifndef AVUTIL_INTREADWRITE_H
 #define AVUTIL_INTREADWRITE_H
 #include <stdint.h>
 //#include "ffmpeg_config.h"
 #include "bswap.h"
 #ifdef __GNUC__
 struct unaligned_64 { uint64_t l; } __attribute__((packed));
 struct unaligned_32 { uint32_t l; } __attribute__((packed));
 struct unaligned_16 { uint16_t l; } __attribute__((packed));
 #define AV_RN16(a) (((const struct unaligned_16 *) (a))->l)
 #define AV_RN32(a) (((const struct unaligned_32 *) (a))->l)
 #define AV_RN64(a) (((const struct unaligned_64 *) (a))->l)
 #define AV_WN16(a, b) (((struct unaligned_16 *) (a))->l) = (b)
 #define AV_WN32(a, b) (((struct unaligned_32 *) (a))->l) = (b)
 #define AV_WN64(a, b) (((struct unaligned_64 *) (a))->l) = (b)
 #elif defined(__DECC)
 #define AV_RN16(a) (*((const __unaligned uint16_t*)(a)))
 #define AV_RN32(a) (*((const __unaligned uint32_t*)(a)))
 #define AV_RN64(a) (*((const __unaligned uint64_t*)(a)))
 #define AV_WN16(a, b) *((__unaligned uint16_t*)(a)) = (b)
 #define AV_WN32(a, b) *((__unaligned uint32_t*)(a)) = (b)
 #define AV_WN64(a, b) *((__unaligned uint64_t*)(a)) = (b)
 #else
 #define AV_RN16(a) (*((const uint16_t*)(a)))
 #define AV_RN32(a) (*((const uint32_t*)(a)))
 #define AV_RN64(a) (*((const uint64_t*)(a)))
 #define AV_WN16(a, b) *((uint16_t*)(a)) = (b)
 #define AV_WN32(a, b) *((uint32_t*)(a)) = (b)
 #define AV_WN64(a, b) *((uint64_t*)(a)) = (b)
 #endif /* !__GNUC__ */
 /* endian macros */
 #define AV_RB8(x)     (((const uint8_t*)(x))[0])
 #define AV_WB8(p, d)  do { ((uint8_t*)(p))[0] = (d); } while(0)
 #define AV_RL8(x)     AV_RB8(x)
 #define AV_WL8(p, d)  AV_WB8(p, d)
 #if HAVE_FAST_UNALIGNED
 # ifdef WORDS_BIGENDIAN
 #  define AV_RB16(x)    AV_RN16(x)
 #  define AV_WB16(p, d) AV_WN16(p, d)
 #  define AV_RL16(x)    bswap_16(AV_RN16(x))
 #  define AV_WL16(p, d) AV_WN16(p, bswap_16(d))
 #  define AV_RB32(x)    AV_RN32(x)
 #  define AV_WB32(p, d) AV_WN32(p, d)
 #  define AV_RL32(x)    bswap_32(AV_RN32(x))
 #  define AV_WL32(p, d) AV_WN32(p, bswap_32(d))
 #  define AV_RB64(x)    AV_RN64(x)
 #  define AV_WB64(p, d) AV_WN64(p, d)
 #  define AV_RL64(x)    bswap_64(AV_RN64(x))
 #  define AV_WL64(p, d) AV_WN64(p, bswap_64(d))
 # else /* WORDS_BIGENDIAN */
 #  define AV_RB16(x)    bswap_16(AV_RN16(x))
 #  define AV_WB16(p, d) AV_WN16(p, bswap_16(d))
 #  define AV_RL16(x)    AV_RN16(x)
 #  define AV_WL16(p, d) AV_WN16(p, d)
 #  define AV_RB32(x)    bswap_32(AV_RN32(x))
 #  define AV_WB32(p, d) AV_WN32(p, bswap_32(d))
 #  define AV_RL32(x)    AV_RN32(x)
 #  define AV_WL32(p, d) AV_WN32(p, d)
 #  define AV_RB64(x)    bswap_64(AV_RN64(x))
 #  define AV_WB64(p, d) AV_WN64(p, bswap_64(d))
 #  define AV_RL64(x)    AV_RN64(x)
 #  define AV_WL64(p, d) AV_WN64(p, d)
 # endif
 #else /* HAVE_FAST_UNALIGNED */
 #define AV_RB16(x)  ((((const uint8_t*)(x))[0] << 8) | ((const uint8_t*)(x))[1])
 #define AV_WB16(p, d) do { \
                    ((uint8_t*)(p))[1] = (d); \
                    ((uint8_t*)(p))[0] = (d)>>8; } while(0)
 #define AV_RL16(x)  ((((const uint8_t*)(x))[1] << 8) | \
                      ((const uint8_t*)(x))[0])
 #define AV_WL16(p, d) do { \
                    ((uint8_t*)(p))[0] = (d); \
                    ((uint8_t*)(p))[1] = (d)>>8; } while(0)
 #define AV_RB32(x)  ((((const uint8_t*)(x))[0] << 24) | \
                     (((const uint8_t*)(x))[1] << 16) | \
                     (((const uint8_t*)(x))[2] <<  8) | \
                      ((const uint8_t*)(x))[3])
 #define AV_WB32(p, d) do { \
                    ((uint8_t*)(p))[3] = (d); \
                    ((uint8_t*)(p))[2] = (d)>>8; \
                    ((uint8_t*)(p))[1] = (d)>>16; \
                    ((uint8_t*)(p))[0] = (d)>>24; } while(0)
 #define AV_RL32(x) ((((const uint8_t*)(x))[3] << 24) | \
                    (((const uint8_t*)(x))[2] << 16) | \
                    (((const uint8_t*)(x))[1] <<  8) | \
                     ((const uint8_t*)(x))[0])
 #define AV_WL32(p, d) do { \
                    ((uint8_t*)(p))[0] = (d); \
                    ((uint8_t*)(p))[1] = (d)>>8; \
                    ((uint8_t*)(p))[2] = (d)>>16; \
                    ((uint8_t*)(p))[3] = (d)>>24; } while(0)
 #define AV_RB64(x)  (((uint64_t)((const uint8_t*)(x))[0] << 56) | \
                     ((uint64_t)((const uint8_t*)(x))[1] << 48) | \
                     ((uint64_t)((const uint8_t*)(x))[2] << 40) | \
                     ((uint64_t)((const uint8_t*)(x))[3] << 32) | \
                     ((uint64_t)((const uint8_t*)(x))[4] << 24) | \
                     ((uint64_t)((const uint8_t*)(x))[5] << 16) | \
                     ((uint64_t)((const uint8_t*)(x))[6] <<  8) | \
                      (uint64_t)((const uint8_t*)(x))[7])
 #define AV_WB64(p, d) do { \
                    ((uint8_t*)(p))[7] = (d);     \
                    ((uint8_t*)(p))[6] = (d)>>8;  \
                    ((uint8_t*)(p))[5] = (d)>>16; \
                    ((uint8_t*)(p))[4] = (d)>>24; \
                    ((uint8_t*)(p))[3] = (d)>>32; \
                    ((uint8_t*)(p))[2] = (d)>>40; \
                    ((uint8_t*)(p))[1] = (d)>>48; \
                    ((uint8_t*)(p))[0] = (d)>>56; } while(0)
 #define AV_RL64(x)  (((uint64_t)((const uint8_t*)(x))[7] << 56) | \
                     ((uint64_t)((const uint8_t*)(x))[6] << 48) | \
                     ((uint64_t)((const uint8_t*)(x))[5] << 40) | \
                     ((uint64_t)((const uint8_t*)(x))[4] << 32) | \
                     ((uint64_t)((const uint8_t*)(x))[3] << 24) | \
                     ((uint64_t)((const uint8_t*)(x))[2] << 16) | \
                     ((uint64_t)((const uint8_t*)(x))[1] <<  8) | \
                      (uint64_t)((const uint8_t*)(x))[0])
 #define AV_WL64(p, d) do { \
                    ((uint8_t*)(p))[0] = (d);     \
                    ((uint8_t*)(p))[1] = (d)>>8;  \
                    ((uint8_t*)(p))[2] = (d)>>16; \
                    ((uint8_t*)(p))[3] = (d)>>24; \
                    ((uint8_t*)(p))[4] = (d)>>32; \
                    ((uint8_t*)(p))[5] = (d)>>40; \
                    ((uint8_t*)(p))[6] = (d)>>48; \
                    ((uint8_t*)(p))[7] = (d)>>56; } while(0)
 #endif  /* HAVE_FAST_UNALIGNED */
 #define AV_RB24(x)  ((((const uint8_t*)(x))[0] << 16) | \
                     (((const uint8_t*)(x))[1] <<  8) | \
                      ((const uint8_t*)(x))[2])
 #define AV_WB24(p, d) do { \
                    ((uint8_t*)(p))[2] = (d); \
                    ((uint8_t*)(p))[1] = (d)>>8; \
                    ((uint8_t*)(p))[0] = (d)>>16; } while(0)
 #define AV_RL24(x)  ((((const uint8_t*)(x))[2] << 16) | \
                     (((const uint8_t*)(x))[1] <<  8) | \
                      ((const uint8_t*)(x))[0])
 #define AV_WL24(p, d) do { \
                    ((uint8_t*)(p))[0] = (d); \
                    ((uint8_t*)(p))[1] = (d)>>8; \
                    ((uint8_t*)(p))[2] = (d)>>16; } while(0)
 #endif /* AVUTIL_INTREADWRITE_H */
--- a/apps/codecs/libatrac/libavutil/log.c
+++ b/apps/codecs/libatrac/libavutil/log.c
@ -0,0 +1,89 @@
 /*
 * log functions
 * Copyright (c) 2003 Michel Bardiaux
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
 /**
 * @file libavutil/log.c
 * logging functions
 */
 #include "avutil.h"
 #include "log.h"
 int av_log_level = AV_LOG_INFO;
 void av_log_default_callback(void* ptr, int level, const char* fmt, va_list vl)
 {
    static int print_prefix=1;
    static int count;
    static char line[1024], prev[1024];
    AVClass* avc= ptr ? *(AVClass**)ptr : NULL;
    if(level>av_log_level)
        return;
 #undef fprintf
    if(print_prefix && avc) {
        snprintf(line, sizeof(line), "[%s @ %p]", avc->item_name(ptr), ptr);
    }else
        line[0]=0;
    vsnprintf(line + strlen(line), sizeof(line) - strlen(line), fmt, vl);
    print_prefix= line[strlen(line)-1] == '\n';
    if(print_prefix && !strcmp(line, prev)){
        count++;
        return;
    }
    if(count>0){
        fprintf(stderr, "    Last message repeated %d times\n", count);
        count=0;
    }
    fputs(line, stderr);
    strcpy(prev, line);
 }
 static void (*av_log_callback)(void*, int, const char*, va_list) = av_log_default_callback;
 void av_log(void* avcl, int level, const char *fmt, ...)
 {
    va_list vl;
    va_start(vl, fmt);
    av_vlog(avcl, level, fmt, vl);
    va_end(vl);
 }
 void av_vlog(void* avcl, int level, const char *fmt, va_list vl)
 {
    av_log_callback(avcl, level, fmt, vl);
 }
 int av_log_get_level(void)
 {
    return av_log_level;
 }
 void av_log_set_level(int level)
 {
    av_log_level = level;
 }
 void av_log_set_callback(void (*callback)(void*, int, const char*, va_list))
 {
    av_log_callback = callback;
 }
--- a/apps/codecs/libatrac/libavutil/log.h
+++ b/apps/codecs/libatrac/libavutil/log.h
@ -0,0 +1,116 @@
 /*
 * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
 #ifndef AVUTIL_LOG_H
 #define AVUTIL_LOG_H
 #include <stdarg.h>
 #include "avutil.h"
 /**
 * Describes the class of an AVClass context structure. That is an
 * arbitrary struct of which the first field is a pointer to an
 * AVClass struct (e.g. AVCodecContext, AVFormatContext etc.).
 */
 typedef struct AVCLASS AVClass;
 struct AVCLASS {
    /**
     * The name of the class; usually it is the same name as the
     * context structure type to which the AVClass is associated.
     */
    const char* class_name;
    /**
     * A pointer to a function which returns the name of a context
     * instance \p ctx associated with the class.
     */
    const char* (*item_name)(void* ctx);
    /**
     * a pointer to the first option specified in the class if any or NULL
     *
     * @see av_set_default_options()
     */
    const struct AVOption *option;
 };
 /* av_log API */
 #define AV_LOG_QUIET    -8
 /**
 * Something went really wrong and we will crash now.
 */
 #define AV_LOG_PANIC     0
 /**
 * Something went wrong and recovery is not possible.
 * For example, no header was found for a format which depends
 * on headers or an illegal combination of parameters is used.
 */
 #define AV_LOG_FATAL     8
 /**
 * Something went wrong and cannot losslessly be recovered.
 * However, not all future data is affected.
 */
 #define AV_LOG_ERROR    16
 /**
 * Something somehow does not look correct. This may or may not
 * lead to problems. An example would be the use of '-vstrict -2'.
 */
 #define AV_LOG_WARNING  24
 #define AV_LOG_INFO     32
 #define AV_LOG_VERBOSE  40
 /**
 * Stuff which is only useful for libav* developers.
 */
 #define AV_LOG_DEBUG    48
 /**
 * Sends the specified message to the log if the level is less than or equal
 * to the current av_log_level. By default, all logging messages are sent to
 * stderr. This behavior can be altered by setting a different av_vlog callback
 * function.
 *
 * @param avcl A pointer to an arbitrary struct of which the first field is a
 * pointer to an AVClass struct.
 * @param level The importance level of the message, lower values signifying
 * higher importance.
 * @param fmt The format string (printf-compatible) that specifies how
 * subsequent arguments are converted to output.
 * @see av_vlog
 */
 #ifdef __GNUC__
 void av_log(void*, int level, const char *fmt, ...) __attribute__ ((__format__ (__printf__, 3, 4)));
 #else
 void av_log(void*, int level, const char *fmt, ...);
 #endif
 void av_vlog(void*, int level, const char *fmt, va_list);
 int av_log_get_level(void);
 void av_log_set_level(int);
 void av_log_set_callback(void (*)(void*, int, const char*, va_list));
 void av_log_default_callback(void* ptr, int level, const char* fmt, va_list vl);
 #endif /* AVUTIL_LOG_H */
--- a/apps/codecs/libatrac/libavutil/mem.c
+++ b/apps/codecs/libatrac/libavutil/mem.c
@ -0,0 +1,159 @@
 /*
 * default memory allocator for libavutil
 * Copyright (c) 2002 Fabrice Bellard
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
 /**
 * @file libavutil/mem.c
 * default memory allocator for libavutil
 */
 //#include "ffmpeg_config.h"
 #include <limits.h>
 #include <stdlib.h>
 #include <string.h>
 #if HAVE_MALLOC_H
 #include <malloc.h>
 #endif
 #include "mem.h"
 /* here we can use OS-dependent allocation functions */
 #undef free
 #undef malloc
 #undef realloc
 /* You can redefine av_malloc and av_free in your project to use your
   memory allocator. You do not need to suppress this file because the
   linker will do it automatically. */
 void *av_malloc(unsigned int size)
 {
    void *ptr = NULL;
 #if CONFIG_MEMALIGN_HACK
    long diff;
 #endif
    /* let's disallow possible ambiguous cases */
    if(size > (INT_MAX-16) )
        return NULL;
 #if CONFIG_MEMALIGN_HACK
    ptr = malloc(size+16);
    if(!ptr)
        return ptr;
    diff= ((-(long)ptr - 1)&15) + 1;
    ptr = (char*)ptr + diff;
    ((char*)ptr)[-1]= diff;
 #elif HAVE_POSIX_MEMALIGN
    if (posix_memalign(&ptr,16,size))
        ptr = NULL;
 #elif HAVE_MEMALIGN
    ptr = memalign(16,size);
    /* Why 64?
       Indeed, we should align it:
         on 4 for 386
         on 16 for 486
         on 32 for 586, PPro - K6-III
         on 64 for K7 (maybe for P3 too).
       Because L1 and L2 caches are aligned on those values.
       But I don't want to code such logic here!
     */
     /* Why 16?
        Because some CPUs need alignment, for example SSE2 on P4, & most RISC CPUs
        it will just trigger an exception and the unaligned load will be done in the
        exception handler or it will just segfault (SSE2 on P4).
        Why not larger? Because I did not see a difference in benchmarks ...
     */
     /* benchmarks with P3
        memalign(64)+1          3071,3051,3032
        memalign(64)+2          3051,3032,3041
        memalign(64)+4          2911,2896,2915
        memalign(64)+8          2545,2554,2550
        memalign(64)+16         2543,2572,2563
        memalign(64)+32         2546,2545,2571
        memalign(64)+64         2570,2533,2558
        BTW, malloc seems to do 8-byte alignment by default here.
     */
 #else
    ptr = malloc(size);
 #endif
    return ptr;
 }
 void *av_realloc(void *ptr, unsigned int size)
 {
 #if CONFIG_MEMALIGN_HACK
    int diff;
 #endif
    /* let's disallow possible ambiguous cases */
    if(size > (INT_MAX-16) )
        return NULL;
 #if CONFIG_MEMALIGN_HACK
    //FIXME this isn't aligned correctly, though it probably isn't needed
    if(!ptr) return av_malloc(size);
    diff= ((char*)ptr)[-1];
    return (char*)realloc((char*)ptr - diff, size + diff) + diff;
 #else
    return realloc(ptr, size);
 #endif
 }
 void av_free(void *ptr)
 {
    /* XXX: this test should not be needed on most libcs */
    if (ptr)
 #if CONFIG_MEMALIGN_HACK
        free((char*)ptr - ((char*)ptr)[-1]);
 #else
        free(ptr);
 #endif
 }
 void av_freep(void *arg)
 {
    void **ptr= (void**)arg;
    av_free(*ptr);
    *ptr = NULL;
 }
 void *av_mallocz(unsigned int size)
 {
    void *ptr = av_malloc(size);
    if (ptr)
        memset(ptr, 0, size);
    return ptr;
 }
 char *av_strdup(const char *s)
 {
    char *ptr= NULL;
    if(s){
        int len = strlen(s) + 1;
        ptr = av_malloc(len);
        if (ptr)
            memcpy(ptr, s, len);
    }
    return ptr;
 }
--- a/apps/codecs/libatrac/libavutil/mem.h
+++ b/apps/codecs/libatrac/libavutil/mem.h
@ -0,0 +1,104 @@
 /*
 * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
 /**
 * @file libavutil/mem.h
 * memory handling functions
 */
 #ifndef AVUTIL_MEM_H
 #define AVUTIL_MEM_H
 #include "common.h"
 #if AV_GCC_VERSION_AT_LEAST(3,1)
    #define av_malloc_attrib __attribute__((__malloc__))
 #else
    #define av_malloc_attrib
 #endif
 #if (!defined(__ICC) || __ICC > 1100) && AV_GCC_VERSION_AT_LEAST(4,3)
    #define av_alloc_size(n) __attribute__((alloc_size(n)))
 #else
    #define av_alloc_size(n)
 #endif
 /**
 * Allocates a block of \p size bytes with alignment suitable for all
 * memory accesses (including vectors if available on the CPU).
 * @param size Size in bytes for the memory block to be allocated.
 * @return Pointer to the allocated block, NULL if the block cannot
 * be allocated.
 * @see av_mallocz()
 */
 void *av_malloc(unsigned int size) av_malloc_attrib av_alloc_size(1);
 /**
 * Allocates or reallocates a block of memory.
 * If \p ptr is NULL and \p size > 0, allocates a new block. If \p
 * size is zero, frees the memory block pointed to by \p ptr.
 * @param size Size in bytes for the memory block to be allocated or
 * reallocated.
 * @param ptr Pointer to a memory block already allocated with
 * av_malloc(z)() or av_realloc() or NULL.
 * @return Pointer to a newly reallocated block or NULL if the block
 * cannot be reallocated or the function is used to free the memory block.
 * @see av_fast_realloc()
 */
 void *av_realloc(void *ptr, unsigned int size) av_alloc_size(2);
 /**
 * Frees a memory block which has been allocated with av_malloc(z)() or
 * av_realloc().
 * @param ptr Pointer to the memory block which should be freed.
 * @note ptr = NULL is explicitly allowed.
 * @note It is recommended that you use av_freep() instead.
 * @see av_freep()
 */
 void av_free(void *ptr);
 /**
 * Allocates a block of \p size bytes with alignment suitable for all
 * memory accesses (including vectors if available on the CPU) and
 * zeroes all the bytes of the block.
 * @param size Size in bytes for the memory block to be allocated.
 * @return Pointer to the allocated block, NULL if it cannot be allocated.
 * @see av_malloc()
 */
 void *av_mallocz(unsigned int size) av_malloc_attrib av_alloc_size(1);
 /**
 * Duplicates the string \p s.
 * @param s string to be duplicated
 * @return Pointer to a newly allocated string containing a
 * copy of \p s or NULL if the string cannot be allocated.
 */
 char *av_strdup(const char *s) av_malloc_attrib;
 /**
 * Frees a memory block which has been allocated with av_malloc(z)() or
 * av_realloc() and set the pointer pointing to it to NULL.
 * @param ptr Pointer to the pointer to the memory block which should
 * be freed.
 * @see av_free()
 */
 void av_freep(void *ptr);
 #endif /* AVUTIL_MEM_H */
--- a/apps/codecs/libatrac/mdct.c
+++ b/apps/codecs/libatrac/mdct.c
@ -0,0 +1,245 @@
 /*
 * MDCT/IMDCT transforms
 * Copyright (c) 2002 Fabrice Bellard
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
 #include "dsputil.h"
 #ifndef M_E
 #define M_E            2.7182818284590452354   /* e */
 #endif
 #ifndef M_LN2
 #define M_LN2          0.69314718055994530942  /* log_e 2 */
 #endif
 #ifndef M_LN10
 #define M_LN10         2.30258509299404568402  /* log_e 10 */
 #endif
 #ifndef M_PI
 #define M_PI           3.14159265358979323846  /* pi */
 #endif
 #ifndef M_SQRT1_2
 #define M_SQRT1_2      0.70710678118654752440  /* 1/sqrt(2) */
 #endif
 /**
 * @file libavcodec/mdct.c
 * MDCT/IMDCT transforms.
 */
 // Generate a Kaiser-Bessel Derived Window.
 #define BESSEL_I0_ITER 50 // default: 50 iterations of Bessel I0 approximation
 av_cold void ff_kbd_window_init(float *window, float alpha, int n)
 {
   int i, j;
   double sum = 0.0, bessel, tmp;
   double local_window[n];
   double alpha2 = (alpha * M_PI / n) * (alpha * M_PI / n);
   for (i = 0; i < n; i++) {
       tmp = i * (n - i) * alpha2;
       bessel = 1.0;
       for (j = BESSEL_I0_ITER; j > 0; j--)
           bessel = bessel * tmp / (j * j) + 1;
       sum += bessel;
       local_window[i] = sum;
   }
   sum++;
   for (i = 0; i < n; i++)
       window[i] = sqrt(local_window[i] / sum);
 }
 DECLARE_ALIGNED(16, float, ff_sine_128 [ 128]);
 DECLARE_ALIGNED(16, float, ff_sine_256 [ 256]);
 DECLARE_ALIGNED(16, float, ff_sine_512 [ 512]);
 DECLARE_ALIGNED(16, float, ff_sine_1024[1024]);
 DECLARE_ALIGNED(16, float, ff_sine_2048[2048]);
 DECLARE_ALIGNED(16, float, ff_sine_4096[4096]);
 float *ff_sine_windows[6] = {
    ff_sine_128, ff_sine_256, ff_sine_512, ff_sine_1024, ff_sine_2048, ff_sine_4096
 };
 // Generate a sine window.
 av_cold void ff_sine_window_init(float *window, int n) {
    int i;
    for(i = 0; i < n; i++)
        window[i] = sinf((i + 0.5) * (M_PI / (2.0 * n)));
 }
 /**
 * init MDCT or IMDCT computation.
 */
 av_cold int ff_mdct_init(MDCTContext *s, int nbits, int inverse)
 {
    int n, n4, i;
    double alpha;
    memset(s, 0, sizeof(*s));
    n = 1 << nbits;
    s->nbits = nbits;
    s->n = n;
    n4 = n >> 2;
    s->tcos = av_malloc(n4 * sizeof(FFTSample));
    if (!s->tcos)
        goto fail;
    s->tsin = av_malloc(n4 * sizeof(FFTSample));
    if (!s->tsin)
        goto fail;
    for(i=0;i<n4;i++) {
        alpha = 2 * M_PI * (i + 1.0 / 8.0) / n;
        s->tcos[i] = -cos(alpha);
        s->tsin[i] = -sin(alpha);
    }
    if (ff_fft_init(&s->fft, s->nbits - 2, inverse) < 0)
        goto fail;
    return 0;
 fail:
    av_freep(&s->tcos);
    av_freep(&s->tsin);
    return -1;
 }
 /* complex multiplication: p = a * b */
 #define CMUL(pre, pim, are, aim, bre, bim) \
 {\
    FFTSample _are = (are);\
    FFTSample _aim = (aim);\
    FFTSample _bre = (bre);\
    FFTSample _bim = (bim);\
    (pre) = _are * _bre - _aim * _bim;\
    (pim) = _are * _bim + _aim * _bre;\
 }
 /**
 * Compute the middle half of the inverse MDCT of size N = 2^nbits,
 * thus excluding the parts that can be derived by symmetry
 * @param output N/2 samples
 * @param input N/2 samples
 */
 void ff_imdct_half_c(MDCTContext *s, FFTSample *output, const FFTSample *input)
 {
    int k, n8, n4, n2, n, j;
    const uint16_t *revtab = s->fft.revtab;
    const FFTSample *tcos = s->tcos;
    const FFTSample *tsin = s->tsin;
    const FFTSample *in1, *in2;
    FFTComplex *z = (FFTComplex *)output;
    n = 1 << s->nbits;
    n2 = n >> 1;
    n4 = n >> 2;
    n8 = n >> 3;
    /* pre rotation */
    in1 = input;
    in2 = input + n2 - 1;
    for(k = 0; k < n4; k++) {
        j=revtab[k];
        CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]);
        in1 += 2;
        in2 -= 2;
    }
    ff_fft_calc(&s->fft, z);
    /* post rotation + reordering */
    output += n4;
    for(k = 0; k < n8; k++) {
        FFTSample r0, i0, r1, i1;
        CMUL(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]);
        CMUL(r1, i0, z[n8+k  ].im, z[n8+k  ].re, tsin[n8+k  ], tcos[n8+k  ]);
        z[n8-k-1].re = r0;
        z[n8-k-1].im = i0;
        z[n8+k  ].re = r1;
        z[n8+k  ].im = i1;
    }
 }
 /**
 * Compute inverse MDCT of size N = 2^nbits
 * @param output N samples
 * @param input N/2 samples
 */
 void ff_imdct_calc_c(MDCTContext *s, FFTSample *output, const FFTSample *input)
 {
    int k;
    int n = 1 << s->nbits;
    int n2 = n >> 1;
    int n4 = n >> 2;
    ff_imdct_half_c(s, output+n4, input);
    for(k = 0; k < n4; k++) {
        output[k] = -output[n2-k-1];
        output[n-k-1] = output[n2+k];
    }
 }
 /**
 * Compute MDCT of size N = 2^nbits
 * @param input N samples
 * @param out N/2 samples
 */
 void ff_mdct_calc(MDCTContext *s, FFTSample *out, const FFTSample *input)
 {
    int i, j, n, n8, n4, n2, n3;
    FFTSample re, im;
    const uint16_t *revtab = s->fft.revtab;
    const FFTSample *tcos = s->tcos;
    const FFTSample *tsin = s->tsin;
    FFTComplex *x = (FFTComplex *)out;
    n = 1 << s->nbits;
    n2 = n >> 1;
    n4 = n >> 2;
    n8 = n >> 3;
    n3 = 3 * n4;
    /* pre rotation */
    for(i=0;i<n8;i++) {
        re = -input[2*i+3*n4] - input[n3-1-2*i];
        im = -input[n4+2*i] + input[n4-1-2*i];
        j = revtab[i];
        CMUL(x[j].re, x[j].im, re, im, -tcos[i], tsin[i]);
        re = input[2*i] - input[n2-1-2*i];
        im = -(input[n2+2*i] + input[n-1-2*i]);
        j = revtab[n8 + i];
        CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]);
    }
    ff_fft_calc(&s->fft, x);
    /* post rotation */
    for(i=0;i<n8;i++) {
        FFTSample r0, i0, r1, i1;
        CMUL(i1, r0, x[n8-i-1].re, x[n8-i-1].im, -tsin[n8-i-1], -tcos[n8-i-1]);
        CMUL(i0, r1, x[n8+i  ].re, x[n8+i  ].im, -tsin[n8+i  ], -tcos[n8+i  ]);
        x[n8-i-1].re = r0;
        x[n8-i-1].im = i0;
        x[n8+i  ].re = r1;
        x[n8+i  ].im = i1;
    }
 }
 av_cold void ff_mdct_end(MDCTContext *s)
 {
    av_freep(&s->tcos);
    av_freep(&s->tsin);
    ff_fft_end(&s->fft);
 }
--- a/apps/codecs/librm/rm.c
+++ b/apps/codecs/librm/rm.c
@ -29,7 +29,7 @@
 #define SWAP(a, b) do{uint8_t SWAP_tmp= b; b= a; a= SWAP_tmp;}while(0)
-static void advance_buffer(uint8_t **buf, int val)
+void advance_buffer(uint8_t **buf, int val)
 {
    *buf += val;
 }
@ -237,10 +237,40 @@ static int real_read_audio_stream_info(int fd, RMContext *rmctx)
           skipped += 1;
       }
-       read_uint32be(fd, &rmctx->extradata_size);
+       switch(fourcc) {
-       skipped += 4;
+           case FOURCC('c','o','o','k'):               
-       read(fd, rmctx->codec_extradata, rmctx->extradata_size);
+               rmctx->codec_type = CODEC_COOK;
-       skipped += rmctx->extradata_size;
+               read_uint32be(fd, &rmctx->extradata_size);
               skipped += 4;
               read(fd, rmctx->codec_extradata, rmctx->extradata_size);
               skipped += rmctx->extradata_size;
               break;
           case FOURCC('a','t','r','c'):  
 DEBUGF("WERE HERE\n");             
               rmctx->codec_type = CODEC_ATRAC;
               read_uint32be(fd, &rmctx->extradata_size);
               skipped += 4;
               read(fd, rmctx->codec_extradata, rmctx->extradata_size);
               skipped += rmctx->extradata_size;
               break;
           case FOURCC('r','a','a','c'):
           case FOURCC('r','a','c','p'):
               rmctx->codec_type = CODEC_AAC;
               read_uint32be(fd, &rmctx->extradata_size);
               skipped += 4;
               read(fd, rmctx->codec_extradata, rmctx->extradata_size);
               skipped += rmctx->extradata_size;
               break;
           case FOURCC('d','n','e','t'):
               rmctx->codec_type = CODEC_AC3;
               break;
           default: /* Not a supported codec */
               return -1;
       }
       DEBUGF("        flavor = %d\n",flavor);
@ -252,8 +282,10 @@ static int real_read_audio_stream_info(int fd, RMContext *rmctx)
       DEBUGF("        channels= %d\n",rmctx->nb_channels);
       DEBUGF("        fourcc = %s\n",fourcc2str(fourcc));
       DEBUGF("        codec_extra_data_length = %d\n",rmctx->extradata_size);
-       DEBUGF("        codec_extradata :\n");
+       if(rmctx->codec_type == CODEC_COOK) {
-       print_cook_extradata(rmctx);
+           DEBUGF("        cook_extradata :\n");
           print_cook_extradata(rmctx);
       }
    }
@ -530,7 +562,7 @@ int rm_get_packet(uint8_t **src,RMContext *rmctx, RMPacket *pkt)
        advance_buffer(src,12);
        consumed += 12;
-        if (rmctx->codec_type == CODEC_COOK) {
+        if (rmctx->codec_type == CODEC_COOK || rmctx->codec_type == CODEC_ATRAC) {
            for(x = 0 ; x < w/sps; x++)
            {
                place = sps*(h*x+((h+1)/2)*(y&1)+(y>>1)); 
--- a/apps/codecs/librm/rm.h
+++ b/apps/codecs/librm/rm.h
@ -31,7 +31,8 @@
 enum codecs {
    CODEC_COOK, 
    CODEC_AAC,
-    CODEC_AC3
+    CODEC_AC3,
    CODEC_ATRAC
 };
 typedef struct rm_packet