Add codecs to librbcodec.

Change-Id: Id7f4717d51ed02d67cb9f9cb3c0ada4a81843f97 Reviewed-on: http://gerrit.rockbox.org/137 Reviewed-by: Nils Wallménius <nils@rockbox.org> Tested-by: Nils Wallménius <nils@rockbox.org>
2025-11-09 13:12:37 -05:00 · 2011-06-25 21:32:25 -04:00 · 2011-06-25 21:32:25 -04:00 · f40bfc9267
commit f40bfc9267
parent a0009907de
757 changed files with 122 additions and 122 deletions
--- a/lib/rbcodec/codecs/SOURCES
+++ b/lib/rbcodec/codecs/SOURCES
@ -0,0 +1,54 @@
+/* decoders */
+
+vorbis.c
+mpa.c
+flac.c
+wav.c
+a52.c
+wavpack.c
+#ifndef RB_PROFILE
+alac.c
+#endif
+cook.c
+raac.c
+a52_rm.c
+atrac3_rm.c
+atrac3_oma.c
+mpc.c
+wma.c
+sid.c
+ape.c
+asap.c
+aac.c
+spc.c
+mod.c
+shorten.c
+aiff.c
+speex.c
+adx.c
+smaf.c
+au.c
+vox.c
+wav64.c
+tta.c
+wmapro.c
+ay.c
+gbs.c
+hes.c
+nsf.c
+sgc.c
+vgm.c
+#if MEMORYSIZE > 2
+kss.c
+#endif
+
+#ifdef HAVE_RECORDING
+
+/* encoders */
+
+aiff_enc.c
+mp3_enc.c
+wav_enc.c
+wavpack_enc.c
+
+#endif /* HAVE_RECORDING */
--- a/lib/rbcodec/codecs/a52.c
+++ b/lib/rbcodec/codecs/a52.c
@ -0,0 +1,192 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2005 Dave Chapman
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "codeclib.h"
+#include <inttypes.h>  /* Needed by a52.h */
+#include <codecs/liba52/config-a52.h>
+#include <codecs/liba52/a52.h>
+
+CODEC_HEADER
+
+#define BUFFER_SIZE 4096
+
+#define A52_SAMPLESPERFRAME (6*256)
+
+static a52_state_t *state;
+static unsigned long samplesdone;
+static unsigned long frequency;
+
+/* used outside liba52 */
+static uint8_t buf[3840] IBSS_ATTR;
+
+static inline void output_audio(sample_t *samples)
+{
+    ci->yield();
+    ci->pcmbuf_insert(&samples[0], &samples[256], 256);
+}
+
+static void a52_decode_data(uint8_t *start, uint8_t *end)
+{
+    static uint8_t *bufptr = buf;
+    static uint8_t *bufpos = buf + 7;
+    /*
+     * sample_rate and flags are static because this routine could
+     * exit between the a52_syncinfo() and the ao_setup(), and we want
+     * to have the same values when we get back !
+     */
+    static int sample_rate;
+    static int flags;
+    int bit_rate;
+    int len;
+
+    while (1) {
+        len = end - start;
+        if (!len)
+            break;
+        if (len > bufpos - bufptr)
+            len = bufpos - bufptr;
+        memcpy(bufptr, start, len);
+        bufptr += len;
+        start += len;
+        if (bufptr == bufpos) {
+            if (bufpos == buf + 7) {
+                int length;
+
+                length = a52_syncinfo(buf, &flags, &sample_rate, &bit_rate);
+                if (!length) {
+                    //DEBUGF("skip\n");
+                    for (bufptr = buf; bufptr < buf + 6; bufptr++)
+                        bufptr[0] = bufptr[1];
+                    continue;
+                }
+                bufpos = buf + length;
+            } else {
+                /* Unity gain is 1 << 26, and we want to end up on 28 bits
+                   of precision instead of the default 30.
+                 */
+                level_t level = 1 << 24;
+                sample_t bias = 0;
+                int i;
+
+                /* This is the configuration for the downmixing: */
+                flags = A52_STEREO | A52_ADJUST_LEVEL;
+
+                if (a52_frame(state, buf, &flags, &level, bias))
+                    goto error;
+                a52_dynrng(state, NULL, NULL);
+                frequency = sample_rate;
+
+                /* An A52 frame consists of 6 blocks of 256 samples
+                   So we decode and output them one block at a time */
+                for (i = 0; i < 6; i++) {
+                    if (a52_block(state))
+                        goto error;
+                    output_audio(a52_samples(state));
+                    samplesdone += 256;
+                }
+                ci->set_elapsed(samplesdone/(frequency/1000));
+                bufptr = buf;
+                bufpos = buf + 7;
+                continue;
+            error:
+                //logf("Error decoding A52 stream\n");
+                bufptr = buf;
+                bufpos = buf + 7;
+            }
+        }   
+    }
+}
+
+/* this is the codec entry point */
+enum codec_status codec_main(enum codec_entry_call_reason reason)
+{
+    if (reason == CODEC_LOAD) {
+        /* Generic codec initialisation */
+        ci->configure(DSP_SET_STEREO_MODE, STEREO_NONINTERLEAVED);
+        ci->configure(DSP_SET_SAMPLE_DEPTH, 28);
+    }
+    else if (reason == CODEC_UNLOAD) {
+        if (state)
+            a52_free(state);
+    }
+
+    return CODEC_OK;
+}
+
+/* this is called for each file to process */
+enum codec_status codec_run(void)
+{
+    size_t n;
+    unsigned char *filebuf;
+    int sample_loc;
+    intptr_t param;
+
+    if (codec_init())
+        return CODEC_ERROR;
+
+    ci->configure(DSP_SWITCH_FREQUENCY, ci->id3->frequency);
+    codec_set_replaygain(ci->id3);
+    
+    /* Intialise the A52 decoder and check for success */
+    state = a52_init(0);
+
+    samplesdone = 0;
+
+    /* The main decoding loop */
+    if (ci->id3->offset) {
+        if (ci->seek_buffer(ci->id3->offset)) {
+            samplesdone = (ci->id3->offset / ci->id3->bytesperframe) *
+                A52_SAMPLESPERFRAME;
+            ci->set_elapsed(samplesdone/(ci->id3->frequency / 1000));
+        }
+    }
+    else {
+        ci->seek_buffer(ci->id3->first_frame_offset);
+        ci->set_elapsed(0);
+    }
+
+    while (1) {
+        enum codec_command_action action = ci->get_command(&param);
+
+        if (action == CODEC_ACTION_HALT)
+            break;
+
+        if (action == CODEC_ACTION_SEEK_TIME) {
+            sample_loc = param/1000 * ci->id3->frequency;
+
+            if (ci->seek_buffer((sample_loc/A52_SAMPLESPERFRAME)*ci->id3->bytesperframe)) {
+                samplesdone = sample_loc;
+                ci->set_elapsed(samplesdone/(ci->id3->frequency/1000));
+            }
+            ci->seek_complete();
+        }
+
+        filebuf = ci->request_buffer(&n, BUFFER_SIZE);
+
+        if (n == 0) /* End of Stream */
+            break;
+  
+        a52_decode_data(filebuf, filebuf + n);
+        ci->advance_buffer(n);
+    }
+
+    return CODEC_OK;
+}
--- a/lib/rbcodec/codecs/a52_rm.c
+++ b/lib/rbcodec/codecs/a52_rm.c
@ -0,0 +1,227 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2009 Mohamed Tarek
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "codeclib.h"
+#include <codecs/librm/rm.h>
+#include <inttypes.h>  /* Needed by a52.h */
+#include <codecs/liba52/config-a52.h>
+#include <codecs/liba52/a52.h>
+
+CODEC_HEADER
+
+#define BUFFER_SIZE 4096
+
+#define A52_SAMPLESPERFRAME (6*256)
+
+static a52_state_t *state;
+static unsigned long samplesdone;
+static unsigned long frequency;
+static RMContext rmctx;
+static RMPacket pkt;
+
+static void init_rm(RMContext *rmctx)
+{
+    memcpy(rmctx, (void*)(( (intptr_t)ci->id3->id3v2buf + 3 ) &~ 3), sizeof(RMContext));
+}
+
+/* used outside liba52 */
+static uint8_t buf[3840] IBSS_ATTR;
+
+/* The following two functions, a52_decode_data and output_audio are taken from a52.c */
+static inline void output_audio(sample_t *samples)
+{
+    ci->yield();
+    ci->pcmbuf_insert(&samples[0], &samples[256], 256);
+}
+
+static void a52_decode_data(uint8_t *start, uint8_t *end)
+{
+    static uint8_t *bufptr = buf;
+    static uint8_t *bufpos = buf + 7;
+    /*
+     * sample_rate and flags are static because this routine could
+     * exit between the a52_syncinfo() and the ao_setup(), and we want
+     * to have the same values when we get back !
+     */
+    static int sample_rate;
+    static int flags;
+    int bit_rate;
+    int len;
+
+    while (1) {
+        len = end - start;
+        if (!len)
+            break;
+        if (len > bufpos - bufptr)
+            len = bufpos - bufptr;
+        memcpy(bufptr, start, len);
+        bufptr += len;
+        start += len;
+        if (bufptr == bufpos) {
+            if (bufpos == buf + 7) {
+                int length;
+
+                length = a52_syncinfo(buf, &flags, &sample_rate, &bit_rate);
+                if (!length) {
+                    //DEBUGF("skip\n");
+                    for (bufptr = buf; bufptr < buf + 6; bufptr++)
+                        bufptr[0] = bufptr[1];
+                    continue;
+                }
+                bufpos = buf + length;
+            } else {
+                /* Unity gain is 1 << 26, and we want to end up on 28 bits
+                   of precision instead of the default 30.
+                 */
+                level_t level = 1 << 24;
+                sample_t bias = 0;
+                int i;
+
+                /* This is the configuration for the downmixing: */
+                flags = A52_STEREO | A52_ADJUST_LEVEL;
+
+                if (a52_frame(state, buf, &flags, &level, bias))
+                    goto error;
+                a52_dynrng(state, NULL, NULL);
+                frequency = sample_rate;
+
+                /* An A52 frame consists of 6 blocks of 256 samples
+                   So we decode and output them one block at a time */
+                for (i = 0; i < 6; i++) {
+                    if (a52_block(state))
+                        goto error;
+                    output_audio(a52_samples(state));
+                    samplesdone += 256;
+                }
+                ci->set_elapsed(samplesdone/(frequency/1000));
+                bufptr = buf;
+                bufpos = buf + 7;
+                continue;
+            error:
+                //logf("Error decoding A52 stream\n");
+                bufptr = buf;
+                bufpos = buf + 7;
+            }
+        }   
+    }
+}
+
+/* this is the codec entry point */
+enum codec_status codec_main(enum codec_entry_call_reason reason)
+{
+    if (reason == CODEC_LOAD) {
+        /* Generic codec initialisation */
+        ci->configure(DSP_SET_STEREO_MODE, STEREO_NONINTERLEAVED);
+        ci->configure(DSP_SET_SAMPLE_DEPTH, 28);
+    }
+    else if (reason == CODEC_UNLOAD) {
+        if (state)
+            a52_free(state);        
+    }
+
+    return CODEC_OK;
+}
+
+/* this is called for each file to process */
+enum codec_status codec_run(void)
+{
+    size_t n;
+    uint8_t *filebuf;
+    int consumed, packet_offset;
+    int playback_on = -1;
+    size_t resume_offset;
+    intptr_t param;
+    enum codec_command_action action = CODEC_ACTION_NULL;
+
+    if (codec_init()) {
+        return CODEC_ERROR;
+    }
+
+    resume_offset = ci->id3->offset;
+
+    ci->configure(DSP_SWITCH_FREQUENCY, ci->id3->frequency);
+    codec_set_replaygain(ci->id3);
+
+    ci->seek_buffer(ci->id3->first_frame_offset);
+
+    /* Intializations */
+    state = a52_init(0);
+    ci->memset(&rmctx,0,sizeof(RMContext)); 
+    ci->memset(&pkt,0,sizeof(RMPacket));
+    init_rm(&rmctx);
+
+    samplesdone = 0;
+
+    /* check for a mid-track resume and force a seek time accordingly */
+    if(resume_offset > rmctx.data_offset + DATA_HEADER_SIZE) {
+        resume_offset -= rmctx.data_offset + DATA_HEADER_SIZE;
+        /* put number of subpackets to skip in resume_offset */
+        resume_offset /= (rmctx.block_align + PACKET_HEADER_SIZE);
+        param = (int)resume_offset * ((rmctx.block_align * 8 * 1000)/rmctx.bit_rate);
+        action = CODEC_ACTION_SEEK_TIME;
+    }
+    else {
+        /* Seek to the first packet */
+        ci->set_elapsed(0);
+        ci->advance_buffer(rmctx.data_offset + DATA_HEADER_SIZE );
+    }
+
+    /* The main decoding loop */
+    while((unsigned)rmctx.audio_pkt_cnt < rmctx.nb_packets) {
+        if (action == CODEC_ACTION_NULL)
+            action = ci->get_command(&param);
+
+        if (action == CODEC_ACTION_HALT)
+            break;
+
+        if (action == CODEC_ACTION_SEEK_TIME) {
+            packet_offset = param / ((rmctx.block_align*8*1000)/rmctx.bit_rate);
+            ci->seek_buffer(rmctx.data_offset + DATA_HEADER_SIZE +
+                            packet_offset*(rmctx.block_align + PACKET_HEADER_SIZE));
+            rmctx.audio_pkt_cnt = packet_offset;
+            samplesdone = (rmctx.sample_rate/1000 * param);
+            ci->set_elapsed(samplesdone/(frequency/1000));
+            ci->seek_complete();
+        }
+
+        action = CODEC_ACTION_NULL;
+
+        filebuf = ci->request_buffer(&n, rmctx.block_align + PACKET_HEADER_SIZE);
+        consumed = rm_get_packet(&filebuf, &rmctx, &pkt);
+
+        if(consumed < 0 && playback_on != 0) {
+            if(playback_on == -1) {
+            /* Error only if packet-parsing failed and playback hadn't started */
+                DEBUGF("rm_get_packet failed\n");
+                return CODEC_ERROR;
+            }
+            else {
+                break;
+            }
+        }
+
+        playback_on = 1;
+        a52_decode_data(filebuf, filebuf + rmctx.block_align);
+        ci->advance_buffer(pkt.length);
+    }
+
+    return CODEC_OK;
+}
--- a/lib/rbcodec/codecs/aac.c
+++ b/lib/rbcodec/codecs/aac.c
@ -0,0 +1,297 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2005 Dave Chapman
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "codeclib.h"
+#include "libm4a/m4a.h"
+#include "libfaad/common.h"
+#include "libfaad/structs.h"
+#include "libfaad/decoder.h"
+
+CODEC_HEADER
+
+/* The maximum buffer size handled by faad. 12 bytes are required by libfaad
+ * as headroom (see libfaad/bits.c). FAAD_BYTE_BUFFER_SIZE bytes are buffered 
+ * for each frame. */
+#define FAAD_BYTE_BUFFER_SIZE (2048-12)
+
+/* this is the codec entry point */
+enum codec_status codec_main(enum codec_entry_call_reason reason)
+{
+    if (reason == CODEC_LOAD) {
+        /* Generic codec initialisation */
+        ci->configure(DSP_SET_STEREO_MODE, STEREO_NONINTERLEAVED);
+        ci->configure(DSP_SET_SAMPLE_DEPTH, 29);
+    }
+
+    return CODEC_OK;
+}
+
+/* this is called for each file to process */
+enum codec_status codec_run(void)
+{
+    /* Note that when dealing with QuickTime/MPEG4 files, terminology is
+     * a bit confusing. Files with sound are split up in chunks, where
+     * each chunk contains one or more samples. Each sample in turn
+     * contains a number of "sound samples" (the kind you refer to with
+     * the sampling frequency).
+     */
+    size_t n;
+    demux_res_t demux_res;
+    stream_t input_stream;
+    uint32_t sound_samples_done;
+    uint32_t elapsed_time;
+    int file_offset;
+    int framelength;
+    int lead_trim = 0;
+    unsigned int frame_samples;
+    unsigned int i;
+    unsigned char* buffer;
+    NeAACDecFrameInfo frame_info;
+    NeAACDecHandle decoder;
+    int err;
+    uint32_t seek_idx = 0;
+    uint32_t s = 0;
+    uint32_t sbr_fac = 1;
+    unsigned char c = 0;
+    void *ret;
+    intptr_t param;
+    bool empty_first_frame = false;
+
+    /* Clean and initialize decoder structures */
+    memset(&demux_res , 0, sizeof(demux_res));
+    if (codec_init()) {
+        LOGF("FAAD: Codec init error\n");
+        return CODEC_ERROR;
+    }
+
+    file_offset = ci->id3->offset;
+
+    ci->configure(DSP_SWITCH_FREQUENCY, ci->id3->frequency);
+    codec_set_replaygain(ci->id3);
+
+    stream_create(&input_stream,ci);
+
+    ci->seek_buffer(ci->id3->first_frame_offset);
+
+    /* if qtmovie_read returns successfully, the stream is up to
+     * the movie data, which can be used directly by the decoder */
+    if (!qtmovie_read(&input_stream, &demux_res)) {
+        LOGF("FAAD: File init error\n");
+        return CODEC_ERROR;
+    }
+
+    /* initialise the sound converter */
+    decoder = NeAACDecOpen();
+
+    if (!decoder) {
+        LOGF("FAAD: Decode open error\n");
+        return CODEC_ERROR;
+    }
+
+    NeAACDecConfigurationPtr conf = NeAACDecGetCurrentConfiguration(decoder);
+    conf->outputFormat = FAAD_FMT_24BIT; /* irrelevant, we don't convert */
+    NeAACDecSetConfiguration(decoder, conf);
+
+    err = NeAACDecInit2(decoder, demux_res.codecdata, demux_res.codecdata_len, &s, &c);
+    if (err) {
+        LOGF("FAAD: DecInit: %d, %d\n", err, decoder->object_type);
+        return CODEC_ERROR;
+    }
+
+#ifdef SBR_DEC
+    /* Check for need of special handling for seek/resume and elapsed time. */
+    if (ci->id3->needs_upsampling_correction) {
+        sbr_fac = 2;
+    } else {
+        sbr_fac = 1;
+    }
+#endif
+
+    i = 0;
+    
+    if (file_offset > 0) {
+        /* Resume the desired (byte) position. Important: When resuming SBR
+         * upsampling files the resulting sound_samples_done must be expanded 
+         * by a factor of 2. This is done via using sbr_fac. */
+        if (m4a_seek_raw(&demux_res, &input_stream, file_offset,
+                          &sound_samples_done, (int*) &i)) {
+            sound_samples_done *= sbr_fac;
+        } else {
+            sound_samples_done = 0;
+        }
+        NeAACDecPostSeekReset(decoder, i);
+    } else {
+        sound_samples_done = 0;
+    }
+
+    elapsed_time = (sound_samples_done * 10) / (ci->id3->frequency / 100);
+    ci->set_elapsed(elapsed_time);
+    
+    if (i == 0) 
+    {
+        lead_trim = ci->id3->lead_trim;
+    }
+
+    /* The main decoding loop */
+    while (i < demux_res.num_sample_byte_sizes) {
+        enum codec_command_action action = ci->get_command(&param);
+
+        if (action == CODEC_ACTION_HALT)
+            break;
+
+        /* Deal with any pending seek requests */
+        if (action == CODEC_ACTION_SEEK_TIME) {
+            /* Seek to the desired time position. Important: When seeking in SBR
+             * upsampling files the seek_time must be divided by 2 when calling 
+             * m4a_seek and the resulting sound_samples_done must be expanded 
+             * by a factor 2. This is done via using sbr_fac. */
+            if (m4a_seek(&demux_res, &input_stream,
+                          (param/10/sbr_fac)*(ci->id3->frequency/100),
+                          &sound_samples_done, (int*) &i)) {
+                sound_samples_done *= sbr_fac;
+                elapsed_time = (sound_samples_done * 10) / (ci->id3->frequency / 100);
+                ci->set_elapsed(elapsed_time);
+                seek_idx = 0;
+
+                if (i == 0) 
+                {
+                    lead_trim = ci->id3->lead_trim;
+                }
+            }
+            NeAACDecPostSeekReset(decoder, i);
+            ci->seek_complete();
+        }
+
+        /* There can be gaps between chunks, so skip ahead if needed. It
+         * doesn't seem to happen much, but it probably means that a 
+         * "proper" file can have chunks out of order. Why one would want
+         * that an good question (but files with gaps do exist, so who 
+         * knows?), so we don't support that - for now, at least.
+         */
+        file_offset = m4a_check_sample_offset(&demux_res, i, &seek_idx);
+
+        if (file_offset > ci->curpos)
+        {
+            ci->advance_buffer(file_offset - ci->curpos);
+        }
+        else if (file_offset == 0)
+        {
+            LOGF("AAC: get_sample_offset error\n");
+            return CODEC_ERROR;
+        }
+        
+        /* Request the required number of bytes from the input buffer */
+        buffer=ci->request_buffer(&n, FAAD_BYTE_BUFFER_SIZE);
+
+        /* Decode one block - returned samples will be host-endian */
+        ret = NeAACDecDecode(decoder, &frame_info, buffer, n);
+
+        /* NeAACDecDecode may sometimes return NULL without setting error. */
+        if (ret == NULL || frame_info.error > 0) {
+            LOGF("FAAD: decode error '%s'\n", NeAACDecGetErrorMessage(frame_info.error));
+            return CODEC_ERROR;
+        }
+
+        /* Advance codec buffer (no need to call set_offset because of this) */
+        ci->advance_buffer(frame_info.bytesconsumed);
+
+        /* Output the audio */
+        ci->yield();
+        
+        frame_samples = frame_info.samples >> 1;
+
+        if (empty_first_frame)
+        {
+            /* Remove the first frame from lead_trim, under the assumption
+             * that it had the same size as this frame
+             */
+            empty_first_frame = false;
+            lead_trim -= frame_samples;
+
+            if (lead_trim < 0)
+            {
+                lead_trim = 0;
+            }
+        }
+
+        /* Gather number of samples for the decoded frame. */
+        framelength = frame_samples - lead_trim;
+        
+        if (i == demux_res.num_sample_byte_sizes - 1)
+        {
+            // Size of the last frame
+            const uint32_t sample_duration = (demux_res.num_time_to_samples > 0) ?
+                demux_res.time_to_sample[demux_res.num_time_to_samples - 1].sample_duration :
+                frame_samples;
+
+            /* Currently limited to at most one frame of tail_trim.
+             * Seems to be enough.
+             */
+            if (ci->id3->tail_trim == 0 && sample_duration < frame_samples)
+            {
+                /* Subtract lead_trim just in case we decode a file with only
+                 * one audio frame with actual data (lead_trim is usually zero
+                 * here).
+                 */
+                framelength = sample_duration - lead_trim;
+            }
+            else
+            {
+                framelength -= ci->id3->tail_trim;
+            }
+        }
+
+        if (framelength > 0)
+        {
+            ci->pcmbuf_insert(&decoder->time_out[0][lead_trim],
+                              &decoder->time_out[1][lead_trim],
+                              framelength);
+            sound_samples_done += framelength;
+            /* Update the elapsed-time indicator */
+            elapsed_time = ((uint64_t) sound_samples_done * 1000) /
+                ci->id3->frequency;
+            ci->set_elapsed(elapsed_time);
+        }
+
+        if (lead_trim > 0)
+        {
+            /* frame_info.samples can be 0 for frame 0. We still want to
+             * remove it from lead_trim, so do that during frame 1.
+             */
+            if (0 == i && 0 == frame_info.samples)
+            {
+                empty_first_frame = true;
+            }
+
+            lead_trim -= frame_samples;
+
+            if (lead_trim < 0)
+            {
+                lead_trim = 0;
+            }
+        }
+
+        ++i;
+    }
+
+    LOGF("AAC: Decoded %lu samples\n", (unsigned long)sound_samples_done);
+    return CODEC_OK;
+}
--- a/lib/rbcodec/codecs/adx.c
+++ b/lib/rbcodec/codecs/adx.c
@ -0,0 +1,404 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ *
+ * Copyright (C) 2006-2008 Adam Gashlin (hcs)
+ * Copyright (C) 2006 Jens Arnold
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include <limits.h>
+#include "codeclib.h"
+#include "inttypes.h"
+#include "math.h"
+#include "lib/fixedpoint.h"
+
+CODEC_HEADER
+
+/* Maximum number of bytes to process in one iteration */
+#define WAV_CHUNK_SIZE (1024*2)
+
+/* Number of times to loop looped tracks when repeat is disabled */
+#define LOOP_TIMES 2
+
+/* Length of fade-out for looped tracks (milliseconds) */
+#define FADE_LENGTH 10000L
+
+/* Default high pass filter cutoff frequency is 500 Hz.
+ * Others can be set, but the default is nearly always used,
+ * and there is no way to determine if another was used, anyway.
+ */
+static const long cutoff = 500;
+
+static int16_t samples[WAV_CHUNK_SIZE] IBSS_ATTR;
+
+/* this is the codec entry point */
+enum codec_status codec_main(enum codec_entry_call_reason reason)
+{
+    if (reason == CODEC_LOAD) {
+        /* Generic codec initialisation */
+        /* we only render 16 bits */
+        ci->configure(DSP_SET_SAMPLE_DEPTH, 16);
+    }
+
+    return CODEC_OK;
+}
+
+/* this is called for each file to process */
+enum codec_status codec_run(void)
+{
+    int channels;
+    int sampleswritten, i;
+    uint8_t *buf;
+    int32_t ch1_1, ch1_2, ch2_1, ch2_2; /* ADPCM history */
+    size_t n;
+    int endofstream; /* end of stream flag */
+    uint32_t avgbytespersec;
+    int looping; /* looping flag */
+    int loop_count; /* number of loops done so far */
+    int fade_count; /*  countdown for fadeout */
+    int fade_frames; /* length of fade in frames */
+    off_t start_adr, end_adr; /* loop points */
+    off_t chanstart, bufoff;
+    /*long coef1=0x7298L,coef2=-0x3350L;*/
+    long coef1, coef2;
+    intptr_t param;
+
+    DEBUGF("ADX: next_track\n");
+    if (codec_init()) {
+        return CODEC_ERROR;
+    }
+    DEBUGF("ADX: after init\n");
+    
+    /* init history */
+    ch1_1=ch1_2=ch2_1=ch2_2=0;
+
+    codec_set_replaygain(ci->id3);
+        
+    /* Get header */
+    DEBUGF("ADX: request initial buffer\n");
+    ci->seek_buffer(0);
+    buf = ci->request_buffer(&n, 0x38);
+    if (!buf || n < 0x38) {
+        return CODEC_ERROR;
+    }
+    bufoff = 0;
+    DEBUGF("ADX: read size = %lx\n",(unsigned long)n);
+
+    /* Get file header for starting offset, channel count */
+    
+    chanstart = ((buf[2] << 8) | buf[3]) + 4;
+    channels = buf[7];
+    
+    /* useful for seeking and reporting current playback position */
+    avgbytespersec = ci->id3->frequency * 18 * channels / 32;
+    DEBUGF("avgbytespersec=%ld\n",(unsigned long)avgbytespersec);
+
+    /* calculate filter coefficients */
+
+    /**
+     * A simple table of these coefficients would be nice, but
+     * some very odd frequencies are used and if I'm going to
+     * interpolate I might as well just go all the way and
+     * calclate them precisely.
+     * Speed is not an issue as this only needs to be done once per file.
+     */
+    {
+        const int64_t big28 = 0x10000000LL;
+        const int64_t big32 = 0x100000000LL;
+        int64_t frequency = ci->id3->frequency;
+        int64_t phasemultiple = cutoff*big32/frequency;
+
+        long z;
+        int64_t a;
+        const int64_t b = (M_SQRT2*big28)-big28;
+        int64_t c;
+        int64_t d;
+        
+        fp_sincos((unsigned long)phasemultiple,&z);
+
+        a = (M_SQRT2*big28) - (z >> 3);
+
+        /**
+         * In the long passed to fsqrt there are only 4 nonfractional bits,
+         * which is sufficient here, but this is the only reason why I don't
+         * use 32 fractional bits everywhere.
+         */
+        d = fp_sqrt((a+b)*(a-b)/big28,28);
+        c = (a-d)*big28/b;
+
+        coef1 = (c*8192) >> 28;
+        coef2 = (c*c/big28*-4096) >> 28;
+        DEBUGF("ADX: samprate=%ld ",(long)frequency);
+        DEBUGF("coef1 %04x ",(unsigned int)(coef1*4));
+        DEBUGF("coef2 %04x\n",(unsigned int)(coef2*-4));
+    }
+
+    /* Get loop data */
+    
+    looping = 0; start_adr = 0; end_adr = 0;
+    if (!memcmp(buf+0x10,"\x01\xF4\x03",3)) {
+        /* Soul Calibur 2 style (type 03) */
+        DEBUGF("ADX: type 03 found\n");
+        /* check if header is too small for loop data */
+        if (chanstart-6 < 0x2c) looping=0;
+        else {
+            looping = (buf[0x18]) ||
+                      (buf[0x19]) ||
+                      (buf[0x1a]) ||
+                      (buf[0x1b]);
+            end_adr = (buf[0x28]<<24) |
+                      (buf[0x29]<<16) |
+                      (buf[0x2a]<<8) |
+                      (buf[0x2b]);
+
+            start_adr = (
+              (buf[0x1c]<<24) |
+              (buf[0x1d]<<16) |
+              (buf[0x1e]<<8) |
+              (buf[0x1f])
+              )/32*channels*18+chanstart;
+        }
+    } else if (!memcmp(buf+0x10,"\x01\xF4\x04",3)) {
+        /* Standard (type 04) */
+        DEBUGF("ADX: type 04 found\n");
+        /* check if header is too small for loop data */
+        if (chanstart-6 < 0x38) looping=0;
+        else {
+            looping = (buf[0x24]) ||
+                      (buf[0x25]) ||
+                      (buf[0x26]) ||
+                      (buf[0x27]);
+            end_adr = (buf[0x34]<<24) |
+                      (buf[0x35]<<16) |
+                      (buf[0x36]<<8) |
+                      buf[0x37];
+            start_adr = (
+              (buf[0x28]<<24) |
+              (buf[0x29]<<16) |
+              (buf[0x2a]<<8) |
+              (buf[0x2b])
+              )/32*channels*18+chanstart;
+        }
+    } else {
+        DEBUGF("ADX: error, couldn't determine ADX type\n");
+        return CODEC_ERROR;
+    }
+    
+    /* is file using encryption */
+    if (buf[0x13]==0x08) {
+        DEBUGF("ADX: error, encrypted ADX not supported\n");
+        return false;
+    }
+
+    if (looping) {
+        DEBUGF("ADX: looped, start: %lx end: %lx\n",start_adr,end_adr);
+    } else {
+        DEBUGF("ADX: not looped\n");
+    }
+    
+    /* advance to first frame */
+    DEBUGF("ADX: first frame at %lx\n",chanstart);
+    bufoff = chanstart;
+
+    /* get in position */
+    ci->seek_buffer(bufoff);
+    ci->set_elapsed(0);
+
+    /* setup pcm buffer format */
+    ci->configure(DSP_SWITCH_FREQUENCY, ci->id3->frequency);
+    if (channels == 2) {
+        ci->configure(DSP_SET_STEREO_MODE, STEREO_INTERLEAVED);
+    } else if (channels == 1) {
+        ci->configure(DSP_SET_STEREO_MODE, STEREO_MONO);
+    } else {
+        DEBUGF("ADX CODEC_ERROR: more than 2 channels\n");
+        return CODEC_ERROR;
+    }    
+
+    endofstream = 0;
+    loop_count = 0;
+    fade_count = -1; /* disable fade */
+    fade_frames = 1;
+
+    /* The main decoder loop */
+        
+    while (!endofstream) {
+        enum codec_command_action action = ci->get_command(&param);
+
+        if (action == CODEC_ACTION_HALT)
+            break;
+        
+        /* do we need to loop? */
+        if (bufoff > end_adr-18*channels && looping) {
+            DEBUGF("ADX: loop!\n");
+            /* check for endless looping */
+            if (ci->loop_track()) {
+                loop_count=0;
+                fade_count = -1; /* disable fade */
+            } else {
+                /* otherwise start fade after LOOP_TIMES loops */
+                loop_count++;
+                if (loop_count >= LOOP_TIMES && fade_count < 0) {
+                    /* frames to fade over */
+                    fade_frames = FADE_LENGTH*ci->id3->frequency/32/1000;
+                    /* volume relative to fade_frames */
+                    fade_count = fade_frames;
+                    DEBUGF("ADX: fade_frames = %d\n",fade_frames);
+                }
+            }
+            bufoff = start_adr;
+            ci->seek_buffer(bufoff);
+        }
+
+        /* do we need to seek? */
+        if (action == CODEC_ACTION_SEEK_TIME) {
+            uint32_t newpos;
+            
+            DEBUGF("ADX: seek to %ldms\n", (long)param);
+
+            endofstream = 0;
+            loop_count = 0;
+            fade_count = -1; /* disable fade */
+            fade_frames = 1;
+
+            newpos = (((uint64_t)avgbytespersec*param)
+                      / (1000LL*18*channels))*(18*channels);
+            bufoff = chanstart + newpos;
+            while (bufoff > end_adr-18*channels) {
+                bufoff-=end_adr-start_adr;
+                loop_count++;
+            }
+            ci->seek_buffer(bufoff);
+
+            ci->set_elapsed(
+               ((end_adr-start_adr)*loop_count + bufoff-chanstart)*
+               1000LL/avgbytespersec);
+
+            ci->seek_complete();
+        }
+
+        if (bufoff>ci->filesize-channels*18) break; /* End of stream */
+        
+        sampleswritten=0;
+          
+        while (
+                /* Is there data left in the file? */
+                (bufoff <= ci->filesize-(18*channels)) &&
+                /* Is there space in the output buffer? */
+                (sampleswritten <= WAV_CHUNK_SIZE-(32*channels)) &&
+                /* Should we be looping? */
+                ((!looping) || bufoff <= end_adr-18*channels))
+        {
+            /* decode first/only channel */
+            int32_t scale;
+            int32_t ch1_0, d;
+
+            /* fetch a frame */
+            buf = ci->request_buffer(&n, 18);
+
+            if (!buf || n!=18) {
+                DEBUGF("ADX: couldn't get buffer at %lx\n",
+                        bufoff);
+                return CODEC_ERROR;
+            }
+
+            scale = ((buf[0] << 8) | (buf[1])) +1;
+  
+            for (i = 2; i < 18; i++)
+            {
+                d = (buf[i] >> 4) & 15;
+                if (d & 8) d-= 16;
+                ch1_0 = d*scale + ((coef1*ch1_1 + coef2*ch1_2) >> 12);
+                if (ch1_0 > 32767) ch1_0 = 32767;
+                else if (ch1_0 < -32768) ch1_0 = -32768;
+                samples[sampleswritten] = ch1_0;
+                sampleswritten+=channels;
+                ch1_2 = ch1_1; ch1_1 = ch1_0;
+
+                d = buf[i] & 15;
+                if (d & 8) d -= 16;
+                ch1_0 = d*scale + ((coef1*ch1_1 + coef2*ch1_2) >> 12);
+                if (ch1_0 > 32767) ch1_0 = 32767;
+                else if (ch1_0 < -32768) ch1_0 = -32768; 
+                samples[sampleswritten] = ch1_0;
+                sampleswritten+=channels;
+                ch1_2 = ch1_1; ch1_1 = ch1_0;
+            }
+            bufoff+=18;
+            ci->advance_buffer(18);
+            
+            if (channels == 2) {
+                /* decode second channel */
+                int32_t scale;
+                int32_t ch2_0, d;
+
+                buf = ci->request_buffer(&n, 18);
+
+                if (!buf || n!=18) {
+                    DEBUGF("ADX: couldn't get buffer at %lx\n",
+                            bufoff);
+                    return CODEC_ERROR;
+                }
+
+                scale = ((buf[0] << 8)|(buf[1]))+1;
+  
+                sampleswritten-=63;
+
+                for (i = 2; i < 18; i++)
+                {
+                    d = (buf[i] >> 4) & 15;
+                    if (d & 8) d-= 16;
+                    ch2_0 = d*scale + ((coef1*ch2_1 + coef2*ch2_2) >> 12);
+                    if (ch2_0 > 32767) ch2_0 = 32767;
+                    else if (ch2_0 < -32768) ch2_0 = -32768;
+                    samples[sampleswritten] = ch2_0;
+                    sampleswritten+=2;
+                    ch2_2 = ch2_1; ch2_1 = ch2_0;
+
+                    d = buf[i] & 15;
+                    if (d & 8) d -= 16;
+                    ch2_0 = d*scale + ((coef1*ch2_1 + coef2*ch2_2) >> 12);
+                    if (ch2_0 > 32767) ch2_0 = 32767;
+                    else if (ch2_0 < -32768) ch2_0 = -32768; 
+                    samples[sampleswritten] = ch2_0;
+                    sampleswritten+=2;
+                    ch2_2 = ch2_1; ch2_1 = ch2_0;
+                }
+                bufoff+=18;
+                ci->advance_buffer(18);
+                sampleswritten--; /* go back to first channel's next sample */
+            }
+
+            if (fade_count>0) {
+                fade_count--;
+                for (i=0;i<(channels==1?32:64);i++) samples[sampleswritten-i-1]=
+                  ((int32_t)samples[sampleswritten-i-1])*fade_count/fade_frames;
+                if (fade_count==0) {endofstream=1; break;}
+            }
+        }
+
+        if (channels == 2)
+            sampleswritten >>= 1; /* make samples/channel */
+
+        ci->pcmbuf_insert(samples, NULL, sampleswritten);
+            
+        ci->set_elapsed(
+           ((end_adr-start_adr)*loop_count + bufoff-chanstart)*
+           1000LL/avgbytespersec);
+    }
+
+    return CODEC_OK;
+}
--- a/lib/rbcodec/codecs/aiff.c
+++ b/lib/rbcodec/codecs/aiff.c
@ -0,0 +1,350 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (c) 2005 Jvo Studer
+ * Copyright (c) 2009 Yoshihisa Uchida
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "codeclib.h"
+#include "codecs/libpcm/support_formats.h"
+
+CODEC_HEADER
+
+#define FOURCC(c1, c2, c3, c4) \
+((((uint32_t)c1)<<24)|(((uint32_t)c2)<<16)|(((uint32_t)c3)<<8)|((uint32_t)c4))
+
+/* This codec supports the following AIFC compressionType formats */
+enum {
+    AIFC_FORMAT_PCM          = FOURCC('N', 'O', 'N', 'E'), /* AIFC PCM Format (big endian) */
+    AIFC_FORMAT_ALAW         = FOURCC('a', 'l', 'a', 'w'), /* AIFC ALaw compressed */
+    AIFC_FORMAT_MULAW        = FOURCC('u', 'l', 'a', 'w'), /* AIFC uLaw compressed */
+    AIFC_FORMAT_IEEE_FLOAT32 = FOURCC('f', 'l', '3', '2'), /* AIFC IEEE float 32 bit */
+    AIFC_FORMAT_IEEE_FLOAT64 = FOURCC('f', 'l', '6', '4'), /* AIFC IEEE float 64 bit */
+    AIFC_FORMAT_QT_IMA_ADPCM = FOURCC('i', 'm', 'a', '4'), /* AIFC QuickTime IMA ADPCM */
+};
+
+static const struct pcm_entry pcm_codecs[] = {
+    { AIFC_FORMAT_PCM,          get_linear_pcm_codec      },
+    { AIFC_FORMAT_ALAW,         get_itut_g711_alaw_codec  },
+    { AIFC_FORMAT_MULAW,        get_itut_g711_mulaw_codec },
+    { AIFC_FORMAT_IEEE_FLOAT32, get_ieee_float_codec      },
+    { AIFC_FORMAT_IEEE_FLOAT64, get_ieee_float_codec      },
+    { AIFC_FORMAT_QT_IMA_ADPCM, get_qt_ima_adpcm_codec    },
+};
+
+#define PCM_SAMPLE_SIZE (1024*2)
+
+static int32_t samples[PCM_SAMPLE_SIZE] IBSS_ATTR;
+
+static const struct pcm_codec *get_codec(uint32_t formattag)
+{
+    unsigned i;
+    for (i = 0; i < sizeof(pcm_codecs)/sizeof(pcm_codecs[0]); i++)
+        if (pcm_codecs[i].format_tag == formattag)
+            return pcm_codecs[i].get_codec();
+
+    return NULL;
+}
+
+/* this is the codec entry point */
+enum codec_status codec_main(enum codec_entry_call_reason reason)
+{
+    if (reason == CODEC_LOAD) {
+        /* Generic codec initialisation */
+        ci->configure(DSP_SET_SAMPLE_DEPTH, PCM_OUTPUT_DEPTH-1);
+    }
+
+    return CODEC_OK;
+}
+
+/* this is called for each file to process */
+enum codec_status codec_run(void)
+{
+    struct pcm_format format;
+    uint32_t bytesdone, decodedsamples;
+    /* rockbox: comment 'set but unused' variables
+    uint32_t num_sample_frames = 0;
+    */
+    size_t n;
+    int bufcount;
+    int endofstream;
+    unsigned char *buf;
+    uint8_t *aifbuf;
+    uint32_t offset2snd = 0;
+    off_t firstblockposn;     /* position of the first block in file */
+    bool is_aifc = false;
+    const struct pcm_codec *codec;
+    uint32_t size;
+    intptr_t param;
+
+    if (codec_init()) {
+        return CODEC_ERROR;
+    }
+
+    codec_set_replaygain(ci->id3);
+    
+    /* Need to save offset for later use (cleared indirectly by advance_buffer) */
+    bytesdone = ci->id3->offset;
+
+    /* assume the AIFF header is less than 1024 bytes */
+    ci->seek_buffer(0);
+    buf = ci->request_buffer(&n, 1024);
+    if (n < 54) {
+        return CODEC_ERROR;
+    }
+
+    if (memcmp(buf, "FORM", 4) != 0)
+    {
+        DEBUGF("CODEC_ERROR: does not aiff format %4.4s\n", (char*)&buf[0]);
+        return CODEC_ERROR;
+    }
+    if (memcmp(&buf[8], "AIFF", 4) == 0)
+        is_aifc = false;
+    else if (memcmp(&buf[8], "AIFC", 4) == 0)
+        is_aifc = true;
+    else
+    {
+        DEBUGF("CODEC_ERROR: does not aiff format %4.4s\n", (char*)&buf[8]);
+        return CODEC_ERROR;
+    }
+
+    buf += 12;
+    n -= 12;
+
+    ci->memset(&format, 0, sizeof(struct pcm_format));
+    format.is_signed = true;
+    format.is_little_endian = false;
+
+    decodedsamples = 0;
+    codec = 0;
+
+    /* read until 'SSND' chunk, which typically is last */
+    while (format.numbytes == 0 && n >= 8)
+    {
+        /* chunkSize */
+        size = ((buf[4]<<24)|(buf[5]<<16)|(buf[6]<<8)|buf[7]);
+        if (memcmp(buf, "COMM", 4) == 0) {
+            if ((!is_aifc && size < 18) || (is_aifc && size < 22))
+            {
+                DEBUGF("CODEC_ERROR: 'COMM' chunk size=%lu < %d\n",
+                       (unsigned long)size, (is_aifc)?22:18);
+                return CODEC_ERROR;
+            }
+            /* num_channels */
+            format.channels = ((buf[8]<<8)|buf[9]);
+            /* num_sample_frames */
+            /*
+            num_sample_frames = ((buf[10]<<24)|(buf[11]<<16)|(buf[12]<<8)
+                                |buf[13]);
+            */
+            
+            /* sample_size */
+            format.bitspersample = ((buf[14]<<8)|buf[15]);
+            /* sample_rate (don't use last 4 bytes, only integer fs) */
+            if (buf[16] != 0x40) {
+                DEBUGF("CODEC_ERROR: weird sampling rate (no @)\n");
+                return CODEC_ERROR;
+            }
+            format.samplespersec = ((buf[18]<<24)|(buf[19]<<16)|(buf[20]<<8)|buf[21])+1;
+            format.samplespersec >>= (16 + 14 - buf[17]);
+            /* compressionType (AIFC only) */
+            if (is_aifc)
+            {
+                format.formattag = (buf[26]<<24)|(buf[27]<<16)|(buf[28]<<8)|buf[29];
+
+                /*
+                 * aiff's sample_size is uncompressed sound data size.
+                 * But format.bitspersample is compressed sound data size.
+                 */
+                if (format.formattag == AIFC_FORMAT_ALAW ||
+                    format.formattag == AIFC_FORMAT_MULAW)
+                    format.bitspersample = 8;
+                else if (format.formattag == AIFC_FORMAT_QT_IMA_ADPCM)
+                    format.bitspersample = 4;
+            }
+            else
+                format.formattag = AIFC_FORMAT_PCM;
+            /* calc average bytes per second */
+            format.avgbytespersec = format.samplespersec*format.channels*format.bitspersample/8;
+        } else if (memcmp(buf, "SSND", 4)==0) {
+            if (format.bitspersample == 0) {
+                DEBUGF("CODEC_ERROR: unsupported chunk order\n");
+                return CODEC_ERROR;
+            }
+            /* offset2snd */
+            offset2snd = (buf[8]<<24)|(buf[9]<<16)|(buf[10]<<8)|buf[11];
+            /* block_size */
+            format.blockalign = ((buf[12]<<24)|(buf[13]<<16)|(buf[14]<<8)|buf[15]) >> 3;
+            if (format.blockalign == 0)
+                format.blockalign = format.channels * format.bitspersample >> 3;
+            format.numbytes = size - 8 - offset2snd;
+            size = 8 + offset2snd; /* advance to the beginning of data */
+        } else if (is_aifc && (memcmp(buf, "FVER", 4)==0)) {
+            /* Format Version Chunk (AIFC only chunk) */
+            /* skip this chunk */
+        } else {
+            DEBUGF("unsupported AIFF chunk: '%c%c%c%c', size=%lu\n",
+                   buf[0], buf[1], buf[2], buf[3], (unsigned long)size);
+        }
+
+        size += 8 + (size & 0x01); /* odd chunk sizes must be padded */
+
+        buf += size;
+        if (n < size) {
+            DEBUGF("CODEC_ERROR: AIFF header size > 1024\n");
+            return CODEC_ERROR;
+        }
+        n -= size;
+    } /* while 'SSND' */
+
+    if (format.channels == 0) {
+        DEBUGF("CODEC_ERROR: 'COMM' chunk not found or 0-channels file\n");
+        return CODEC_ERROR;
+    }
+    if (format.numbytes == 0) {
+        DEBUGF("CODEC_ERROR: 'SSND' chunk not found or has zero length\n");
+        return CODEC_ERROR;
+    }
+
+    codec = get_codec(format.formattag);
+    if (codec == 0)
+    {
+        DEBUGF("CODEC_ERROR: AIFC does not support compressionType: 0x%x\n", 
+            (unsigned int)format.formattag);
+        return CODEC_ERROR;
+    }
+
+    if (!codec->set_format(&format))
+    {
+        return CODEC_ERROR;
+    }
+
+    ci->configure(DSP_SWITCH_FREQUENCY, ci->id3->frequency);
+
+    if (format.channels == 2) {
+        ci->configure(DSP_SET_STEREO_MODE, STEREO_INTERLEAVED);
+    } else if (format.channels == 1) {
+        ci->configure(DSP_SET_STEREO_MODE, STEREO_MONO);
+    } else {
+        DEBUGF("CODEC_ERROR: more than 2 channels unsupported\n");
+        return CODEC_ERROR;
+    }
+
+    if (format.samplesperblock == 0)
+    {
+        DEBUGF("CODEC_ERROR: samplesperblock is 0\n");
+        return CODEC_ERROR;
+    }
+    if (format.blockalign == 0)
+    {
+        DEBUGF("CODEC_ERROR: blockalign is 0\n");
+        return CODEC_ERROR;
+    }
+
+    /* check chunksize */
+    if ((format.chunksize / format.blockalign) * format.samplesperblock * format.channels
+           > PCM_SAMPLE_SIZE)
+        format.chunksize = (PCM_SAMPLE_SIZE / format.blockalign) * format.blockalign;
+    if (format.chunksize == 0)
+    {
+        DEBUGF("CODEC_ERROR: chunksize is 0\n");
+        return CODEC_ERROR;
+    }
+
+    firstblockposn = 1024 - n;
+    ci->advance_buffer(firstblockposn);
+
+    /* make sure we're at the correct offset */
+    if (bytesdone > (uint32_t) firstblockposn) {
+        /* Round down to previous block */
+        struct pcm_pos *newpos = codec->get_seek_pos(bytesdone - firstblockposn,
+                                                     PCM_SEEK_POS, NULL);
+
+        if (newpos->pos > format.numbytes)
+            return CODEC_OK;
+
+        if (ci->seek_buffer(firstblockposn + newpos->pos))
+        {
+            bytesdone      = newpos->pos;
+            decodedsamples = newpos->samples;
+        }
+    } else {
+        /* already where we need to be */
+        bytesdone = 0;
+    }
+
+    ci->set_elapsed(decodedsamples*1000LL/ci->id3->frequency);
+
+    /* The main decoder loop */
+    endofstream = 0;
+
+    while (!endofstream) {
+        enum codec_command_action action = ci->get_command(&param);
+
+        if (action == CODEC_ACTION_HALT)
+            break;
+
+        if (action == CODEC_ACTION_SEEK_TIME) {
+            /* 3rd args(read_buffer) is unnecessary in the format which AIFF supports. */
+            struct pcm_pos *newpos = codec->get_seek_pos(param, PCM_SEEK_TIME, NULL);
+
+            if (newpos->pos > format.numbytes)
+            {
+                ci->set_elapsed(ci->id3->length);
+                ci->seek_complete();
+                break;
+            }
+
+            if (ci->seek_buffer(firstblockposn + newpos->pos))
+            {
+                bytesdone      = newpos->pos;
+                decodedsamples = newpos->samples;
+            }
+
+            ci->set_elapsed(decodedsamples*1000LL/ci->id3->frequency);
+            ci->seek_complete();
+        }
+        aifbuf = (uint8_t *)ci->request_buffer(&n, format.chunksize);
+
+        if (n == 0)
+            break; /* End of stream */
+
+        if (bytesdone + n > format.numbytes) {
+            n = format.numbytes - bytesdone;
+            endofstream = 1;
+        }
+
+        if (codec->decode(aifbuf, n, samples, &bufcount) == CODEC_ERROR)
+        {
+            DEBUGF("codec error\n");
+            return CODEC_ERROR;
+        }
+
+        ci->pcmbuf_insert(samples, NULL, bufcount);
+
+        ci->advance_buffer(n);
+        bytesdone += n;
+        decodedsamples += bufcount;
+        if (bytesdone >= format.numbytes)
+            endofstream = 1;
+
+        ci->set_elapsed(decodedsamples*1000LL/ci->id3->frequency);
+    }
+
+    return CODEC_OK;
+}
--- a/lib/rbcodec/codecs/aiff_enc.c
+++ b/lib/rbcodec/codecs/aiff_enc.c
@ -0,0 +1,400 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2006 Antonius Hellmann
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include <inttypes.h>
+#include "codeclib.h"
+
+CODEC_ENC_HEADER
+
+struct aiff_header
+{
+    uint8_t   form_id[4];           /* 00h - 'FORM'                          */
+    uint32_t  form_size;            /* 04h - size of file - 8                */
+    uint8_t   aiff_id[4];           /* 08h - 'AIFF'                          */
+    uint8_t   comm_id[4];           /* 0Ch - 'COMM'                          */
+    int32_t   comm_size;            /* 10h - num_channels through sample_rate
+                                             (18)                            */
+    int16_t   num_channels;         /* 14h - 1=M, 2=S, etc.                  */
+    uint32_t  num_sample_frames;    /* 16h - num samples for each channel    */
+    int16_t   sample_size;          /* 1ah - 1-32 bits per sample            */
+    uint8_t   sample_rate[10];      /* 1ch - IEEE 754 80-bit floating point  */
+    uint8_t   ssnd_id[4];           /* 26h - "SSND"                          */
+    int32_t   ssnd_size;            /* 2ah - size of chunk from offset to
+                                             end of pcm data                 */
+    uint32_t  offset;               /* 2eh - data offset from end of header  */
+    uint32_t  block_size;           /* 32h - pcm data alignment              */
+                                    /* 36h */
+} __attribute__((packed));
+
+#define PCM_DEPTH_BYTES             2
+#define PCM_DEPTH_BITS             16
+#define PCM_SAMP_PER_CHUNK       2048
+#define PCM_CHUNK_SIZE          (PCM_SAMP_PER_CHUNK*4)
+
+/* Template headers */
+struct aiff_header aiff_header =
+{
+    { 'F', 'O', 'R', 'M' },             /* form_id               */
+    0,                                  /* form_size         (*) */
+    { 'A', 'I', 'F', 'F' },             /* aiff_id               */
+    { 'C', 'O', 'M', 'M' },             /* comm_id               */
+    htobe32(18),                        /* comm_size             */
+    0,                                  /* num_channels      (*) */
+    0,                                  /* num_sample_frames (*) */
+    htobe16(PCM_DEPTH_BITS),            /* sample_size           */
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },   /* sample_rate       (*) */
+    { 'S', 'S', 'N', 'D' },             /* ssnd_id               */
+    0,                                  /* ssnd_size         (*) */
+    htobe32(0),                         /* offset                */
+    htobe32(0),                         /* block_size            */
+};
+
+/* (*) updated when finalizing file */
+
+static int      num_channels IBSS_ATTR;
+static int      rec_mono_mode IBSS_ATTR;
+static uint32_t sample_rate;
+static uint32_t enc_size;
+static int32_t  err          IBSS_ATTR;
+
+/* convert unsigned 32 bit value to 80-bit floating point number */
+STATICIRAM void uint32_h_to_ieee754_extended_be(uint8_t f[10], uint32_t l)
+                                                ICODE_ATTR;
+STATICIRAM void uint32_h_to_ieee754_extended_be(uint8_t f[10], uint32_t l)
+{
+    int32_t exp;
+
+    ci->memset(f, 0, 10);
+
+    if (l == 0)
+        return;
+
+    for (exp = 30; (l & (1ul << 31)) == 0; exp--)
+        l <<= 1;
+
+    /* sign always zero - bit 79 */
+    /* exponent is 0-31 (normalized: 31 - shift + 16383) - bits 64-78 */
+    f[0] = 0x40;
+    f[1] = (uint8_t)exp;
+    /* mantissa is value left justified with most significant non-zero
+       bit stored in bit 63 - bits 0-63 */
+    f[2] = (uint8_t)(l >> 24);
+    f[3] = (uint8_t)(l >> 16);
+    f[4] = (uint8_t)(l >>  8);
+    f[5] = (uint8_t)(l >>  0);
+} /* uint32_h_to_ieee754_extended_be */
+
+/* called version often - inline */
+static inline bool is_file_data_ok(struct enc_file_event_data *data) ICODE_ATTR;
+static inline bool is_file_data_ok(struct enc_file_event_data *data)
+{
+    return data->rec_file >= 0 && (long)data->chunk->flags >= 0;
+} /* is_file_data_ok */
+
+/* called version often - inline */
+static inline bool on_write_chunk(struct enc_file_event_data *data) ICODE_ATTR;
+static inline bool on_write_chunk(struct enc_file_event_data *data)
+{
+    if (!is_file_data_ok(data))
+        return false;
+
+    if (data->chunk->enc_data == NULL)
+    {
+#ifdef ROCKBOX_HAS_LOGF
+        ci->logf("aiff enc: NULL data");
+#endif
+        return true;
+    }
+
+    if (ci->write(data->rec_file, data->chunk->enc_data,
+                  data->chunk->enc_size) != (ssize_t)data->chunk->enc_size)
+        return false;
+
+    data->num_pcm_samples += data->chunk->num_pcm;
+    return true;
+} /* on_write_chunk */
+
+static bool on_start_file(struct enc_file_event_data *data)
+{
+    if ((data->chunk->flags & CHUNKF_ERROR) || *data->filename == '\0')
+        return false;
+
+    data->rec_file = ci->open(data->filename, O_RDWR|O_CREAT|O_TRUNC, 0666);
+
+    if (data->rec_file < 0)
+        return false;
+
+    /* reset sample count */
+    data->num_pcm_samples = 0;
+
+    /* write template headers */
+    if (ci->write(data->rec_file, &aiff_header, sizeof (aiff_header))
+            != sizeof (aiff_header))
+    {
+        return false;
+    }
+
+    data->new_enc_size += sizeof(aiff_header);
+    return true;
+} /* on_start_file */
+
+static bool on_end_file(struct enc_file_event_data *data)
+{
+    /* update template headers */
+    struct aiff_header hdr;
+    uint32_t data_size;
+
+    if (!is_file_data_ok(data))
+        return false;
+
+    if (ci->lseek(data->rec_file, 0, SEEK_SET) != 0 ||
+        ci->read(data->rec_file, &hdr, sizeof (hdr)) != sizeof (hdr))
+    {
+        return false;
+    }
+
+    data_size = data->num_pcm_samples*num_channels*PCM_DEPTH_BYTES;
+
+    /* 'FORM' chunk */
+    hdr.form_size         = htobe32(data_size + sizeof (hdr) - 8);
+
+    /* 'COMM' chunk */
+    hdr.num_channels      = htobe16(num_channels);
+    hdr.num_sample_frames = htobe32(data->num_pcm_samples);
+    uint32_h_to_ieee754_extended_be(hdr.sample_rate, sample_rate);
+
+    /* 'SSND' chunk */
+    hdr.ssnd_size         = htobe32(data_size + 8);
+
+    if (ci->lseek(data->rec_file, 0, SEEK_SET) != 0 ||
+        ci->write(data->rec_file, &hdr, sizeof (hdr)) != sizeof (hdr) ||
+        ci->close(data->rec_file) != 0)
+    {
+        return false;
+    }
+
+    data->rec_file = -1;
+
+    return true;
+} /* on_end_file */
+
+STATICIRAM void enc_events_callback(enum enc_events event, void *data)
+                                    ICODE_ATTR;
+STATICIRAM void enc_events_callback(enum enc_events event, void *data)
+{
+    switch (event)
+    {
+    case ENC_WRITE_CHUNK:
+        if (on_write_chunk((struct enc_file_event_data *)data))
+            return;
+
+        break;
+
+    case ENC_START_FILE:
+        if (on_start_file((struct enc_file_event_data *)data))
+            return;
+
+        break;
+
+    case ENC_END_FILE:
+        if (on_end_file((struct enc_file_event_data *)data))
+            return;
+
+        break;
+
+    default:
+        return;
+    }
+
+    /* Something failed above. Signal error back to core. */
+    ((struct enc_file_event_data *)data)->chunk->flags |= CHUNKF_ERROR;
+} /* enc_events_callback */
+
+/* convert native pcm samples to aiff format samples */
+static inline void sample_to_mono(uint32_t **src, uint32_t **dst)
+{
+    int32_t lr1, lr2;
+
+    switch(rec_mono_mode)
+    {
+        case 1:
+            /* mono = L */
+            lr1 = *(*src)++;
+            lr1 = lr1 >> 16;
+            lr2 = *(*src)++;
+            lr2 = lr2 >> 16;
+            break;
+        case 2:
+            /* mono = R */
+            lr1 = *(*src)++;
+            lr1 = (int16_t)lr1;
+            lr2 = *(*src)++;
+            lr2 = (int16_t)lr2;
+            break;
+        case 0:
+        default:
+            /* mono = (L+R)/2 */
+            lr1 = *(*src)++;
+            lr1 = (int16_t)lr1 + (lr1 >> 16) + err;
+            err = lr1 & 1;
+            lr1 >>= 1;
+
+            lr2 = *(*src)++;
+            lr2 = (int16_t)lr2 + (lr2 >> 16) + err;
+            err = lr2 & 1;
+            lr2 >>= 1;
+            break;
+    }
+    *(*dst)++ = htobe32((lr1 << 16) | (uint16_t)lr2);
+} /* sample_to_mono */
+
+STATICIRAM void chunk_to_aiff_format(uint32_t *src, uint32_t *dst) ICODE_ATTR;
+STATICIRAM void chunk_to_aiff_format(uint32_t *src, uint32_t *dst)
+{
+    if (num_channels == 1)
+    {
+        /* On big endian:
+         *  |LLLLLLLLllllllll|RRRRRRRRrrrrrrrr|
+         *  |LLLLLLLLllllllll|RRRRRRRRrrrrrrrr| =>
+         *  |MMMMMMMMmmmmmmmm|MMMMMMMMmmmmmmmm|
+         *
+         * On little endian:
+         *  |llllllllLLLLLLLL|rrrrrrrrRRRRRRRR|
+         *  |llllllllLLLLLLLL|rrrrrrrrRRRRRRRR| =>
+         *  |MMMMMMMMmmmmmmmm|MMMMMMMMmmmmmmmm|
+         */
+        uint32_t *src_end = src + PCM_SAMP_PER_CHUNK;
+
+        do
+        {
+            sample_to_mono(&src, &dst);
+            sample_to_mono(&src, &dst);
+            sample_to_mono(&src, &dst);
+            sample_to_mono(&src, &dst);
+            sample_to_mono(&src, &dst);
+            sample_to_mono(&src, &dst);
+            sample_to_mono(&src, &dst);
+            sample_to_mono(&src, &dst);
+        }
+        while (src < src_end);
+    }
+    else
+    {
+#ifdef ROCKBOX_BIG_ENDIAN
+        /*  |LLLLLLLLllllllll|RRRRRRRRrrrrrrrr| =>
+         *  |LLLLLLLLllllllll|RRRRRRRRrrrrrrrr|
+         */
+        ci->memcpy(dst, src, PCM_CHUNK_SIZE);
+#else
+        /*  |llllllllLLLLLLLL|rrrrrrrrRRRRRRRR| =>
+         *  |LLLLLLLLllllllll|RRRRRRRRrrrrrrrr|
+         */
+        uint32_t *src_end = src + PCM_SAMP_PER_CHUNK;
+
+        do
+        {
+            *dst++ = swap_odd_even32(*src++);
+            *dst++ = swap_odd_even32(*src++);
+            *dst++ = swap_odd_even32(*src++);
+            *dst++ = swap_odd_even32(*src++);
+            *dst++ = swap_odd_even32(*src++);
+            *dst++ = swap_odd_even32(*src++);
+            *dst++ = swap_odd_even32(*src++);
+            *dst++ = swap_odd_even32(*src++);
+        }
+        while (src < src_end);
+#endif
+    }
+} /* chunk_to_aiff_format */
+
+static bool init_encoder(void)
+{
+    struct enc_inputs     inputs;
+    struct enc_parameters params;
+
+    if (ci->enc_get_inputs         == NULL ||
+        ci->enc_set_parameters     == NULL ||
+        ci->enc_get_chunk          == NULL ||
+        ci->enc_finish_chunk       == NULL ||
+        ci->enc_get_pcm_data       == NULL )
+        return false;
+
+    ci->enc_get_inputs(&inputs);
+
+    if (inputs.config->afmt != AFMT_AIFF)
+        return false;
+
+    sample_rate  = inputs.sample_rate;
+    num_channels = inputs.num_channels;
+    rec_mono_mode = inputs.rec_mono_mode;
+    err          = 0;
+
+    /* configure the buffer system */
+    params.afmt            = AFMT_AIFF;
+    enc_size               = PCM_CHUNK_SIZE*inputs.num_channels / 2;
+    params.chunk_size      = enc_size;
+    params.enc_sample_rate = sample_rate;
+    params.reserve_bytes   = 0;
+    params.events_callback = enc_events_callback;
+    ci->enc_set_parameters(&params);
+
+    return true;
+} /* init_encoder */
+
+/* this is the codec entry point */
+enum codec_status codec_main(enum codec_entry_call_reason reason)
+{
+    if (reason == CODEC_LOAD) {
+        if (!init_encoder())
+            return CODEC_ERROR;
+    }
+    else if (reason == CODEC_UNLOAD) {
+        /* reset parameters to initial state */
+        ci->enc_set_parameters(NULL);
+    }
+
+    return CODEC_OK;
+}
+
+/* this is called for each file to process */
+enum codec_status codec_run(void)
+{
+    /* main encoding loop */
+    while (ci->get_command(NULL) != CODEC_ACTION_HALT)
+    {
+        uint32_t *src = (uint32_t *)ci->enc_get_pcm_data(PCM_CHUNK_SIZE);
+        struct enc_chunk_hdr *chunk;
+
+        if (src == NULL)
+            continue;
+
+        chunk           = ci->enc_get_chunk();
+        chunk->enc_size = enc_size;
+        chunk->num_pcm  = PCM_SAMP_PER_CHUNK;
+        chunk->enc_data = ENC_CHUNK_SKIP_HDR(chunk->enc_data, chunk);
+
+        chunk_to_aiff_format(src, (uint32_t *)chunk->enc_data);
+
+        ci->enc_finish_chunk();
+    }
+
+    return CODEC_OK;
+}
--- a/lib/rbcodec/codecs/alac.c
+++ b/lib/rbcodec/codecs/alac.c
@ -0,0 +1,146 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2005 Dave Chapman
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "codeclib.h"
+#include "libm4a/m4a.h"
+#include "libalac/decomp.h"
+
+CODEC_HEADER
+
+/* The maximum buffer size handled. This amount of bytes is buffered for each 
+ * frame. */
+#define ALAC_BYTE_BUFFER_SIZE 32768
+
+static int32_t outputbuffer[ALAC_MAX_CHANNELS][ALAC_BLOCKSIZE] IBSS_ATTR;
+
+/* this is the codec entry point */
+enum codec_status codec_main(enum codec_entry_call_reason reason)
+{
+    if (reason == CODEC_LOAD) {
+        /* Generic codec initialisation */
+        ci->configure(DSP_SET_STEREO_MODE, STEREO_NONINTERLEAVED);
+        ci->configure(DSP_SET_SAMPLE_DEPTH, ALAC_OUTPUT_DEPTH-1);
+    }
+
+    return CODEC_OK;
+}
+
+/* this is called for each file to process */
+enum codec_status codec_run(void)
+{
+    size_t n;
+    demux_res_t demux_res;
+    stream_t input_stream;
+    uint32_t samplesdone;
+    uint32_t elapsedtime = 0;
+    int samplesdecoded;
+    unsigned int i;
+    unsigned char* buffer;
+    alac_file alac;
+    intptr_t param;
+
+    /* Clean and initialize decoder structures */
+    memset(&demux_res , 0, sizeof(demux_res));
+    if (codec_init()) {
+        LOGF("ALAC: Error initialising codec\n");
+        return CODEC_ERROR;
+    }
+
+    ci->configure(DSP_SWITCH_FREQUENCY, ci->id3->frequency);
+    codec_set_replaygain(ci->id3);
+
+    ci->seek_buffer(0);
+
+    stream_create(&input_stream,ci);
+
+    /* Read from ci->id3->offset before calling qtmovie_read. */
+    samplesdone = (uint32_t)(((uint64_t)(ci->id3->offset) * ci->id3->frequency) /  
+                  (ci->id3->bitrate*128));
+  
+    /* if qtmovie_read returns successfully, the stream is up to
+     * the movie data, which can be used directly by the decoder */
+    if (!qtmovie_read(&input_stream, &demux_res)) {
+        LOGF("ALAC: Error initialising file\n");
+        return CODEC_ERROR;
+    }
+
+    /* initialise the sound converter */
+    alac_set_info(&alac, demux_res.codecdata);
+  
+    /* Set i for first frame, seek to desired sample position for resuming. */
+    i=0;
+    if (samplesdone > 0) {
+        if (m4a_seek(&demux_res, &input_stream, samplesdone,
+                      &samplesdone, (int*) &i)) {
+            elapsedtime = (samplesdone * 10) / (ci->id3->frequency / 100);
+            ci->set_elapsed(elapsedtime);
+        } else {
+            samplesdone = 0;
+        }
+    }
+
+    ci->set_elapsed(elapsedtime);
+
+    /* The main decoding loop */
+    while (i < demux_res.num_sample_byte_sizes) {
+        enum codec_command_action action = ci->get_command(&param);
+
+        if (action == CODEC_ACTION_HALT)
+            break;
+
+        /* Request the required number of bytes from the input buffer */
+        buffer=ci->request_buffer(&n, ALAC_BYTE_BUFFER_SIZE);
+
+        /* Deal with any pending seek requests */
+        if (action == CODEC_ACTION_SEEK_TIME) {
+            if (m4a_seek(&demux_res, &input_stream,
+                         (param/10) * (ci->id3->frequency/100),
+                         &samplesdone, (int *)&i)) {
+                elapsedtime=(samplesdone*10)/(ci->id3->frequency/100);
+            }
+            ci->set_elapsed(elapsedtime);
+            ci->seek_complete();
+        }
+
+        /* Request the required number of bytes from the input buffer */
+        buffer=ci->request_buffer(&n, ALAC_BYTE_BUFFER_SIZE);
+
+        /* Decode one block - returned samples will be host-endian */
+        samplesdecoded=alac_decode_frame(&alac, buffer, outputbuffer, ci->yield);
+        ci->yield();
+
+        /* Advance codec buffer by amount of consumed bytes */
+        ci->advance_buffer(alac.bytes_consumed);
+
+        /* Output the audio */
+        ci->pcmbuf_insert(outputbuffer[0], outputbuffer[1], samplesdecoded);
+
+        /* Update the elapsed-time indicator */
+        samplesdone+=samplesdecoded;
+        elapsedtime=(samplesdone*10)/(ci->id3->frequency/100);
+        ci->set_elapsed(elapsedtime);
+
+        i++;
+    }
+
+    LOGF("ALAC: Decoded %lu samples\n",(unsigned long)samplesdone);
+    return CODEC_OK;
+}
--- a/lib/rbcodec/codecs/ape.c
+++ b/lib/rbcodec/codecs/ape.c
@ -0,0 +1,330 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2007 Dave Chapman
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "codeclib.h"
+#include <codecs/demac/libdemac/demac.h>
+
+CODEC_HEADER
+
+#define BLOCKS_PER_LOOP     1024
+#define MAX_CHANNELS        2
+#define MAX_BYTESPERSAMPLE  3
+
+/* Monkey's Audio files have one seekpoint per frame.  The framesize
+   varies between 73728 and 1179648 samples.  
+
+   At the smallest framesize, 30000 frames would be 50155 seconds of
+   audio - almost 14 hours.  This should be enough for any file a user
+   would want to play in Rockbox, given the 2GB FAT filesize (and 4GB
+   seektable entry size) limit.
+
+   This means the seektable is 120000 bytes, but we have a lot of
+   spare room in the codec buffer - the APE codec itself is small.
+*/
+
+#define MAX_SEEKPOINTS      30000
+static uint32_t seektablebuf[MAX_SEEKPOINTS];
+
+#define INPUT_CHUNKSIZE     (32*1024)
+
+/* 1024*4 = 4096 bytes per channel */
+static int32_t decoded0[BLOCKS_PER_LOOP] IBSS_ATTR;
+static int32_t decoded1[BLOCKS_PER_LOOP] IBSS_ATTR;
+
+#define MAX_SUPPORTED_SEEKTABLE_SIZE 5000
+
+
+/* Given an ape_ctx and a sample to seek to, return the file position
+   to the frame containing that sample, and the number of samples to
+   skip in that frame.
+*/
+
+static bool ape_calc_seekpos(struct ape_ctx_t* ape_ctx,
+                             uint32_t new_sample,
+                             uint32_t* newframe,
+                             uint32_t* filepos,
+                             uint32_t* samplestoskip)
+{
+    uint32_t n;
+
+    n = new_sample / ape_ctx->blocksperframe;
+    if (n >= ape_ctx->numseekpoints)
+    {
+        /* We don't have a seekpoint for that frame */
+        return false;
+    }
+
+    *newframe = n;
+    *filepos = ape_ctx->seektable[n];
+    *samplestoskip = new_sample - (n * ape_ctx->blocksperframe);
+
+    return true;
+}
+
+/* The resume offset is a value in bytes - we need to
+   turn it into a frame number and samplestoskip value */
+
+static void ape_resume(struct ape_ctx_t* ape_ctx, size_t resume_offset, 
+                       uint32_t* currentframe, uint32_t* samplesdone, 
+                       uint32_t* samplestoskip, int* firstbyte)
+{
+    off_t newfilepos;
+    int64_t framesize;
+    int64_t offset;
+
+    *currentframe = 0;
+    *samplesdone = 0;
+    *samplestoskip = 0;
+
+    while ((*currentframe < ape_ctx->totalframes) &&
+           (*currentframe < ape_ctx->numseekpoints) &&
+           (resume_offset > ape_ctx->seektable[*currentframe]))
+    {
+        ++*currentframe;
+        *samplesdone += ape_ctx->blocksperframe;
+    }
+
+    if ((*currentframe > 0) && 
+        (ape_ctx->seektable[*currentframe] > resume_offset)) {
+        --*currentframe;
+        *samplesdone -= ape_ctx->blocksperframe;
+    }
+
+    newfilepos = ape_ctx->seektable[*currentframe];
+
+    /* APE's bytestream is weird... */
+    *firstbyte = 3 - (newfilepos & 3);
+    newfilepos &= ~3;
+
+    ci->seek_buffer(newfilepos);
+
+    /* We estimate where we were in the current frame, based on the
+       byte offset */
+    if (*currentframe < (ape_ctx->totalframes - 1)) {
+        framesize = ape_ctx->seektable[*currentframe+1] - ape_ctx->seektable[*currentframe];
+        offset = resume_offset - ape_ctx->seektable[*currentframe];
+
+        *samplestoskip = (offset * ape_ctx->blocksperframe) / framesize;
+    }
+}
+
+/* this is the codec entry point */
+enum codec_status codec_main(enum codec_entry_call_reason reason)
+{
+    if (reason == CODEC_LOAD) {
+        /* Generic codec initialisation */
+        ci->configure(DSP_SET_SAMPLE_DEPTH, APE_OUTPUT_DEPTH-1);
+    }
+
+    return CODEC_OK;
+}
+
+/* this is called for each file to process */
+enum codec_status codec_run(void)
+{
+    struct ape_ctx_t ape_ctx;
+    uint32_t samplesdone;
+    uint32_t elapsedtime;
+    size_t bytesleft;
+
+    uint32_t currentframe;
+    uint32_t newfilepos;
+    uint32_t samplestoskip;
+    int nblocks;
+    int bytesconsumed;
+    unsigned char* inbuffer;
+    uint32_t blockstodecode;
+    int res;
+    int firstbyte;
+    size_t resume_offset;
+    intptr_t param;
+
+    if (codec_init()) {
+        LOGF("APE: Error initialising codec\n");
+        return CODEC_ERROR;
+    }
+
+    /* Remember the resume position - when the codec is opened, the
+       playback engine will reset it. */
+    resume_offset = ci->id3->offset;
+
+    ci->seek_buffer(0);
+    inbuffer = ci->request_buffer(&bytesleft, INPUT_CHUNKSIZE);
+
+    /* Read the file headers to populate the ape_ctx struct */
+    if (ape_parseheaderbuf(inbuffer,&ape_ctx) < 0) {
+        LOGF("APE: Error reading header\n");
+        return CODEC_ERROR;
+    }
+
+    /* Initialise the seektable for this file */
+    ape_ctx.seektable = seektablebuf;
+    ape_ctx.numseekpoints = MIN(MAX_SEEKPOINTS,ape_ctx.numseekpoints);
+
+    ci->advance_buffer(ape_ctx.seektablefilepos);
+
+    /* The seektable may be bigger than the guard buffer (32KB), so we
+       do a read() */
+    ci->read_filebuf(ape_ctx.seektable, ape_ctx.numseekpoints * sizeof(uint32_t));
+
+#ifdef ROCKBOX_BIG_ENDIAN
+    /* Byte-swap the little-endian seekpoints */
+    {
+        uint32_t i;
+
+        for(i = 0; i < ape_ctx.numseekpoints; i++)
+            ape_ctx.seektable[i] = swap32(ape_ctx.seektable[i]);
+    }
+#endif
+
+    /* Now advance the file position to the first frame */
+    ci->advance_buffer(ape_ctx.firstframe - 
+                       (ape_ctx.seektablefilepos +
+                        ape_ctx.numseekpoints * sizeof(uint32_t)));
+
+    ci->configure(DSP_SWITCH_FREQUENCY, ape_ctx.samplerate);
+    ci->configure(DSP_SET_STEREO_MODE, ape_ctx.channels == 1 ?
+                  STEREO_MONO : STEREO_NONINTERLEAVED);
+    codec_set_replaygain(ci->id3);
+
+    /* The main decoding loop */
+
+    if (resume_offset) {
+        /* The resume offset is a value in bytes - we need to
+           turn it into a frame number and samplestoskip value */
+
+        ape_resume(&ape_ctx, resume_offset, 
+                   &currentframe, &samplesdone, &samplestoskip, &firstbyte);
+    } else {
+        currentframe = 0;
+        samplesdone = 0;
+        samplestoskip = 0;
+        firstbyte = 3;  /* Take account of the little-endian 32-bit byte ordering */
+    }
+
+    elapsedtime = (samplesdone*10)/(ape_ctx.samplerate/100);
+    ci->set_elapsed(elapsedtime);
+
+    /* Initialise the buffer */
+    inbuffer = ci->request_buffer(&bytesleft, INPUT_CHUNKSIZE);
+
+    /* The main decoding loop - we decode the frames a small chunk at a time */
+    while (currentframe < ape_ctx.totalframes)
+    {
+frame_start:
+        /* Calculate how many blocks there are in this frame */
+        if (currentframe == (ape_ctx.totalframes - 1))
+            nblocks = ape_ctx.finalframeblocks;
+        else
+            nblocks = ape_ctx.blocksperframe;
+
+        ape_ctx.currentframeblocks = nblocks;
+
+        /* Initialise the frame decoder */
+        init_frame_decoder(&ape_ctx, inbuffer, &firstbyte, &bytesconsumed);
+
+        ci->advance_buffer(bytesconsumed);
+        inbuffer = ci->request_buffer(&bytesleft, INPUT_CHUNKSIZE);
+
+        /* Decode the frame a chunk at a time */
+        while (nblocks > 0)
+        {
+            enum codec_command_action action = ci->get_command(&param);
+
+            if (action == CODEC_ACTION_HALT)
+                goto done;
+
+            /* Deal with any pending seek requests */
+            if (action == CODEC_ACTION_SEEK_TIME) 
+            {
+                if (ape_calc_seekpos(&ape_ctx,
+                    (param/10) * (ci->id3->frequency/100),
+                    &currentframe,
+                    &newfilepos,
+                    &samplestoskip))
+                {
+                    samplesdone = currentframe * ape_ctx.blocksperframe;
+
+                    /* APE's bytestream is weird... */
+                    firstbyte = 3 - (newfilepos & 3);
+                    newfilepos &= ~3;
+
+                    ci->seek_buffer(newfilepos);
+                    inbuffer = ci->request_buffer(&bytesleft, INPUT_CHUNKSIZE);
+
+                    elapsedtime = (samplesdone*10)/(ape_ctx.samplerate/100);
+                    ci->set_elapsed(elapsedtime);
+                    ci->seek_complete();
+                    goto frame_start;  /* Sorry... */
+                }
+
+                ci->seek_complete();
+            }
+
+            blockstodecode = MIN(BLOCKS_PER_LOOP, nblocks);
+
+            if ((res = decode_chunk(&ape_ctx, inbuffer, &firstbyte,
+                                    &bytesconsumed,
+                                    decoded0, decoded1,
+                                    blockstodecode)) < 0)
+            {
+                /* Frame decoding error, abort */
+                LOGF("APE: Frame %lu, error %d\n",(unsigned long)currentframe,res);
+                return CODEC_ERROR;
+            }
+
+            ci->yield();
+
+            if (samplestoskip > 0) {
+                if (samplestoskip < blockstodecode) {
+                    ci->pcmbuf_insert(decoded0 + samplestoskip, 
+                                      decoded1 + samplestoskip, 
+                                      blockstodecode - samplestoskip);
+                    samplestoskip = 0;
+                } else {
+                    samplestoskip -= blockstodecode;
+                }
+            } else {
+                ci->pcmbuf_insert(decoded0, decoded1, blockstodecode);
+            }
+        
+            samplesdone += blockstodecode;
+
+            if (!samplestoskip) {
+                /* Update the elapsed-time indicator */
+                elapsedtime = (samplesdone*10)/(ape_ctx.samplerate/100);
+                ci->set_elapsed(elapsedtime);
+            }
+
+            ci->advance_buffer(bytesconsumed);
+            inbuffer = ci->request_buffer(&bytesleft, INPUT_CHUNKSIZE);
+
+            /* Decrement the block count */
+            nblocks -= blockstodecode;
+        }
+
+        currentframe++;
+    }
+
+done:
+    LOGF("APE: Decoded %lu samples\n",(unsigned long)samplesdone);
+    return CODEC_OK;
+}
--- a/lib/rbcodec/codecs/asap.c
+++ b/lib/rbcodec/codecs/asap.c
@ -0,0 +1,140 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2008 Dominik Wenger
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "codeclib.h"
+#include "libasap/asap.h"
+
+CODEC_HEADER
+
+#define CHUNK_SIZE (1024*2)
+
+static byte samples[CHUNK_SIZE] IBSS_ATTR;   /* The sample buffer */
+static ASAP_State asap IBSS_ATTR;         /* asap codec state */
+
+/* this is the codec entry point */
+enum codec_status codec_main(enum codec_entry_call_reason reason)
+{
+    /* Nothing to do */
+    return CODEC_OK;
+    (void)reason;
+}
+
+/* this is called for each file to process */
+enum codec_status codec_run(void)
+{
+    int n_bytes;
+    int song;
+    int duration;
+    char* module;
+    int bytesPerSample =2;
+    intptr_t param;
+    
+    if (codec_init()) {
+        DEBUGF("codec init failed\n");
+        return CODEC_ERROR;
+    }
+
+    codec_set_replaygain(ci->id3);
+        
+    int bytes_done =0;   
+    size_t filesize;
+    ci->seek_buffer(0);
+    module = ci->request_buffer(&filesize, ci->filesize);
+    if (!module || (size_t)filesize < (size_t)ci->filesize) 
+    {
+        DEBUGF("loading error\n");
+        return CODEC_ERROR;
+    }
+
+    /*Init ASAP */
+    if (!ASAP_Load(&asap, ci->id3->path, module, filesize))
+    {
+        DEBUGF("%s: format not supported",ci->id3->path);
+        return CODEC_ERROR;
+    }  
+    
+      /* Make use of 44.1khz */
+    ci->configure(DSP_SET_FREQUENCY, 44100);
+    /* Sample depth is 16 bit little endian */
+    ci->configure(DSP_SET_SAMPLE_DEPTH, 16);
+    /* Stereo or Mono output ? */
+    if(asap.module_info->channels ==1)
+    {
+        ci->configure(DSP_SET_STEREO_MODE, STEREO_MONO);
+        bytesPerSample = 2;
+    }
+    else
+    {
+        ci->configure(DSP_SET_STEREO_MODE, STEREO_INTERLEAVED);
+        bytesPerSample = 4; 
+    }    
+    /* reset eleapsed */
+    ci->set_elapsed(0);
+
+    song = asap.module_info->default_song;
+    duration = asap.module_info->durations[song];
+    if (duration < 0)
+        duration = 180 * 1000;
+    
+    /* set id3 length, because metadata parse might not have done it */
+    ci->id3->length = duration;
+    
+    ASAP_PlaySong(&asap, song, duration);
+    ASAP_MutePokeyChannels(&asap, 0);
+    
+    /* The main decoder loop */    
+    while (1) {
+        enum codec_command_action action = ci->get_command(&param);
+
+        if (action == CODEC_ACTION_HALT)
+            break;
+
+        if (action == CODEC_ACTION_SEEK_TIME) {
+            /* New time is ready in param */
+
+            /* seek to pos */
+            ASAP_Seek(&asap,param);
+            /* update bytes_done */
+            bytes_done = param*44.1*2;    
+            /* update elapsed */
+            ci->set_elapsed((bytes_done / 2) / 44.1);
+            /* seek ready */    
+            ci->seek_complete();            
+        }
+        
+        /* Generate a buffer full of Audio */
+        #ifdef ROCKBOX_LITTLE_ENDIAN
+        n_bytes = ASAP_Generate(&asap, samples, sizeof(samples), ASAP_FORMAT_S16_LE);
+        #else
+        n_bytes = ASAP_Generate(&asap, samples, sizeof(samples), ASAP_FORMAT_S16_BE);
+        #endif
+        
+        ci->pcmbuf_insert(samples, NULL, n_bytes /bytesPerSample);
+        
+        bytes_done += n_bytes;
+        ci->set_elapsed((bytes_done / 2) / 44.1);
+        
+        if(n_bytes != sizeof(samples))
+            break;
+    }
+ 
+    return CODEC_OK;    
+}
--- a/lib/rbcodec/codecs/atrac3_oma.c
+++ b/lib/rbcodec/codecs/atrac3_oma.c
@ -0,0 +1,153 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2009 Mohamed Tarek
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include <string.h>
+
+#include "logf.h"
+#include "codeclib.h"
+#include "inttypes.h"
+#include "libatrac/atrac3.h"
+
+CODEC_HEADER
+
+#define FRAMESIZE ci->id3->bytesperframe
+#define BITRATE   ci->id3->bitrate
+
+static ATRAC3Context q IBSS_ATTR;
+
+/* this is the codec entry point */
+enum codec_status codec_main(enum codec_entry_call_reason reason)
+{
+    /* Nothing to do */
+    return CODEC_OK;
+    (void)reason;
+}
+
+/* this is called for each file to process */
+enum codec_status codec_run(void)
+{
+    static size_t buff_size;
+    int datasize, res, frame_counter, total_frames, seek_frame_offset;
+    uint8_t *bit_buffer;
+    int elapsed = 0;
+    size_t resume_offset;
+    intptr_t param;
+    enum codec_command_action action = CODEC_ACTION_NULL;
+
+    if (codec_init()) {
+        DEBUGF("codec init failed\n");
+        return CODEC_ERROR;
+    }
+
+    resume_offset = ci->id3->offset;
+
+    codec_set_replaygain(ci->id3);
+    ci->memset(&q,0,sizeof(ATRAC3Context));
+ 
+    ci->configure(DSP_SET_FREQUENCY, ci->id3->frequency);
+    ci->configure(DSP_SET_SAMPLE_DEPTH, 17); /* Remark: atrac3 uses s15.0 by default, s15.2 was hacked. */
+    ci->configure(DSP_SET_STEREO_MODE, ci->id3->channels == 1 ?
+        STEREO_MONO : STEREO_NONINTERLEAVED);
+
+    ci->seek_buffer(0);
+
+    res = atrac3_decode_init(&q, ci->id3);
+    if(res < 0) {
+        DEBUGF("failed to initialize OMA atrac decoder\n");
+        return CODEC_ERROR;
+    }
+
+    total_frames = (ci->id3->filesize - ci->id3->first_frame_offset) / FRAMESIZE;
+    frame_counter = 0;
+    
+    /* check for a mid-track resume and force a seek time accordingly */
+    if(resume_offset > ci->id3->first_frame_offset) {
+        resume_offset -= ci->id3->first_frame_offset;
+        /* calculate resume_offset in frames */
+        resume_offset = (int)resume_offset / FRAMESIZE;
+        param = (int)resume_offset * ((FRAMESIZE * 8)/BITRATE);
+        action = CODEC_ACTION_SEEK_TIME;
+    }
+    else {
+        ci->set_elapsed(0);
+        ci->seek_buffer(ci->id3->first_frame_offset);
+    }
+
+    /* The main decoder loop */  
+    while(frame_counter < total_frames)
+    {
+        if (action == CODEC_ACTION_NULL)
+            action = ci->get_command(&param);
+
+        if (action == CODEC_ACTION_HALT)
+            break;
+
+        bit_buffer = (uint8_t *) ci->request_buffer(&buff_size, FRAMESIZE);
+
+        if (action == CODEC_ACTION_SEEK_TIME) {
+            /* Do not allow seeking beyond the file's length */
+            if ((unsigned) param > ci->id3->length) {
+                ci->set_elapsed(ci->id3->length);
+                ci->seek_complete();
+                break;
+            }       
+
+            /* Seek to the start of the track */
+            if (param == 0) {
+                elapsed = 0;
+                ci->set_elapsed(0);
+                ci->seek_buffer(ci->id3->first_frame_offset);
+                ci->seek_complete();
+                action = CODEC_ACTION_NULL;
+                continue;           
+            }                                                                
+
+            seek_frame_offset = (param * BITRATE) / (8 * FRAMESIZE);
+            frame_counter = seek_frame_offset;
+            ci->seek_buffer(ci->id3->first_frame_offset + seek_frame_offset* FRAMESIZE);
+            bit_buffer = (uint8_t *) ci->request_buffer(&buff_size, FRAMESIZE);
+            elapsed = param;
+            ci->set_elapsed(elapsed);
+            ci->seek_complete(); 
+        }
+
+        action = CODEC_ACTION_NULL;
+
+        res = atrac3_decode_frame(FRAMESIZE, &q, &datasize, bit_buffer, FRAMESIZE);
+
+        if(res != (int)FRAMESIZE) {
+            DEBUGF("codec error\n");
+            return CODEC_ERROR;
+        }
+
+        if(datasize)
+            ci->pcmbuf_insert(q.outSamples, q.outSamples + 1024,
+                              q.samples_per_frame / ci->id3->channels);
+
+        elapsed += (FRAMESIZE * 8) / BITRATE;
+        ci->set_elapsed(elapsed);
+
+        ci->advance_buffer(FRAMESIZE);
+        frame_counter++;
+    }
+
+    return CODEC_OK;    
+}
--- a/lib/rbcodec/codecs/atrac3_rm.c
+++ b/lib/rbcodec/codecs/atrac3_rm.c
@ -0,0 +1,215 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2009 Mohamed Tarek
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include <string.h>
+
+#include "logf.h"
+#include "codeclib.h"
+#include "inttypes.h"
+#include "libatrac/atrac3.h"
+
+CODEC_HEADER
+
+static RMContext rmctx  IBSS_ATTR_LARGE_IRAM;
+static RMPacket pkt     IBSS_ATTR_LARGE_IRAM;
+static ATRAC3Context q  IBSS_ATTR;
+
+static void init_rm(RMContext *rmctx)
+{
+    /* initialize the RMContext */
+    memcpy(rmctx, (void*)(( (intptr_t)ci->id3->id3v2buf + 3 ) &~ 3), sizeof(RMContext));
+
+    /* and atrac3 expects extadata in id3v2buf, so we shall give it that */
+    memcpy(ci->id3->id3v2buf, (char*)rmctx->codec_extradata, rmctx->extradata_size*sizeof(char));
+}
+
+/* this is the codec entry point */
+enum codec_status codec_main(enum codec_entry_call_reason reason)
+{
+    /* Nothing to do */
+    return CODEC_OK;
+    (void)reason;
+}
+
+/* this is called for each file to process */
+enum codec_status codec_run(void)
+{
+    static size_t buff_size;
+    int datasize, res, consumed, i, time_offset;
+    uint8_t *bit_buffer;
+    uint16_t fs,sps,h;
+    uint32_t packet_count;
+    int scrambling_unit_size, num_units, elapsed = 0;
+    int playback_on = -1;
+    size_t resume_offset;
+    intptr_t param;
+    enum codec_command_action action = CODEC_ACTION_NULL;
+
+    if (codec_init()) {
+        DEBUGF("codec init failed\n");
+        return CODEC_ERROR;
+    }
+
+    resume_offset = ci->id3->offset;
+
+    codec_set_replaygain(ci->id3);
+    ci->memset(&rmctx,0,sizeof(RMContext));
+    ci->memset(&pkt,0,sizeof(RMPacket));
+    ci->memset(&q,0,sizeof(ATRAC3Context));
+
+    ci->seek_buffer(0);
+    init_rm(&rmctx);
+ 
+    ci->configure(DSP_SET_FREQUENCY, ci->id3->frequency);
+    ci->configure(DSP_SET_SAMPLE_DEPTH, 17); /* Remark: atrac3 uses s15.0 by default, s15.2 was hacked. */
+    ci->configure(DSP_SET_STEREO_MODE, rmctx.nb_channels == 1 ?
+        STEREO_MONO : STEREO_NONINTERLEAVED);
+
+    packet_count = rmctx.nb_packets;
+    rmctx.audio_framesize = rmctx.block_align;
+    rmctx.block_align = rmctx.sub_packet_size;
+    fs = rmctx.audio_framesize;
+    sps= rmctx.block_align;
+    h = rmctx.sub_packet_h;
+    scrambling_unit_size = h * (fs + PACKET_HEADER_SIZE);
+    
+    res = atrac3_decode_init(&q, ci->id3);
+    if(res < 0) {
+        DEBUGF("failed to initialize RM atrac decoder\n");
+        return CODEC_ERROR;
+    }
+    
+    /* check for a mid-track resume and force a seek time accordingly */
+    if(resume_offset > rmctx.data_offset + DATA_HEADER_SIZE) {
+        resume_offset -= rmctx.data_offset + DATA_HEADER_SIZE;
+        num_units = (int)resume_offset / scrambling_unit_size;        
+        /* put number of subpackets to skip in resume_offset */
+        resume_offset /= (sps + PACKET_HEADER_SIZE);
+        param = (int)resume_offset * ((sps * 8 * 1000)/rmctx.bit_rate);
+        action = CODEC_ACTION_SEEK_TIME;
+    }
+    else {
+        ci->set_elapsed(0);
+    }
+
+    ci->advance_buffer(rmctx.data_offset + DATA_HEADER_SIZE);
+
+    /* The main decoder loop */  
+seek_start :         
+    while((unsigned)elapsed < rmctx.duration)
+    {  
+        bit_buffer = (uint8_t *) ci->request_buffer(&buff_size, scrambling_unit_size);
+        consumed = rm_get_packet(&bit_buffer, &rmctx, &pkt);
+        if(consumed < 0 && playback_on != 0) {
+            if(playback_on == -1) {
+            /* Error only if packet-parsing failed and playback hadn't started */
+                DEBUGF("rm_get_packet failed\n");
+                return CODEC_ERROR;
+            }
+            else
+                return CODEC_OK;
+        }
+
+        for(i = 0; i < rmctx.audio_pkt_cnt*(fs/sps) ; i++)
+        { 
+            if (action == CODEC_ACTION_NULL)
+                action = ci->get_command(&param);
+
+            if (action == CODEC_ACTION_HALT)
+                return CODEC_OK;
+
+            if (action == CODEC_ACTION_SEEK_TIME) {
+                /* Do not allow seeking beyond the file's length */
+                if ((unsigned) param > ci->id3->length) {
+                    ci->set_elapsed(ci->id3->length);
+                    ci->seek_complete();
+                    return CODEC_OK;
+                }       
+
+                ci->seek_buffer(rmctx.data_offset + DATA_HEADER_SIZE);
+                packet_count = rmctx.nb_packets;
+                rmctx.audio_pkt_cnt = 0;
+                rmctx.frame_number = 0;
+
+                /* Seek to the start of the track */
+                if (param == 0) {
+                    ci->set_elapsed(0);
+                    ci->seek_complete();
+                    action = CODEC_ACTION_NULL;
+                    goto seek_start;           
+                }                                                                
+                num_units = (param/(sps*1000*8/rmctx.bit_rate))/(h*(fs/sps));                    
+                ci->seek_buffer(rmctx.data_offset + DATA_HEADER_SIZE + consumed * num_units);
+                bit_buffer = (uint8_t *) ci->request_buffer(&buff_size, scrambling_unit_size);
+                consumed = rm_get_packet(&bit_buffer, &rmctx, &pkt);
+                if(consumed < 0 && playback_on != 0) {
+                    if(playback_on == -1) {
+                    /* Error only if packet-parsing failed and playback hadn't started */
+                        DEBUGF("rm_get_packet failed\n");
+                        return CODEC_ERROR;
+                    }
+                    else
+                        return CODEC_OK;
+                }
+
+                packet_count = rmctx.nb_packets - rmctx.audio_pkt_cnt * num_units;
+                rmctx.frame_number = (param/(sps*1000*8/rmctx.bit_rate)); 
+                while(rmctx.audiotimestamp > (unsigned) param) {
+                    rmctx.audio_pkt_cnt = 0;
+                    ci->seek_buffer(rmctx.data_offset + DATA_HEADER_SIZE + consumed * (num_units-1));
+                    bit_buffer = (uint8_t *) ci->request_buffer(&buff_size, scrambling_unit_size); 
+                    consumed = rm_get_packet(&bit_buffer, &rmctx, &pkt);                                                                             
+                    packet_count += rmctx.audio_pkt_cnt;
+                    num_units--;
+                }
+                time_offset = param - rmctx.audiotimestamp;
+                i = (time_offset/((sps * 8 * 1000)/rmctx.bit_rate));
+                elapsed = rmctx.audiotimestamp+(1000*8*sps/rmctx.bit_rate)*i;
+                ci->set_elapsed(elapsed);
+                ci->seek_complete(); 
+            }
+
+            action = CODEC_ACTION_NULL;
+
+            if(pkt.length)    
+                res = atrac3_decode_frame(rmctx.block_align, &q, &datasize, pkt.frames[i], rmctx.block_align);
+            else /* indicates that there are no remaining frames */
+                return CODEC_OK;
+
+            if(res != rmctx.block_align) {
+                DEBUGF("codec error\n");
+                return CODEC_ERROR;
+            }
+
+            if(datasize)
+                ci->pcmbuf_insert(q.outSamples, q.outSamples + 1024, q.samples_per_frame / rmctx.nb_channels);
+            playback_on = 1;
+            elapsed = rmctx.audiotimestamp+(1000*8*sps/rmctx.bit_rate)*i;
+            ci->set_elapsed(elapsed);
+            rmctx.frame_number++;
+        }
+        packet_count -= rmctx.audio_pkt_cnt;
+        rmctx.audio_pkt_cnt = 0;
+        ci->advance_buffer(consumed);
+    }
+
+    return CODEC_OK;
+}
--- a/lib/rbcodec/codecs/au.c
+++ b/lib/rbcodec/codecs/au.c
@ -0,0 +1,314 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2010 Yoshihisa Uchida
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "codeclib.h"
+#include "codecs/libpcm/support_formats.h"
+
+CODEC_HEADER
+
+/* Sun Audio file (Au file format) codec
+ *
+ * References
+ * [1] Sun Microsystems, Inc., Header file for Audio, .au, 1992
+ *     URL http://www.opengroup.org/public/pubs/external/auformat.html
+ * [2] Wikipedia, Au file format, URL: http://en.wikipedia.org/wiki/Sun_Audio
+ */
+
+#define PCM_SAMPLE_SIZE (1024*2)
+
+static int32_t samples[PCM_SAMPLE_SIZE] IBSS_ATTR;
+
+enum
+{
+    AU_FORMAT_UNSUPPORT = 0, /* unsupported format */
+    AU_FORMAT_MULAW,         /* G.711 MULAW */
+    AU_FORMAT_PCM,           /* Linear PCM */
+    AU_FORMAT_IEEE_FLOAT,    /* IEEE float */
+    AU_FORMAT_ALAW,          /* G.711 ALAW */
+};
+
+static const char support_formats[9][2] = {
+  { AU_FORMAT_UNSUPPORT,  0  }, /* encoding */
+  { AU_FORMAT_MULAW,      8  }, /* 1:  G.711 MULAW */
+  { AU_FORMAT_PCM,        8  }, /* 2:  Linear PCM 8bit (signed) */
+  { AU_FORMAT_PCM,        16 }, /* 3:  Linear PCM 16bit (signed, big endian) */
+  { AU_FORMAT_PCM,        24 }, /* 4:  Linear PCM 24bit (signed, big endian) */
+  { AU_FORMAT_PCM,        32 }, /* 5:  Linear PCM 32bit (signed, big endian) */
+  { AU_FORMAT_IEEE_FLOAT, 32 }, /* 6:  Linear PCM float 32bit (signed, big endian) */
+  { AU_FORMAT_IEEE_FLOAT, 64 }, /* 7:  Linear PCM float 64bit (signed, big endian) */
+                                /* encoding 8 - 26 unsupported. */
+  { AU_FORMAT_ALAW,       8  }, /* 27: G.711 ALAW */
+};
+
+static const struct pcm_entry au_codecs[] = {
+    { AU_FORMAT_MULAW,      get_itut_g711_mulaw_codec },
+    { AU_FORMAT_PCM,        get_linear_pcm_codec      },
+    { AU_FORMAT_IEEE_FLOAT, get_ieee_float_codec      },
+    { AU_FORMAT_ALAW,       get_itut_g711_alaw_codec  },
+};
+
+#define NUM_FORMATS 4
+
+static const struct pcm_codec *get_au_codec(uint32_t formattag)
+{
+    int i;
+
+    for (i = 0; i < NUM_FORMATS; i++)
+    {
+        if (au_codecs[i].format_tag == formattag)
+        {
+            if (au_codecs[i].get_codec)
+                return au_codecs[i].get_codec();
+            return 0;
+        }
+    }
+    return 0;
+}
+
+static unsigned int get_be32(uint8_t *buf)
+{
+    return (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
+}
+
+static int convert_au_format(unsigned int encoding, struct pcm_format *fmt)
+{
+    fmt->formattag = AU_FORMAT_UNSUPPORT;
+    if (encoding < 8)
+    {
+        fmt->formattag = support_formats[encoding][0];
+        fmt->bitspersample = support_formats[encoding][1];
+    }
+    else if (encoding == 27)
+    {
+        fmt->formattag = support_formats[8][0];
+        fmt->bitspersample = support_formats[8][1];
+    }
+
+    return fmt->formattag;
+}
+
+/* this is the codec entry point */
+enum codec_status codec_main(enum codec_entry_call_reason reason)
+{
+    if (reason == CODEC_LOAD) {
+        /* Generic codec initialisation */
+        ci->configure(DSP_SET_SAMPLE_DEPTH, PCM_OUTPUT_DEPTH-1);
+    }
+
+    return CODEC_OK;
+}
+
+/* this is called for each file to process */
+enum codec_status codec_run(void)
+{
+    struct pcm_format format;
+    uint32_t bytesdone, decodedsamples;
+    size_t n;
+    int bufcount;
+    int endofstream;
+    unsigned char *buf;
+    uint8_t *aubuf;
+    off_t firstblockposn;     /* position of the first block in file */
+    const struct pcm_codec *codec;
+    int offset = 0;
+    intptr_t param;
+ 
+    if (codec_init()) {
+        DEBUGF("codec_init() error\n");
+        return CODEC_ERROR;
+    }
+
+    codec_set_replaygain(ci->id3);
+    
+    /* Need to save offset for later use (cleared indirectly by advance_buffer) */
+    bytesdone = ci->id3->offset;
+
+    ci->memset(&format, 0, sizeof(struct pcm_format));
+    format.is_signed = true;
+    format.is_little_endian = false;
+
+    /* set format */
+    ci->seek_buffer(0);
+    buf = ci->request_buffer(&n, 24);
+    if (n < 24 || (memcmp(buf, ".snd", 4) != 0))
+    {
+        /*
+         * headerless sun audio file
+         * It is decoded under conditions.
+         *     format:    G.711 mu-law
+         *     channel:   mono
+         *     frequency: 8000 kHz
+         */
+        offset = 0;
+        format.formattag     = AU_FORMAT_MULAW;
+        format.channels      = 1;
+        format.bitspersample = 8;
+        format.numbytes      = ci->id3->filesize;
+    }
+    else
+    {
+        /* parse header */
+
+        /* data offset */
+        offset = get_be32(buf + 4);
+        if (offset < 24)
+        {
+            DEBUGF("CODEC_ERROR: sun audio offset size is small: %d\n", offset);
+            return CODEC_ERROR;
+        }
+        /* data size */
+        format.numbytes = get_be32(buf + 8);
+        if (format.numbytes == (uint32_t)0xffffffff)
+            format.numbytes = ci->id3->filesize - offset;
+        /* encoding */
+        format.formattag = convert_au_format(get_be32(buf + 12), &format);
+        if (format.formattag == AU_FORMAT_UNSUPPORT)
+        {
+            DEBUGF("CODEC_ERROR: sun audio unsupport format: %d\n", get_be32(buf + 12));
+            return CODEC_ERROR;
+        }
+        /* skip sample rate */
+        format.channels = get_be32(buf + 20);
+    }
+
+    /* advance to first WAVE chunk */
+    ci->advance_buffer(offset);
+
+    firstblockposn = offset;
+
+    decodedsamples = 0;
+    codec = 0;
+
+    /* get codec */
+    codec = get_au_codec(format.formattag);
+    if (!codec)
+    {
+        DEBUGF("CODEC_ERROR: unsupport sun audio format: %x\n", (int)format.formattag);
+        return CODEC_ERROR;
+    }
+
+    if (!codec->set_format(&format))
+    {
+        return CODEC_ERROR;
+    }
+
+    if (format.numbytes == 0) {
+        DEBUGF("CODEC_ERROR: data size is 0\n");
+        return CODEC_ERROR;
+    }
+
+    /* check chunksize */
+    if ((format.chunksize / format.blockalign) * format.samplesperblock * format.channels
+           > PCM_SAMPLE_SIZE)
+        format.chunksize = (PCM_SAMPLE_SIZE / format.blockalign) * format.blockalign;
+    if (format.chunksize == 0)
+    {
+        DEBUGF("CODEC_ERROR: chunksize is 0\n");
+        return CODEC_ERROR;
+    }
+
+    ci->configure(DSP_SWITCH_FREQUENCY, ci->id3->frequency);
+    if (format.channels == 2) {
+        ci->configure(DSP_SET_STEREO_MODE, STEREO_INTERLEAVED);
+    } else if (format.channels == 1) {
+        ci->configure(DSP_SET_STEREO_MODE, STEREO_MONO);
+    } else {
+        DEBUGF("CODEC_ERROR: more than 2 channels\n");
+        return CODEC_ERROR;
+    }
+
+    /* make sure we're at the correct offset */
+    if (bytesdone > (uint32_t) firstblockposn) {
+        /* Round down to previous block */
+        struct pcm_pos *newpos = codec->get_seek_pos(bytesdone - firstblockposn,
+                                                     PCM_SEEK_POS, NULL);
+
+        if (newpos->pos > format.numbytes)
+            goto done;
+        if (ci->seek_buffer(firstblockposn + newpos->pos))
+        {
+            bytesdone      = newpos->pos;
+            decodedsamples = newpos->samples;
+        }
+    } else {
+        /* already where we need to be */
+        bytesdone = 0;
+    }
+
+    ci->set_elapsed(decodedsamples*1000LL/ci->id3->frequency);
+
+    /* The main decoder loop */
+    endofstream = 0;
+
+    while (!endofstream) {
+        enum codec_command_action action = ci->get_command(&param);
+
+        if (action == CODEC_ACTION_HALT)
+            break;
+
+        if (action == CODEC_ACTION_SEEK_TIME) {
+            /* 3rd args(read_buffer) is unnecessary in the format which Sun Audio supports.  */
+            struct pcm_pos *newpos = codec->get_seek_pos(param, PCM_SEEK_TIME, NULL);
+
+            if (newpos->pos > format.numbytes)
+            {
+                ci->set_elapsed(ci->id3->length);
+                ci->seek_complete();
+                break;
+            }
+
+            if (ci->seek_buffer(firstblockposn + newpos->pos))
+            {
+                bytesdone      = newpos->pos;
+                decodedsamples = newpos->samples;
+            }
+
+            ci->set_elapsed(decodedsamples*1000LL/ci->id3->frequency);
+            ci->seek_complete();
+        }
+
+        aubuf = (uint8_t *)ci->request_buffer(&n, format.chunksize);
+        if (n == 0)
+            break; /* End of stream */
+        if (bytesdone + n > format.numbytes) {
+            n = format.numbytes - bytesdone;
+            endofstream = 1;
+        }
+
+        if (codec->decode(aubuf, n, samples, &bufcount) == CODEC_ERROR)
+        {
+            DEBUGF("codec error\n");
+            return CODEC_ERROR;
+        }
+
+        ci->pcmbuf_insert(samples, NULL, bufcount);
+        ci->advance_buffer(n);
+        bytesdone += n;
+        decodedsamples += bufcount;
+
+        if (bytesdone >= format.numbytes)
+            endofstream = 1;
+        ci->set_elapsed(decodedsamples*1000LL/ci->id3->frequency);
+    }
+
+done:
+    return CODEC_OK;
+}
--- a/lib/rbcodec/codecs/ay.c
+++ b/lib/rbcodec/codecs/ay.c
@ -0,0 +1,137 @@
+
+/* Ripped off from Game_Music_Emu 0.5.2. http://www.slack.net/~ant/ */
+
+#include <codecs/lib/codeclib.h>
+#include "libgme/ay_emu.h" 
+
+CODEC_HEADER
+
+/* Maximum number of bytes to process in one iteration */
+#define CHUNK_SIZE (1024*2)
+
+static int16_t samples[CHUNK_SIZE] IBSS_ATTR;
+static struct Ay_Emu ay_emu;
+
+/****************** rockbox interface ******************/
+
+static void set_codec_track(int t, int multitrack) {
+    Ay_start_track(&ay_emu, t); 
+
+    /* for loop mode we disable track limits */
+    if (!ci->loop_track()) {
+        Track_set_fade(&ay_emu, Track_get_length( &ay_emu, t ) - 4000, 4000);
+    }
+    if (multitrack) ci->set_elapsed(t*1000); /* t is track no to display */
+    else ci->set_elapsed(0);
+}
+
+/* this is the codec entry point */
+enum codec_status codec_main(enum codec_entry_call_reason reason)
+{
+    if (reason == CODEC_LOAD) {
+        /* we only render 16 bits */
+        ci->configure(DSP_SET_SAMPLE_DEPTH, 16);
+
+        /* 44 Khz, Interleaved stereo */
+        ci->configure(DSP_SET_FREQUENCY, 44100);
+        ci->configure(DSP_SET_STEREO_MODE, STEREO_INTERLEAVED);
+
+        Ay_init(&ay_emu);
+        Ay_set_sample_rate(&ay_emu, 44100);
+    }
+
+    return CODEC_OK;
+}
+
+/* this is called for each file to process */
+enum codec_status codec_run(void)
+{
+    blargg_err_t err;
+    uint8_t *buf;
+    size_t n;
+    int track, is_multitrack;
+    intptr_t param;
+    uint32_t elapsed_time;
+
+    /* reset values */
+    track = is_multitrack = 0;
+    elapsed_time = 0;
+
+    DEBUGF("AY: next_track\n");
+    if (codec_init()) {
+        return CODEC_ERROR;
+    }  
+
+    codec_set_replaygain(ci->id3);
+        
+    /* Read the entire file */
+    DEBUGF("AY: request file\n");
+    ci->seek_buffer(0);
+    buf = ci->request_buffer(&n, ci->filesize);
+    if (!buf || n < (size_t)ci->filesize) {
+        DEBUGF("AY: file load failed\n");
+        return CODEC_ERROR;
+    }
+   
+    if ((err = Ay_load_mem(&ay_emu, buf, ci->filesize))) {
+        DEBUGF("AY: Ay_load_mem failed (%s)\n", err);
+        return CODEC_ERROR;
+    }
+
+    /* Update internal track count */
+    if (ay_emu.m3u.size > 0)
+        ay_emu.track_count = ay_emu.m3u.size;
+
+    /* Check if file has multiple tracks */
+    if (ay_emu.track_count > 1) {
+        is_multitrack = 1;
+    }
+
+next_track:
+    set_codec_track(track, is_multitrack);
+
+    /* The main decoder loop */
+    while (1) {
+        enum codec_command_action action = ci->get_command(&param);
+
+        if (action == CODEC_ACTION_HALT)
+            break;
+
+        if (action == CODEC_ACTION_SEEK_TIME) {
+            if (is_multitrack) {
+                track = param/1000;
+                ci->seek_complete();
+                if (track >= ay_emu.track_count) break;
+                goto next_track;
+            }
+
+            ci->set_elapsed(param);
+            elapsed_time = param;
+            Track_seek(&ay_emu, param);
+            ci->seek_complete();
+            
+            /* Set fade again */
+            if (!ci->loop_track()) {
+                Track_set_fade(&ay_emu, Track_get_length( &ay_emu, track ) - 4000, 4000);
+            }
+        }
+
+        /* Generate audio buffer */
+        err = Ay_play(&ay_emu, CHUNK_SIZE, samples);
+        if (err || Track_ended(&ay_emu)) {
+            track++;
+            if (track >= ay_emu.track_count) break;
+            goto next_track;
+        }
+
+        ci->pcmbuf_insert(samples, NULL, CHUNK_SIZE >> 1);
+
+        /* Set elapsed time for one track files */
+        if (!is_multitrack) {
+            elapsed_time += (CHUNK_SIZE / 2) * 10 / 441;
+            ci->set_elapsed(elapsed_time);
+        }
+    }
+
+    return CODEC_OK;
+}
--- a/lib/rbcodec/codecs/codec_crt0.c
+++ b/lib/rbcodec/codecs/codec_crt0.c
@ -0,0 +1,74 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2006 Tomasz Malesinski
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "config.h"
+#include "codecs.h"
+
+struct codec_api *ci DATA_ATTR;
+
+extern unsigned char plugin_bss_start[];
+extern unsigned char plugin_end_addr[];
+
+/* stub, the entry point is called via its reference in __header to
+ * avoid warning with certain compilers */
+int _start(void) {return 0;}
+
+enum codec_status codec_start(enum codec_entry_call_reason reason)
+{
+#if (CONFIG_PLATFORM & PLATFORM_NATIVE)
+    if (reason == CODEC_LOAD)
+    {
+#ifdef USE_IRAM
+        extern char iramcopy[], iramstart[], iramend[], iedata[], iend[];
+        size_t iram_size = iramend - iramstart;
+        size_t ibss_size = iend - iedata;
+        if (iram_size > 0 || ibss_size > 0)
+        {
+            ci->memcpy(iramstart, iramcopy, iram_size);
+            ci->memset(iedata, 0, ibss_size);
+            /* make the icache (if it exists) up to date with the new code */
+            ci->commit_discard_idcache();
+            /* barrier to prevent reordering iram copy and BSS clearing,
+             * because the BSS segment alias the IRAM copy.
+             */
+            asm volatile ("" ::: "memory");
+        }
+#endif /* PLUGIN_USE_IRAM */
+        ci->memset(plugin_bss_start, 0, plugin_end_addr - plugin_bss_start);
+        /* Some parts of bss may be used via a no-cache alias (at least
+         * portalplayer has this). If we don't clear the cache, those aliases
+         * may read garbage */
+        ci->commit_dcache();
+    }
+#endif /* CONFIG_PLATFORM */
+
+    /* Note: If for any reason codec_main would not be called with CODEC_LOAD
+     * because the above code failed then it must not be ever be called with
+     * any other value and some strategy to avoid doing so must be conceived */
+    return codec_main(reason);
+}
+
+#if defined(CPU_ARM) && (CONFIG_PLATFORM & PLATFORM_NATIVE)
+void __attribute__((naked)) __div0(void)
+{
+    asm volatile("bx %0" : : "r"(ci->__div0));
+}
+#endif
--- a/lib/rbcodec/codecs/codecs.h
+++ b/lib/rbcodec/codecs/codecs.h
@ -0,0 +1,291 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2002 Björn Stenberg
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#ifndef _CODECS_H_
+#define _CODECS_H_
+
+/* instruct simulator code to not redefine any symbols when compiling codecs.
+   (the CODEC macro is defined in codecs.make) */
+#ifdef CODEC
+#define NO_REDEFINES_PLEASE
+#endif
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include "strlcpy.h"
+#include "config.h"
+#include "system.h"
+#include "metadata.h"
+#include "audio.h"
+#ifdef RB_PROFILE
+#include "profile.h"
+#include "thread.h"
+#endif
+#if (CONFIG_CODEC == SWCODEC)
+#ifdef HAVE_RECORDING
+#include "pcm_record.h"
+#endif
+#include "dsp.h"
+#include "dsp-util.h"
+#endif
+
+#include "gcc_extensions.h"
+#include "load_code.h"
+
+#ifdef CODEC
+#if defined(DEBUG) || defined(SIMULATOR)
+#undef DEBUGF
+#define DEBUGF  ci->debugf
+#undef LDEBUGF
+#define LDEBUGF ci->debugf
+#else
+#define DEBUGF(...)
+#define LDEBUGF(...)
+#endif
+
+#ifdef ROCKBOX_HAS_LOGF
+#undef LOGF
+#define LOGF ci->logf
+#else
+#define LOGF(...)
+#endif
+
+#endif
+
+/* magic for normal codecs */
+#define CODEC_MAGIC 0x52434F44 /* RCOD */
+/* magic for encoder codecs */
+#define CODEC_ENC_MAGIC 0x52454E43 /* RENC */
+
+/* increase this every time the api struct changes */
+#define CODEC_API_VERSION 44
+
+/* update this to latest version if a change to the api struct breaks
+   backwards compatibility (and please take the opportunity to sort in any
+   new function which are "waiting" at the end of the function table) */
+#define CODEC_MIN_API_VERSION 43
+
+/* reasons for calling codec main entrypoint */
+enum codec_entry_call_reason {
+    CODEC_LOAD = 0,
+    CODEC_UNLOAD
+};
+
+/* codec return codes */
+enum codec_status {
+    CODEC_OK = 0,
+    CODEC_ERROR = -1,
+};
+
+/* codec command action codes */
+enum codec_command_action {
+    CODEC_ACTION_HALT = -1,
+    CODEC_ACTION_NULL = 0,
+    CODEC_ACTION_SEEK_TIME = 1,
+};
+
+/* NOTE: To support backwards compatibility, only add new functions at
+         the end of the structure.  Every time you add a new function,
+         remember to increase CODEC_API_VERSION.  If you make changes to the
+         existing APIs then also update CODEC_MIN_API_VERSION to current
+         version
+ */
+struct codec_api {
+    off_t  filesize;          /* Total file length */
+    off_t  curpos;            /* Current buffer position */
+    
+    struct mp3entry *id3;     /* TAG metadata pointer */
+    int    audio_hid;         /* Current audio handle */
+    
+    /* The dsp instance to be used for audio output */
+    struct dsp_config *dsp;
+    
+    /* Returns buffer to malloc array. Only codeclib should need this. */
+    void* (*codec_get_buffer)(size_t *size);
+    /* Insert PCM data into audio buffer for playback. Playback will start
+       automatically. */
+    void (*pcmbuf_insert)(const void *ch1, const void *ch2, int count);
+    /* Set song position in WPS (value in ms). */
+    void (*set_elapsed)(unsigned long value);
+    
+    /* Read next <size> amount bytes from file buffer to <ptr>.
+       Will return number of bytes read or 0 if end of file. */
+    size_t (*read_filebuf)(void *ptr, size_t size);
+    /* Request pointer to file buffer which can be used to read
+       <realsize> amount of data. <reqsize> tells the buffer system
+       how much data it should try to allocate. If <realsize> is 0,
+       end of file is reached. */
+    void* (*request_buffer)(size_t *realsize, size_t reqsize);
+    /* Advance file buffer position by <amount> amount of bytes. */
+    void (*advance_buffer)(size_t amount);
+    /* Seek file buffer to position <newpos> beginning of file. */
+    bool (*seek_buffer)(size_t newpos);
+    /* Codec should call this function when it has done the seeking. */
+    void (*seek_complete)(void);
+    /* Update the current position */
+    void (*set_offset)(size_t value);
+    /* Configure different codec buffer parameters. */
+    void (*configure)(int setting, intptr_t value);
+    /* Obtain command action on what to do next */
+    enum codec_command_action (*get_command)(intptr_t *param);
+    /* Determine whether the track should be looped, if applicable. */
+    bool (*loop_track)(void);
+
+    /* kernel/ system */
+#if defined(CPU_ARM) && CONFIG_PLATFORM & PLATFORM_NATIVE
+    void (*__div0)(void);
+#endif
+    unsigned (*sleep)(unsigned ticks);
+    void (*yield)(void);
+
+#if NUM_CORES > 1
+    unsigned int
+        (*create_thread)(void (*function)(void), void* stack,
+                         size_t stack_size, unsigned flags, const char *name
+                         IF_PRIO(, int priority)
+                         IF_COP(, unsigned int core));
+
+    void (*thread_thaw)(unsigned int thread_id);
+    void (*thread_wait)(unsigned int thread_id);
+    void (*semaphore_init)(struct semaphore *s, int max, int start);
+    int  (*semaphore_wait)(struct semaphore *s, int timeout);
+    void (*semaphore_release)(struct semaphore *s);
+#endif /* NUM_CORES */
+
+    void (*commit_dcache)(void);
+    void (*commit_discard_dcache)(void);
+
+    /* strings and memory */
+    char* (*strcpy)(char *dst, const char *src);
+    size_t (*strlen)(const char *str);
+    int (*strcmp)(const char *, const char *);
+    char *(*strcat)(char *s1, const char *s2);
+    void* (*memset)(void *dst, int c, size_t length);
+    void* (*memcpy)(void *out, const void *in, size_t n);
+    void* (*memmove)(void *out, const void *in, size_t n);
+    int (*memcmp)(const void *s1, const void *s2, size_t n);
+    void *(*memchr)(const void *s1, int c, size_t n);
+
+#if defined(DEBUG) || defined(SIMULATOR)
+    void (*debugf)(const char *fmt, ...) ATTRIBUTE_PRINTF(1, 2);
+#endif
+#ifdef ROCKBOX_HAS_LOGF
+    void (*logf)(const char *fmt, ...) ATTRIBUTE_PRINTF(1, 2);
+#endif
+
+    /* Tremor requires qsort */
+    void (*qsort)(void *base, size_t nmemb, size_t size,
+                  int(*compar)(const void *, const void *));
+
+#ifdef RB_PROFILE
+    void (*profile_thread)(void);
+    void (*profstop)(void);
+    void (*profile_func_enter)(void *this_fn, void *call_site);
+    void (*profile_func_exit)(void *this_fn, void *call_site);
+#endif
+ 
+#ifdef HAVE_RECORDING
+    void            (*enc_get_inputs)(struct enc_inputs *inputs);
+    void            (*enc_set_parameters)(struct enc_parameters *params);
+    struct enc_chunk_hdr * (*enc_get_chunk)(void);
+    void            (*enc_finish_chunk)(void);
+    unsigned char * (*enc_get_pcm_data)(size_t size);
+    size_t          (*enc_unget_pcm_data)(size_t size);
+
+    /* file */
+    int (*open)(const char* pathname, int flags, ...);
+    int (*close)(int fd);
+    ssize_t (*read)(int fd, void* buf, size_t count);
+    off_t (*lseek)(int fd, off_t offset, int whence);
+    ssize_t (*write)(int fd, const void* buf, size_t count);
+    int (*round_value_to_list32)(unsigned long value,
+                                 const unsigned long list[],
+                                 int count,
+                                 bool signd);
+#endif
+
+    /* new stuff at the end, sort into place next time
+       the API gets incompatible */
+    void (*commit_discard_idcache)(void);
+};
+
+/* codec header */
+struct codec_header {
+    struct lc_header lc_hdr; /* must be first */
+    enum codec_status(*entry_point)(enum codec_entry_call_reason reason);
+    enum codec_status(*run_proc)(void);
+    struct codec_api **api;
+};
+
+#ifdef CODEC
+#if (CONFIG_PLATFORM & PLATFORM_NATIVE)
+/* plugin_* is correct, codecs use the plugin linker script */
+extern unsigned char plugin_start_addr[];
+extern unsigned char plugin_end_addr[];
+/* decoders */
+#define CODEC_HEADER \
+        const struct codec_header __header \
+        __attribute__ ((section (".header")))= { \
+        { CODEC_MAGIC, TARGET_ID, CODEC_API_VERSION, \
+        plugin_start_addr, plugin_end_addr }, codec_start, \
+        codec_run, &ci };
+/* encoders */
+#define CODEC_ENC_HEADER \
+        const struct codec_header __header \
+        __attribute__ ((section (".header")))= { \
+        { CODEC_ENC_MAGIC, TARGET_ID, CODEC_API_VERSION, \
+        plugin_start_addr, plugin_end_addr }, codec_start, \
+        codec_run, &ci };
+
+#else /* def SIMULATOR */
+/* decoders */
+#define CODEC_HEADER \
+        const struct codec_header __header \
+        __attribute__((visibility("default"))) = { \
+        { CODEC_MAGIC, TARGET_ID, CODEC_API_VERSION, NULL, NULL }, \
+        codec_start, codec_run, &ci };
+/* encoders */
+#define CODEC_ENC_HEADER \
+        const struct codec_header __header = { \
+        { CODEC_ENC_MAGIC, TARGET_ID, CODEC_API_VERSION, NULL, NULL }, \
+        codec_start, codec_run, &ci };
+#endif /* SIMULATOR */
+#endif /* CODEC */
+
+/* create full codec path from root filenames in audio_formats[]
+   assumes buffer size is MAX_PATH */
+void codec_get_full_path(char *path, const char *codec_root_fn);
+
+/* Returns pointer to and size of free codec RAM */
+void *codec_get_buffer_callback(size_t *size);
+
+/* defined by the codec loader (codec.c) */
+int codec_load_buf(int hid, struct codec_api *api);
+int codec_load_file(const char* codec, struct codec_api *api);
+int codec_run_proc(void);
+int codec_halt(void);
+int codec_close(void);
+
+/* defined by the codec */
+enum codec_status codec_start(enum codec_entry_call_reason reason);
+enum codec_status codec_main(enum codec_entry_call_reason reason);
+enum codec_status codec_run(void);
+
+#endif /* _CODECS_H_ */
--- a/lib/rbcodec/codecs/codecs.make
+++ b/lib/rbcodec/codecs/codecs.make
@ -0,0 +1,206 @@
+#             __________               __   ___.
+#   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+#   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+#   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+#   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+#                     \/            \/     \/    \/            \/
+# $Id$
+#
+
+CODECDIR = $(RBCODEC_BLD)/codecs
+CODECS_SRC := $(call preprocess, $(RBCODECLIB_DIR)/codecs/SOURCES)
+OTHER_SRC += $(CODECS_SRC)
+
+CODECS := $(CODECS_SRC:.c=.codec)
+CODECS := $(subst $(RBCODECLIB_DIR),$(RBCODEC_BLD),$(CODECS))
+
+# the codec helper library
+include $(RBCODECLIB_DIR)/codecs/lib/libcodec.make
+OTHER_INC += -I$(RBCODECLIB_DIR)/codecs/lib
+
+# extra libraries
+CODEC_LIBS := $(EXTRA_LIBS) $(CODECLIB)
+
+# the codec libraries
+include $(RBCODECLIB_DIR)/codecs/demac/libdemac.make
+include $(RBCODECLIB_DIR)/codecs/liba52/liba52.make
+include $(RBCODECLIB_DIR)/codecs/libalac/libalac.make
+include $(RBCODECLIB_DIR)/codecs/libasap/libasap.make
+include $(RBCODECLIB_DIR)/codecs/libasf/libasf.make
+include $(RBCODECLIB_DIR)/codecs/libfaad/libfaad.make
+include $(RBCODECLIB_DIR)/codecs/libffmpegFLAC/libffmpegFLAC.make
+include $(RBCODECLIB_DIR)/codecs/libm4a/libm4a.make
+include $(RBCODECLIB_DIR)/codecs/libmad/libmad.make
+include $(RBCODECLIB_DIR)/codecs/libmusepack/libmusepack.make
+include $(RBCODECLIB_DIR)/codecs/libspc/libspc.make
+include $(RBCODECLIB_DIR)/codecs/libspeex/libspeex.make
+include $(RBCODECLIB_DIR)/codecs/libtremor/libtremor.make
+include $(RBCODECLIB_DIR)/codecs/libwavpack/libwavpack.make
+include $(RBCODECLIB_DIR)/codecs/libwma/libwma.make
+include $(RBCODECLIB_DIR)/codecs/libwmapro/libwmapro.make
+include $(RBCODECLIB_DIR)/codecs/libcook/libcook.make
+include $(RBCODECLIB_DIR)/codecs/librm/librm.make
+include $(RBCODECLIB_DIR)/codecs/libatrac/libatrac.make
+include $(RBCODECLIB_DIR)/codecs/libpcm/libpcm.make
+include $(RBCODECLIB_DIR)/codecs/libtta/libtta.make
+include $(RBCODECLIB_DIR)/codecs/libgme/libay.make
+include $(RBCODECLIB_DIR)/codecs/libgme/libgbs.make
+include $(RBCODECLIB_DIR)/codecs/libgme/libhes.make
+include $(RBCODECLIB_DIR)/codecs/libgme/libnsf.make
+include $(RBCODECLIB_DIR)/codecs/libgme/libsgc.make
+include $(RBCODECLIB_DIR)/codecs/libgme/libvgm.make
+include $(RBCODECLIB_DIR)/codecs/libgme/libkss.make
+include $(RBCODECLIB_DIR)/codecs/libgme/libemu2413.make
+
+# compile flags for codecs
+CODECFLAGS = $(CFLAGS) $(RBCODEC_CFLAGS) -fstrict-aliasing \
+			 -I$(RBCODECLIB_DIR)/codecs -I$(RBCODECLIB_DIR)/codecs/lib -DCODEC
+
+# set CODECFLAGS per codec lib, since gcc takes the last -Ox and the last
+# in a -ffoo -fno-foo pair, there is no need to filter them out
+$(A52LIB) : CODECFLAGS += -O1
+$(ALACLIB) : CODECFLAGS += -O1
+$(ASAPLIB) : CODECFLAGS += -O1
+$(ASFLIB) : CODECFLAGS += -O2
+$(ATRACLIB) : CODECFLAGS += -O1
+$(AYLIB) : CODECFLAGS += -O2
+$(COOKLIB): CODECFLAGS += -O1
+$(DEMACLIB) : CODECFLAGS += -O3
+$(FAADLIB) : CODECFLAGS += -O2
+$(FFMPEGFLACLIB) : CODECFLAGS += -O2
+$(GBSLIB) : CODECFLAGS +=  -O2
+$(HESLIB) : CODECFLAGS +=  -O2
+$(KSSLIB) : CODECFLAGS +=  -O2
+$(M4ALIB) : CODECFLAGS += -O3
+$(MUSEPACKLIB) : CODECFLAGS += -O1
+$(NSFLIB) : CODECFLAGS +=  -O2
+$(PCMSLIB) : CODECFLAGS += -O1
+$(RMLIB) : CODECFLAGS += -O3
+$(SGCLIB) : CODECFLAGS +=  -O2
+$(SPCLIB) : CODECFLAGS +=  -O1
+$(TREMORLIB) : CODECFLAGS += -O2
+$(TTALIB) : CODECFLAGS += -O2
+$(VGMLIB) : CODECFLAGS +=  -O2
+$(EMU2413LIB) : CODECFLAGS +=  -O3
+$(WAVPACKLIB) : CODECFLAGS += -O1
+$(WMALIB) : CODECFLAGS += -O2
+$(WMAPROLIB) : CODECFLAGS += -O1
+$(WMAVOICELIB) : CODECFLAGS += -O1
+
+# fine-tuning of CODECFLAGS per cpu arch
+ifeq ($(ARCH),arch_arm)
+  # redo per arm generation
+  $(ALACLIB) : CODECFLAGS += -O2
+  $(AYLIB) : CODECFLAGS +=  -O1
+  $(GBSLIB) : CODECFLAGS +=  -O1
+  $(HESLIB) : CODECFLAGS +=  -O1
+  $(KSSLIB) : CODECFLAGS +=  -O1
+  $(NSFLIB) : CODECFLAGS +=  -O1
+  $(SGCLIB) : CODECFLAGS +=  -O1
+  $(VGMLIB) : CODECFLAGS +=  -O1
+  $(EMU2413LIB) : CODECFLAGS +=  -O3
+  $(WAVPACKLIB) : CODECFLAGS += -O3
+else ifeq ($(ARCH),arch_m68k)
+  $(A52LIB) : CODECFLAGS += -O2
+  $(ASFLIB) : CODECFLAGS += -O3
+  $(ATRACLIB) : CODECFLAGS += -O2
+  $(COOKLIB): CODECFLAGS += -O2
+  $(DEMACLIB) : CODECFLAGS += -O2
+  $(SPCLIB) : CODECFLAGS +=  -O3
+  $(WMAPROLIB) : CODECFLAGS += -O3
+  $(WMAVOICELIB) : CODECFLAGS += -O2
+endif
+
+ifeq ($(MEMORYSIZE),2)
+  $(ASFLIB) : CODECFLAGS += -Os
+  $(WMALIB) : CODECFLAGS += -Os
+endif
+
+ifndef APP_TYPE
+  CONFIGFILE := $(FIRMDIR)/export/config/$(MODELNAME).h
+  CODEC_LDS := $(APPSDIR)/plugins/plugin.lds # codecs and plugins use same file
+  CODECLINK_LDS := $(CODECDIR)/codec.link
+endif
+
+CODEC_CRT0 := $(CODECDIR)/codec_crt0.o
+
+$(CODECS): $(CODEC_CRT0) $(CODECLINK_LDS)
+
+$(CODECLINK_LDS): $(CODEC_LDS) $(CONFIGFILE)
+	$(call PRINTS,PP $(@F))
+	$(shell mkdir -p $(dir $@))
+	$(call preprocess2file, $<, $@, -DCODEC)
+
+# codec/library dependencies
+$(CODECDIR)/spc.codec : $(CODECDIR)/libspc.a
+$(CODECDIR)/mpa.codec : $(CODECDIR)/libmad.a
+$(CODECDIR)/a52.codec : $(CODECDIR)/liba52.a
+$(CODECDIR)/flac.codec : $(CODECDIR)/libffmpegFLAC.a
+$(CODECDIR)/vorbis.codec : $(CODECDIR)/libtremor.a
+$(CODECDIR)/speex.codec : $(CODECDIR)/libspeex.a
+$(CODECDIR)/mpc.codec : $(CODECDIR)/libmusepack.a
+$(CODECDIR)/wavpack.codec : $(CODECDIR)/libwavpack.a
+$(CODECDIR)/alac.codec : $(CODECDIR)/libalac.a $(CODECDIR)/libm4a.a 
+$(CODECDIR)/aac.codec : $(CODECDIR)/libfaad.a $(CODECDIR)/libm4a.a
+$(CODECDIR)/shorten.codec : $(CODECDIR)/libffmpegFLAC.a
+$(CODECDIR)/ape-pre.map : $(CODECDIR)/libdemac-pre.a
+$(CODECDIR)/ape.codec : $(CODECDIR)/libdemac.a
+$(CODECDIR)/wma.codec : $(CODECDIR)/libwma.a $(CODECDIR)/libasf.a
+$(CODECDIR)/wmapro.codec : $(CODECDIR)/libwmapro.a $(CODECDIR)/libasf.a
+$(CODECDIR)/wavpack_enc.codec: $(CODECDIR)/libwavpack.a
+$(CODECDIR)/asap.codec : $(CODECDIR)/libasap.a
+$(CODECDIR)/cook.codec : $(CODECDIR)/libcook.a $(CODECDIR)/librm.a
+$(CODECDIR)/raac.codec : $(CODECDIR)/libfaad.a $(CODECDIR)/librm.a
+$(CODECDIR)/a52_rm.codec : $(CODECDIR)/liba52.a $(CODECDIR)/librm.a
+$(CODECDIR)/atrac3_rm.codec : $(CODECDIR)/libatrac.a $(CODECDIR)/librm.a
+$(CODECDIR)/atrac3_oma.codec : $(CODECDIR)/libatrac.a
+$(CODECDIR)/aiff.codec : $(CODECDIR)/libpcm.a
+$(CODECDIR)/wav.codec : $(CODECDIR)/libpcm.a
+$(CODECDIR)/smaf.codec : $(CODECDIR)/libpcm.a
+$(CODECDIR)/au.codec : $(CODECDIR)/libpcm.a
+$(CODECDIR)/vox.codec : $(CODECDIR)/libpcm.a
+$(CODECDIR)/wav64.codec : $(CODECDIR)/libpcm.a
+$(CODECDIR)/tta.codec : $(CODECDIR)/libtta.a
+$(CODECDIR)/ay.codec : $(CODECDIR)/libay.a
+$(CODECDIR)/gbs.codec : $(CODECDIR)/libgbs.a
+$(CODECDIR)/hes.codec : $(CODECDIR)/libhes.a
+$(CODECDIR)/nsf.codec : $(CODECDIR)/libnsf.a $(CODECDIR)/libemu2413.a
+$(CODECDIR)/sgc.codec : $(CODECDIR)/libsgc.a $(CODECDIR)/libemu2413.a
+$(CODECDIR)/vgm.codec : $(CODECDIR)/libvgm.a $(CODECDIR)/libemu2413.a
+$(CODECDIR)/kss.codec : $(CODECDIR)/libkss.a $(CODECDIR)/libemu2413.a
+
+$(CODECS): $(CODEC_LIBS) # this must be last in codec dependency list
+
+# pattern rule for compiling codecs
+$(CODECDIR)/%.o: $(RBCODECLIB_DIR)/codecs/%.c
+	$(SILENT)mkdir -p $(dir $@)
+	$(call PRINTS,CC $(subst $(ROOTDIR)/,,$<))$(CC) \
+		-I$(dir $<) $(CODECFLAGS) -c $< -o $@
+
+# pattern rule for compiling codecs
+$(CODECDIR)/%.o: $(RBCODECLIB_DIR)/codecs/%.S
+	$(SILENT)mkdir -p $(dir $@)
+	$(call PRINTS,CC $(subst $(ROOTDIR)/,,$<))$(CC) \
+		-I$(dir $<) $(CODECFLAGS) $(ASMFLAGS) -c $< -o $@
+
+ifdef APP_TYPE
+ CODECLDFLAGS = $(SHARED_LDFLAG) -Wl,--gc-sections -Wl,-Map,$(CODECDIR)/$*.map
+ CODECFLAGS += $(SHARED_CFLAGS) # <-- from Makefile
+else
+ CODECLDFLAGS = -T$(CODECLINK_LDS) -Wl,--gc-sections -Wl,-Map,$(CODECDIR)/$*.map
+ CODECFLAGS += -UDEBUG -DNDEBUG
+endif
+CODECLDFLAGS += $(GLOBAL_LDOPTS)
+
+$(CODECDIR)/%-pre.map: $(CODEC_CRT0) $(CODECLINK_LDS) $(CODECDIR)/%.o $(CODECS_LIBS)
+	$(call PRINTS,LD $(@F))$(CC) $(CODECFLAGS) -o $(CODECDIR)/$*-pre.elf \
+		$(filter %.o, $^) \
+		$(filter-out $(CODECLIB),$(filter %.a, $+)) $(CODECLIB) \
+		-lgcc $(subst .map,-pre.map,$(CODECLDFLAGS))
+
+$(CODECDIR)/%.codec: $(CODECDIR)/%.o
+	$(call PRINTS,LD $(@F))$(CC) $(CODECFLAGS) -o $(CODECDIR)/$*.elf \
+		$(filter %.o, $^) \
+		$(filter %.a, $+) \
+		-lgcc $(CODECLDFLAGS)
+	$(SILENT)$(call objcopy,$(CODECDIR)/$*.elf,$@)
--- a/lib/rbcodec/codecs/cook.c
+++ b/lib/rbcodec/codecs/cook.c
@ -0,0 +1,202 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2009 Mohamed Tarek
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include <string.h>
+
+#include "logf.h"
+#include "codeclib.h"
+#include "inttypes.h"
+#include "libcook/cook.h"
+
+CODEC_HEADER
+
+static RMContext rmctx         IBSS_ATTR_COOK_LARGE_IRAM;
+static RMPacket pkt            IBSS_ATTR_COOK_LARGE_IRAM;
+static COOKContext q           IBSS_ATTR;
+static int32_t rm_outbuf[2048] IBSS_ATTR_COOK_LARGE_IRAM MEM_ALIGN_ATTR;
+
+static void init_rm(RMContext *rmctx)
+{
+    memcpy(rmctx, (void*)(( (intptr_t)ci->id3->id3v2buf + 3 ) &~ 3), sizeof(RMContext));
+}
+
+/* this is the codec entry point */
+enum codec_status codec_main(enum codec_entry_call_reason reason)
+{
+    /* Nothing to do */
+    return CODEC_OK;
+    (void)reason;
+}
+
+/* this is called for each file to process */
+enum codec_status codec_run(void)
+{
+    static size_t buff_size;
+    int datasize, res, consumed, i, time_offset;
+    uint8_t *bit_buffer;
+    uint16_t fs,sps,h;
+    uint32_t packet_count;
+    int scrambling_unit_size, num_units;
+    size_t resume_offset;
+    intptr_t param = 0;
+    enum codec_command_action action = CODEC_ACTION_NULL;
+
+    if (codec_init()) {
+        DEBUGF("codec init failed\n");
+        return CODEC_ERROR;
+    }
+
+    resume_offset = ci->id3->offset;
+
+    codec_set_replaygain(ci->id3);
+    ci->memset(&rmctx,0,sizeof(RMContext));
+    ci->memset(&pkt,0,sizeof(RMPacket));
+    ci->memset(&q,0,sizeof(COOKContext));
+
+    ci->seek_buffer(0);
+
+    init_rm(&rmctx);
+ 
+    ci->configure(DSP_SET_FREQUENCY, ci->id3->frequency);
+    /* cook's sample representation is 21.11
+     * DSP_SET_SAMPLE_DEPTH = 11 (FRACT) + 16 (NATIVE) - 1 (SIGN) = 26 */
+    ci->configure(DSP_SET_SAMPLE_DEPTH, 26);
+    ci->configure(DSP_SET_STEREO_MODE, rmctx.nb_channels == 1 ?
+                  STEREO_MONO : STEREO_NONINTERLEAVED);
+
+    packet_count = rmctx.nb_packets;
+    rmctx.audio_framesize = rmctx.block_align;
+    rmctx.block_align = rmctx.sub_packet_size;
+    fs = rmctx.audio_framesize;
+    sps= rmctx.block_align;
+    h = rmctx.sub_packet_h;
+    scrambling_unit_size = h * (fs + PACKET_HEADER_SIZE);
+    
+    res =cook_decode_init(&rmctx, &q);
+    if(res < 0) {
+        DEBUGF("failed to initialize cook decoder\n");
+        return CODEC_ERROR;
+    }
+
+    /* check for a mid-track resume and force a seek time accordingly */
+    if(resume_offset > rmctx.data_offset + DATA_HEADER_SIZE) {
+        resume_offset -= rmctx.data_offset + DATA_HEADER_SIZE;
+        num_units = (int)resume_offset / scrambling_unit_size;    
+        /* put number of subpackets to skip in resume_offset */
+        resume_offset /= (sps + PACKET_HEADER_SIZE);
+        param = (int)resume_offset * ((sps * 8 * 1000)/rmctx.bit_rate);
+        action = CODEC_ACTION_SEEK_TIME;
+    }
+    else {
+        ci->set_elapsed(0);
+    }
+
+    ci->advance_buffer(rmctx.data_offset + DATA_HEADER_SIZE);
+
+    /* The main decoder loop */
+seek_start :         
+    while(packet_count)
+    {  
+        bit_buffer = (uint8_t *) ci->request_buffer(&buff_size, scrambling_unit_size);
+        consumed = rm_get_packet(&bit_buffer, &rmctx, &pkt);
+        if(consumed < 0) {
+            DEBUGF("rm_get_packet failed\n");
+            return CODEC_ERROR;
+        }
+       
+        for(i = 0; i < rmctx.audio_pkt_cnt*(fs/sps) ; i++)
+        {
+            if (action == CODEC_ACTION_NULL)
+                action = ci->get_command(&param);
+
+            if (action == CODEC_ACTION_HALT)
+                return CODEC_OK;
+
+            if (action == CODEC_ACTION_SEEK_TIME) {
+                /* Do not allow seeking beyond the file's length */
+                if ((unsigned) param > ci->id3->length) {
+                    ci->set_elapsed(ci->id3->length);
+                    ci->seek_complete();
+                    return CODEC_OK;
+                }       
+
+                ci->seek_buffer(rmctx.data_offset + DATA_HEADER_SIZE);
+                packet_count = rmctx.nb_packets;
+                rmctx.audio_pkt_cnt = 0;
+                rmctx.frame_number = 0;
+
+                /* Seek to the start of the track */
+                if (param == 0) {
+                    ci->set_elapsed(0);
+                    ci->seek_complete();
+                    action = CODEC_ACTION_NULL;
+                    goto seek_start;           
+                }                                                                
+                num_units = (param/(sps*1000*8/rmctx.bit_rate))/(h*(fs/sps));                    
+                ci->seek_buffer(rmctx.data_offset + DATA_HEADER_SIZE + consumed * num_units);
+                bit_buffer = (uint8_t *) ci->request_buffer(&buff_size, scrambling_unit_size);
+                consumed = rm_get_packet(&bit_buffer, &rmctx, &pkt);
+                if(consumed < 0) {
+                     DEBUGF("rm_get_packet failed\n");
+                    ci->seek_complete();
+                    return CODEC_ERROR;
+                } 
+                packet_count = rmctx.nb_packets - rmctx.audio_pkt_cnt * num_units;
+                rmctx.frame_number = (param/(sps*1000*8/rmctx.bit_rate)); 
+                while(rmctx.audiotimestamp > (unsigned) param) {
+                    rmctx.audio_pkt_cnt = 0;
+                    ci->seek_buffer(rmctx.data_offset + DATA_HEADER_SIZE + consumed * (num_units-1));
+                    bit_buffer = (uint8_t *) ci->request_buffer(&buff_size, scrambling_unit_size); 
+                    consumed = rm_get_packet(&bit_buffer, &rmctx, &pkt);                                                                             
+                    packet_count += rmctx.audio_pkt_cnt;
+                    num_units--;
+                }
+                time_offset = param - rmctx.audiotimestamp;
+                i = (time_offset/((sps * 8 * 1000)/rmctx.bit_rate));
+                ci->set_elapsed(rmctx.audiotimestamp+(1000*8*sps/rmctx.bit_rate)*i);
+                ci->seek_complete(); 
+            }
+
+            action = CODEC_ACTION_NULL;
+
+            res = cook_decode_frame(&rmctx,&q, rm_outbuf, &datasize, pkt.frames[i], rmctx.block_align);
+            rmctx.frame_number++;
+
+            /* skip the first two frames; no valid audio */
+            if(rmctx.frame_number < 3) continue;
+
+            if(res != rmctx.block_align) {
+                DEBUGF("codec error\n");
+                return CODEC_ERROR;
+            }
+
+            ci->pcmbuf_insert(rm_outbuf, 
+                              rm_outbuf+q.samples_per_channel,
+                              q.samples_per_channel);
+            ci->set_elapsed(rmctx.audiotimestamp+(1000*8*sps/rmctx.bit_rate)*i);  
+        }
+        packet_count -= rmctx.audio_pkt_cnt;
+        rmctx.audio_pkt_cnt = 0;
+        ci->advance_buffer(consumed);
+    }
+
+    return CODEC_OK;
+}
--- a/lib/rbcodec/codecs/demac/COPYING
+++ b/lib/rbcodec/codecs/demac/COPYING
@ -0,0 +1,339 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
--- a/lib/rbcodec/codecs/demac/Makefile
+++ b/lib/rbcodec/codecs/demac/Makefile
@ -0,0 +1,42 @@
+# $Id$
+
+FILTERS = libdemac/filter_16_11.o libdemac/filter_64_11.o libdemac/filter_256_13.o libdemac/filter_32_10.o libdemac/filter_1280_15.o
+LIBOBJS = libdemac/parser.o libdemac/decoder.o libdemac/entropy.o libdemac/predictor.o libdemac/crc.o $(FILTERS)
+OBJS = demac.o wavwrite.o $(LIBOBJS)
+
+CFLAGS = -Wall -g -O3 -Ilibdemac
+
+ifeq ($(findstring CYGWIN,$(shell uname)),CYGWIN)
+EXT = .exe
+CROSS = 
+CFLAGS += -mno-cygwin
+else
+  ifdef WIN
+     EXT = .exe
+     CROSS = i586-mingw32msvc-
+  else
+     EXT =
+     CROSS =
+  endif
+endif
+
+CC = $(CROSS)gcc
+STRIP = $(CROSS)strip
+OUTPUT = demac$(EXT)
+
+all: $(OUTPUT)
+
+$(OUTPUT): $(OBJS)
+	$(CC) $(CFLAGS) -o $(OUTPUT) $(OBJS)
+
+.c.o :
+	$(CC) $(CFLAGS) $(INC) -c -o $@ $<
+
+libdemac/filter_16_11.o: libdemac/filter.c
+libdemac/filter_64_11.o: libdemac/filter.c
+libdemac/filter_256_13.o: libdemac/filter.c
+libdemac/filter_1280_15.o: libdemac/filter.c
+libdemac/filter_32_10.o: libdemac/filter.c
+
+clean:
+	rm -f $(OUTPUT) $(OBJS) *~ */*~
--- a/lib/rbcodec/codecs/demac/README
+++ b/lib/rbcodec/codecs/demac/README
@ -0,0 +1,69 @@
+demac - a decoder for Monkey's Audio files.
+
+Introduction
+
+demac is an implementation in portable ANSI C of a decoder for the
+Monkey's Audio lossless compression format.  It has the following
+features:
+
+  * Open source (GNU GPL)
+  * Written in portable ANSI C
+  * Designed for use on low memory and embedded devices.  All internal
+    buffers are statically declared - the core library doesn't require 
+    malloc/free.  This has the disadvantage that the library isn't 
+    re-entrant.
+
+
+Compatibility
+
+
+libdemac is still in the early stages of development but has been
+relatively well tested with v3.99 files at all compression levels.
+
+v3.97 files have received less testing - 16-bit files seem to work,
+but 24-bit files are causing problems in the range decoder.
+
+Files earlier than v3.97 are not supported by libdemac, but support
+might be added in the future.
+
+
+Source Code
+
+The source code in this directory is structured as follows:
+
+demac/Makefile - Makefile for the standalone demac decoder
+demac/demac.c - Simple standalone test program to decoder an APE file to WAV
+demac/wavwrite.[ch] - Helper functions for demac.c
+demac/libdemac/Makefile - A Makefile for use in Rockbox
+demac/libdemac/*.[ch] - The main libdemac code
+
+
+Latest Version
+
+The latest version of demac and libdemac can always be found in the
+"lib/rbcodec/codecs/demac/" directory in the Rockbox source.  You can check
+this out from svn with the command:
+
+svn co svn://svn.rockbox.org/rockbox/trunk/lib/rbcodec/codecs/demac demac
+
+Or browse the source code online here:
+
+http://svn.rockbox.org/viewvc.cgi/trunk/lib/rbcodec/codecs/demac
+
+
+
+Acknowledgements
+
+Thanks to Matt. T. Ashland for writing Monkey's Audio.  His website
+can be found here: http://www.monkeysaudio.com
+
+
+Copyright and license
+
+
+libdemac is (C) 2007 Dave Chapman and is licensed under the GNU
+GPL. See the COPYING file in this directory.
+
+The exception is the majority of rangecoding.h, which is (C) 1997,
+1998, 1999, 2000 Michael Schindler and is also licensed under the GPL.
+See that source file for full details.
--- a/lib/rbcodec/codecs/demac/demac.c
+++ b/lib/rbcodec/codecs/demac/demac.c
@ -0,0 +1,281 @@
+/*
+
+demac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+/* 
+
+This example is intended to demonstrate how the decoder can be used in
+embedded devices - there is no usage of dynamic memory (i.e. no
+malloc/free) and small buffer sizes are chosen to minimise both the
+memory usage and decoding latency.
+
+This implementation requires the following memory and supports decoding of all APE files up to 24-bit Stereo.
+
+32768 - data from the input stream to be presented to the decoder in one contiguous chunk.
+18432 - decoding buffer (left channel)
+18432 - decoding buffer (right channel)
+
+17408+5120+2240 - buffers used for filter histories (compression levels 2000-5000)
+
+In addition, this example uses a static 27648 byte buffer as temporary
+storage for outputting the data to a WAV file but that could be
+avoided by writing the decoded data one sample at a time.
+
+*/
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "demac.h"
+#include "wavwrite.h"
+
+#ifndef __WIN32__
+#define O_BINARY 0
+#endif
+
+#define CALC_CRC 1
+
+#define BLOCKS_PER_LOOP     4608
+#define MAX_CHANNELS        2
+#define MAX_BYTESPERSAMPLE  3
+
+#define INPUT_CHUNKSIZE     (32*1024)
+
+#ifndef MIN
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#endif
+
+
+/* 4608*2*3 = 27648 bytes */
+static unsigned char wavbuffer[BLOCKS_PER_LOOP*MAX_CHANNELS*MAX_BYTESPERSAMPLE];
+
+/* 4608*4 = 18432 bytes per channel */
+static int32_t decoded0[BLOCKS_PER_LOOP];
+static int32_t decoded1[BLOCKS_PER_LOOP];
+
+/* We assume that 32KB of compressed data is enough to extract up to
+   27648 bytes of decompressed data. */
+
+static unsigned char inbuffer[INPUT_CHUNKSIZE];
+
+int ape_decode(char* infile, char* outfile)
+{
+    int fd;
+    int fdwav;
+    int currentframe;
+    int nblocks;
+    int bytesconsumed;
+    struct ape_ctx_t ape_ctx;
+    int i, n;
+    unsigned char* p;
+    int bytesinbuffer;
+    int blockstodecode;
+    int res;
+    int firstbyte;
+    int16_t  sample16;
+    int32_t  sample32;
+    uint32_t frame_crc;
+    int crc_errors = 0;
+
+    fd = open(infile,O_RDONLY|O_BINARY);
+    if (fd < 0) return -1;
+
+    /* Read the file headers to populate the ape_ctx struct */
+    if (ape_parseheader(fd,&ape_ctx) < 0) {
+        printf("Cannot read header\n");
+        close(fd);
+        return -1;
+    }
+
+    if ((ape_ctx.fileversion < APE_MIN_VERSION) || (ape_ctx.fileversion > APE_MAX_VERSION)) {
+        printf("Unsupported file version - %.2f\n", ape_ctx.fileversion/1000.0);
+        close(fd);
+        return -2;
+    }
+
+    //ape_dumpinfo(&ape_ctx);
+
+    printf("Decoding file - v%.2f, compression level %d\n",ape_ctx.fileversion/1000.0,ape_ctx.compressiontype);
+
+    /* Open the WAV file and write a canonical 44-byte WAV header
+       based on the audio format information in the ape_ctx struct.
+
+       NOTE: This example doesn't write the original WAV header and
+             tail data which are (optionally) stored in the APE file.
+     */
+    fdwav = open_wav(&ape_ctx,outfile);
+
+    currentframe = 0;
+
+    /* Initialise the buffer */
+    lseek(fd, ape_ctx.firstframe, SEEK_SET);
+    bytesinbuffer = read(fd, inbuffer, INPUT_CHUNKSIZE);
+    firstbyte = 3;  /* Take account of the little-endian 32-bit byte ordering */
+
+    /* The main decoding loop - we decode the frames a small chunk at a time */
+    while (currentframe < ape_ctx.totalframes)
+    {
+        /* Calculate how many blocks there are in this frame */
+        if (currentframe == (ape_ctx.totalframes - 1))
+            nblocks = ape_ctx.finalframeblocks;
+        else
+            nblocks = ape_ctx.blocksperframe;
+
+        ape_ctx.currentframeblocks = nblocks;
+
+        /* Initialise the frame decoder */
+        init_frame_decoder(&ape_ctx, inbuffer, &firstbyte, &bytesconsumed);
+
+        /* Update buffer */
+        memmove(inbuffer,inbuffer + bytesconsumed, bytesinbuffer - bytesconsumed);
+        bytesinbuffer -= bytesconsumed;
+
+        n = read(fd, inbuffer + bytesinbuffer, INPUT_CHUNKSIZE - bytesinbuffer);
+        bytesinbuffer += n;
+
+#if CALC_CRC
+        frame_crc = ape_initcrc();
+#endif
+
+        /* Decode the frame a chunk at a time */
+        while (nblocks > 0)
+        {
+            blockstodecode = MIN(BLOCKS_PER_LOOP, nblocks);
+
+            if ((res = decode_chunk(&ape_ctx, inbuffer, &firstbyte,
+                                    &bytesconsumed,
+                                    decoded0, decoded1,
+                                    blockstodecode)) < 0)
+            {
+                /* Frame decoding error, abort */
+                close(fd);
+                return res;
+            }
+
+            /* Convert the output samples to WAV format and write to output file */
+            p = wavbuffer;
+            if (ape_ctx.bps == 8) {
+                for (i = 0 ; i < blockstodecode ; i++)
+                {
+                    /* 8 bit WAV uses unsigned samples */
+                    *(p++) = (decoded0[i] + 0x80) & 0xff;
+
+                    if (ape_ctx.channels == 2) {
+                        *(p++) = (decoded1[i] + 0x80) & 0xff;
+                    }
+                }
+            } else if (ape_ctx.bps == 16) {
+                for (i = 0 ; i < blockstodecode ; i++)
+                {
+                    sample16 = decoded0[i];
+                    *(p++) = sample16 & 0xff;
+                    *(p++) = (sample16 >> 8) & 0xff;
+
+                    if (ape_ctx.channels == 2) {
+                        sample16 = decoded1[i];
+                        *(p++) = sample16 & 0xff;
+                        *(p++) = (sample16 >> 8) & 0xff;
+                    }
+                }
+            } else if (ape_ctx.bps == 24) {
+                for (i = 0 ; i < blockstodecode ; i++)
+                {
+                    sample32 = decoded0[i];
+                    *(p++) = sample32 & 0xff;
+                    *(p++) = (sample32 >> 8) & 0xff;
+                    *(p++) = (sample32 >> 16) & 0xff;
+
+                    if (ape_ctx.channels == 2) {
+                        sample32 = decoded1[i];
+                        *(p++) = sample32 & 0xff;
+                        *(p++) = (sample32 >> 8) & 0xff;
+                        *(p++) = (sample32 >> 16) & 0xff;
+                    }
+                }
+            }
+
+#if CALC_CRC
+            frame_crc = ape_updatecrc(wavbuffer, p - wavbuffer, frame_crc);
+#endif
+            write(fdwav,wavbuffer,p - wavbuffer);
+
+            /* Update the buffer */
+            memmove(inbuffer,inbuffer + bytesconsumed, bytesinbuffer - bytesconsumed);
+            bytesinbuffer -= bytesconsumed;
+
+            n = read(fd, inbuffer + bytesinbuffer, INPUT_CHUNKSIZE - bytesinbuffer);
+            bytesinbuffer += n;
+
+            /* Decrement the block count */
+            nblocks -= blockstodecode;
+        }
+
+#if CALC_CRC
+        frame_crc = ape_finishcrc(frame_crc);
+
+        if (ape_ctx.CRC != frame_crc)
+        {
+            fprintf(stderr,"CRC error in frame %d\n",currentframe);
+            crc_errors++;
+        }
+#endif
+
+        currentframe++;
+    }
+
+    close(fd);
+    close(fdwav);
+
+    if (crc_errors > 0)
+        return -1;
+    else
+        return 0;
+}
+
+int main(int argc, char* argv[])
+{
+    int res;
+
+    if (argc != 3) {
+        fprintf(stderr,"Usage: demac infile.ape outfile.wav\n");
+        return 0;
+    }        
+
+    res = ape_decode(argv[1], argv[2]);
+
+    if (res < 0)
+    {
+        fprintf(stderr,"DECODING ERROR %d, ABORTING\n", res);
+    }
+    else 
+    {
+        fprintf(stderr,"DECODED OK - NO CRC ERRORS.\n");
+    }
+
+    return 0;
+}
--- a/lib/rbcodec/codecs/demac/libdemac.make
+++ b/lib/rbcodec/codecs/demac/libdemac.make
@ -0,0 +1,35 @@
+#             __________               __   ___.
+#   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+#   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+#   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+#   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+#                     \/            \/     \/    \/            \/
+# $Id$
+#
+
+# libdemac
+DEMACLIB := $(CODECDIR)/libdemac.a
+DEMACLIB_SRC := $(call preprocess, $(RBCODECLIB_DIR)/codecs/demac/libdemac/SOURCES)
+DEMACLIB_OBJ := $(call c2obj, $(DEMACLIB_SRC))
+OTHER_SRC += $(DEMACLIB_SRC)
+ifeq ($(ARCH),arch_arm)
+OTHER_SRC += $(RBCODECLIB_DIR)/codecs/demac/libdemac/udiv32_arm-pre.S
+endif
+DEMACLIB_PRE := $(subst .a,-pre.a,$(DEMACLIB))
+DEMACLIB_OBJ_PRE := $(subst udiv32_arm.o,udiv32_arm-pre.o,$(DEMACLIB_OBJ))
+
+$(DEMACLIB_PRE): $(DEMACLIB_OBJ_PRE)
+	$(SILENT)$(shell rm -f $@)
+	$(call PRINTS,AR $(@F))$(AR) rcs $@ $^ >/dev/null
+
+$(DEMACLIB): $(DEMACLIB_OBJ)
+	$(SILENT)$(shell rm -f $@)
+	$(call PRINTS,AR $(@F))$(AR) rcs $@ $^ >/dev/null
+
+$(CODECDIR)/ape_free_iram.h: $(CODECDIR)/ape-pre.map
+	$(call PRINTS,GEN $(@F))perl -an \
+		-e 'if(/^PLUGIN_IRAM/){$$istart=hex($$F[1]);$$ilen=hex($$F[2])}' \
+		-e 'if(/iend = /){$$iend=hex($$F[0]);}' \
+		-e '}{if($$ilen){print"#define FREE_IRAM ".($$ilen+$$istart-$$iend)."\n";}' \
+		$(CODECDIR)/ape-pre.map \
+		> $@
--- a/lib/rbcodec/codecs/demac/libdemac/SOURCES
+++ b/lib/rbcodec/codecs/demac/libdemac/SOURCES
@ -0,0 +1,15 @@
+predictor.c
+#ifdef CPU_ARM
+predictor-arm.S
+udiv32_arm.S
+#elif defined CPU_COLDFIRE
+predictor-cf.S
+#endif
+entropy.c
+decoder.c
+parser.c
+filter_1280_15.c
+filter_16_11.c
+filter_256_13.c
+filter_32_10.c
+filter_64_11.c
--- a/lib/rbcodec/codecs/demac/libdemac/crc.c
+++ b/lib/rbcodec/codecs/demac/libdemac/crc.c
@ -0,0 +1,120 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#include <inttypes.h>
+#include "demac.h"
+
+static const uint32_t crctab32[] =
+{
+  0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA,
+  0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3,
+  0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988,
+  0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91,
+  0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE,
+  0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
+  0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC,
+  0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5,
+  0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172,
+  0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B,
+  0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940,
+  0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
+  0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116,
+  0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F,
+  0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924,
+  0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D,
+
+  0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A,
+  0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
+  0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818,
+  0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01,
+  0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E,
+  0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457,
+  0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C,
+  0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
+  0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2,
+  0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB,
+  0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0,
+  0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9,
+  0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086,
+  0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
+  0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4,
+  0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD,
+
+  0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A,
+  0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683,
+  0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8,
+  0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
+  0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE,
+  0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7,
+  0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC,
+  0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5,
+  0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252,
+  0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
+  0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60,
+  0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79,
+  0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236,
+  0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F,
+  0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04,
+  0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
+
+  0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A,
+  0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713,
+  0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38,
+  0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21,
+  0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E,
+  0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
+  0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C,
+  0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45,
+  0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2,
+  0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB,
+  0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0,
+  0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
+  0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6,
+  0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF,
+  0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,
+  0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
+};
+
+uint32_t ape_initcrc(void)
+{
+    return 0xffffffff;
+}
+
+/* Update the CRC from a block of WAV-format audio data */
+uint32_t ape_updatecrc(unsigned char *block, int count, uint32_t crc)
+{
+    while (count--)
+        crc = (crc >> 8) ^ crctab32[(crc & 0xff) ^ *block++];
+
+    return crc;
+}
+
+uint32_t ape_finishcrc(uint32_t crc)
+{
+    crc ^= 0xffffffff;
+    crc >>= 1;
+
+    return crc;
+}
+
--- a/lib/rbcodec/codecs/demac/libdemac/decoder.c
+++ b/lib/rbcodec/codecs/demac/libdemac/decoder.c
@ -0,0 +1,216 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#include <inttypes.h>
+#include <string.h>
+
+#include "demac.h"
+#include "predictor.h"
+#include "entropy.h"
+#include "filter.h"
+#include "demac_config.h"
+
+/* Statically allocate the filter buffers */
+
+#ifdef FILTER256_IRAM
+static filter_int filterbuf32[(32*3 + FILTER_HISTORY_SIZE) * 2]   
+                  IBSS_ATTR_DEMAC MEM_ALIGN_ATTR; 
+                  /* 2432 or 4864 bytes */
+static filter_int filterbuf256[(256*3 + FILTER_HISTORY_SIZE) * 2]
+                  IBSS_ATTR_DEMAC MEM_ALIGN_ATTR; 
+                  /* 5120 or 10240 bytes */
+#define FILTERBUF64 filterbuf256
+#define FILTERBUF32 filterbuf32
+#define FILTERBUF16 filterbuf32
+#else
+static filter_int filterbuf64[(64*3 + FILTER_HISTORY_SIZE) * 2]   
+                  IBSS_ATTR_DEMAC MEM_ALIGN_ATTR; 
+                  /* 2432 or 4864 bytes */
+static filter_int filterbuf256[(256*3 + FILTER_HISTORY_SIZE) * 2]
+                  MEM_ALIGN_ATTR; /* 5120 or 10240 bytes */
+#define FILTERBUF64 filterbuf64
+#define FILTERBUF32 filterbuf64
+#define FILTERBUF16 filterbuf64
+#endif
+
+/* This is only needed for "insane" files, and no current Rockbox targets
+   can hope to decode them in realtime, except the Gigabeat S (at 528MHz). */
+static filter_int filterbuf1280[(1280*3 + FILTER_HISTORY_SIZE) * 2] 
+                  IBSS_ATTR_DEMAC_INSANEBUF MEM_ALIGN_ATTR;
+                  /* 17408 or 34816 bytes */
+
+void init_frame_decoder(struct ape_ctx_t* ape_ctx,
+                        unsigned char* inbuffer, int* firstbyte,
+                        int* bytesconsumed)
+{
+    init_entropy_decoder(ape_ctx, inbuffer, firstbyte, bytesconsumed);
+    //printf("CRC=0x%08x\n",ape_ctx->CRC);
+    //printf("Flags=0x%08x\n",ape_ctx->frameflags);
+
+    init_predictor_decoder(&ape_ctx->predictor);
+
+    switch (ape_ctx->compressiontype)
+    {
+        case 2000:
+            init_filter_16_11(FILTERBUF16);
+            break;
+
+        case 3000:
+            init_filter_64_11(FILTERBUF64);
+            break;
+
+        case 4000:
+            init_filter_256_13(filterbuf256);
+            init_filter_32_10(FILTERBUF32);
+            break;
+
+        case 5000:
+            init_filter_1280_15(filterbuf1280);
+            init_filter_256_13(filterbuf256);
+            init_filter_16_11(FILTERBUF32);
+    }
+}
+
+int ICODE_ATTR_DEMAC decode_chunk(struct ape_ctx_t* ape_ctx,
+                                  unsigned char* inbuffer, int* firstbyte,
+                                  int* bytesconsumed,
+                                  int32_t* decoded0, int32_t* decoded1,
+                                  int count)
+{
+    int32_t left, right;
+#ifdef ROCKBOX
+    int scale = (APE_OUTPUT_DEPTH - ape_ctx->bps);
+    #define SCALE(x) ((x) << scale)
+#else
+    #define SCALE(x) (x)
+#endif
+         
+    if ((ape_ctx->channels==1) || ((ape_ctx->frameflags
+        & (APE_FRAMECODE_PSEUDO_STEREO|APE_FRAMECODE_STEREO_SILENCE))
+        == APE_FRAMECODE_PSEUDO_STEREO)) {
+
+        entropy_decode(ape_ctx, inbuffer, firstbyte, bytesconsumed,
+                       decoded0, NULL, count);
+
+        if (ape_ctx->frameflags & APE_FRAMECODE_MONO_SILENCE) {
+            /* We are pure silence, so we're done. */
+            return 0;
+        }
+
+        switch (ape_ctx->compressiontype)
+        {
+            case 2000:
+                apply_filter_16_11(ape_ctx->fileversion,0,decoded0,count);
+                break;
+    
+            case 3000:
+                apply_filter_64_11(ape_ctx->fileversion,0,decoded0,count);
+                break;
+    
+            case 4000:
+                apply_filter_32_10(ape_ctx->fileversion,0,decoded0,count);
+                apply_filter_256_13(ape_ctx->fileversion,0,decoded0,count);
+                break;
+    
+            case 5000:
+                apply_filter_16_11(ape_ctx->fileversion,0,decoded0,count);
+                apply_filter_256_13(ape_ctx->fileversion,0,decoded0,count);
+                apply_filter_1280_15(ape_ctx->fileversion,0,decoded0,count);
+        }
+
+        /* Now apply the predictor decoding */
+        predictor_decode_mono(&ape_ctx->predictor,decoded0,count);
+
+        if (ape_ctx->channels==2) {
+            /* Pseudo-stereo - copy left channel to right channel */
+            while (count--)
+            {
+                left = *decoded0;
+                *(decoded1++) = *(decoded0++) = SCALE(left);
+            }
+        }
+#ifdef ROCKBOX
+         else {
+            /* Scale to output depth */
+            while (count--)
+            {
+                left = *decoded0;
+                *(decoded0++) = SCALE(left);
+            }
+        }
+#endif
+    } else { /* Stereo */
+        entropy_decode(ape_ctx, inbuffer, firstbyte, bytesconsumed,
+                       decoded0, decoded1, count);
+
+        if ((ape_ctx->frameflags & APE_FRAMECODE_STEREO_SILENCE)
+            == APE_FRAMECODE_STEREO_SILENCE) {
+            /* We are pure silence, so we're done. */
+            return 0;
+        }
+
+        /* Apply filters - compression type 1000 doesn't have any */
+        switch (ape_ctx->compressiontype)
+        {
+            case 2000:
+                apply_filter_16_11(ape_ctx->fileversion,0,decoded0,count);
+                apply_filter_16_11(ape_ctx->fileversion,1,decoded1,count);
+                break;
+    
+            case 3000:
+                apply_filter_64_11(ape_ctx->fileversion,0,decoded0,count);
+                apply_filter_64_11(ape_ctx->fileversion,1,decoded1,count);
+                break;
+    
+            case 4000:
+                apply_filter_32_10(ape_ctx->fileversion,0,decoded0,count);
+                apply_filter_32_10(ape_ctx->fileversion,1,decoded1,count);
+                apply_filter_256_13(ape_ctx->fileversion,0,decoded0,count);
+                apply_filter_256_13(ape_ctx->fileversion,1,decoded1,count);
+                break;
+    
+            case 5000:
+                apply_filter_16_11(ape_ctx->fileversion,0,decoded0,count);
+                apply_filter_16_11(ape_ctx->fileversion,1,decoded1,count);
+                apply_filter_256_13(ape_ctx->fileversion,0,decoded0,count);
+                apply_filter_256_13(ape_ctx->fileversion,1,decoded1,count);
+                apply_filter_1280_15(ape_ctx->fileversion,0,decoded0,count);
+                apply_filter_1280_15(ape_ctx->fileversion,1,decoded1,count);
+        }
+
+        /* Now apply the predictor decoding */
+        predictor_decode_stereo(&ape_ctx->predictor,decoded0,decoded1,count);
+
+        /* Decorrelate and scale to output depth */
+        while (count--)
+        {
+            left = *decoded1 - (*decoded0 / 2);
+            right = left + *decoded0;
+
+            *(decoded0++) = SCALE(left);
+            *(decoded1++) = SCALE(right);
+        }
+    }
+    return 0;
+}
--- a/lib/rbcodec/codecs/demac/libdemac/decoder.h
+++ b/lib/rbcodec/codecs/demac/libdemac/decoder.h
@ -0,0 +1,40 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#ifndef _APE_DECODER_H
+#define _APE_DECODER_H
+
+#include <inttypes.h>
+#include "parser.h"
+
+void init_frame_decoder(struct ape_ctx_t* ape_ctx,
+                        unsigned char* inbuffer, int* firstbyte,
+                        int* bytesconsumed);
+
+int decode_chunk(struct ape_ctx_t* ape_ctx,
+                 unsigned char* inbuffer, int* firstbyte,
+                 int* bytesconsumed,
+                 int32_t* decoded0, int32_t* decoded1, 
+                 int count);
+#endif
--- a/lib/rbcodec/codecs/demac/libdemac/demac.h
+++ b/lib/rbcodec/codecs/demac/libdemac/demac.h
@ -0,0 +1,45 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#ifndef _APE_DECODER_H
+#define _APE_DECODER_H
+
+#include <inttypes.h>
+#include "parser.h"
+
+void init_frame_decoder(struct ape_ctx_t* ape_ctx,
+                        unsigned char* inbuffer, int* firstbyte,
+                        int* bytesconsumed);
+
+int decode_chunk(struct ape_ctx_t* ape_ctx,
+                 unsigned char* inbuffer, int* firstbyte,
+                 int* bytesconsumed,
+                 int32_t* decoded0, int32_t* decoded1, 
+                 int count);
+
+uint32_t ape_initcrc(void);
+uint32_t ape_updatecrc(unsigned char *block, int count, uint32_t crc);
+uint32_t ape_finishcrc(uint32_t crc);
+
+#endif
--- a/lib/rbcodec/codecs/demac/libdemac/demac_config.h
+++ b/lib/rbcodec/codecs/demac/libdemac/demac_config.h
@ -0,0 +1,145 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#ifndef _DEMAC_CONFIG_H
+#define _DEMAC_CONFIG_H
+
+/* Build-time choices for libdemac.
+ * Note that this file is included by both .c and .S files. */
+
+#ifdef ROCKBOX
+
+#include "config.h"
+
+#ifndef __ASSEMBLER__
+#include "codeclib.h"
+#include <codecs.h>
+#endif
+
+#define APE_OUTPUT_DEPTH 29
+
+/* On ARMv4, using 32 bit ints for the filters is faster. */
+#if defined(CPU_ARM) && (ARM_ARCH == 4)
+#define FILTER_BITS 32
+#endif
+
+#if !defined(CPU_PP) && !defined(CPU_S5L870X)
+#define FILTER256_IRAM
+#endif
+
+#if CONFIG_CPU == PP5002 || defined(CPU_S5L870X)
+/* Code and data IRAM for speed (PP5002 has a broken cache), not enough IRAM
+ * for the insane filter buffer. Reciprocal table for division in IRAM. */
+#define ICODE_SECTION_DEMAC_ARM   .icode
+#define ICODE_ATTR_DEMAC          ICODE_ATTR
+#define ICONST_ATTR_DEMAC         ICONST_ATTR
+#define IBSS_ATTR_DEMAC           IBSS_ATTR
+#define IBSS_ATTR_DEMAC_INSANEBUF
+
+#elif CONFIG_CPU == PP5020
+/* Code and small data in DRAM for speed (PP5020 IRAM isn't completely single
+ * cycle). Insane filter buffer not in IRAM in favour of reciprocal table for
+ * divison. Decoded data buffers should be in IRAM (defined by the caller). */
+#define ICODE_SECTION_DEMAC_ARM   .text
+#define ICODE_ATTR_DEMAC
+#define ICONST_ATTR_DEMAC
+#define IBSS_ATTR_DEMAC
+#define IBSS_ATTR_DEMAC_INSANEBUF
+
+#elif CONFIG_CPU == PP5022
+/* Code in DRAM, data in IRAM. Insane filter buffer not in IRAM in favour of
+ * reciprocal table for divison */
+#define ICODE_SECTION_DEMAC_ARM   .text
+#define ICODE_ATTR_DEMAC
+#define ICONST_ATTR_DEMAC         ICONST_ATTR
+#define IBSS_ATTR_DEMAC           IBSS_ATTR
+#define IBSS_ATTR_DEMAC_INSANEBUF
+
+#else
+/* Code in DRAM, data in IRAM, including insane filter buffer. */
+#define ICODE_SECTION_DEMAC_ARM   .text
+#define ICODE_ATTR_DEMAC
+#define ICONST_ATTR_DEMAC         ICONST_ATTR
+#define IBSS_ATTR_DEMAC           IBSS_ATTR
+#define IBSS_ATTR_DEMAC_INSANEBUF IBSS_ATTR
+#endif
+
+#else /* !ROCKBOX */
+
+#define APE_OUTPUT_DEPTH (ape_ctx->bps)
+
+#define MEM_ALIGN_ATTR __attribute__((aligned(16)))
+        /* adjust to target architecture for best performance */
+
+#define ICODE_ATTR_DEMAC
+#define ICONST_ATTR_DEMAC
+#define IBSS_ATTR_DEMAC
+#define IBSS_ATTR_DEMAC_INSANEBUF
+
+/* Use to give gcc hints on which branch is most likely taken */
+#if defined(__GNUC__) && __GNUC__ >= 3
+#define LIKELY(x)   __builtin_expect(!!(x), 1)
+#define UNLIKELY(x) __builtin_expect(!!(x), 0)
+#else
+#define LIKELY(x)   (x)
+#define UNLIKELY(x) (x)
+#endif
+
+#endif /* !ROCKBOX */
+
+/* Defaults */
+
+#ifndef FILTER_HISTORY_SIZE
+#define FILTER_HISTORY_SIZE 512
+#endif
+
+#ifndef PREDICTOR_HISTORY_SIZE
+#define PREDICTOR_HISTORY_SIZE 512
+#endif     
+
+#ifndef FILTER_BITS
+#define FILTER_BITS 16
+#endif
+
+
+#ifndef __ASSEMBLER__
+
+#if defined(CPU_ARM) && (ARM_ARCH < 5 || defined(USE_IRAM))
+/* optimised unsigned integer division for ARMv4, in IRAM */
+unsigned udiv32_arm(unsigned a, unsigned b);
+#define UDIV32(a, b) udiv32_arm(a, b)
+#else
+/* default */
+#define UDIV32(a, b) (a / b)
+#endif
+
+#include <inttypes.h>
+#if FILTER_BITS == 32
+typedef int32_t filter_int;
+#elif FILTER_BITS == 16
+typedef int16_t filter_int;
+#endif
+#endif
+
+#endif /* _DEMAC_CONFIG_H */
--- a/lib/rbcodec/codecs/demac/libdemac/entropy.c
+++ b/lib/rbcodec/codecs/demac/libdemac/entropy.c
@ -0,0 +1,464 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#include <inttypes.h>
+#include <string.h>
+
+#include "parser.h"
+#include "entropy.h"
+#include "demac_config.h"
+
+#define MODEL_ELEMENTS 64
+
+/*
+  The following counts arrays for use with the range decoder are
+  hard-coded in the Monkey's Audio decoder.
+*/
+
+static const int counts_3970[65] ICONST_ATTR_DEMAC =
+{
+        0,14824,28224,39348,47855,53994,58171,60926,
+    62682,63786,64463,64878,65126,65276,65365,65419,
+    65450,65469,65480,65487,65491,65493,65494,65495,
+    65496,65497,65498,65499,65500,65501,65502,65503,
+    65504,65505,65506,65507,65508,65509,65510,65511,
+    65512,65513,65514,65515,65516,65517,65518,65519,
+    65520,65521,65522,65523,65524,65525,65526,65527,
+    65528,65529,65530,65531,65532,65533,65534,65535,
+    65536
+};
+
+/* counts_diff_3970[i] = counts_3970[i+1] - counts_3970[i] */
+static const int counts_diff_3970[64] ICONST_ATTR_DEMAC =
+{
+    14824,13400,11124,8507,6139,4177,2755,1756,
+    1104,677,415,248,150,89,54,31,
+    19,11,7,4,2,1,1,1,
+    1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1
+};
+
+static const int counts_3980[65] ICONST_ATTR_DEMAC =
+{
+        0,19578,36160,48417,56323,60899,63265,64435,
+    64971,65232,65351,65416,65447,65466,65476,65482,
+    65485,65488,65490,65491,65492,65493,65494,65495,
+    65496,65497,65498,65499,65500,65501,65502,65503,
+    65504,65505,65506,65507,65508,65509,65510,65511,
+    65512,65513,65514,65515,65516,65517,65518,65519,
+    65520,65521,65522,65523,65524,65525,65526,65527,
+    65528,65529,65530,65531,65532,65533,65534,65535,
+    65536
+};
+
+/* counts_diff_3980[i] = counts_3980[i+1] - counts_3980[i] */
+
+static const int counts_diff_3980[64] ICONST_ATTR_DEMAC =
+{
+    19578,16582,12257,7906,4576,2366,1170,536,
+    261,119,65,31,19,10,6,3,
+    3,2,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1
+};
+
+/*
+
+Range decoder adapted from rangecod.c included in:
+
+  http://www.compressconsult.com/rangecoder/rngcod13.zip
+
+  rangecod.c     range encoding
+
+  (c) Michael Schindler
+  1997, 1998, 1999, 2000
+  http://www.compressconsult.com/
+  michael@compressconsult.com
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+
+The encoding functions were removed, and functions turned into "static
+inline" functions. Some minor cosmetic changes were made (e.g. turning
+pre-processor symbols into upper-case, removing the rc parameter from
+each function (and the RNGC macro)).
+
+*/
+
+/* BITSTREAM READING FUNCTIONS */
+
+/* We deal with the input data one byte at a time - to ensure
+   functionality on CPUs of any endianness regardless of any requirements
+   for aligned reads.
+*/
+
+static unsigned char* bytebuffer IBSS_ATTR_DEMAC;
+static int bytebufferoffset IBSS_ATTR_DEMAC;
+
+static inline void skip_byte(void)
+{
+    bytebufferoffset--;
+    bytebuffer += bytebufferoffset & 4;
+    bytebufferoffset &= 3;
+}
+
+static inline int read_byte(void)
+{
+    int ch = bytebuffer[bytebufferoffset];
+
+    skip_byte();
+
+    return ch;
+}
+
+/* RANGE DECODING FUNCTIONS */
+
+/* SIZE OF RANGE ENCODING CODE VALUES. */
+
+#define CODE_BITS 32
+#define TOP_VALUE ((unsigned int)1 << (CODE_BITS-1))
+#define SHIFT_BITS (CODE_BITS - 9)
+#define EXTRA_BITS ((CODE_BITS-2) % 8 + 1)
+#define BOTTOM_VALUE (TOP_VALUE >> 8)
+
+struct rangecoder_t
+{
+    uint32_t low;        /* low end of interval */
+    uint32_t range;      /* length of interval */
+    uint32_t help;       /* bytes_to_follow resp. intermediate value */
+    unsigned int buffer; /* buffer for input/output */
+};
+
+static struct rangecoder_t rc IBSS_ATTR_DEMAC;
+
+/* Start the decoder */
+static inline void range_start_decoding(void)
+{
+    rc.buffer = read_byte();
+    rc.low = rc.buffer >> (8 - EXTRA_BITS);
+    rc.range = (uint32_t) 1 << EXTRA_BITS;
+}
+
+static inline void range_dec_normalize(void)
+{
+    while (rc.range <= BOTTOM_VALUE)
+    {   
+        rc.buffer = (rc.buffer << 8) | read_byte();
+        rc.low = (rc.low << 8) | ((rc.buffer >> 1) & 0xff);
+        rc.range <<= 8;
+    }
+}
+
+/* Calculate culmulative frequency for next symbol. Does NO update!*/
+/* tot_f is the total frequency                              */
+/* or: totf is (code_value)1<<shift                                      */
+/* returns the culmulative frequency                         */
+static inline int range_decode_culfreq(int tot_f)
+{
+    range_dec_normalize();
+    rc.help = UDIV32(rc.range, tot_f);
+    return UDIV32(rc.low, rc.help);
+}
+
+static inline int range_decode_culshift(int shift)
+{
+    range_dec_normalize();
+    rc.help = rc.range >> shift;
+    return UDIV32(rc.low, rc.help);
+}
+
+
+/* Update decoding state                                     */
+/* sy_f is the interval length (frequency of the symbol)     */
+/* lt_f is the lower end (frequency sum of < symbols)        */
+static inline void range_decode_update(int sy_f, int lt_f)
+{
+    rc.low -= rc.help * lt_f;
+    rc.range = rc.help * sy_f;
+}
+
+
+/* Decode a byte/short without modelling                     */
+static inline unsigned char decode_byte(void)
+{   int tmp = range_decode_culshift(8);
+    range_decode_update( 1,tmp);
+    return tmp;
+}
+
+static inline unsigned short range_decode_short(void)
+{   int tmp = range_decode_culshift(16);
+    range_decode_update( 1,tmp);
+    return tmp;
+}
+
+/* Decode n bits (n <= 16) without modelling - based on range_decode_short */
+static inline int range_decode_bits(int n)
+{   int tmp = range_decode_culshift(n);
+    range_decode_update( 1,tmp);
+    return tmp;
+}
+
+
+/* Finish decoding                                           */
+static inline void range_done_decoding(void)
+{   range_dec_normalize();      /* normalize to use up all bytes */
+}
+
+/*
+  range_get_symbol_* functions based on main decoding loop in simple_d.c from
+  http://www.compressconsult.com/rangecoder/rngcod13.zip
+  (c) Michael Schindler
+*/
+
+static inline int range_get_symbol_3980(void)
+{
+    int symbol, cf;
+
+    cf = range_decode_culshift(16);
+
+    /* figure out the symbol inefficiently; a binary search would be much better */
+    for (symbol = 0; counts_3980[symbol+1] <= cf; symbol++);
+
+    range_decode_update(counts_diff_3980[symbol],counts_3980[symbol]);
+
+    return symbol;
+}
+
+static inline int range_get_symbol_3970(void)
+{
+    int symbol, cf;
+
+    cf = range_decode_culshift(16);
+
+    /* figure out the symbol inefficiently; a binary search would be much better */
+    for (symbol = 0; counts_3970[symbol+1] <= cf; symbol++);
+
+    range_decode_update(counts_diff_3970[symbol],counts_3970[symbol]);
+
+    return symbol;
+}
+
+/* MAIN DECODING FUNCTIONS */
+
+struct rice_t
+{
+  uint32_t k;
+  uint32_t ksum;
+};
+
+static struct rice_t riceX IBSS_ATTR_DEMAC;
+static struct rice_t riceY IBSS_ATTR_DEMAC;
+
+static inline void update_rice(struct rice_t* rice, int x)
+{
+    rice->ksum += ((x + 1) / 2) - ((rice->ksum + 16) >> 5);
+
+    if (UNLIKELY(rice->k == 0)) {
+        rice->k = 1;
+    } else {
+        uint32_t lim = 1 << (rice->k + 4);
+        if (UNLIKELY(rice->ksum < lim)) {
+            rice->k--;
+        } else if (UNLIKELY(rice->ksum >= 2 * lim)) {
+            rice->k++;
+        }
+    }
+}
+
+static inline int entropy_decode3980(struct rice_t* rice)
+{
+    int base, x, pivot, overflow;
+
+    pivot = rice->ksum >> 5;
+    if (UNLIKELY(pivot == 0))
+        pivot=1;
+
+    overflow = range_get_symbol_3980();
+
+    if (UNLIKELY(overflow == (MODEL_ELEMENTS-1))) {
+        overflow = range_decode_short() << 16;
+        overflow |= range_decode_short();
+    }
+
+    if (pivot >= 0x10000) {
+        /* Codepath for 24-bit streams */
+        int nbits, lo_bits, base_hi, base_lo;
+
+        /* Count the number of bits in pivot */
+        nbits = 17; /* We know there must be at least 17 bits */
+        while ((pivot >> nbits) > 0) { nbits++; }
+
+        /* base_lo is the low (nbits-16) bits of base
+           base_hi is the high 16 bits of base
+        */
+        lo_bits = (nbits - 16);
+
+        base_hi = range_decode_culfreq((pivot >> lo_bits) + 1);
+        range_decode_update(1, base_hi);
+
+        base_lo = range_decode_culshift(lo_bits);
+        range_decode_update(1, base_lo);
+
+        base = (base_hi << lo_bits) + base_lo;
+    } else {
+        /* Codepath for 16-bit streams */
+        base = range_decode_culfreq(pivot);
+        range_decode_update(1, base);
+    }
+
+    x = base + (overflow * pivot);
+    update_rice(rice, x);
+
+    /* Convert to signed */
+    if (x & 1)
+        return (x >> 1) + 1;
+    else
+        return -(x >> 1);
+}
+
+
+static inline int entropy_decode3970(struct rice_t* rice)
+{
+    int x, tmpk;
+
+    int overflow = range_get_symbol_3970();
+
+    if (UNLIKELY(overflow == (MODEL_ELEMENTS - 1))) {
+        tmpk = range_decode_bits(5);
+        overflow = 0;
+    } else {
+        tmpk = (rice->k < 1) ? 0 : rice->k - 1;
+    }
+
+    if (tmpk <= 16) {
+        x = range_decode_bits(tmpk);
+    } else {
+        x = range_decode_short();
+        x |= (range_decode_bits(tmpk - 16) << 16);
+    }
+    x += (overflow << tmpk);
+
+    update_rice(rice, x);
+
+    /* Convert to signed */
+    if (x & 1)
+        return (x >> 1) + 1;
+    else
+        return -(x >> 1);
+}
+
+void init_entropy_decoder(struct ape_ctx_t* ape_ctx,
+                          unsigned char* inbuffer, int* firstbyte,
+                          int* bytesconsumed)
+{
+    bytebuffer = inbuffer;
+    bytebufferoffset = *firstbyte;
+
+    /* Read the CRC */
+    ape_ctx->CRC = read_byte();
+    ape_ctx->CRC = (ape_ctx->CRC << 8) | read_byte();
+    ape_ctx->CRC = (ape_ctx->CRC << 8) | read_byte();
+    ape_ctx->CRC = (ape_ctx->CRC << 8) | read_byte();
+
+    /* Read the frame flags if they exist */
+    ape_ctx->frameflags = 0;
+    if ((ape_ctx->fileversion > 3820) && (ape_ctx->CRC & 0x80000000)) {
+        ape_ctx->CRC &= ~0x80000000;
+
+        ape_ctx->frameflags = read_byte();
+        ape_ctx->frameflags = (ape_ctx->frameflags << 8) | read_byte();
+        ape_ctx->frameflags = (ape_ctx->frameflags << 8) | read_byte();
+        ape_ctx->frameflags = (ape_ctx->frameflags << 8) | read_byte();
+    }
+    /* Keep a count of the blocks decoded in this frame */
+    ape_ctx->blocksdecoded = 0;
+
+    /* Initialise the rice structs */
+    riceX.k = 10;
+    riceX.ksum = (1 << riceX.k) * 16;
+    riceY.k = 10;
+    riceY.ksum = (1 << riceY.k) * 16;
+
+    /* The first 8 bits of input are ignored. */
+    skip_byte();
+
+    range_start_decoding();
+
+    /* Return the new state of the buffer */
+    *bytesconsumed = (intptr_t)bytebuffer - (intptr_t)inbuffer;
+    *firstbyte = bytebufferoffset;
+}
+
+void ICODE_ATTR_DEMAC entropy_decode(struct ape_ctx_t* ape_ctx,
+                                     unsigned char* inbuffer, int* firstbyte,
+                                     int* bytesconsumed,
+                                     int32_t* decoded0, int32_t* decoded1,
+                                     int blockstodecode)
+{
+    bytebuffer = inbuffer;
+    bytebufferoffset = *firstbyte;
+
+    ape_ctx->blocksdecoded += blockstodecode;
+
+    if ((ape_ctx->frameflags & APE_FRAMECODE_LEFT_SILENCE)
+        && ((ape_ctx->frameflags & APE_FRAMECODE_RIGHT_SILENCE)
+            || (decoded1 == NULL))) {
+        /* We are pure silence, just memset the output buffer. */
+        memset(decoded0, 0, blockstodecode * sizeof(int32_t));
+        if (decoded1 != NULL)
+            memset(decoded1, 0, blockstodecode * sizeof(int32_t));
+    } else {
+        if (ape_ctx->fileversion > 3970) {
+            while (LIKELY(blockstodecode--)) {
+                *(decoded0++) = entropy_decode3980(&riceY);
+                if (decoded1 != NULL)
+                    *(decoded1++) = entropy_decode3980(&riceX);
+            }
+        } else {
+            while (LIKELY(blockstodecode--)) {
+                *(decoded0++) = entropy_decode3970(&riceY);
+                if (decoded1 != NULL)
+                    *(decoded1++) = entropy_decode3970(&riceX);
+            }
+        }
+    }
+
+    if (ape_ctx->blocksdecoded == ape_ctx->currentframeblocks)
+    {
+        range_done_decoding();
+    }
+
+    /* Return the new state of the buffer */
+    *bytesconsumed = bytebuffer - inbuffer;
+    *firstbyte = bytebufferoffset;
+}
--- a/lib/rbcodec/codecs/demac/libdemac/entropy.h
+++ b/lib/rbcodec/codecs/demac/libdemac/entropy.h
@ -0,0 +1,40 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#ifndef _APE_ENTROPY_H
+#define _APE_ENTROPY_H
+
+#include <inttypes.h>
+
+void init_entropy_decoder(struct ape_ctx_t* ape_ctx,
+                          unsigned char* inbuffer, int* firstbyte,
+                          int* bytesconsumed);
+
+void entropy_decode(struct ape_ctx_t* ape_ctx,
+                    unsigned char* inbuffer, int* firstbyte,
+                    int* bytesconsumed,
+                    int32_t* decoded0, int32_t* decoded1,
+                    int blockstodecode);
+
+#endif
--- a/lib/rbcodec/codecs/demac/libdemac/filter.c
+++ b/lib/rbcodec/codecs/demac/libdemac/filter.c
@ -0,0 +1,296 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#include <string.h>
+#include <inttypes.h>
+
+#include "demac.h"
+#include "filter.h"
+#include "demac_config.h"
+     
+#if FILTER_BITS == 32
+
+#if defined(CPU_ARM) && (ARM_ARCH == 4)
+#include "vector_math32_armv4.h"
+#else
+#include "vector_math_generic.h"
+#endif
+
+#else /* FILTER_BITS == 16 */
+
+#ifdef CPU_COLDFIRE
+#include "vector_math16_cf.h"
+#elif defined(CPU_ARM) && (ARM_ARCH >= 7)
+#include "vector_math16_armv7.h"
+#elif defined(CPU_ARM) && (ARM_ARCH >= 6)
+#include "vector_math16_armv6.h"
+#elif defined(CPU_ARM) && (ARM_ARCH >= 5)
+/* Assume all our ARMv5 targets are ARMv5te(j) */
+#include "vector_math16_armv5te.h"
+#elif (defined(__i386__) || defined(__i486__))  && defined(__MMX__) \
+    || defined(__x86_64__)
+#include "vector_math16_mmx.h"
+#else
+#include "vector_math_generic.h"
+#endif
+
+#endif /* FILTER_BITS */
+
+struct filter_t {
+    filter_int* coeffs; /* ORDER entries */
+
+    /* We store all the filter delays in a single buffer */
+    filter_int* history_end;
+
+    filter_int* delay;
+    filter_int* adaptcoeffs;
+
+    int avg;
+};
+
+/* We name the functions according to the ORDER and FRACBITS
+   pre-processor symbols and build multiple .o files from this .c file
+   - this increases code-size but gives the compiler more scope for
+   optimising the individual functions, as well as replacing a lot of
+   variables with constants.
+*/
+
+#if FRACBITS == 11
+  #if ORDER == 16
+     #define INIT_FILTER   init_filter_16_11
+     #define APPLY_FILTER apply_filter_16_11
+  #elif ORDER == 64
+     #define INIT_FILTER  init_filter_64_11
+     #define APPLY_FILTER apply_filter_64_11
+  #endif
+#elif FRACBITS == 13
+  #define INIT_FILTER  init_filter_256_13
+  #define APPLY_FILTER apply_filter_256_13
+#elif FRACBITS == 10
+  #define INIT_FILTER  init_filter_32_10
+  #define APPLY_FILTER apply_filter_32_10
+#elif FRACBITS == 15
+  #define INIT_FILTER  init_filter_1280_15
+  #define APPLY_FILTER apply_filter_1280_15
+#endif
+
+/* Some macros to handle the fixed-point stuff */
+
+/* Convert from (32-FRACBITS).FRACBITS fixed-point format to an
+   integer (rounding to nearest). */
+#define FP_HALF  (1 << (FRACBITS - 1))   /* 0.5 in fixed-point format. */
+#define FP_TO_INT(x) ((x + FP_HALF) >> FRACBITS)  /* round(x) */
+
+#ifdef CPU_ARM
+#if ARM_ARCH >= 6
+#define SATURATE(x) ({int __res; asm("ssat %0, #16, %1" : "=r"(__res) : "r"(x)); __res; })
+#else /* ARM_ARCH < 6 */
+/* Keeping the asr #31 outside of the asm allows loads to be scheduled between
+   it and the rest of the block on ARM9E, with the load's result latency filled
+   by the other calculations. */
+#define SATURATE(x) ({ \
+    int __res = (x) >> 31; \
+    asm volatile ( \
+        "teq %0, %1, asr #15\n\t" \
+        "moveq %0, %1\n\t" \
+        "eorne %0, %0, #0xff\n\t" \
+        "eorne %0, %0, #0x7f00" \
+        : "+r" (__res) : "r" (x) : "cc" \
+    ); \
+    __res; \
+})
+#endif /* ARM_ARCH */
+#else /* CPU_ARM */
+#define SATURATE(x) (LIKELY((x) == (int16_t)(x)) ? (x) : ((x) >> 31) ^ 0x7FFF)
+#endif
+
+/* Apply the filter with state f to count entries in data[] */
+
+static void ICODE_ATTR_DEMAC do_apply_filter_3980(struct filter_t* f,
+                                                  int32_t* data, int count)
+{
+    int res;
+    int absres; 
+
+#ifdef PREPARE_SCALARPRODUCT
+    PREPARE_SCALARPRODUCT
+#endif
+
+    while(LIKELY(count--))
+    {
+#ifdef FUSED_VECTOR_MATH
+        if (LIKELY(*data != 0)) {
+            if (*data < 0)
+                res = vector_sp_add(f->coeffs, f->delay - ORDER,
+                                    f->adaptcoeffs - ORDER);
+            else
+                res = vector_sp_sub(f->coeffs, f->delay - ORDER,
+                                    f->adaptcoeffs - ORDER);
+        } else {
+            res = scalarproduct(f->coeffs, f->delay - ORDER);
+        }
+        res = FP_TO_INT(res);
+#else
+        res = FP_TO_INT(scalarproduct(f->coeffs, f->delay - ORDER));
+
+        if (LIKELY(*data != 0)) {
+            if (*data < 0)
+                vector_add(f->coeffs, f->adaptcoeffs - ORDER);
+            else
+                vector_sub(f->coeffs, f->adaptcoeffs - ORDER);
+        }
+#endif
+
+        res += *data;
+
+        *data++ = res;
+
+        /* Update the output history */
+        *f->delay++ = SATURATE(res);
+
+        /* Version 3.98 and later files */
+
+        /* Update the adaption coefficients */
+        absres = (res < 0 ? -res : res);
+
+        if (UNLIKELY(absres > 3 * f->avg))
+            *f->adaptcoeffs = ((res >> 25) & 64) - 32;
+        else if (3 * absres > 4 * f->avg)
+            *f->adaptcoeffs = ((res >> 26) & 32) - 16;
+        else if (LIKELY(absres > 0))
+            *f->adaptcoeffs = ((res >> 27) & 16) - 8;
+        else
+            *f->adaptcoeffs = 0;
+
+        f->avg += (absres - f->avg) / 16;
+
+        f->adaptcoeffs[-1] >>= 1;
+        f->adaptcoeffs[-2] >>= 1;
+        f->adaptcoeffs[-8] >>= 1;
+
+        f->adaptcoeffs++;
+
+        /* Have we filled the history buffer? */
+        if (UNLIKELY(f->delay == f->history_end)) {
+            memmove(f->coeffs + ORDER, f->delay - (ORDER*2),
+                    (ORDER*2) * sizeof(filter_int));
+            f->adaptcoeffs = f->coeffs + ORDER*2;
+            f->delay = f->coeffs + ORDER*3;
+        }
+    }
+}
+
+static void ICODE_ATTR_DEMAC do_apply_filter_3970(struct filter_t* f,
+                                                  int32_t* data, int count)
+{
+    int res;
+    
+#ifdef PREPARE_SCALARPRODUCT
+    PREPARE_SCALARPRODUCT
+#endif
+
+    while(LIKELY(count--))
+    {
+#ifdef FUSED_VECTOR_MATH
+        if (LIKELY(*data != 0)) {
+            if (*data < 0)
+                res = vector_sp_add(f->coeffs, f->delay - ORDER,
+                                    f->adaptcoeffs - ORDER);
+            else
+                res = vector_sp_sub(f->coeffs, f->delay - ORDER,
+                                    f->adaptcoeffs - ORDER);
+        } else {
+            res = scalarproduct(f->coeffs, f->delay - ORDER);
+        }
+        res = FP_TO_INT(res);
+#else
+        res = FP_TO_INT(scalarproduct(f->coeffs, f->delay - ORDER));
+
+        if (LIKELY(*data != 0)) {
+            if (*data < 0)
+                vector_add(f->coeffs, f->adaptcoeffs - ORDER);
+            else
+                vector_sub(f->coeffs, f->adaptcoeffs - ORDER);
+        }
+#endif
+
+        /* Convert res from (32-FRACBITS).FRACBITS fixed-point format to an
+           integer (rounding to nearest) and add the input value to
+           it */
+        res += *data;
+
+        *data++ = res;
+
+        /* Update the output history */
+        *f->delay++ = SATURATE(res);
+
+        /* Version ??? to < 3.98 files (untested) */
+        f->adaptcoeffs[0] = (res == 0) ? 0 : ((res >> 28) & 8) - 4;
+        f->adaptcoeffs[-4] >>= 1;
+        f->adaptcoeffs[-8] >>= 1;
+
+        f->adaptcoeffs++;
+
+        /* Have we filled the history buffer? */
+        if (UNLIKELY(f->delay == f->history_end)) {
+            memmove(f->coeffs + ORDER, f->delay - (ORDER*2),
+                    (ORDER*2) * sizeof(filter_int));
+            f->adaptcoeffs = f->coeffs + ORDER*2;
+            f->delay = f->coeffs + ORDER*3;
+        }
+    }
+}
+
+static struct filter_t filter[2] IBSS_ATTR_DEMAC;
+
+static void do_init_filter(struct filter_t* f, filter_int* buf)
+{
+    f->coeffs = buf;
+    f->history_end = buf + ORDER*3 + FILTER_HISTORY_SIZE;
+
+    /* Init pointers */
+    f->adaptcoeffs = f->coeffs + ORDER*2;
+    f->delay = f->coeffs + ORDER*3;
+
+    /* Zero coefficients and history buffer */
+    memset(f->coeffs, 0, ORDER*3 * sizeof(filter_int));
+
+    /* Zero the running average */
+    f->avg = 0;
+}
+
+void INIT_FILTER(filter_int* buf)
+{
+    do_init_filter(&filter[0], buf);
+    do_init_filter(&filter[1], buf + ORDER*3 + FILTER_HISTORY_SIZE);
+}
+
+void ICODE_ATTR_DEMAC APPLY_FILTER(int fileversion, int channel,
+                                   int32_t* data, int count)
+{
+    if (fileversion >= 3980)
+        do_apply_filter_3980(&filter[channel], data, count);
+    else
+        do_apply_filter_3970(&filter[channel], data, count);
+}
--- a/lib/rbcodec/codecs/demac/libdemac/filter.h
+++ b/lib/rbcodec/codecs/demac/libdemac/filter.h
@ -0,0 +1,50 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#ifndef _APE_FILTER_H
+#define _APE_FILTER_H
+
+#include "demac_config.h"
+
+void init_filter_16_11(filter_int* buf);
+void apply_filter_16_11(int fileversion, int channel,
+                        int32_t* decoded, int count);
+
+void init_filter_64_11(filter_int* buf);
+void apply_filter_64_11(int fileversion, int channel,
+                        int32_t* decoded, int count);
+
+void init_filter_32_10(filter_int* buf);
+void apply_filter_32_10(int fileversion, int channel,
+                        int32_t* decoded, int count);
+
+void init_filter_256_13(filter_int* buf);
+void apply_filter_256_13(int fileversion, int channel,
+                         int32_t* decoded, int count);
+
+void init_filter_1280_15(filter_int* buf);
+void apply_filter_1280_15(int fileversion, int channel,
+                          int32_t* decoded, int count);
+
+#endif
--- a/lib/rbcodec/codecs/demac/libdemac/filter_1280_15.c
+++ b/lib/rbcodec/codecs/demac/libdemac/filter_1280_15.c
@ -0,0 +1,32 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#include "demac_config.h"
+#ifndef FILTER256_IRAM
+#undef ICODE_ATTR_DEMAC
+#define ICODE_ATTR_DEMAC
+#endif
+#define ORDER 1280
+#define FRACBITS 15
+#include "filter.c"
--- a/lib/rbcodec/codecs/demac/libdemac/filter_16_11.c
+++ b/lib/rbcodec/codecs/demac/libdemac/filter_16_11.c
@ -0,0 +1,27 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#define ORDER 16
+#define FRACBITS 11
+#include "filter.c"
--- a/lib/rbcodec/codecs/demac/libdemac/filter_256_13.c
+++ b/lib/rbcodec/codecs/demac/libdemac/filter_256_13.c
@ -0,0 +1,32 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#include "demac_config.h"
+#ifndef FILTER256_IRAM
+#undef ICODE_ATTR_DEMAC
+#define ICODE_ATTR_DEMAC
+#endif
+#define ORDER 256
+#define FRACBITS 13
+#include "filter.c"
--- a/lib/rbcodec/codecs/demac/libdemac/filter_32_10.c
+++ b/lib/rbcodec/codecs/demac/libdemac/filter_32_10.c
@ -0,0 +1,27 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#define ORDER 32
+#define FRACBITS 10
+#include "filter.c"
--- a/lib/rbcodec/codecs/demac/libdemac/filter_64_11.c
+++ b/lib/rbcodec/codecs/demac/libdemac/filter_64_11.c
@ -0,0 +1,27 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#define ORDER 64
+#define FRACBITS 11
+#include "filter.c"
--- a/lib/rbcodec/codecs/demac/libdemac/parser.c
+++ b/lib/rbcodec/codecs/demac/libdemac/parser.c
@ -0,0 +1,402 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#include <inttypes.h>
+#include <string.h>
+#ifndef ROCKBOX
+#include <stdio.h>
+#include <stdlib.h>
+#include "inttypes.h"
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#endif
+
+#include "parser.h"
+
+#ifdef APE_MAX
+#undef APE_MAX
+#endif
+#define APE_MAX(a,b) ((a)>(b)?(a):(b))
+
+
+static inline int16_t get_int16(unsigned char* buf)
+{
+    return(buf[0] | (buf[1] << 8));
+}
+
+static inline uint16_t get_uint16(unsigned char* buf)
+{
+    return(buf[0] | (buf[1] << 8));
+}
+
+static inline uint32_t get_uint32(unsigned char* buf)
+{
+    return(buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24));
+}
+
+
+int ape_parseheaderbuf(unsigned char* buf, struct ape_ctx_t* ape_ctx)
+{
+    unsigned char* header;
+
+    memset(ape_ctx,0,sizeof(struct ape_ctx_t));
+    /* TODO: Skip any leading junk such as id3v2 tags */
+    ape_ctx->junklength = 0;
+
+    memcpy(ape_ctx->magic, buf, 4);
+    if (memcmp(ape_ctx->magic,"MAC ",4)!=0)
+    {
+        return -1;
+    }
+
+    ape_ctx->fileversion = get_int16(buf + 4);
+
+    if (ape_ctx->fileversion >= 3980)
+    {
+        ape_ctx->padding1 = get_int16(buf + 6);
+        ape_ctx->descriptorlength = get_uint32(buf + 8);
+        ape_ctx->headerlength = get_uint32(buf + 12);
+        ape_ctx->seektablelength = get_uint32(buf + 16);
+        ape_ctx->wavheaderlength = get_uint32(buf + 20);
+        ape_ctx->audiodatalength = get_uint32(buf + 24);
+        ape_ctx->audiodatalength_high = get_uint32(buf + 28);
+        ape_ctx->wavtaillength = get_uint32(buf + 32);
+        memcpy(ape_ctx->md5, buf + 36, 16);
+
+        header = buf + ape_ctx->descriptorlength;
+
+        /* Read header data */
+        ape_ctx->compressiontype = get_uint16(header + 0);
+        ape_ctx->formatflags = get_uint16(header + 2);
+        ape_ctx->blocksperframe = get_uint32(header + 4);
+        ape_ctx->finalframeblocks = get_uint32(header + 8);
+        ape_ctx->totalframes = get_uint32(header + 12);
+        ape_ctx->bps = get_uint16(header + 16);
+        ape_ctx->channels = get_uint16(header + 18);
+        ape_ctx->samplerate = get_uint32(header + 20);
+
+        ape_ctx->seektablefilepos = ape_ctx->junklength + 
+                                    ape_ctx->descriptorlength +
+                                    ape_ctx->headerlength;
+
+        ape_ctx->firstframe = ape_ctx->junklength + ape_ctx->descriptorlength +
+                              ape_ctx->headerlength + ape_ctx->seektablelength +
+                              ape_ctx->wavheaderlength;
+    } else {
+        ape_ctx->headerlength = 32;
+        ape_ctx->compressiontype = get_uint16(buf + 6);
+        ape_ctx->formatflags = get_uint16(buf + 8);
+        ape_ctx->channels = get_uint16(buf + 10);
+        ape_ctx->samplerate = get_uint32(buf + 12);
+        ape_ctx->wavheaderlength = get_uint32(buf + 16);
+        ape_ctx->totalframes = get_uint32(buf + 24);
+        ape_ctx->finalframeblocks = get_uint32(buf + 28);
+
+        if (ape_ctx->formatflags & MAC_FORMAT_FLAG_HAS_PEAK_LEVEL)
+        {
+            ape_ctx->headerlength += 4;
+        }
+
+        if (ape_ctx->formatflags & MAC_FORMAT_FLAG_HAS_SEEK_ELEMENTS)
+        {
+            ape_ctx->seektablelength = get_uint32(buf + ape_ctx->headerlength);
+            ape_ctx->seektablelength *= sizeof(int32_t);
+            ape_ctx->headerlength += 4;
+        } else {
+            ape_ctx->seektablelength = ape_ctx->totalframes * sizeof(int32_t);
+        }
+
+        if (ape_ctx->formatflags & MAC_FORMAT_FLAG_8_BIT)
+            ape_ctx->bps = 8;
+        else if (ape_ctx->formatflags & MAC_FORMAT_FLAG_24_BIT)
+            ape_ctx->bps = 24;
+        else
+            ape_ctx->bps = 16;
+
+        if (ape_ctx->fileversion >= 3950)
+            ape_ctx->blocksperframe = 73728 * 4;
+        else if ((ape_ctx->fileversion >= 3900) || (ape_ctx->fileversion >= 3800 && ape_ctx->compressiontype >= 4000))
+            ape_ctx->blocksperframe = 73728;
+        else
+            ape_ctx->blocksperframe = 9216;
+
+        ape_ctx->seektablefilepos = ape_ctx->junklength + ape_ctx->headerlength +
+                                    ape_ctx->wavheaderlength;
+
+        ape_ctx->firstframe = ape_ctx->junklength + ape_ctx->headerlength +
+                              ape_ctx->wavheaderlength + ape_ctx->seektablelength;
+    }
+
+    ape_ctx->totalsamples = ape_ctx->finalframeblocks;
+    if (ape_ctx->totalframes > 1)
+        ape_ctx->totalsamples += ape_ctx->blocksperframe * (ape_ctx->totalframes-1);
+
+    ape_ctx->numseekpoints = APE_MAX(ape_ctx->maxseekpoints,
+                                     ape_ctx->seektablelength / sizeof(int32_t));
+
+    return 0;
+}
+
+
+#ifndef ROCKBOX
+/* Helper functions */
+
+static int read_uint16(int fd, uint16_t* x)
+{
+    unsigned char tmp[2];
+    int n;
+
+    n = read(fd,tmp,2);
+
+    if (n != 2)
+        return -1;
+
+    *x = tmp[0] | (tmp[1] << 8);
+
+    return 0;
+}
+
+static int read_int16(int fd, int16_t* x)
+{
+    return read_uint16(fd, (uint16_t*)x);
+}
+
+static int read_uint32(int fd, uint32_t* x)
+{
+    unsigned char tmp[4];
+    int n;
+
+    n = read(fd,tmp,4);
+
+    if (n != 4)
+        return -1;
+
+    *x = tmp[0] | (tmp[1] << 8) | (tmp[2] << 16) | (tmp[3] << 24);
+
+    return 0;
+}
+
+int ape_parseheader(int fd, struct ape_ctx_t* ape_ctx)
+{
+    int i,n;
+
+    /* TODO: Skip any leading junk such as id3v2 tags */
+    ape_ctx->junklength = 0;
+
+    lseek(fd,ape_ctx->junklength,SEEK_SET);
+
+    n = read(fd,&ape_ctx->magic,4);
+    if (n != 4) return -1;
+
+    if (memcmp(ape_ctx->magic,"MAC ",4)!=0)
+    {
+        return -1;
+    }
+
+    if (read_int16(fd,&ape_ctx->fileversion) < 0)
+        return -1;
+
+    if (ape_ctx->fileversion >= 3980)
+    {
+        if (read_int16(fd,&ape_ctx->padding1) < 0)
+            return -1;
+        if (read_uint32(fd,&ape_ctx->descriptorlength) < 0)
+            return -1;
+        if (read_uint32(fd,&ape_ctx->headerlength) < 0)
+            return -1;
+        if (read_uint32(fd,&ape_ctx->seektablelength) < 0)
+            return -1;
+        if (read_uint32(fd,&ape_ctx->wavheaderlength) < 0)
+            return -1;
+        if (read_uint32(fd,&ape_ctx->audiodatalength) < 0)
+            return -1;
+        if (read_uint32(fd,&ape_ctx->audiodatalength_high) < 0)
+            return -1;
+        if (read_uint32(fd,&ape_ctx->wavtaillength) < 0)
+            return -1;
+        if (read(fd,&ape_ctx->md5,16) != 16)
+            return -1;
+
+        /* Skip any unknown bytes at the end of the descriptor.  This is for future
+           compatibility */
+        if (ape_ctx->descriptorlength > 52)
+            lseek(fd,ape_ctx->descriptorlength - 52, SEEK_CUR);
+
+        /* Read header data */
+        if (read_uint16(fd,&ape_ctx->compressiontype) < 0)
+            return -1;
+        if (read_uint16(fd,&ape_ctx->formatflags) < 0)
+            return -1;
+        if (read_uint32(fd,&ape_ctx->blocksperframe) < 0)
+            return -1;
+        if (read_uint32(fd,&ape_ctx->finalframeblocks) < 0)
+            return -1;
+        if (read_uint32(fd,&ape_ctx->totalframes) < 0)
+            return -1;
+        if (read_uint16(fd,&ape_ctx->bps) < 0)
+            return -1;
+        if (read_uint16(fd,&ape_ctx->channels) < 0)
+            return -1;
+        if (read_uint32(fd,&ape_ctx->samplerate) < 0)
+            return -1;
+    } else {
+        ape_ctx->descriptorlength = 0;
+        ape_ctx->headerlength = 32;
+
+        if (read_uint16(fd,&ape_ctx->compressiontype) < 0)
+            return -1;
+        if (read_uint16(fd,&ape_ctx->formatflags) < 0)
+            return -1;
+        if (read_uint16(fd,&ape_ctx->channels) < 0)
+            return -1;
+        if (read_uint32(fd,&ape_ctx->samplerate) < 0)
+            return -1;
+        if (read_uint32(fd,&ape_ctx->wavheaderlength) < 0)
+            return -1;
+        if (read_uint32(fd,&ape_ctx->wavtaillength) < 0)
+            return -1;
+        if (read_uint32(fd,&ape_ctx->totalframes) < 0)
+            return -1;
+        if (read_uint32(fd,&ape_ctx->finalframeblocks) < 0)
+            return -1;
+
+        if (ape_ctx->formatflags & MAC_FORMAT_FLAG_HAS_PEAK_LEVEL)
+        {
+            lseek(fd, 4, SEEK_CUR);   /* Skip the peak level */
+            ape_ctx->headerlength += 4;
+        }
+
+        if (ape_ctx->formatflags & MAC_FORMAT_FLAG_HAS_SEEK_ELEMENTS)
+        {
+            if (read_uint32(fd,&ape_ctx->seektablelength) < 0)
+                return -1;
+            ape_ctx->headerlength += 4;
+            ape_ctx->seektablelength *= sizeof(int32_t);
+        } else {
+            ape_ctx->seektablelength = ape_ctx->totalframes * sizeof(int32_t);
+        }
+
+        if (ape_ctx->formatflags & MAC_FORMAT_FLAG_8_BIT)
+            ape_ctx->bps = 8;
+        else if (ape_ctx->formatflags & MAC_FORMAT_FLAG_24_BIT)
+            ape_ctx->bps = 24;
+        else
+            ape_ctx->bps = 16;
+
+        if (ape_ctx->fileversion >= 3950)
+            ape_ctx->blocksperframe = 73728 * 4;
+        else if ((ape_ctx->fileversion >= 3900) || (ape_ctx->fileversion >= 3800 && ape_ctx->compressiontype >= 4000))
+            ape_ctx->blocksperframe = 73728;
+        else
+            ape_ctx->blocksperframe = 9216;
+
+        /* Skip any stored wav header */
+        if (!(ape_ctx->formatflags & MAC_FORMAT_FLAG_CREATE_WAV_HEADER))
+        {
+            lseek(fd, ape_ctx->wavheaderlength, SEEK_CUR);
+        }
+    }
+
+    ape_ctx->totalsamples = ape_ctx->finalframeblocks;
+    if (ape_ctx->totalframes > 1)
+        ape_ctx->totalsamples += ape_ctx->blocksperframe * (ape_ctx->totalframes-1);
+
+    if (ape_ctx->seektablelength > 0)
+    {
+        ape_ctx->seektable = malloc(ape_ctx->seektablelength);
+        if (ape_ctx->seektable == NULL)
+            return -1;
+        for (i=0; i < ape_ctx->seektablelength / sizeof(uint32_t); i++)
+        {
+            if (read_uint32(fd,&ape_ctx->seektable[i]) < 0)
+            {
+                 free(ape_ctx->seektable);
+                 return -1;
+            }
+        }
+    }
+
+    ape_ctx->firstframe = ape_ctx->junklength + ape_ctx->descriptorlength +
+                           ape_ctx->headerlength + ape_ctx->seektablelength +
+                           ape_ctx->wavheaderlength;
+
+    return 0;
+}
+
+void ape_dumpinfo(struct ape_ctx_t* ape_ctx)
+{
+  int i;
+
+    printf("Descriptor Block:\n\n");
+    printf("magic                = \"%c%c%c%c\"\n",
+            ape_ctx->magic[0],ape_ctx->magic[1],
+            ape_ctx->magic[2],ape_ctx->magic[3]);
+    printf("fileversion          = %d\n",ape_ctx->fileversion);
+    printf("descriptorlength     = %d\n",ape_ctx->descriptorlength);
+    printf("headerlength         = %d\n",ape_ctx->headerlength);
+    printf("seektablelength      = %d\n",ape_ctx->seektablelength);
+    printf("wavheaderlength      = %d\n",ape_ctx->wavheaderlength);
+    printf("audiodatalength      = %d\n",ape_ctx->audiodatalength);
+    printf("audiodatalength_high = %d\n",ape_ctx->audiodatalength_high);
+    printf("wavtaillength        = %d\n",ape_ctx->wavtaillength);
+    printf("md5                  = ");
+    for (i = 0; i < 16; i++)
+        printf("%02x",ape_ctx->md5[i]);
+    printf("\n");
+
+    printf("\nHeader Block:\n\n");
+
+    printf("compressiontype      = %d\n",ape_ctx->compressiontype);
+    printf("formatflags          = %d\n",ape_ctx->formatflags);
+    printf("blocksperframe       = %d\n",ape_ctx->blocksperframe);
+    printf("finalframeblocks     = %d\n",ape_ctx->finalframeblocks);
+    printf("totalframes          = %d\n",ape_ctx->totalframes);
+    printf("bps                  = %d\n",ape_ctx->bps);
+    printf("channels             = %d\n",ape_ctx->channels);
+    printf("samplerate           = %d\n",ape_ctx->samplerate);
+
+    printf("\nSeektable\n\n");
+    if ((ape_ctx->seektablelength / sizeof(uint32_t)) != ape_ctx->totalframes)
+    {
+        printf("No seektable\n");
+    }
+    else
+    {
+        for ( i = 0; i < ape_ctx->seektablelength / sizeof(uint32_t) ; i++)
+        {
+            if (i < ape_ctx->totalframes-1) {
+                printf("%8d   %d (%d bytes)\n",i,ape_ctx->seektable[i],ape_ctx->seektable[i+1]-ape_ctx->seektable[i]);
+            } else {
+                printf("%8d   %d\n",i,ape_ctx->seektable[i]);
+            }
+        }
+    }
+    printf("\nCalculated information:\n\n");
+    printf("junklength           = %d\n",ape_ctx->junklength);
+    printf("firstframe           = %d\n",ape_ctx->firstframe);
+    printf("totalsamples         = %d\n",ape_ctx->totalsamples);
+}
+
+#endif /* !ROCKBOX */
--- a/lib/rbcodec/codecs/demac/libdemac/parser.h
+++ b/lib/rbcodec/codecs/demac/libdemac/parser.h
@ -0,0 +1,137 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#ifndef _APE_PARSER_H
+#define _APE_PARSER_H
+
+#include <inttypes.h>
+#include "demac_config.h"
+
+/* The earliest and latest file formats supported by this library */
+#define APE_MIN_VERSION 3970
+#define APE_MAX_VERSION 3990
+
+#define MAC_FORMAT_FLAG_8_BIT                 1    // is 8-bit [OBSOLETE]
+#define MAC_FORMAT_FLAG_CRC                   2    // uses the new CRC32 error detection [OBSOLETE]
+#define MAC_FORMAT_FLAG_HAS_PEAK_LEVEL        4    // uint32 nPeakLevel after the header [OBSOLETE]
+#define MAC_FORMAT_FLAG_24_BIT                8    // is 24-bit [OBSOLETE]
+#define MAC_FORMAT_FLAG_HAS_SEEK_ELEMENTS    16    // has the number of seek elements after the peak level
+#define MAC_FORMAT_FLAG_CREATE_WAV_HEADER    32    // create the wave header on decompression (not stored)
+
+
+/* Special frame codes:
+
+   MONO_SILENCE - All PCM samples in frame are zero (mono streams only)
+   LEFT_SILENCE - All PCM samples for left channel in frame are zero (stereo streams)
+   RIGHT_SILENCE - All PCM samples for left channel in frame are zero (stereo streams)
+   PSEUDO_STEREO - Left and Right channels are identical
+
+*/
+
+#define APE_FRAMECODE_MONO_SILENCE    1
+#define APE_FRAMECODE_LEFT_SILENCE    1 /* same as mono */
+#define APE_FRAMECODE_RIGHT_SILENCE   2
+#define APE_FRAMECODE_STEREO_SILENCE  3 /* combined */
+#define APE_FRAMECODE_PSEUDO_STEREO   4
+
+#define PREDICTOR_ORDER 8
+/* Total size of all predictor histories - 50 * sizeof(int32_t) */
+#define PREDICTOR_SIZE 50
+
+
+/* NOTE: This struct is used in predictor-arm.S - any updates need to
+   be reflected there. */
+
+struct predictor_t
+{
+    /* Filter histories */
+    int32_t* buf;
+
+    int32_t YlastA;
+    int32_t XlastA;
+
+    /* NOTE: The order of the next four fields is important for
+       predictor-arm.S */
+    int32_t YfilterB;
+    int32_t XfilterA;
+    int32_t XfilterB;
+    int32_t YfilterA;
+
+    /* Adaption co-efficients */
+    int32_t YcoeffsA[4];
+    int32_t XcoeffsA[4];
+    int32_t YcoeffsB[5];
+    int32_t XcoeffsB[5];
+    int32_t historybuffer[PREDICTOR_HISTORY_SIZE + PREDICTOR_SIZE];
+};
+
+struct ape_ctx_t
+{
+    /* Derived fields */
+    uint32_t      junklength;
+    uint32_t      firstframe;
+    uint32_t      totalsamples;
+
+    /* Info from Descriptor Block */
+    char          magic[4];
+    int16_t       fileversion;
+    int16_t       padding1;
+    uint32_t      descriptorlength;
+    uint32_t      headerlength;
+    uint32_t      seektablelength;
+    uint32_t      wavheaderlength;
+    uint32_t      audiodatalength;
+    uint32_t      audiodatalength_high;
+    uint32_t      wavtaillength;
+    uint8_t       md5[16];
+
+    /* Info from Header Block */
+    uint16_t      compressiontype;
+    uint16_t      formatflags;
+    uint32_t      blocksperframe;
+    uint32_t      finalframeblocks;
+    uint32_t      totalframes;
+    uint16_t      bps;
+    uint16_t      channels;
+    uint32_t      samplerate;
+
+    /* Seektable */
+    uint32_t*     seektable;        /* Seektable buffer */
+    uint32_t      maxseekpoints;    /* Max seekpoints we can store (size of seektable buffer) */
+    uint32_t      numseekpoints;    /* Number of seekpoints */
+    int           seektablefilepos; /* Location in .ape file of seektable */
+
+    /* Decoder state */
+    uint32_t      CRC;
+    int           frameflags;
+    int           currentframeblocks;
+    int           blocksdecoded;
+    struct predictor_t predictor;
+};
+
+int ape_parseheader(int fd, struct ape_ctx_t* ape_ctx);
+int ape_parseheaderbuf(unsigned char* buf, struct ape_ctx_t* ape_ctx);
+void ape_dumpinfo(struct ape_ctx_t* ape_ctx);
+
+#endif
--- a/lib/rbcodec/codecs/demac/libdemac/predictor-arm.S
+++ b/lib/rbcodec/codecs/demac/libdemac/predictor-arm.S
@ -0,0 +1,702 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+#include "demac_config.h"
+
+    .section    ICODE_SECTION_DEMAC_ARM,"ax",%progbits
+
+    .align      2
+
+/* NOTE: The following need to be kept in sync with parser.h */
+
+#define YDELAYA        200
+#define YDELAYB        168
+#define XDELAYA        136
+#define XDELAYB        104
+#define YADAPTCOEFFSA   72
+#define XADAPTCOEFFSA   56
+#define YADAPTCOEFFSB   40
+#define XADAPTCOEFFSB   20
+
+/* struct predictor_t members: */
+#define buf              0    /* int32_t* buf */
+
+#define YlastA           4    /* int32_t YlastA; */
+#define XlastA           8    /* int32_t XlastA; */
+
+#define YfilterB        12    /* int32_t YfilterB; */
+#define XfilterA        16    /* int32_t XfilterA; */
+
+#define XfilterB        20    /* int32_t XfilterB; */
+#define YfilterA        24    /* int32_t YfilterA; */
+    
+#define YcoeffsA        28    /* int32_t YcoeffsA[4]; */
+#define XcoeffsA        44    /* int32_t XcoeffsA[4]; */
+#define YcoeffsB        60    /* int32_t YcoeffsB[5]; */
+#define XcoeffsB        80    /* int32_t XcoeffsB[5]; */
+
+#define historybuffer  100    /* int32_t historybuffer[] */
+
+@ Macro for loading 2 registers, for various ARM versions.
+@ Registers must start with an even register, and must be consecutive.
+
+.macro LDR2OFS reg1, reg2, base, offset
+#if ARM_ARCH >= 6
+    ldrd    \reg1, [\base, \offset]
+#else /* ARM_ARCH < 6 */
+#ifdef CPU_ARM7TDMI
+    add     \reg1, \base, \offset
+    ldmia   \reg1, {\reg1, \reg2}
+#else /* ARM9 (v4 and v5) is faster this way */
+    ldr     \reg1, [\base, \offset]
+    ldr     \reg2, [\base, \offset+4]
+#endif
+#endif /* ARM_ARCH */
+.endm
+
+@ Macro for storing 2 registers, for various ARM versions.
+@ Registers must start with an even register, and must be consecutive.
+
+.macro STR2OFS reg1, reg2, base, offset
+#if ARM_ARCH >= 6
+    strd    \reg1, [\base, \offset]
+#else
+    str     \reg1, [\base, \offset]
+    str     \reg2, [\base, \offset+4]
+#endif
+.endm
+
+    .global     predictor_decode_stereo
+    .type       predictor_decode_stereo,%function
+
+@ Register usage:
+@
+@ r0-r11 - scratch
+@ r12 - struct predictor_t* p
+@ r14 - int32_t* p->buf
+
+@ void predictor_decode_stereo(struct predictor_t* p,
+@                              int32_t* decoded0,
+@                              int32_t* decoded1,
+@                              int count)
+
+predictor_decode_stereo:
+    stmdb   sp!, {r1-r11, lr}
+
+    @ r1 (decoded0) is [sp]
+    @ r2 (decoded1) is [sp, #4]
+    @ r3 (count)    is [sp, #8]
+
+    mov     r12, r0       @ r12 := p
+    ldr     r14, [r0]     @ r14 := p->buf
+
+loop:
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR Y
+
+@ Predictor Y, Filter A
+
+    ldr     r11, [r12, #YlastA]     @ r11 := p->YlastA
+
+    add     r2, r14, #YDELAYA-12    @ r2 := &p->buf[YDELAYA-3]
+    ldmia   r2, {r2, r3, r10}       @ r2 := p->buf[YDELAYA-3]
+                                    @ r3 := p->buf[YDELAYA-2]
+                                    @ r10 := p->buf[YDELAYA-1]
+
+    add     r6, r12, #YcoeffsA
+    ldmia   r6, {r6 - r9}           @ r6 := p->YcoeffsA[0]
+                                    @ r7 := p->YcoeffsA[1]
+                                    @ r8 := p->YcoeffsA[2]
+                                    @ r9 := p->YcoeffsA[3]
+
+    subs    r10, r11, r10           @ r10 := r11 - r10
+
+    STR2OFS r10, r11, r14, #YDELAYA-4
+                                    @ p->buf[YDELAYA-1] = r10
+                                    @ p->buf[YDELAYA] = r11
+
+    mul     r0, r11, r6             @ r0 := p->buf[YDELAYA] * p->YcoeffsA[0]
+    mla     r0, r10, r7, r0         @ r0 += p->buf[YDELAYA-1] * p->YcoeffsA[1]
+    mla     r0, r3, r8, r0          @ r0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
+    mla     r0, r2, r9, r0          @ r0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
+
+    @ flags were set above, in the subs instruction
+    mvngt   r10, #0
+    movlt   r10, #1                 @ r10 := SIGN(r10) (see .c for SIGN macro)
+
+    cmp     r11, #0
+    mvngt   r11, #0
+    movlt   r11, #1                 @ r11 := SIGN(r11) (see .c for SIGN macro)
+
+    STR2OFS r10, r11, r14, #YADAPTCOEFFSA-4
+                                    @ p->buf[YADAPTCOEFFSA-1] := r10
+                                    @ p->buf[YADAPTCOEFFSA] := r11
+
+    @ NOTE: r0 now contains predictionA - don't overwrite.
+
+@ Predictor Y, Filter B
+
+    LDR2OFS r6, r7, r12, #YfilterB  @ r6 := p->YfilterB
+                                    @ r7 := p->XfilterA
+
+    add     r2, r14, #YDELAYB-16    @ r2 := &p->buf[YDELAYB-4]
+    ldmia   r2, {r2 - r4, r10}      @ r2 := p->buf[YDELAYB-4]
+                                    @ r3 := p->buf[YDELAYB-3]
+                                    @ r4 := p->buf[YDELAYB-2]
+                                    @ r10 := p->buf[YDELAYB-1]
+
+    rsb     r6, r6, r6, lsl #5      @ r6 := r6 * 32 - r6 ( == r6*31)
+    sub     r11, r7, r6, asr #5     @ r11 (p->buf[YDELAYB]) := r7 - (r6 >> 5)
+
+    str     r7, [r12, #YfilterB]    @ p->YfilterB := r7 (p->XfilterA)
+
+    add     r5, r12, #YcoeffsB
+    ldmia   r5, {r5 - r9}           @ r5 := p->YcoeffsB[0]
+                                    @ r6 := p->YcoeffsB[1]
+                                    @ r7 := p->YcoeffsB[2]
+                                    @ r8 := p->YcoeffsB[3]
+                                    @ r9 := p->YcoeffsB[4]
+
+    subs    r10, r11, r10           @ r10 := r11 - r10
+
+    STR2OFS r10, r11, r14, #YDELAYB-4
+                                    @ p->buf[YDELAYB-1] = r10
+                                    @ p->buf[YDELAYB] = r11
+
+    mul     r1, r11, r5             @ r1 := p->buf[YDELAYB] * p->YcoeffsB[0]
+    mla     r1, r10, r6, r1         @ r1 += p->buf[YDELAYB-1] * p->YcoeffsB[1]
+    mla     r1, r4, r7, r1          @ r1 += p->buf[YDELAYB-2] * p->YcoeffsB[2]
+    mla     r1, r3, r8, r1          @ r1 += p->buf[YDELAYB-3] * p->YcoeffsB[3]
+    mla     r1, r2, r9, r1          @ r1 += p->buf[YDELAYB-4] * p->YcoeffsB[4]
+
+    @ flags were set above, in the subs instruction
+    mvngt   r10, #0
+    movlt   r10, #1                 @ r10 := SIGN(r10) (see .c for SIGN macro)
+
+    cmp     r11, #0
+    mvngt   r11, #0
+    movlt   r11, #1                 @ r11 := SIGN(r11) (see .c for SIGN macro)
+
+    STR2OFS r10, r11, r14, #YADAPTCOEFFSB-4
+                                    @ p->buf[YADAPTCOEFFSB-1] := r10
+                                    @ p->buf[YADAPTCOEFFSB] := r11
+
+    @ r0 still contains predictionA
+    @ r1 contains predictionB
+
+    @ Finish Predictor Y
+
+    ldr     r2, [sp]                @ r2 := decoded0
+    add     r0, r0, r1, asr #1      @ r0 := r0 + (r1 >> 1)
+    ldr     r4, [r12, #YfilterA]    @ r4 := p->YfilterA
+    ldr     r3, [r2]                @ r3 := *decoded0
+    rsb     r4, r4, r4, lsl #5      @ r4 := r4 * 32 - r4 ( == r4*31)
+    add     r1, r3, r0, asr #10     @ r1 := r3 + (r0 >> 10)
+    str     r1, [r12, #YlastA]      @ p->YlastA := r1
+    add     r1, r1, r4, asr #5      @ r1 := r1 + (r4 >> 5)
+    str     r1, [r12, #YfilterA]    @ p->YfilterA := r1
+
+    @ r1 contains p->YfilterA
+    @ r2 contains decoded0
+    @ r3 contains *decoded0
+
+    @ r5, r6, r7, r8, r9 contain p->YcoeffsB[0..4]
+    @ r10, r11 contain p->buf[YADAPTCOEFFSB-1] and p->buf[YADAPTCOEFFSB]
+
+    str     r1, [r2], #4            @ *(decoded0++) := r1  (p->YfilterA)
+    str     r2, [sp]                @ save decoded0
+    cmp     r3, #0
+    beq     3f
+
+    add     r2, r14, #YADAPTCOEFFSB-16
+    ldmia   r2, {r2 - r4}           @ r2 := p->buf[YADAPTCOEFFSB-4]
+                                    @ r3 := p->buf[YADAPTCOEFFSB-3]
+                                    @ r4 := p->buf[YADAPTCOEFFSB-2]
+    blt     1f
+
+    @ *decoded0 > 0
+
+    sub     r5, r5, r11       @ r5 := p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB]
+    sub     r6, r6, r10       @ r6 := p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1]
+    sub     r9, r9, r2        @ r9 := p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4]
+    sub     r8, r8, r3        @ r8 := p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3]
+    sub     r7, r7, r4        @ r7 := p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2]
+
+    add     r0, r12, #YcoeffsB      
+    stmia   r0, {r5 - r9}           @ Save p->YcoeffsB[]
+
+    add     r1, r12, #YcoeffsA
+    ldmia   r1, {r2 - r5}           @ r2 := p->YcoeffsA[0]
+                                    @ r3 := p->YcoeffsA[1]
+                                    @ r4 := p->YcoeffsA[2]
+                                    @ r5 := p->YcoeffsA[3]
+
+    add     r6, r14, #YADAPTCOEFFSA-12
+    ldmia   r6, {r6 - r9}           @ r6 := p->buf[YADAPTCOEFFSA-3]
+                                    @ r7 := p->buf[YADAPTCOEFFSA-2]
+                                    @ r8 := p->buf[YADAPTCOEFFSA-1]
+                                    @ r9 := p->buf[YADAPTCOEFFSA]
+
+    sub     r5, r5, r6        @ r5 := p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
+    sub     r4, r4, r7        @ r4 := p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
+    sub     r3, r3, r8        @ r3 := p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
+    sub     r2, r2, r9        @ r2 := p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
+    
+    b       2f
+
+
+1:  @ *decoded0 < 0
+
+    add     r5, r5, r11       @ r5 := p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB]
+    add     r6, r6, r10       @ r6 := p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1]
+    add     r9, r9, r2        @ r9 := p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4]
+    add     r8, r8, r3        @ r9 := p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3]
+    add     r7, r7, r4        @ r8 := p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2]
+
+    add     r0, r12, #YcoeffsB      
+    stmia   r0, {r5 - r9}           @ Save p->YcoeffsB[]
+
+    add     r1, r12, #YcoeffsA
+    ldmia   r1, {r2 - r5}           @ r2 := p->YcoeffsA[0]
+                                    @ r3 := p->YcoeffsA[1]
+                                    @ r4 := p->YcoeffsA[2]
+                                    @ r5 := p->YcoeffsA[3]
+
+    add     r6, r14, #YADAPTCOEFFSA-12
+    ldmia   r6, {r6 - r9}           @ r6 := p->buf[YADAPTCOEFFSA-3]
+                                    @ r7 := p->buf[YADAPTCOEFFSA-2]
+                                    @ r8 := p->buf[YADAPTCOEFFSA-1]
+                                    @ r9 := p->buf[YADAPTCOEFFSA]
+
+    add     r5, r5, r6        @ r5 := p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3]
+    add     r4, r4, r7        @ r4 := p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2]
+    add     r3, r3, r8        @ r3 := p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1]
+    add     r2, r2, r9        @ r2 := p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA]
+    
+2:
+    stmia   r1, {r2 - r5}     @ Save p->YcoeffsA
+
+3:
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR X
+
+@ Predictor X, Filter A
+
+    ldr     r11, [r12, #XlastA]     @ r11 := p->XlastA
+
+    add     r2, r14, #XDELAYA-12    @ r2 := &p->buf[XDELAYA-3]
+    ldmia   r2, {r2, r3, r10}       @ r2 := p->buf[XDELAYA-3]
+                                    @ r3 := p->buf[XDELAYA-2]
+                                    @ r10 := p->buf[XDELAYA-1]
+
+    add     r6, r12, #XcoeffsA
+    ldmia   r6, {r6 - r9}           @ r6 := p->XcoeffsA[0]
+                                    @ r7 := p->XcoeffsA[1]
+                                    @ r8 := p->XcoeffsA[2]
+                                    @ r9 := p->XcoeffsA[3]
+
+    subs    r10, r11, r10           @ r10 := r11 - r10
+
+    STR2OFS r10, r11, r14, #XDELAYA-4
+                                    @ p->buf[XDELAYA-1] = r10
+                                    @ p->buf[XDELAYA] = r11
+
+    mul     r0, r11, r6             @ r0 := p->buf[XDELAYA] * p->XcoeffsA[0]
+    mla     r0, r10, r7, r0         @ r0 += p->buf[XDELAYA-1] * p->XcoeffsA[1]
+    mla     r0, r3, r8, r0          @ r0 += p->buf[XDELAYA-2] * p->XcoeffsA[2]
+    mla     r0, r2, r9, r0          @ r0 += p->buf[XDELAYA-3] * p->XcoeffsA[3]
+
+    @ flags were set above, in the subs instruction
+    mvngt   r10, #0
+    movlt   r10, #1                 @ r10 := SIGN(r10) (see .c for SIGN macro)
+
+    cmp     r11, #0
+    mvngt   r11, #0
+    movlt   r11, #1                 @ r11 := SIGN(r11) (see .c for SIGN macro)
+
+    STR2OFS r10, r11, r14, #XADAPTCOEFFSA-4
+                                    @ p->buf[XADAPTCOEFFSA-1] := r10
+                                    @ p->buf[XADAPTCOEFFSA] := r11
+
+    @ NOTE: r0 now contains predictionA - don't overwrite.
+
+@ Predictor X, Filter B
+
+    LDR2OFS r6, r7, r12, #XfilterB  @ r6 := p->XfilterB
+                                    @ r7 := p->YfilterA
+
+    add     r2, r14, #XDELAYB-16    @ r2 := &p->buf[XDELAYB-4]
+    ldmia   r2, {r2 - r4, r10}      @ r2 := p->buf[XDELAYB-4]
+                                    @ r3 := p->buf[XDELAYB-3]
+                                    @ r4 := p->buf[XDELAYB-2]
+                                    @ r10 := p->buf[XDELAYB-1]
+
+    rsb     r6, r6, r6, lsl #5      @ r6 := r2 * 32 - r6 ( == r6*31)
+    sub     r11, r7, r6, asr #5     @ r11 (p->buf[XDELAYB]) := r7 - (r6 >> 5)
+
+    str     r7, [r12, #XfilterB]    @ p->XfilterB := r7 (p->YfilterA)
+
+    add     r5, r12, #XcoeffsB
+    ldmia   r5, {r5 - r9}           @ r5 := p->XcoeffsB[0]
+                                    @ r6 := p->XcoeffsB[1]
+                                    @ r7 := p->XcoeffsB[2]
+                                    @ r8 := p->XcoeffsB[3]
+                                    @ r9 := p->XcoeffsB[4]
+
+    subs    r10, r11, r10           @ r10 := r11 - r10
+
+    STR2OFS r10, r11, r14, #XDELAYB-4
+                                    @ p->buf[XDELAYB-1] = r10
+                                    @ p->buf[XDELAYB] = r11
+
+    mul     r1, r11, r5             @ r1 := p->buf[XDELAYB] * p->XcoeffsB[0]
+    mla     r1, r10, r6, r1         @ r1 += p->buf[XDELAYB-1] * p->XcoeffsB[1]
+    mla     r1, r4, r7, r1          @ r1 += p->buf[XDELAYB-2] * p->XcoeffsB[2]
+    mla     r1, r3, r8, r1          @ r1 += p->buf[XDELAYB-3] * p->XcoeffsB[3]
+    mla     r1, r2, r9, r1          @ r1 += p->buf[XDELAYB-4] * p->XcoeffsB[4]
+
+    @ flags were set above, in the subs instruction
+    mvngt   r10, #0
+    movlt   r10, #1                 @ r10 := SIGN(r10) (see .c for SIGN macro)
+
+    cmp     r11, #0
+    mvngt   r11, #0
+    movlt   r11, #1                 @ r11 := SIGN(r11) (see .c for SIGN macro)
+
+    STR2OFS r10, r11, r14, #XADAPTCOEFFSB-4
+                                    @ p->buf[XADAPTCOEFFSB-1] := r10
+                                    @ p->buf[XADAPTCOEFFSB] := r11
+
+    @ r0 still contains predictionA
+    @ r1 contains predictionB
+
+    @ Finish Predictor X
+
+    ldr     r2, [sp, #4]            @ r2 := decoded1
+    add     r0, r0, r1, asr #1      @ r0 := r0 + (r1 >> 1)
+    ldr     r4, [r12, #XfilterA]    @ r4 := p->XfilterA
+    ldr     r3, [r2]                @ r3 := *decoded1
+    rsb     r4, r4, r4, lsl #5      @ r4 := r4 * 32 - r4 ( == r4*31)
+    add     r1, r3, r0, asr #10     @ r1 := r3 + (r0 >> 10)
+    str     r1, [r12, #XlastA]      @ p->XlastA := r1
+    add     r1, r1, r4, asr #5      @ r1 := r1 + (r4 >> 5)
+    str     r1, [r12, #XfilterA]    @ p->XfilterA := r1
+
+    @ r1 contains p->XfilterA
+    @ r2 contains decoded1
+    @ r3 contains *decoded1
+
+    @ r5, r6, r7, r8, r9 contain p->XcoeffsB[0..4]
+    @ r10, r11 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB]
+
+    str     r1, [r2], #4            @ *(decoded1++) := r1  (p->XfilterA)
+    str     r2, [sp, #4]            @ save decoded1
+    cmp     r3, #0
+    beq     3f
+
+    add     r2, r14, #XADAPTCOEFFSB-16
+    ldmia   r2, {r2 - r4}           @ r2 := p->buf[XADAPTCOEFFSB-4]
+                                    @ r3 := p->buf[XADAPTCOEFFSB-3]
+                                    @ r4 := p->buf[XADAPTCOEFFSB-2]
+    blt     1f
+
+    @ *decoded1 > 0
+
+    sub     r5, r5, r11       @ r5 := p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB]
+    sub     r6, r6, r10       @ r6 := p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1]
+    sub     r9, r9, r2        @ r9 := p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4]
+    sub     r8, r8, r3        @ r8 := p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3]
+    sub     r7, r7, r4        @ r7 := p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2]
+
+    add     r0, r12, #XcoeffsB      
+    stmia   r0, {r5 - r9}           @ Save p->XcoeffsB[]
+
+    add     r1, r12, #XcoeffsA
+    ldmia   r1, {r2 - r5}           @ r2 := p->XcoeffsA[0]
+                                    @ r3 := p->XcoeffsA[1]
+                                    @ r4 := p->XcoeffsA[2]
+                                    @ r5 := p->XcoeffsA[3]
+
+    add     r6, r14, #XADAPTCOEFFSA-12
+    ldmia   r6, {r6 - r9}           @ r6 := p->buf[XADAPTCOEFFSA-3]
+                                    @ r7 := p->buf[XADAPTCOEFFSA-2]
+                                    @ r8 := p->buf[XADAPTCOEFFSA-1]
+                                    @ r9 := p->buf[XADAPTCOEFFSA]
+
+    sub     r5, r5, r6        @ r5 := p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3]
+    sub     r4, r4, r7        @ r4 := p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2]
+    sub     r3, r3, r8        @ r3 := p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1]
+    sub     r2, r2, r9        @ r2 := p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA]
+    
+    b       2f
+
+
+1:  @ *decoded1 < 0
+
+    add     r5, r5, r11       @ r5 := p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB]
+    add     r6, r6, r10       @ r6 := p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1]
+    add     r9, r9, r2        @ r9 := p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4]
+    add     r8, r8, r3        @ r8 := p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3]
+    add     r7, r7, r4        @ r7 := p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2]
+
+    add     r0, r12, #XcoeffsB      
+    stmia   r0, {r5 - r9}           @ Save p->XcoeffsB[]
+
+    add     r1, r12, #XcoeffsA
+    ldmia   r1, {r2 - r5}           @ r2 := p->XcoeffsA[0]
+                                    @ r3 := p->XcoeffsA[1]
+                                    @ r4 := p->XcoeffsA[2]
+                                    @ r5 := p->XcoeffsA[3]
+
+    add     r6, r14, #XADAPTCOEFFSA-12
+    ldmia   r6, {r6 - r9}           @ r6 := p->buf[XADAPTCOEFFSA-3]
+                                    @ r7 := p->buf[XADAPTCOEFFSA-2]
+                                    @ r8 := p->buf[XADAPTCOEFFSA-1]
+                                    @ r9 := p->buf[XADAPTCOEFFSA]
+
+    add     r5, r5, r6        @ r5 := p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3]
+    add     r4, r4, r7        @ r4 := p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2]
+    add     r3, r3, r8        @ r3 := p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1]
+    add     r2, r2, r9        @ r2 := p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA]
+    
+2:
+    stmia   r1, {r2 - r5}           @ Save p->XcoeffsA
+
+3:
+    
+@@@@@@@@@@@@@@@@@@@@@@@@@@@ COMMON
+
+    add     r14, r14, #4                @ p->buf++
+
+    add     r11, r12, #historybuffer    @ r11 := &p->historybuffer[0]
+
+    sub     r10, r14, #PREDICTOR_HISTORY_SIZE*4
+                                       @ r10 := p->buf - PREDICTOR_HISTORY_SIZE
+
+    ldr     r0, [sp, #8]
+    cmp     r10, r11
+    beq     move_hist     @ The history buffer is full, we need to do a memmove
+
+    @ Check loop count
+    subs    r0, r0, #1
+    strne   r0, [sp, #8]
+    bne     loop
+
+done:
+    str     r14, [r12]              @ Save value of p->buf
+    add     sp, sp, #12             @ Don't bother restoring r1-r3 
+#ifdef ROCKBOX
+    ldmpc   regs=r4-r11
+#else
+    ldmia   sp!, {r4 - r11, pc}
+#endif
+
+move_hist:
+    @ dest = r11 (p->historybuffer)
+    @ src = r14 (p->buf)
+    @ n = 200
+
+    ldmia   r14!, {r0-r9}    @ 40 bytes
+    stmia   r11!, {r0-r9}
+    ldmia   r14!, {r0-r9}    @ 40 bytes
+    stmia   r11!, {r0-r9}
+    ldmia   r14!, {r0-r9}    @ 40 bytes
+    stmia   r11!, {r0-r9}
+    ldmia   r14!, {r0-r9}    @ 40 bytes
+    stmia   r11!, {r0-r9}
+    ldmia   r14!, {r0-r9}    @ 40 bytes
+    stmia   r11!, {r0-r9}
+
+    ldr     r0, [sp, #8]
+    add     r14, r12, #historybuffer    @ p->buf = &p->historybuffer[0]
+
+    @ Check loop count
+    subs    r0, r0, #1
+    strne   r0, [sp, #8]
+    bne     loop
+    
+    b       done
+    .size   predictor_decode_stereo, .-predictor_decode_stereo
+
+    .global     predictor_decode_mono
+    .type       predictor_decode_mono,%function
+
+@ Register usage:
+@
+@ r0-r11 - scratch
+@ r12 - struct predictor_t* p
+@ r14 - int32_t* p->buf
+
+@ void predictor_decode_mono(struct predictor_t* p,
+@                            int32_t* decoded0,
+@                            int count)
+
+predictor_decode_mono:
+    stmdb   sp!, {r1, r2, r4-r11, lr}
+
+    @ r1 (decoded0) is [sp]
+    @ r2 (count)    is [sp, #4]
+
+    mov     r12, r0         @ r12 := p
+    ldr     r14, [r0]       @ r14 := p->buf
+    
+loopm:
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR
+
+    ldr     r11, [r12, #YlastA]     @ r11 := p->YlastA
+
+    add     r2, r14, #YDELAYA-12    @ r2 := &p->buf[YDELAYA-3]
+    ldmia   r2, {r2, r3, r10}       @ r2 := p->buf[YDELAYA-3]
+                                    @ r3 := p->buf[YDELAYA-2]
+                                    @ r10 := p->buf[YDELAYA-1]
+
+    add     r5, r12, #YcoeffsA      @ r5 := &p->YcoeffsA[0]
+    ldmia   r5, {r6 - r9}           @ r6 := p->YcoeffsA[0]
+                                    @ r7 := p->YcoeffsA[1]
+                                    @ r8 := p->YcoeffsA[2]
+                                    @ r9 := p->YcoeffsA[3]
+
+    subs    r10, r11, r10           @ r10 := r11 - r10
+
+    STR2OFS r10, r11, r14, #YDELAYA-4
+                                    @ p->buf[YDELAYA-1] = r10
+                                    @ p->buf[YDELAYA] = r11
+
+    mul     r0, r11, r6             @ r0 := p->buf[YDELAYA] * p->YcoeffsA[0]
+    mla     r0, r10, r7, r0         @ r0 += p->buf[YDELAYA-1] * p->YcoeffsA[1]
+    mla     r0, r3, r8, r0          @ r0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
+    mla     r0, r2, r9, r0          @ r0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
+
+    @ flags were set above, in the subs instruction
+    mvngt   r10, #0
+    movlt   r10, #1                 @ r10 := SIGN(r10) (see .c for SIGN macro)
+
+    cmp     r11, #0
+    mvngt   r11, #0
+    movlt   r11, #1                 @ r11 := SIGN(r11) (see .c for SIGN macro)
+
+    STR2OFS r10, r11, r14, #YADAPTCOEFFSA-4
+                                    @ p->buf[YADAPTCOEFFSA-1] := r10
+                                    @ p->buf[YADAPTCOEFFSA] := r11
+
+    ldr     r2, [sp]                @ r2 := decoded0
+    ldr     r4, [r12, #YfilterA]    @ r4 := p->YfilterA
+    ldr     r3, [r2]                @ r3 := *decoded0
+    rsb     r4, r4, r4, lsl #5      @ r4 := r4 * 32 - r4 ( == r4*31)
+    add     r1, r3, r0, asr #10     @ r1 := r3 + (r0 >> 10)
+    str     r1, [r12, #YlastA]      @ p->YlastA := r1
+    add     r1, r1, r4, asr #5      @ r1 := r1 + (r4 >> 5)
+    str     r1, [r12, #YfilterA]    @ p->YfilterA := r1
+
+    @ r1 contains p->YfilterA
+    @ r2 contains decoded0
+    @ r3 contains *decoded0
+
+    @ r6, r7, r8, r9 contain p->YcoeffsA[0..3]
+    @ r10, r11 contain p->buf[YADAPTCOEFFSA-1] and p->buf[YADAPTCOEFFSA]
+
+    str     r1, [r2], #4            @ *(decoded0++) := r1  (p->YfilterA)
+    str     r2, [sp]                @ save decoded0
+    cmp     r3, #0
+    beq     3f
+
+    LDR2OFS r2, r3, r14, #YADAPTCOEFFSA-12
+                                    @ r2 := p->buf[YADAPTCOEFFSA-3]
+                                    @ r3 := p->buf[YADAPTCOEFFSA-2]
+    blt     1f
+
+    @ *decoded0 > 0
+
+    sub     r6, r6, r11     @ r6 := p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
+    sub     r7, r7, r10     @ r7 := p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
+    sub     r9, r9, r2      @ r9 := p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
+    sub     r8, r8, r3      @ r8 := p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
+
+    b       2f
+
+1:  @ *decoded0 < 0
+
+    add     r6, r6, r11     @ r6 := p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA]
+    add     r7, r7, r10     @ r7 := p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1]
+    add     r9, r9, r2      @ r9 := p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3]
+    add     r8, r8, r3      @ r8 := p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2]
+    
+2:
+    stmia   r5, {r6 - r9}           @ Save p->YcoeffsA
+
+3:
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@ COMMON
+
+    add     r14, r14, #4                @ p->buf++
+
+    add     r11, r12, #historybuffer    @ r11 := &p->historybuffer[0]
+
+    sub     r10, r14, #PREDICTOR_HISTORY_SIZE*4
+                                       @ r10 := p->buf - PREDICTOR_HISTORY_SIZE
+
+    ldr     r0, [sp, #4]
+    cmp     r10, r11
+    beq     move_histm    @ The history buffer is full, we need to do a memmove
+
+    @ Check loop count
+    subs    r0, r0, #1
+    strne   r0, [sp, #4]
+    bne     loopm
+
+donem:
+    str     r14, [r12]              @ Save value of p->buf
+    add     sp, sp, #8              @ Don't bother restoring r1, r2
+#ifdef ROCKBOX
+    ldmpc   regs=r4-r11
+#else
+    ldmia   sp!, {r4 - r11, pc}
+#endif
+
+move_histm:
+    @ dest = r11 (p->historybuffer)
+    @ src = r14 (p->buf)
+    @ n = 200
+
+    ldmia   r14!, {r0-r9}    @ 40 bytes
+    stmia   r11!, {r0-r9}
+    ldmia   r14!, {r0-r9}    @ 40 bytes
+    stmia   r11!, {r0-r9}
+    ldmia   r14!, {r0-r9}    @ 40 bytes
+    stmia   r11!, {r0-r9}
+    ldmia   r14!, {r0-r9}    @ 40 bytes
+    stmia   r11!, {r0-r9}
+    ldmia   r14!, {r0-r9}    @ 40 bytes
+    stmia   r11!, {r0-r9}
+
+    ldr     r0, [sp, #4]
+    add     r14, r12, #historybuffer    @ p->buf = &p->historybuffer[0]
+
+    @ Check loop count
+    subs    r0, r0, #1
+    strne   r0, [sp, #4]
+    bne     loopm
+    
+    b       donem
+    .size   predictor_decode_mono, .-predictor_decode_mono
--- a/lib/rbcodec/codecs/demac/libdemac/predictor-cf.S
+++ b/lib/rbcodec/codecs/demac/libdemac/predictor-cf.S
@ -0,0 +1,660 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+Coldfire predictor copyright (C) 2007 Jens Arnold
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#include "demac_config.h"
+
+/* NOTE: The following need to be kept in sync with parser.h */
+
+#define YDELAYA        200
+#define YDELAYB        168
+#define XDELAYA        136
+#define XDELAYB        104
+#define YADAPTCOEFFSA   72
+#define XADAPTCOEFFSA   56
+#define YADAPTCOEFFSB   40
+#define XADAPTCOEFFSB   20
+
+/* struct predictor_t members: */
+#define buf              0    /* int32_t* buf */
+
+#define YlastA           4    /* int32_t YlastA; */
+#define XlastA           8    /* int32_t XlastA; */
+
+#define YfilterB        12    /* int32_t YfilterB; */
+#define XfilterA        16    /* int32_t XfilterA; */
+
+#define XfilterB        20    /* int32_t XfilterB; */
+#define YfilterA        24    /* int32_t YfilterA; */
+
+#define YcoeffsA        28    /* int32_t YcoeffsA[4]; */
+#define XcoeffsA        44    /* int32_t XcoeffsA[4]; */
+#define YcoeffsB        60    /* int32_t YcoeffsB[5]; */
+#define XcoeffsB        80    /* int32_t XcoeffsB[5]; */
+
+#define historybuffer  100    /* int32_t historybuffer[] */
+
+
+    .text
+
+    .align  2
+
+    .global predictor_decode_stereo
+    .type   predictor_decode_stereo,@function
+
+| void predictor_decode_stereo(struct predictor_t* p,
+|                              int32_t* decoded0,
+|                              int32_t* decoded1,
+|                              int count)
+
+predictor_decode_stereo:
+    lea.l   (-12*4,%sp), %sp
+    movem.l %d2-%d7/%a2-%a6, (4,%sp)
+
+    movem.l (12*4+8,%sp), %a3-%a5       | %a3 = decoded0
+                                        | %a4 = decoded1
+    move.l  %a5, (%sp)                  | (%sp) = count
+
+    move.l  #0, %macsr                  | signed integer mode
+    move.l  (12*4+4,%sp), %a6           | %a6 = p
+    move.l  (%a6), %a5                  | %a5 = p->buf
+    
+.loop:
+    
+    | ***** PREDICTOR Y *****
+    
+    | Predictor Y, Filter A
+    
+    move.l  (YlastA,%a6), %d3           | %d3  = p->YlastA
+
+    movem.l (YDELAYA-12,%a5), %d0-%d2   | %d0  = p->buf[YDELAYA-3]
+                                        | %d1  = p->buf[YDELAYA-2]
+                                        | %d2  = p->buf[YDELAYA-1]
+
+    move.l  %d3, (YDELAYA,%a5)          | p->buf[YDELAYA]  = %d3
+
+    sub.l   %d3, %d2
+    neg.l   %d2                         | %d2 = %d3 - %d2
+
+    move.l  %d2, (YDELAYA-4,%a5)        | p->buf[YDELAYA-1]  = %d2
+
+    movem.l (YcoeffsA,%a6), %d4-%d7     | %d4  = p->YcoeffsA[0]
+                                        | %d5  = p->YcoeffsA[1]
+                                        | %d6  = p->YcoeffsA[2]
+                                        | %d7  = p->YcoeffsA[3]
+
+    mac.l   %d3, %d4, %acc0     | %acc0  = p->buf[YDELAYA] * p->YcoeffsA[0]
+    mac.l   %d2, %d5, %acc0     | %acc0 += p->buf[YDELAYA-1] * p->YcoeffsA[1]
+    mac.l   %d1, %d6, %acc0     | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
+    mac.l   %d0, %d7, %acc0     | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
+
+    tst.l   %d2
+    beq.s   1f
+    spl.b   %d2                         | pos: 0x??????ff, neg: 0x??????00
+    extb.l  %d2                         | pos: 0xffffffff, neg: 0x00000000
+    or.l    #1, %d2                     | pos: 0xffffffff, neg: 0x00000001
+1:                                      | %d2 = SIGN(%d2)
+    move.l  %d2, (YADAPTCOEFFSA-4,%a5)  | p->buf[YADAPTCOEFFSA-1]  = %d2
+
+    tst.l   %d3
+    beq.s   1f
+    spl.b   %d3
+    extb.l  %d3
+    or.l    #1, %d3
+1:                                      | %d3 = SIGN(%d3)
+    move.l  %d3, (YADAPTCOEFFSA,%a5)    | p->buf[YADAPTCOEFFSA]  = %d3
+
+    | Predictor Y, Filter B
+    
+    movem.l (YfilterB,%a6), %d2-%d3     | %d2  = p->YfilterB
+                                        | %d3  = p->XfilterA
+    move.l  %d3, (YfilterB,%a6)         | p->YfilterB = %d3
+
+    move.l  %d2, %d1                    | %d1  = %d2
+    lsl.l   #5, %d2                     | %d2  = %d2 * 32
+    sub.l   %d1, %d2                    | %d2 -= %d1 (== 31 * old_d2)
+    asr.l   #5, %d2                     | %d2 >>= 5
+    sub.l   %d2, %d3                    | %d3 -= %d2
+    
+    movem.l (YDELAYB-16,%a5), %d4-%d7   | %d4  = p->buf[YDELAYB-4]
+                                        | %d5  = p->buf[YDELAYB-3]
+                                        | %d6  = p->buf[YDELAYB-2]
+                                        | %d7  = p->buf[YDELAYB-1]
+    sub.l   %d3, %d7
+    neg.l   %d7                         | %d7  = %d3 - %d7
+
+    move.l  %d7, (YDELAYB-4,%a5)        | p->buf[YDELAYB-1]  = %d7
+
+    movem.l (YcoeffsB,%a6), %d1-%d2/%a0-%a2 | %d1  = p->YcoeffsB[0]
+                                        | %d2  = p->YcoeffsB[1]
+                                        | %a0  = p->YcoeffsB[2]
+                                        | %a1  = p->YcoeffsB[3]
+                                        | %a2  = p->YcoeffsB[4]
+
+    mac.l   %d3, %d1, %acc1     | %acc1  = p->buf[YDELAYB] * p->YcoeffsB[0]
+    mac.l   %d7, %d2, %acc1     | %acc1 += p->buf[YDELAYB-1] * p->YcoeffsB[1]
+    mac.l   %d6, %a0, %acc1     | %acc1 += p->buf[YDELAYB-2] * p->YcoeffsB[2]
+    mac.l   %d5, %a1, %acc1     | %acc1 += p->buf[YDELAYB-3] * p->YcoeffsB[3]
+    mac.l   %d4, %a2, %acc1     | %acc1 += p->buf[YDELAYB-4] * p->YcoeffsB[4]
+    
+    move.l  %d3, (YDELAYB, %a5)         | p->buf[YDELAYB]  = %d3
+    
+    tst.l   %d7
+    beq.s   1f
+    spl.b   %d7
+    extb.l  %d7
+    or.l    #1, %d7
+1:                                      | %d7 = SIGN(%d7)
+    move.l  %d7, (YADAPTCOEFFSB-4,%a5)  | p->buf[YADAPTCOEFFSB-1]  = %d7
+    tst.l   %d3
+    beq.s   1f
+    spl.b   %d3
+    extb.l  %d3
+    or.l    #1, %d3
+1:                                      | %d3 = SIGN(%d3)
+    move.l  %d3, (YADAPTCOEFFSB, %a5)   | p->buf[YADAPTCOEFFSB]  = %d3
+
+    | %d1, %d2, %a0, %a1, %a2 contain p->YcoeffsB[0..4]
+    | %d7, %d3 contain p->buf[YADAPTCOEFFSB-1] and p->buf[YADAPTCOEFFSB]
+
+    move.l  (%a3), %d0                  | %d0  = *decoded0
+    beq.s   3f
+
+    movem.l (YADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4  = p->buf[YADAPTCOEFFSB-4]
+                                            | %d5  = p->buf[YADAPTCOEFFSB-3]
+                                            | %d6  = p->buf[YADAPTCOEFFSB-2]
+
+    bmi.s   1f                          | flags still valid here
+
+    | *decoded0 > 0
+
+    sub.l   %d3, %d1        | %d1  = p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB]
+    sub.l   %d7, %d2        | %d2  = p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1]
+    sub.l   %d6, %a0        | %a0  = p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2]
+    sub.l   %d5, %a1        | %a1  = p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3]
+    sub.l   %d4, %a2        | %a2  = p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4]
+
+    movem.l %d1-%d2/%a0-%a2, (YcoeffsB,%a6) | Save p->YcoeffsB[]
+    
+    movem.l (YcoeffsA,%a6), %d4-%d7     | %d4  = p->YcoeffsA[0]
+                                        | %d5  = p->YcoeffsA[1]
+                                        | %d6  = p->YcoeffsA[2]
+                                        | %d7  = p->YcoeffsA[3]
+                                        
+    movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 
+                                        | %d2  = p->buf[YADAPTCOEFFSA-3]
+                                        | %a0  = p->buf[YADAPTCOEFFSA-2]
+                                        | %a1  = p->buf[YADAPTCOEFFSA-1]
+                                        | %a2  = p->buf[YADAPTCOEFFSA]
+                                        
+    sub.l   %a2, %d4        | %d4  = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
+    sub.l   %a1, %d5        | %d5  = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
+    sub.l   %a0, %d6        | %d6  = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
+    sub.l   %d2, %d7        | %d7  = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
+    
+    bra.s   2f
+
+1:  | *decoded0 < 0
+
+    add.l   %d3, %d1        | %d1  = p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB]
+    add.l   %d7, %d2        | %d2  = p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1]
+    add.l   %d6, %a0        | %a0  = p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2]
+    add.l   %d5, %a1        | %a1  = p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3]
+    add.l   %d4, %a2        | %a2  = p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4]
+
+    movem.l %d1-%d2/%a0-%a2, (YcoeffsB,%a6) | Save p->YcoeffsB[]
+
+    movem.l (YcoeffsA,%a6), %d4-%d7     | %d4  = p->YcoeffsA[0]
+                                        | %d5  = p->YcoeffsA[1]
+                                        | %d6  = p->YcoeffsA[2]
+                                        | %d7  = p->YcoeffsA[3]
+
+    movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 
+                                        | %d2  = p->buf[YADAPTCOEFFSA-3]
+                                        | %a0  = p->buf[YADAPTCOEFFSA-2]
+                                        | %a1  = p->buf[YADAPTCOEFFSA-1]
+                                        | %a2  = p->buf[YADAPTCOEFFSA]
+
+    add.l   %a2, %d4        | %d4  = p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA]
+    add.l   %a1, %d5        | %d5  = p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1]
+    add.l   %a0, %d6        | %d6  = p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2]
+    add.l   %d2, %d7        | %d7  = p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3]
+
+2:
+    movem.l %d4-%d7, (YcoeffsA,%a6)     | Save p->YcoeffsA[]
+
+3:
+    | Finish Predictor Y
+
+    movclr.l %acc0, %d1                 | %d1 = predictionA
+    movclr.l %acc1, %d2                 | %d2 = predictionB
+    asr.l   #1, %d2
+    add.l   %d2, %d1                    | %d1 += (%d2 >> 1)
+    asr.l   #8, %d1
+    asr.l   #2, %d1                     | %d1 >>= 10
+    add.l   %d0, %d1                    | %d1 += %d0
+    move.l  %d1, (YlastA,%a6)           | p->YlastA  = %d1
+    
+    move.l  (YfilterA,%a6), %d2         | %d2  = p->YfilterA
+    move.l  %d2, %d0
+    lsl.l   #5, %d2
+    sub.l   %d0, %d2                    | %d2 = 31 * %d2
+    asr.l   #5, %d2                     | %d2 >>= 5
+    add.l   %d1, %d2
+    move.l  %d2, (YfilterA,%a6)         | p->YfilterA  = %d2
+
+    | *decoded0 stored 2 instructions down, avoiding pipeline stall
+
+    | ***** PREDICTOR X *****
+    
+    | Predictor X, Filter A
+    
+    move.l  (XlastA,%a6), %d3           | %d3  = p->XlastA
+
+    move.l  %d2, (%a3)+                 | *(decoded0++)  = %d2 (p->YfilterA)
+
+    movem.l (XDELAYA-12,%a5), %d0-%d2   | %d0  = p->buf[XDELAYA-3]
+                                        | %d1  = p->buf[XDELAYA-2]
+                                        | %d2  = p->buf[XDELAYA-1]
+
+    move.l  %d3, (XDELAYA,%a5)          | p->buf[XDELAYA]  = %d3
+
+    sub.l   %d3, %d2
+    neg.l   %d2                         | %d2  = %d3 -%d2
+
+    move.l  %d2, (XDELAYA-4,%a5)        | p->buf[XDELAYA-1]  = %d2
+
+    movem.l (XcoeffsA,%a6), %d4-%d7     | %d4  = p->XcoeffsA[0]
+                                        | %d5  = p->XcoeffsA[1]
+                                        | %d6  = p->XcoeffsA[2]
+                                        | %d7  = p->XcoeffsA[3]
+
+    mac.l   %d3, %d4, %acc0     | %acc0  = p->buf[XDELAYA] * p->XcoeffsA[0]
+    mac.l   %d2, %d5, %acc0     | %acc0 += p->buf[XDELAYA-1] * p->XcoeffsA[1]
+    mac.l   %d1, %d6, %acc0     | %acc0 += p->buf[XDELAYA-2] * p->XcoeffsA[2]
+    mac.l   %d0, %d7, %acc0     | %acc0 += p->buf[XDELAYA-3] * p->XcoeffsA[3]
+
+    tst.l   %d2
+    beq.s   1f
+    spl.b   %d2                         | pos: 0x??????ff, neg: 0x??????00
+    extb.l  %d2                         | pos: 0xffffffff, neg: 0x00000000
+    or.l    #1, %d2                     | pos: 0xffffffff, neg: 0x00000001
+1:                                      | %d2 = SIGN(%d2)
+    move.l  %d2, (XADAPTCOEFFSA-4,%a5)  | p->buf[XADAPTCOEFFSA-1]  = %d2
+
+    tst.l   %d3
+    beq.s   1f
+    spl.b   %d3
+    extb.l  %d3
+    or.l    #1, %d3
+1:                                      | %d3 = SIGN(%d3)
+    move.l  %d3, (XADAPTCOEFFSA,%a5)    | p->buf[XADAPTCOEFFSA]  = %d3
+
+    | Predictor X, Filter B
+    
+    movem.l (XfilterB,%a6), %d2-%d3     | %d2  = p->XfilterB
+                                        | %d3  = p->YfilterA
+    move.l  %d3, (XfilterB,%a6)         | p->XfilterB = %d3
+
+    move.l  %d2, %d1                    | %d1  = %d2
+    lsl.l   #5, %d2                     | %d2  = %d2 * 32
+    sub.l   %d1, %d2                    | %d2 -= %d1 (== 31 * old_d2)
+    asr.l   #5, %d2                     | %d2 >>= 5
+    sub.l   %d2, %d3                    | %d3 -= %d2 
+    
+    movem.l (XDELAYB-16,%a5), %d4-%d7   | %d4  = p->buf[XDELAYB-4]
+                                        | %d5  = p->buf[XDELAYB-3]
+                                        | %d6  = p->buf[XDELAYB-2]
+                                        | %d7  = p->buf[XDELAYB-1]
+    sub.l   %d3, %d7
+    neg.l   %d7                         | %d7  = %d3 - %d7
+
+    move.l  %d7, (XDELAYB-4,%a5)        | p->buf[XDELAYB-1]  = %d7
+
+    movem.l (XcoeffsB,%a6), %d1-%d2/%a0-%a2 | %d1  = p->XcoeffsB[0]
+                                        | %d2  = p->XcoeffsB[1]
+                                        | %a0  = p->XcoeffsB[2]
+                                        | %a1  = p->XcoeffsB[3]
+                                        | %a2  = p->XcoeffsB[4]
+
+    mac.l   %d3, %d1, %acc1     | %acc1  = p->buf[XDELAYB] * p->XcoeffsB[0]
+    mac.l   %d7, %d2, %acc1     | %acc1 += p->buf[XDELAYB-1] * p->XcoeffsB[1]
+    mac.l   %d6, %a0, %acc1     | %acc1 += p->buf[XDELAYB-2] * p->XcoeffsB[2]
+    mac.l   %d5, %a1, %acc1     | %acc1 += p->buf[XDELAYB-3] * p->XcoeffsB[3]
+    mac.l   %d4, %a2, %acc1     | %acc1 += p->buf[XDELAYB-4] * p->XcoeffsB[4]
+    
+    move.l  %d3, (XDELAYB, %a5)         | p->buf[XDELAYB]  = %d3
+    
+    tst.l   %d7
+    beq.s   1f
+    spl.b   %d7
+    extb.l  %d7
+    or.l    #1, %d7
+1:                                      | %d7 = SIGN(%d7)
+    move.l  %d7, (XADAPTCOEFFSB-4,%a5)  | p->buf[XADAPTCOEFFSB-1]  = %d7
+
+    tst.l   %d3
+    beq.s   1f
+    spl.b   %d3
+    extb.l  %d3
+    or.l    #1, %d3
+1:                                      | %d3 = SIGN(%d3)
+    move.l  %d3, (XADAPTCOEFFSB, %a5)   | p->buf[XADAPTCOEFFSB]  = %d3
+
+    | %d1, %d2, %a0, %a1, %a2 contain p->XcoeffsB[0..4]
+    | %d7, %d3 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB]
+
+    move.l  (%a4), %d0                  | %d0  = *decoded1
+    beq.s   3f
+    
+    movem.l (XADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4  = p->buf[XADAPTCOEFFSB-4]
+                                            | %d5  = p->buf[XADAPTCOEFFSB-3]
+                                            | %d6  = p->buf[XADAPTCOEFFSB-2]
+
+    bmi.s   1f                          | flags still valid here
+    
+    | *decoded1 > 0
+
+    sub.l   %d3, %d1        | %d1  = p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB]
+    sub.l   %d7, %d2        | %d2  = p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1]
+    sub.l   %d6, %a0        | %a0  = p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2]
+    sub.l   %d5, %a1        | %a1  = p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3]
+    sub.l   %d4, %a2        | %a2  = p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4]
+
+    movem.l %d1-%d2/%a0-%a2, (XcoeffsB,%a6) | Save p->XcoeffsB[]
+
+    movem.l (XcoeffsA,%a6), %d4-%d7     | %d4  = p->XcoeffsA[0]
+                                        | %d5  = p->XcoeffsA[1]
+                                        | %d6  = p->XcoeffsA[2]
+                                        | %d7  = p->XcoeffsA[3]
+                                        
+    movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 
+                                        | %d2  = p->buf[XADAPTCOEFFSA-3]
+                                        | %a0  = p->buf[XADAPTCOEFFSA-2]
+                                        | %a1  = p->buf[XADAPTCOEFFSA-1]
+                                        | %a2  = p->buf[XADAPTCOEFFSA]
+                                        
+    sub.l   %a2, %d4        | %d4  = p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA]
+    sub.l   %a1, %d5        | %d5  = p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1]
+    sub.l   %a0, %d6        | %d6  = p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2]
+    sub.l   %d2, %d7        | %d7  = p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3]
+
+    bra.s   2f
+
+1:  | *decoded1 < 0
+
+    add.l   %d3, %d1        | %d1  = p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB]
+    add.l   %d7, %d2        | %d2  = p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1]
+    add.l   %d6, %a0        | %a0  = p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2]
+    add.l   %d5, %a1        | %a1  = p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3]
+    add.l   %d4, %a2        | %a2  = p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4]
+    
+    movem.l %d1-%d2/%a0-%a2, (XcoeffsB,%a6) | Save p->XcoeffsB[]
+    
+    movem.l (XcoeffsA,%a6), %d4-%d7     | %d4  = p->XcoeffsA[0]
+                                        | %d5  = p->XcoeffsA[1]
+                                        | %d6  = p->XcoeffsA[2]
+                                        | %d7  = p->XcoeffsA[3]
+
+    movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2 
+                                        | %d2  = p->buf[XADAPTCOEFFSA-3]
+                                        | %a0  = p->buf[XADAPTCOEFFSA-2]
+                                        | %a1  = p->buf[XADAPTCOEFFSA-1]
+                                        | %a2  = p->buf[XADAPTCOEFFSA]
+                                        
+    add.l   %a2, %d4        | %d4  = p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA]
+    add.l   %a1, %d5        | %d5  = p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1]
+    add.l   %a0, %d6        | %d6  = p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2]
+    add.l   %d2, %d7        | %d7  = p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3]
+
+2:
+    movem.l %d4-%d7, (XcoeffsA,%a6)     | Save p->XcoeffsA[]
+
+3:
+    | Finish Predictor X
+
+    movclr.l %acc0, %d1                 | %d1 = predictionA
+    movclr.l %acc1, %d2                 | %d2 = predictionB
+    asr.l   #1, %d2
+    add.l   %d2, %d1                    | %d1 += (%d2 >> 1)
+    asr.l   #8, %d1
+    asr.l   #2, %d1                     | %d1 >>= 10
+    add.l   %d0, %d1                    | %d1 += %d0
+    move.l  %d1, (XlastA,%a6)           | p->XlastA  = %d1
+    
+    move.l  (XfilterA,%a6), %d2         | %d2  = p->XfilterA
+    move.l  %d2, %d0
+    lsl.l   #5, %d2
+    sub.l   %d0, %d2                    | %d2 = 31 * %d2
+    asr.l   #5, %d2                     | %d6 >>= 2
+    add.l   %d1, %d2
+    move.l  %d2, (XfilterA,%a6)         | p->XfilterA  = %d2
+
+    | *decoded1 stored 3 instructions down, avoiding pipeline stall
+
+    | ***** COMMON *****
+    
+    addq.l  #4, %a5                     | p->buf++
+    lea.l   (historybuffer+PREDICTOR_HISTORY_SIZE*4,%a6), %a2
+                            | %a2 = &p->historybuffer[PREDICTOR_HISTORY_SIZE]
+    
+    move.l  %d2, (%a4)+                 | *(decoded1++)  = %d2 (p->XfilterA)
+
+    cmp.l   %a2, %a5
+    beq.s   .move_hist      | History buffer is full, we need to do a memmove
+
+    subq.l  #1, (%sp)                   | decrease loop count
+    bne.w   .loop
+
+.done:
+    move.l  %a5, (%a6)                  | Save value of p->buf
+    movem.l (4,%sp), %d2-%d7/%a2-%a6
+    lea.l   (12*4,%sp), %sp
+    rts
+    
+.move_hist:
+    lea.l   (historybuffer,%a6), %a2
+
+    | dest = %a2 (p->historybuffer)
+    | src = %a5 (p->buf)
+    | n = 200
+    
+    movem.l (%a5), %d0-%d7/%a0-%a1      | 40 bytes
+    movem.l %d0-%d7/%a0-%a1, (%a2)
+    movem.l (40,%a5), %d0-%d7/%a0-%a1   | 40 bytes
+    movem.l %d0-%d7/%a0-%a1, (40,%a2)
+    movem.l (80,%a5), %d0-%d7/%a0-%a1   | 40 bytes
+    movem.l %d0-%d7/%a0-%a1, (80,%a2)
+    movem.l (120,%a5), %d0-%d7/%a0-%a1  | 40 bytes
+    movem.l %d0-%d7/%a0-%a1, (120,%a2)
+    movem.l (160,%a5), %d0-%d7/%a0-%a1  | 40 bytes
+    movem.l %d0-%d7/%a0-%a1, (160,%a2)
+
+    move.l  %a2, %a5                    | p->buf = &p->historybuffer[0]
+
+    subq.l  #1, (%sp)                   | decrease loop count
+    bne.w   .loop
+
+    bra.s   .done
+    .size   predictor_decode_stereo, .-predictor_decode_stereo
+
+
+    .global predictor_decode_mono
+    .type   predictor_decode_mono,@function
+
+| void predictor_decode_mono(struct predictor_t* p,
+|                            int32_t* decoded0,
+|                            int count)
+
+predictor_decode_mono:
+    lea.l   (-11*4,%sp), %sp
+    movem.l %d2-%d7/%a2-%a6, (%sp)
+
+    move.l  #0, %macsr                  | signed integer mode
+
+    move.l  (11*4+4,%sp), %a6           | %a6 = p
+    move.l  (11*4+8,%sp), %a4           | %a4 = decoded0
+    move.l  (11*4+12,%sp), %d7          | %d7 = count
+    move.l  (%a6), %a5                  | %a5 = p->buf
+
+    move.l  (YlastA,%a6), %d3           | %d3  = p->YlastA
+    
+.loopm:
+
+    | ***** PREDICTOR *****
+
+    movem.l (YDELAYA-12,%a5), %d0-%d2   | %d0  = p->buf[YDELAYA-3]
+                                        | %d1  = p->buf[YDELAYA-2]
+                                        | %d2  = p->buf[YDELAYA-1]
+
+    move.l  %d3, (YDELAYA,%a5)          | p->buf[YDELAYA]  = %d3
+
+    sub.l   %d3, %d2
+    neg.l   %d2                         | %d2 = %d3 - %d2
+
+    move.l  %d2, (YDELAYA-4,%a5)        | p->buf[YDELAYA-1]  = %d2
+
+    movem.l (YcoeffsA,%a6), %a0-%a3     | %a0  = p->YcoeffsA[0]
+                                        | %a1  = p->YcoeffsA[1]
+                                        | %a2  = p->YcoeffsA[2]
+                                        | %a3  = p->YcoeffsA[3]
+
+    mac.l   %d3, %a0, %acc0     | %acc0  = p->buf[YDELAYA] * p->YcoeffsA[0]
+    mac.l   %d2, %a1, %acc0     | %acc0 += p->buf[YDELAYA-1] * p->YcoeffsA[1]
+    mac.l   %d1, %a2, %acc0     | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
+    mac.l   %d0, %a3, %acc0     | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
+
+    tst.l   %d2
+    beq.s   1f
+    spl.b   %d2                         | pos: 0x??????ff, neg: 0x??????00
+    extb.l  %d2                         | pos: 0xffffffff, neg: 0x00000000
+    or.l    #1, %d2                     | pos: 0xffffffff, neg: 0x00000001
+1:                                      | %d2 = SIGN(%d2)
+    move.l  %d2, (YADAPTCOEFFSA-4,%a5)  | p->buf[YADAPTCOEFFSA-1]  = %d2
+
+    tst.l   %d3
+    beq.s   1f
+    spl.b   %d3
+    extb.l  %d3
+    or.l    #1, %d3
+1:                                      | %d3 = SIGN(%d3)
+    move.l  %d3, (YADAPTCOEFFSA,%a5)    | p->buf[YADAPTCOEFFSA]  = %d3
+
+    move.l  (%a4), %d0                  | %d0 = *decoded0
+    beq.s   3f
+
+    movem.l (YADAPTCOEFFSA-12,%a5),%d4-%d5  | %d4  = p->buf[YADAPTCOEFFSA-3]
+                                            | %d5  = p->buf[YADAPTCOEFFSA-2]
+                                            
+    bmi.s   1f                          | flags still valid here
+    
+    | *decoded0 > 0
+    
+    sub.l   %d3, %a0        | %a0  = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
+    sub.l   %d2, %a1        | %a1  = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
+    sub.l   %d5, %a2        | %a2  = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
+    sub.l   %d4, %a3        | %a3  = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
+
+    bra.s   2f
+
+1:  | *decoded0 < 0
+
+    add.l   %d3, %a0        | %a0  = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
+    add.l   %d2, %a1        | %a1  = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
+    add.l   %d5, %a2        | %a2  = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
+    add.l   %d4, %a3        | %a3  = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
+
+2:
+    movem.l %a0-%a3, (YcoeffsA,%a6)     | save p->YcoeffsA[]
+
+3:
+    | Finish Predictor
+
+    movclr.l %acc0, %d3                 | %d3 = predictionA
+    asr.l   #8, %d3
+    asr.l   #2, %d3                     | %d3 >>= 10
+    add.l   %d0, %d3                    | %d3 += %d0
+
+    move.l  (YfilterA,%a6), %d2         | %d2  = p->YfilterA
+    move.l  %d2, %d0
+    lsl.l   #5, %d2
+    sub.l   %d0, %d2                    | %d2 = 31 * %d2
+    asr.l   #5, %d2                     | %d2 >>= 5
+    add.l   %d3, %d2
+    move.l  %d2, (YfilterA,%a6)         | p->YfilterA  = %d2
+
+    | *decoded0 stored 3 instructions down, avoiding pipeline stall
+
+    | ***** COMMON *****
+
+    addq.l  #4, %a5                     | p->buf++
+    lea.l   (historybuffer+PREDICTOR_HISTORY_SIZE*4,%a6), %a3
+                            | %a3 = &p->historybuffer[PREDICTOR_HISTORY_SIZE]
+    
+    move.l  %d2, (%a4)+                 | *(decoded0++)  = %d2 (p->YfilterA)
+
+    cmp.l   %a3, %a5
+    beq.s   .move_histm     | History buffer is full, we need to do a memmove
+                                              
+    subq.l  #1, %d7                     | decrease loop count
+    bne.w   .loopm
+
+    move.l  %d3, (YlastA,%a6)           | %d3  = p->YlastA
+
+.donem:
+    move.l  %a5, (%a6)                  | Save value of p->buf
+    movem.l (%sp), %d2-%d7/%a2-%a6
+    lea.l   (11*4,%sp), %sp
+    rts
+    
+.move_histm:
+    move.l  %d3, (YlastA,%a6)           | %d3  = p->YlastA
+
+    lea.l   (historybuffer,%a6), %a3
+
+    | dest = %a3 (p->historybuffer)
+    | src = %a5 (p->buf)
+    | n = 200
+    
+    movem.l (%a5), %d0-%d6/%a0-%a2      | 40 bytes
+    movem.l %d0-%d6/%a0-%a2, (%a3)
+    movem.l (40,%a5), %d0-%d6/%a0-%a2   | 40 bytes
+    movem.l %d0-%d6/%a0-%a2, (40,%a3)
+    movem.l (80,%a5), %d0-%d6/%a0-%a2   | 40 bytes
+    movem.l %d0-%d6/%a0-%a2, (80,%a3)
+    movem.l (120,%a5), %d0-%d6/%a0-%a2  | 40 bytes
+    movem.l %d0-%d6/%a0-%a2, (120,%a3)
+    movem.l (160,%a5), %d0-%d6/%a0-%a2  | 40 bytes
+    movem.l %d0-%d6/%a0-%a2, (160,%a3)
+
+    move.l  %a3, %a5                    | p->buf = &p->historybuffer[0]
+
+    move.l  (YlastA,%a6), %d3           | %d3  = p->YlastA
+
+    subq.l  #1, %d7                     | decrease loop count
+    bne.w   .loopm
+
+    bra.s   .donem
+    .size   predictor_decode_mono, .-predictor_decode_mono
--- a/lib/rbcodec/codecs/demac/libdemac/predictor.c
+++ b/lib/rbcodec/codecs/demac/libdemac/predictor.c
@ -0,0 +1,271 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#include <inttypes.h>
+#include <string.h>
+
+#include "parser.h"
+#include "predictor.h"
+#include "demac_config.h"
+
+/* Return 0 if x is zero, -1 if x is positive, 1 if x is negative */
+#define SIGN(x) (x) ? (((x) > 0) ? -1 : 1) : 0
+
+static const int32_t initial_coeffs[4] = {
+  360, 317, -109, 98
+};
+
+#define YDELAYA (18 + PREDICTOR_ORDER*4)
+#define YDELAYB (18 + PREDICTOR_ORDER*3)
+#define XDELAYA (18 + PREDICTOR_ORDER*2)
+#define XDELAYB (18 + PREDICTOR_ORDER)
+
+#define YADAPTCOEFFSA (18)
+#define XADAPTCOEFFSA (14)
+#define YADAPTCOEFFSB (10)
+#define XADAPTCOEFFSB (5)
+
+void init_predictor_decoder(struct predictor_t* p)
+{
+    /* Zero the history buffers */
+    memset(p->historybuffer, 0, PREDICTOR_SIZE * sizeof(int32_t));
+    p->buf = p->historybuffer;
+
+    /* Initialise and zero the co-efficients */
+    memcpy(p->YcoeffsA, initial_coeffs, sizeof(initial_coeffs));
+    memcpy(p->XcoeffsA, initial_coeffs, sizeof(initial_coeffs));
+    memset(p->YcoeffsB, 0, sizeof(p->YcoeffsB));
+    memset(p->XcoeffsB, 0, sizeof(p->XcoeffsB));
+
+    p->YfilterA = 0;
+    p->YfilterB = 0;
+    p->YlastA = 0;
+
+    p->XfilterA = 0;
+    p->XfilterB = 0;
+    p->XlastA = 0;
+}
+
+#if !defined(CPU_ARM) && !defined(CPU_COLDFIRE)
+void ICODE_ATTR_DEMAC predictor_decode_stereo(struct predictor_t* p,
+                                              int32_t* decoded0,
+                                              int32_t* decoded1,
+                                              int count)
+{
+    int32_t predictionA, predictionB; 
+
+    while (LIKELY(count--))
+    {
+        /* Predictor Y */
+        p->buf[YDELAYA] = p->YlastA;
+        p->buf[YADAPTCOEFFSA] = SIGN(p->buf[YDELAYA]);
+
+        p->buf[YDELAYA-1] = p->buf[YDELAYA] - p->buf[YDELAYA-1];
+        p->buf[YADAPTCOEFFSA-1] = SIGN(p->buf[YDELAYA-1]);
+
+        predictionA = (p->buf[YDELAYA] * p->YcoeffsA[0]) + 
+                      (p->buf[YDELAYA-1] * p->YcoeffsA[1]) + 
+                      (p->buf[YDELAYA-2] * p->YcoeffsA[2]) + 
+                      (p->buf[YDELAYA-3] * p->YcoeffsA[3]);
+
+        /*  Apply a scaled first-order filter compression */
+        p->buf[YDELAYB] = p->XfilterA - ((p->YfilterB * 31) >> 5);
+        p->buf[YADAPTCOEFFSB] = SIGN(p->buf[YDELAYB]);
+        p->YfilterB = p->XfilterA;
+
+        p->buf[YDELAYB-1] = p->buf[YDELAYB] - p->buf[YDELAYB-1];
+        p->buf[YADAPTCOEFFSB-1] = SIGN(p->buf[YDELAYB-1]);
+
+        predictionB = (p->buf[YDELAYB] * p->YcoeffsB[0]) + 
+                      (p->buf[YDELAYB-1] * p->YcoeffsB[1]) + 
+                      (p->buf[YDELAYB-2] * p->YcoeffsB[2]) + 
+                      (p->buf[YDELAYB-3] * p->YcoeffsB[3]) + 
+                      (p->buf[YDELAYB-4] * p->YcoeffsB[4]);
+
+        p->YlastA = *decoded0 + ((predictionA + (predictionB >> 1)) >> 10);
+        p->YfilterA =  p->YlastA + ((p->YfilterA * 31) >> 5);
+
+        /* Predictor X */
+
+        p->buf[XDELAYA] = p->XlastA;
+        p->buf[XADAPTCOEFFSA] = SIGN(p->buf[XDELAYA]);
+        p->buf[XDELAYA-1] = p->buf[XDELAYA] - p->buf[XDELAYA-1];
+        p->buf[XADAPTCOEFFSA-1] = SIGN(p->buf[XDELAYA-1]);
+
+        predictionA = (p->buf[XDELAYA] * p->XcoeffsA[0]) + 
+                      (p->buf[XDELAYA-1] * p->XcoeffsA[1]) + 
+                      (p->buf[XDELAYA-2] * p->XcoeffsA[2]) + 
+                      (p->buf[XDELAYA-3] * p->XcoeffsA[3]);
+
+        /*  Apply a scaled first-order filter compression */
+        p->buf[XDELAYB] = p->YfilterA - ((p->XfilterB * 31) >> 5);
+        p->buf[XADAPTCOEFFSB] = SIGN(p->buf[XDELAYB]);
+        p->XfilterB = p->YfilterA;
+        p->buf[XDELAYB-1] = p->buf[XDELAYB] - p->buf[XDELAYB-1];
+        p->buf[XADAPTCOEFFSB-1] = SIGN(p->buf[XDELAYB-1]);
+
+        predictionB = (p->buf[XDELAYB] * p->XcoeffsB[0]) + 
+                      (p->buf[XDELAYB-1] * p->XcoeffsB[1]) + 
+                      (p->buf[XDELAYB-2] * p->XcoeffsB[2]) + 
+                      (p->buf[XDELAYB-3] * p->XcoeffsB[3]) + 
+                      (p->buf[XDELAYB-4] * p->XcoeffsB[4]);
+
+        p->XlastA = *decoded1 + ((predictionA + (predictionB >> 1)) >> 10); 
+        p->XfilterA =  p->XlastA + ((p->XfilterA * 31) >> 5);
+
+        if (LIKELY(*decoded0 != 0))
+        {
+            if (*decoded0 > 0)
+            {
+                p->YcoeffsA[0] -= p->buf[YADAPTCOEFFSA];
+                p->YcoeffsA[1] -= p->buf[YADAPTCOEFFSA-1];
+                p->YcoeffsA[2] -= p->buf[YADAPTCOEFFSA-2];
+                p->YcoeffsA[3] -= p->buf[YADAPTCOEFFSA-3];
+
+                p->YcoeffsB[0] -= p->buf[YADAPTCOEFFSB];
+                p->YcoeffsB[1] -= p->buf[YADAPTCOEFFSB-1];
+                p->YcoeffsB[2] -= p->buf[YADAPTCOEFFSB-2];
+                p->YcoeffsB[3] -= p->buf[YADAPTCOEFFSB-3];
+                p->YcoeffsB[4] -= p->buf[YADAPTCOEFFSB-4];
+            }
+            else
+            {
+                p->YcoeffsA[0] += p->buf[YADAPTCOEFFSA];
+                p->YcoeffsA[1] += p->buf[YADAPTCOEFFSA-1];
+                p->YcoeffsA[2] += p->buf[YADAPTCOEFFSA-2];
+                p->YcoeffsA[3] += p->buf[YADAPTCOEFFSA-3];
+
+                p->YcoeffsB[0] += p->buf[YADAPTCOEFFSB];
+                p->YcoeffsB[1] += p->buf[YADAPTCOEFFSB-1];
+                p->YcoeffsB[2] += p->buf[YADAPTCOEFFSB-2];
+                p->YcoeffsB[3] += p->buf[YADAPTCOEFFSB-3];
+                p->YcoeffsB[4] += p->buf[YADAPTCOEFFSB-4];
+            }
+        }
+
+        *(decoded0++) = p->YfilterA;
+
+        if (LIKELY(*decoded1 != 0))
+        {
+            if (*decoded1 > 0)
+            {
+                p->XcoeffsA[0] -= p->buf[XADAPTCOEFFSA];
+                p->XcoeffsA[1] -= p->buf[XADAPTCOEFFSA-1];
+                p->XcoeffsA[2] -= p->buf[XADAPTCOEFFSA-2];
+                p->XcoeffsA[3] -= p->buf[XADAPTCOEFFSA-3];
+
+                p->XcoeffsB[0] -= p->buf[XADAPTCOEFFSB];
+                p->XcoeffsB[1] -= p->buf[XADAPTCOEFFSB-1];
+                p->XcoeffsB[2] -= p->buf[XADAPTCOEFFSB-2];
+                p->XcoeffsB[3] -= p->buf[XADAPTCOEFFSB-3];
+                p->XcoeffsB[4] -= p->buf[XADAPTCOEFFSB-4];
+            }
+            else
+            {
+                p->XcoeffsA[0] += p->buf[XADAPTCOEFFSA];
+                p->XcoeffsA[1] += p->buf[XADAPTCOEFFSA-1];
+                p->XcoeffsA[2] += p->buf[XADAPTCOEFFSA-2];
+                p->XcoeffsA[3] += p->buf[XADAPTCOEFFSA-3];
+
+                p->XcoeffsB[0] += p->buf[XADAPTCOEFFSB];
+                p->XcoeffsB[1] += p->buf[XADAPTCOEFFSB-1];
+                p->XcoeffsB[2] += p->buf[XADAPTCOEFFSB-2];
+                p->XcoeffsB[3] += p->buf[XADAPTCOEFFSB-3];
+                p->XcoeffsB[4] += p->buf[XADAPTCOEFFSB-4];
+            }
+        }
+
+        *(decoded1++) = p->XfilterA;
+
+        /* Combined */
+        p->buf++;
+
+        /* Have we filled the history buffer? */
+        if (UNLIKELY(p->buf == p->historybuffer + PREDICTOR_HISTORY_SIZE)) {
+            memmove(p->historybuffer, p->buf, 
+                    PREDICTOR_SIZE * sizeof(int32_t));
+            p->buf = p->historybuffer;
+        }
+    }
+}
+
+void ICODE_ATTR_DEMAC predictor_decode_mono(struct predictor_t* p,
+                                            int32_t* decoded0,
+                                            int count)
+{
+    int32_t predictionA, currentA, A;
+
+    currentA = p->YlastA;
+
+    while (LIKELY(count--))
+    {
+        A = *decoded0;
+
+        p->buf[YDELAYA] = currentA;
+        p->buf[YDELAYA-1] = p->buf[YDELAYA] - p->buf[YDELAYA-1];
+
+        predictionA = (p->buf[YDELAYA] * p->YcoeffsA[0]) + 
+                      (p->buf[YDELAYA-1] * p->YcoeffsA[1]) + 
+                      (p->buf[YDELAYA-2] * p->YcoeffsA[2]) + 
+                      (p->buf[YDELAYA-3] * p->YcoeffsA[3]);
+
+        currentA = A + (predictionA >> 10);
+
+        p->buf[YADAPTCOEFFSA] = SIGN(p->buf[YDELAYA]);
+        p->buf[YADAPTCOEFFSA-1] = SIGN(p->buf[YDELAYA-1]);
+        
+        if (LIKELY(A != 0))
+        {
+            if (A > 0)
+            {
+                p->YcoeffsA[0] -= p->buf[YADAPTCOEFFSA];
+                p->YcoeffsA[1] -= p->buf[YADAPTCOEFFSA-1];
+                p->YcoeffsA[2] -= p->buf[YADAPTCOEFFSA-2];
+                p->YcoeffsA[3] -= p->buf[YADAPTCOEFFSA-3];
+            }
+            else
+            {
+                p->YcoeffsA[0] += p->buf[YADAPTCOEFFSA];
+                p->YcoeffsA[1] += p->buf[YADAPTCOEFFSA-1];
+                p->YcoeffsA[2] += p->buf[YADAPTCOEFFSA-2];
+                p->YcoeffsA[3] += p->buf[YADAPTCOEFFSA-3];
+            }
+        }
+
+        p->buf++;
+
+        /* Have we filled the history buffer? */
+        if (UNLIKELY(p->buf == p->historybuffer + PREDICTOR_HISTORY_SIZE)) {
+            memmove(p->historybuffer, p->buf, 
+                    PREDICTOR_SIZE * sizeof(int32_t));
+            p->buf = p->historybuffer;
+        }
+
+        p->YfilterA =  currentA + ((p->YfilterA * 31) >> 5);
+        *(decoded0++) = p->YfilterA;
+    }
+
+    p->YlastA = currentA;
+}
+#endif
--- a/lib/rbcodec/codecs/demac/libdemac/predictor.h
+++ b/lib/rbcodec/codecs/demac/libdemac/predictor.h
@ -0,0 +1,38 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#ifndef _APE_PREDICTOR_H
+#define _APE_PREDICTOR_H
+
+#include <inttypes.h>
+#include "parser.h"
+#include "filter.h"
+
+void init_predictor_decoder(struct predictor_t* p);
+void predictor_decode_stereo(struct predictor_t* p, int32_t* decoded0,
+                             int32_t* decoded1, int count);
+void predictor_decode_mono(struct predictor_t* p, int32_t* decoded0,
+                           int count);
+
+#endif
--- a/lib/rbcodec/codecs/demac/libdemac/udiv32_arm-pre.S
+++ b/lib/rbcodec/codecs/demac/libdemac/udiv32_arm-pre.S
@ -0,0 +1,25 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2010 by Andrew Mahone
+ *
+ * Wrapper for udiv32_arm.S to test available IRAM by pre-linking the codec.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#define APE_PRE
+#include "udiv32_arm.S"
--- a/lib/rbcodec/codecs/demac/libdemac/udiv32_arm.S
+++ b/lib/rbcodec/codecs/demac/libdemac/udiv32_arm.S
@ -0,0 +1,318 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2008 by Jens Arnold
+ * Copyright (C) 2009 by Andrew Mahone
+ *
+ * Optimised unsigned integer division for ARMv4
+ *
+ * Based on: libgcc routines for ARM cpu, additional algorithms from ARM System
+ *           Developer's Guide
+ * Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
+ * Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
+ * Free Software Foundation, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "config.h"
+/* On targets with codec iram, a header file will be generated after an initial
+   link of the APE codec, stating the amount of IRAM remaining for use by the
+   reciprocal lookup table. */
+#if !defined(APE_PRE) && defined(USE_IRAM) && ARM_ARCH < 5
+#include "lib/rbcodec/codecs/ape_free_iram.h"
+#endif
+
+/* Codecs should not normally do this, but we need to check a macro, and
+ * codecs.h would confuse the assembler. */
+
+#ifdef USE_IRAM
+#define DIV_RECIP
+    .section    .icode,"ax",%progbits
+#else
+    .text
+#endif
+    .align
+    .global udiv32_arm
+    .type   udiv32_arm,%function
+
+#if ARM_ARCH < 5
+/* Adapted from an algorithm given in ARM System Developer's Guide (7.3.1.2)
+   for dividing a 30-bit value by a 15-bit value, with two operations per
+   iteration by storing quotient and remainder together and adding the previous
+   quotient bit during trial subtraction. Modified to work with any dividend
+   and divisor both less than 1 << 30, and skipping trials by calculating bits
+   in output. */
+.macro ARM_DIV_31_BODY dividend, divisor, result, bits, curbit, quotient, remainder
+
+    mov     \bits, #1
+    /* Shift the divisor left until it aligns with the numerator. If it already
+       has the high bit set, this is fine, everything inside .rept will be
+       skipped, and the add before and adcs after will set the one-bit result
+       to zero. */
+    cmn     \divisor, \dividend, lsr #16
+    movcs   \divisor, \divisor, lsl #16
+    addcs   \bits, \bits, #16
+    cmn     \divisor, \dividend, lsr #8
+    movcs   \divisor, \divisor, lsl #8
+    addcs   \bits, \bits, #8
+    cmn     \divisor, \dividend, lsr #4
+    movcs   \divisor, \divisor, lsl #4
+    addcs   \bits, \bits, #4
+    cmn     \divisor, \dividend, lsr #2
+    movcs   \divisor, \divisor, lsl #2
+    addcs   \bits, \bits, #2
+    cmn     \divisor, \dividend, lsr #1
+    movcs   \divisor, \divisor, lsl #1
+    addcs   \bits, \bits, #1
+    adds    \result, \dividend, \divisor
+    subcc   \result, \result, \divisor
+    rsb     \curbit, \bits, #31
+    add     pc, pc, \curbit, lsl #3
+    nop
+    .rept   30
+    adcs    \result, \divisor, \result, lsl #1
+    /* Fix the remainder portion of the result. This must be done because the
+       handler for 32-bit numerators needs the remainder. */
+    subcc   \result, \result, \divisor
+    .endr
+    /* Shift remainder/quotient left one, add final quotient bit */
+    adc     \result, \result, \result
+    mov     \remainder, \result, lsr \bits
+    eor     \quotient, \result, \remainder, lsl \bits
+.endm
+
+#ifndef FREE_IRAM
+.set recip_max, 2
+#else
+/* Each table entry is one word. Since a compare is done against the maximum
+   entry as an immediate, the maximum entry must be a valid ARM immediate,
+   which means a byte shifted by an even number of places. */
+.set recip_max, 2 + FREE_IRAM / 4
+.set recip_max_tmp, recip_max >> 8
+.set recip_mask_shift, 0
+.set tmp_shift, 16
+.rept 5
+    .if recip_max_tmp >> tmp_shift
+        .set recip_max_tmp, recip_max_tmp >> tmp_shift
+        .set recip_mask_shift, recip_mask_shift + tmp_shift
+    .endif
+    .set tmp_shift, tmp_shift >> 1
+.endr
+.if recip_max_tmp
+    .set recip_mask_shift, recip_mask_shift + 1
+.endif
+.set recip_mask_shift, (recip_mask_shift + 1) & 62
+.set recip_max, recip_max & (255 << recip_mask_shift)
+//.set recip_max, 2
+#endif
+
+udiv32_arm:
+#ifdef DIV_RECIP
+    cmp     r1, #3
+    bcc     .L_udiv_tiny
+    cmp     r1, #recip_max
+    bhi     .L_udiv
+    adr     r3, .L_udiv_recip_table-12
+    ldr     r2, [r3, r1, lsl #2]
+    mov     r3, r0
+    umull   ip, r0, r2, r0
+    mul     r2, r0, r1
+    cmp     r3, r2
+    bxcs    lr
+    sub     r0, r0, #1
+    bx      lr
+.L_udiv_tiny:
+    cmp     r1, #1
+    movhi   r0, r0, lsr #1
+    bxcs    lr
+    b       .L_div0
+#endif
+.L_udiv:
+    /* Invert divisor. ARM_DIV_31_BODY uses adc to both subtract the divisor
+       and add the next bit of the result. The correction code at .L_udiv32
+       does not need the divisor inverted, but can be modified to work with it,
+       and this allows the zero divisor test to be done early and without an
+       explicit comparison. */
+    rsbs    r1, r1, #0
+#ifndef DIV_RECIP
+    beq .L_div0
+#endif
+    tst     r0, r0
+    /* High bit must be unset, otherwise shift numerator right, calculate,
+       and correct results. As this case is very uncommon we want to avoid
+       any other delays on the main path in handling it, so the long divide
+       calls the short divide as a function. */
+    bmi     .L_udiv32
+.L_udiv31:
+    ARM_DIV_31_BODY r0, r1, r2, r3, ip, r0, r1
+    bx      lr
+.L_udiv32:
+    /* store original numerator and divisor, we'll need them to correct the
+       result, */
+    stmdb   sp, { r0, r1, lr }
+    /* Call __div0 here if divisor is zero, otherwise it would report the wrong
+       address. */
+    mov     r0, r0, lsr #1
+    bl      .L_udiv31
+    ldmdb   sp, { r2, r3, lr }
+    /* Move the low bit of the original numerator to the carry bit */
+    movs    r2, r2, lsr #1
+    /* Shift the remainder left one and add in the carry bit */
+    adc     r1, r1, r1
+    /* Subtract the original divisor from the remainder, setting carry if the
+       result is non-negative */
+    adds    r1, r1, r3
+    /* Shift quotient left one and add carry bit */
+    adc     r0, r0, r0
+    bx      lr
+.L_div0:
+    /* __div0 expects the calling address on the top of the stack */
+    stmdb sp!, { lr }
+    mov     r0, #0
+#if defined(__ARM_EABI__) || !defined(USE_IRAM)
+    bl      __div0
+#else
+    ldr     pc, [pc, #-4]
+    .word   __div0
+#endif
+#ifdef DIV_RECIP
+.L_udiv_recip_table:
+    .set div, 3
+    .rept recip_max - 2
+        .if (div - 1) & div
+            .set q, 0x40000000 / div
+            .set r, (0x40000000 - (q * div))<<1
+            .set q, q << 1
+            .if r >= div
+                .set q, q + 1
+                .set r, r - div
+            .endif
+            .set r, r << 1
+            .set q, q << 1
+            .if r >= div
+                .set q, q + 1
+                .set r, r - div
+            .endif
+            .set q, q + 1
+        .else
+            .set q, 0x40000000 / div * 4
+        .endif
+        .word q
+        .set div, div+1
+    .endr
+#endif
+    .size udiv32_arm, . - udiv32_arm
+
+#else
+.macro ARMV5_UDIV32_BODY numerator, divisor, quotient, bits, inv, neg, div0label
+    cmp     \numerator, \divisor
+    clz     \bits, \divisor
+    bcc     30f
+    mov     \inv, \divisor, lsl \bits
+    add     \neg, pc, \inv, lsr #25
+    cmp     \inv, #1<<31
+    ldrhib  \inv, [\neg, #.L_udiv_est_table-.-64]
+    bls     20f
+    subs    \bits, \bits, #7
+    rsb     \neg, \divisor, #0
+    movpl   \divisor, \inv, lsl \bits
+    bmi     10f
+    mul     \inv, \divisor, \neg
+    smlawt  \divisor, \divisor, \inv, \divisor
+    mul     \inv, \divisor, \neg
+    /* This will save a cycle on ARMv6, but requires that the numerator sign
+       bit is not set (that of inv is guaranteed unset). The branch should
+       predict very well, making it typically 1 cycle, and thus both the branch
+       and test fill delay cycles for the multiplies. Based on logging of
+       numerator sizes in the APE codec, the branch is taken about 1/10^7 of
+       the time. */
+#if ARM_ARCH >= 6
+    tst     \numerator, \numerator
+    smmla   \divisor, \divisor, \inv, \divisor
+    bmi     40f
+    smmul   \inv, \numerator, \divisor
+#else
+    mov     \bits, #0
+    smlal   \bits, \divisor, \inv, \divisor
+    umull   \bits, \inv, \numerator, \divisor
+#endif
+    add     \numerator, \numerator, \neg
+    mla     \divisor, \inv, \neg, \numerator
+    mov     \quotient, \inv
+    cmn     \divisor, \neg
+    addcc   \quotient, \quotient, #1
+    addpl   \quotient, \quotient, #2
+    bx      lr
+10:
+    rsb     \bits, \bits, #0
+    sub     \inv, \inv, #4
+    mov     \divisor, \inv, lsr \bits
+    umull   \bits, \inv, \numerator, \divisor
+    mla     \divisor, \inv, \neg, \numerator
+    mov     \quotient, \inv
+    cmn     \neg, \divisor, lsr #1
+    addcs   \divisor, \divisor, \neg, lsl #1
+    addcs   \quotient, \quotient, #2
+    cmn     \neg, \divisor
+    addcs   \quotient, \quotient, #1
+    bx      lr
+20:
+.ifnc "", "\div0label"
+    rsb     \bits, \bits, #31
+    bne     \div0label
+.endif
+    mov     \quotient, \numerator, lsr \bits
+    bx      lr
+30:
+    mov     \quotient, #0
+    bx      lr
+#if ARM_ARCH >= 6
+40:
+    umull   \bits, \inv, \numerator, \divisor
+    add     \numerator, \numerator, \neg
+    mla     \divisor, \inv, \neg, \numerator
+    mov     \quotient, \inv
+    cmn     \divisor, \neg
+    addcc   \quotient, \quotient, #1
+    addpl   \quotient, \quotient, #2
+    bx      lr
+#endif
+.endm
+
+udiv32_arm:
+    ARMV5_UDIV32_BODY r0, r1, r0, r2, r3, ip, .L_div0
+.L_div0:
+    /* __div0 expects the calling address on the top of the stack */
+    stmdb sp!, { lr }
+    mov     r0, #0
+#if defined(__ARM_EABI__) || !defined(USE_IRAM)
+    bl      __div0
+#else
+    ldr     pc, [pc, #-4]
+    .word   __div0
+#endif
+.L_udiv_est_table:
+    .byte 0xff, 0xfc, 0xf8, 0xf4, 0xf0, 0xed, 0xea, 0xe6
+    .byte 0xe3, 0xe0, 0xdd, 0xda, 0xd7, 0xd4, 0xd2, 0xcf
+    .byte 0xcc, 0xca, 0xc7, 0xc5, 0xc3, 0xc0, 0xbe, 0xbc
+    .byte 0xba, 0xb8, 0xb6, 0xb4, 0xb2, 0xb0, 0xae, 0xac
+    .byte 0xaa, 0xa8, 0xa7, 0xa5, 0xa3, 0xa2, 0xa0, 0x9f
+    .byte 0x9d, 0x9c, 0x9a, 0x99, 0x97, 0x96, 0x94, 0x93
+    .byte 0x92, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8a, 0x89
+    .byte 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81
+#endif
+    .size udiv32_arm, . - udiv32_arm
--- a/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv5te.h
+++ b/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv5te.h
@ -0,0 +1,404 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+ARMv5te vector math copyright (C) 2008 Jens Arnold
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#define FUSED_VECTOR_MATH
+
+#define REPEAT_3(x) x x x
+#if ORDER > 16
+#define REPEAT_MLA(x) x x x x x x x
+#else
+#define REPEAT_MLA(x) x x x
+#endif
+
+/* Calculate scalarproduct, then add a 2nd vector (fused for performance)
+ * This version fetches data as 32 bit words, and *requires* v1 to be
+ * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit
+ * aligned or both unaligned. If either condition isn't met, it will either
+ * result in a data abort or incorrect results. */
+static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
+{
+    int res;
+#if ORDER > 16
+    int cnt = ORDER>>4;
+#endif
+
+#define ADDHALFREGS(sum, s1, s2)                         /* Adds register   */ \
+        "mov   " #s1  ", " #s1  ",   ror #16         \n" /* halves straight */ \
+        "add   " #sum ", " #s1  ", " #s2  ", lsl #16 \n" /* Clobbers 's1'   */ \
+        "add   " #s1  ", " #s1  ", " #s2  ", lsr #16 \n" \
+        "mov   " #s1  ", " #s1  ",   lsl #16         \n" \
+        "orr   " #sum ", " #s1  ", " #sum ", lsr #16 \n"
+
+#define ADDHALFXREGS(sum, s1, s2)                        /* Adds register  */ \
+        "add   " #s1  ", " #s1  ", " #sum ", lsl #16 \n" /* halves across. */ \
+        "add   " #sum ", " #s2  ", " #sum ", lsr #16 \n" /* Clobbers 's1'. */ \
+        "mov   " #sum ", " #sum ",   lsl #16         \n" \
+        "orr   " #sum ", " #sum ", " #s1  ", lsr #16 \n"
+
+    asm volatile (
+#if ORDER > 16
+        "mov     %[res], #0              \n"
+#endif
+        "tst     %[f2], #2               \n"
+        "beq     20f                     \n"
+
+    "10:                                 \n"
+        "ldrh    r4, [%[s2]], #2         \n"
+        "mov     r4, r4, lsl #16         \n"
+        "ldrh    r3, [%[f2]], #2         \n"
+#if ORDER > 16
+        "mov     r3, r3, lsl #16         \n"
+    "1:                                  \n"
+        "ldmia   %[v1],  {r0,r1}         \n"
+        "smlabt  %[res], r0, r3, %[res]  \n"
+#else
+        "ldmia   %[v1],  {r0,r1}         \n"
+        "smulbb  %[res], r0, r3          \n"
+#endif
+        "ldmia   %[f2]!, {r2,r3}         \n"
+        "smlatb  %[res], r0, r2, %[res]  \n"
+        "smlabt  %[res], r1, r2, %[res]  \n"
+        "smlatb  %[res], r1, r3, %[res]  \n"
+        "ldmia   %[s2]!, {r2,r5}         \n"
+        ADDHALFXREGS(r0, r4, r2)
+        ADDHALFXREGS(r1, r2, r5)
+        "stmia   %[v1]!, {r0,r1}         \n"
+        "ldmia   %[v1],  {r0,r1}         \n"
+        "smlabt  %[res], r0, r3, %[res]  \n"
+        "ldmia   %[f2]!, {r2,r3}         \n"
+        "smlatb  %[res], r0, r2, %[res]  \n"
+        "smlabt  %[res], r1, r2, %[res]  \n"
+        "smlatb  %[res], r1, r3, %[res]  \n"
+        "ldmia   %[s2]!, {r2,r4}         \n"
+        ADDHALFXREGS(r0, r5, r2)
+        ADDHALFXREGS(r1, r2, r4)
+        "stmia   %[v1]!, {r0,r1}         \n"
+
+        "ldmia   %[v1],  {r0,r1}         \n"
+        "smlabt  %[res], r0, r3, %[res]  \n"
+        "ldmia   %[f2]!, {r2,r3}         \n"
+        "smlatb  %[res], r0, r2, %[res]  \n"
+        "smlabt  %[res], r1, r2, %[res]  \n"
+        "smlatb  %[res], r1, r3, %[res]  \n"
+        "ldmia   %[s2]!, {r2,r5}         \n"
+        ADDHALFXREGS(r0, r4, r2)
+        ADDHALFXREGS(r1, r2, r5)
+        "stmia   %[v1]!, {r0,r1}         \n"
+        "ldmia   %[v1],  {r0,r1}         \n"
+        "smlabt  %[res], r0, r3, %[res]  \n"
+        "ldmia   %[f2]!, {r2,r3}         \n"
+        "smlatb  %[res], r0, r2, %[res]  \n"
+        "smlabt  %[res], r1, r2, %[res]  \n"
+        "smlatb  %[res], r1, r3, %[res]  \n"
+        "ldmia   %[s2]!, {r2,r4}         \n"
+        ADDHALFXREGS(r0, r5, r2)
+        ADDHALFXREGS(r1, r2, r4)
+        "stmia   %[v1]!, {r0,r1}         \n"
+#if ORDER > 16
+        "subs    %[cnt], %[cnt], #1      \n"
+        "bne     1b                      \n"
+#endif
+        "b       99f                     \n"
+
+    "20:                                 \n"
+    "1:                                  \n"
+        "ldmia   %[v1],  {r1,r2}         \n"
+        "ldmia   %[f2]!, {r3,r4}         \n"
+#if ORDER > 16
+        "smlabb  %[res], r1, r3, %[res]  \n"
+#else
+        "smulbb  %[res], r1, r3          \n"
+#endif
+        "smlatt  %[res], r1, r3, %[res]  \n"
+        "smlabb  %[res], r2, r4, %[res]  \n"
+        "smlatt  %[res], r2, r4, %[res]  \n"
+        "ldmia   %[s2]!, {r3,r4}         \n"
+        ADDHALFREGS(r0, r1, r3)
+        ADDHALFREGS(r1, r2, r4)
+        "stmia   %[v1]!, {r0,r1}         \n"
+
+        REPEAT_3(
+        "ldmia   %[v1],  {r1,r2}         \n"
+        "ldmia   %[f2]!, {r3,r4}         \n"
+        "smlabb  %[res], r1, r3, %[res]  \n"
+        "smlatt  %[res], r1, r3, %[res]  \n"
+        "smlabb  %[res], r2, r4, %[res]  \n"
+        "smlatt  %[res], r2, r4, %[res]  \n"
+        "ldmia   %[s2]!, {r3,r4}         \n"
+        ADDHALFREGS(r0, r1, r3)
+        ADDHALFREGS(r1, r2, r4)
+        "stmia   %[v1]!, {r0,r1}         \n"
+        )
+#if ORDER > 16
+        "subs    %[cnt], %[cnt], #1      \n"
+        "bne     1b                      \n"
+#endif
+
+    "99:                                 \n"
+        : /* outputs */
+#if ORDER > 16
+        [cnt]"+r"(cnt),
+#endif
+        [v1] "+r"(v1),
+        [f2] "+r"(f2),
+        [s2] "+r"(s2),
+        [res]"=r"(res)
+        : /* inputs */
+        : /* clobbers */
+        "r0", "r1", "r2", "r3", "r4", "r5", "cc", "memory"
+    );
+    return res;
+}
+
+/* Calculate scalarproduct, then subtract a 2nd vector (fused for performance)
+ * This version fetches data as 32 bit words, and *requires* v1 to be
+ * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit
+ * aligned or both unaligned. If either condition isn't met, it will either
+ * result in a data abort or incorrect results. */
+static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
+{
+    int res;
+#if ORDER > 16
+    int cnt = ORDER>>4;
+#endif
+
+#define SUBHALFREGS(dif, s1, s2)                         /* Subtracts reg.  */ \
+        "mov   " #s1  ", " #s1  ",   ror #16         \n" /* halves straight */ \
+        "sub   " #dif ", " #s1  ", " #s2  ", lsl #16 \n" /* Clobbers 's1'   */ \
+        "sub   " #s1  ", " #s1  ", " #s2  ", lsr #16 \n" \
+        "mov   " #s1  ", " #s1  ",   lsl #16         \n" \
+        "orr   " #dif ", " #s1  ", " #dif ", lsr #16 \n"
+
+#define SUBHALFXREGS(dif, s1, s2, msk)                   /* Subtracts reg. */  \
+        "sub   " #s1  ", " #dif ", " #s1  ", lsr #16 \n" /* halves across. */  \
+        "and   " #s1  ", " #s1  ", " #msk "          \n" /* Needs msk =    */  \
+        "rsb   " #dif ", " #s2  ", " #dif ", lsr #16 \n" /*    0x0000ffff, */  \
+        "orr   " #dif ", " #s1  ", " #dif ", lsl #16 \n" /* clobbers 's1'. */
+
+    asm volatile (
+#if ORDER > 16
+        "mov     %[res], #0              \n"
+#endif
+        "tst     %[f2], #2               \n"
+        "beq     20f                     \n"
+
+    "10:                                 \n"
+        "mov     r6, #0xff               \n"
+        "orr     r6, r6, #0xff00         \n"
+        "ldrh    r4, [%[s2]], #2         \n"
+        "mov     r4, r4, lsl #16         \n"
+        "ldrh    r3, [%[f2]], #2         \n"
+#if ORDER > 16
+        "mov     r3, r3, lsl #16         \n"
+    "1:                                  \n"
+        "ldmia   %[v1],  {r0,r1}         \n"
+        "smlabt  %[res], r0, r3, %[res]  \n"
+#else
+        "ldmia   %[v1],  {r0,r1}         \n"
+        "smulbb  %[res], r0, r3          \n"
+#endif
+        "ldmia   %[f2]!, {r2,r3}         \n"
+        "smlatb  %[res], r0, r2, %[res]  \n"
+        "smlabt  %[res], r1, r2, %[res]  \n"
+        "smlatb  %[res], r1, r3, %[res]  \n"
+        "ldmia   %[s2]!, {r2,r5}         \n"
+        SUBHALFXREGS(r0, r4, r2, r6)
+        SUBHALFXREGS(r1, r2, r5, r6)
+        "stmia   %[v1]!, {r0,r1}         \n"
+        "ldmia   %[v1],  {r0,r1}         \n"
+        "smlabt  %[res], r0, r3, %[res]  \n"
+        "ldmia   %[f2]!, {r2,r3}         \n"
+        "smlatb  %[res], r0, r2, %[res]  \n"
+        "smlabt  %[res], r1, r2, %[res]  \n"
+        "smlatb  %[res], r1, r3, %[res]  \n"
+        "ldmia   %[s2]!, {r2,r4}         \n"
+        SUBHALFXREGS(r0, r5, r2, r6)
+        SUBHALFXREGS(r1, r2, r4, r6)
+        "stmia   %[v1]!, {r0,r1}         \n"
+
+        "ldmia   %[v1],  {r0,r1}         \n"
+        "smlabt  %[res], r0, r3, %[res]  \n"
+        "ldmia   %[f2]!, {r2,r3}         \n"
+        "smlatb  %[res], r0, r2, %[res]  \n"
+        "smlabt  %[res], r1, r2, %[res]  \n"
+        "smlatb  %[res], r1, r3, %[res]  \n"
+        "ldmia   %[s2]!, {r2,r5}         \n"
+        SUBHALFXREGS(r0, r4, r2, r6)
+        SUBHALFXREGS(r1, r2, r5, r6)
+        "stmia   %[v1]!, {r0,r1}         \n"
+        "ldmia   %[v1],  {r0,r1}         \n"
+        "smlabt  %[res], r0, r3, %[res]  \n"
+        "ldmia   %[f2]!, {r2,r3}         \n"
+        "smlatb  %[res], r0, r2, %[res]  \n"
+        "smlabt  %[res], r1, r2, %[res]  \n"
+        "smlatb  %[res], r1, r3, %[res]  \n"
+        "ldmia   %[s2]!, {r2,r4}         \n"
+        SUBHALFXREGS(r0, r5, r2, r6)
+        SUBHALFXREGS(r1, r2, r4, r6)
+        "stmia   %[v1]!, {r0,r1}         \n"
+#if ORDER > 16
+        "subs    %[cnt], %[cnt], #1      \n"
+        "bne     1b                      \n"
+#endif
+        "b       99f                     \n"
+
+    "20:                                 \n"
+    "1:                                  \n"
+        "ldmia   %[v1],  {r1,r2}         \n"
+        "ldmia   %[f2]!, {r3,r4}         \n"
+#if ORDER > 16
+        "smlabb  %[res], r1, r3, %[res]  \n"
+#else
+        "smulbb  %[res], r1, r3          \n"
+#endif
+        "smlatt  %[res], r1, r3, %[res]  \n"
+        "smlabb  %[res], r2, r4, %[res]  \n"
+        "smlatt  %[res], r2, r4, %[res]  \n"
+        "ldmia   %[s2]!, {r3,r4}         \n"
+        SUBHALFREGS(r0, r1, r3)
+        SUBHALFREGS(r1, r2, r4)
+        "stmia   %[v1]!, {r0,r1}         \n"
+
+        REPEAT_3(
+        "ldmia   %[v1],  {r1,r2}         \n"
+        "ldmia   %[f2]!, {r3,r4}         \n"
+        "smlabb  %[res], r1, r3, %[res]  \n"
+        "smlatt  %[res], r1, r3, %[res]  \n"
+        "smlabb  %[res], r2, r4, %[res]  \n"
+        "smlatt  %[res], r2, r4, %[res]  \n"
+        "ldmia   %[s2]!, {r3,r4}         \n"
+        SUBHALFREGS(r0, r1, r3)
+        SUBHALFREGS(r1, r2, r4)
+        "stmia   %[v1]!, {r0,r1}         \n"
+        )
+#if ORDER > 16
+        "subs    %[cnt], %[cnt], #1      \n"
+        "bne     1b                      \n"
+#endif
+
+    "99:                                 \n"
+        : /* outputs */
+#if ORDER > 16
+        [cnt]"+r"(cnt),
+#endif
+        [v1] "+r"(v1),
+        [f2] "+r"(f2),
+        [s2] "+r"(s2),
+        [res]"=r"(res)
+        : /* inputs */
+        : /* clobbers */
+        "r0", "r1", "r2", "r3", "r4", "r5", "r6", "cc", "memory"
+    );
+    return res;
+}
+
+/* This version fetches data as 32 bit words, and *requires* v1 to be
+ * 32 bit aligned, otherwise it will result either in a data abort, or
+ * incorrect results (if ARM aligncheck is disabled). */
+static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
+{
+    int res;
+#if ORDER > 32
+    int cnt = ORDER>>5;
+#endif
+
+    asm volatile (
+#if ORDER > 32
+        "mov     %[res], #0              \n"
+#endif
+        "tst     %[v2], #2               \n"
+        "beq     20f                     \n"
+
+    "10:                                 \n"
+        "ldrh    r3, [%[v2]], #2         \n"
+#if ORDER > 32
+        "mov     r3, r3, lsl #16         \n"
+    "1:                                  \n"
+        "ldmia   %[v1]!, {r0,r1}         \n"
+        "smlabt  %[res], r0, r3, %[res]  \n"
+#else
+        "ldmia   %[v1]!, {r0,r1}         \n"
+        "smulbb  %[res], r0, r3          \n"
+#endif
+        "ldmia   %[v2]!, {r2,r3}         \n"
+        "smlatb  %[res], r0, r2, %[res]  \n"
+        "smlabt  %[res], r1, r2, %[res]  \n"
+        "smlatb  %[res], r1, r3, %[res]  \n"
+
+        REPEAT_MLA(
+        "ldmia   %[v1]!, {r0,r1}         \n"
+        "smlabt  %[res], r0, r3, %[res]  \n"
+        "ldmia   %[v2]!, {r2,r3}         \n"
+        "smlatb  %[res], r0, r2, %[res]  \n"
+        "smlabt  %[res], r1, r2, %[res]  \n"
+        "smlatb  %[res], r1, r3, %[res]  \n"
+        )
+#if ORDER > 32
+        "subs    %[cnt], %[cnt], #1  \n"
+        "bne     1b                  \n"
+#endif
+        "b       99f                 \n"
+
+    "20:                                 \n"
+    "1:                                  \n"
+        "ldmia   %[v1]!, {r0,r1}         \n"
+        "ldmia   %[v2]!, {r2,r3}         \n"
+#if ORDER > 32
+        "smlabb  %[res], r0, r2, %[res]  \n"
+#else
+        "smulbb  %[res], r0, r2          \n"
+#endif
+        "smlatt  %[res], r0, r2, %[res]  \n"
+        "smlabb  %[res], r1, r3, %[res]  \n"
+        "smlatt  %[res], r1, r3, %[res]  \n"
+
+        REPEAT_MLA(
+        "ldmia   %[v1]!, {r0,r1}         \n"
+        "ldmia   %[v2]!, {r2,r3}         \n"
+        "smlabb  %[res], r0, r2, %[res]  \n"
+        "smlatt  %[res], r0, r2, %[res]  \n"
+        "smlabb  %[res], r1, r3, %[res]  \n"
+        "smlatt  %[res], r1, r3, %[res]  \n"
+        )
+#if ORDER > 32
+        "subs    %[cnt], %[cnt], #1      \n"
+        "bne     1b                      \n"  
+#endif
+
+    "99:                                 \n"
+        : /* outputs */
+#if ORDER > 32
+        [cnt]"+r"(cnt),
+#endif
+        [v1] "+r"(v1),
+        [v2] "+r"(v2),
+        [res]"=r"(res)
+        : /* inputs */
+        : /* clobbers */
+        "r0", "r1", "r2", "r3", "cc", "memory"
+    );
+    return res;
+}
--- a/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv6.h
+++ b/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv6.h
@ -0,0 +1,490 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+ARMv6 vector math copyright (C) 2008 Jens Arnold
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#define FUSED_VECTOR_MATH
+
+#if ORDER > 16
+#define REPEAT_BLOCK(x) x x x
+#else
+#define REPEAT_BLOCK(x) x
+#endif
+
+/* Calculate scalarproduct, then add a 2nd vector (fused for performance)
+ * This version fetches data as 32 bit words, and *requires* v1 to be
+ * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit
+ * aligned or both unaligned. If either condition isn't met, it will either
+ * result in a data abort or incorrect results. */
+static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
+{
+    int res;
+#if ORDER > 32
+    int cnt = ORDER>>5;
+#endif
+
+    asm volatile (
+#if ORDER > 32
+        "mov     %[res], #0              \n"
+#endif
+        "tst     %[f2], #2               \n"
+        "beq     20f                     \n"
+
+    "10:                                 \n"
+        "ldrh    r3, [%[f2]], #2         \n"
+        "ldrh    r6, [%[s2]], #2         \n"
+        "ldmia   %[f2]!, {r2,r4}         \n"
+        "mov     r3, r3, lsl #16         \n"
+        "mov     r6, r6, lsl #16         \n"
+
+    "1:                                  \n"
+        "ldmia   %[s2]!, {r5,r7}         \n"
+        "pkhtb   r3, r3, r2              \n"
+        "pkhtb   r2, r2, r4              \n"
+        "ldrd    r0, [%[v1]]             \n"
+        "mov     r5, r5, ror #16         \n"
+        "pkhtb   r6, r5, r6, asr #16     \n"
+        "pkhbt   r5, r5, r7, lsl #16     \n"
+#if ORDER > 32
+        "smladx  %[res], r0, r3, %[res]  \n"
+#else
+        "smuadx  %[res], r0, r3          \n"
+#endif
+        "smladx  %[res], r1, r2, %[res]  \n"
+        "ldmia   %[f2]!, {r2,r3}         \n"
+        "sadd16  r0, r0, r6              \n"
+        "sadd16  r1, r1, r5              \n"
+        "strd    r0, [%[v1]], #8         \n"
+
+        REPEAT_BLOCK(
+        "ldmia   %[s2]!, {r5,r6}         \n"
+        "pkhtb   r4, r4, r2              \n"
+        "pkhtb   r2, r2, r3              \n"
+        "ldrd    r0, [%[v1]]             \n"
+        "mov     r5, r5, ror #16         \n"
+        "pkhtb   r7, r5, r7, asr #16     \n"
+        "pkhbt   r5, r5, r6, lsl #16     \n"
+        "smladx  %[res], r0, r4, %[res]  \n"
+        "smladx  %[res], r1, r2, %[res]  \n"
+        "ldmia   %[f2]!, {r2,r4}         \n"
+        "sadd16  r0, r0, r7              \n"
+        "sadd16  r1, r1, r5              \n"
+        "strd    r0, [%[v1]], #8         \n"
+        "ldmia   %[s2]!, {r5,r7}         \n"
+        "pkhtb   r3, r3, r2              \n"
+        "pkhtb   r2, r2, r4              \n"
+        "ldrd    r0, [%[v1]]             \n"
+        "mov     r5, r5, ror #16         \n"
+        "pkhtb   r6, r5, r6, asr #16     \n"
+        "pkhbt   r5, r5, r7, lsl #16     \n"
+        "smladx  %[res], r0, r3, %[res]  \n"
+        "smladx  %[res], r1, r2, %[res]  \n"
+        "ldmia   %[f2]!, {r2,r3}         \n"
+        "sadd16  r0, r0, r6              \n"
+        "sadd16  r1, r1, r5              \n"
+        "strd    r0, [%[v1]], #8         \n"
+        )
+
+        "ldmia   %[s2]!, {r5,r6}         \n"
+        "pkhtb   r4, r4, r2              \n"
+        "pkhtb   r2, r2, r3              \n"
+        "ldrd    r0, [%[v1]]             \n"
+        "mov     r5, r5, ror #16         \n"
+        "pkhtb   r7, r5, r7, asr #16     \n"
+        "pkhbt   r5, r5, r6, lsl #16     \n"
+        "smladx  %[res], r0, r4, %[res]  \n"
+        "smladx  %[res], r1, r2, %[res]  \n"
+#if ORDER > 32
+        "subs    %[cnt], %[cnt], #1      \n"
+        "ldmneia %[f2]!, {r2,r4}         \n"
+        "sadd16  r0, r0, r7              \n"
+        "sadd16  r1, r1, r5              \n"
+        "strd    r0, [%[v1]], #8         \n"
+        "bne     1b                      \n"
+#else
+        "sadd16  r0, r0, r7              \n"
+        "sadd16  r1, r1, r5              \n"
+        "strd    r0, [%[v1]], #8         \n"
+#endif
+
+        "b       99f                     \n"
+
+    "20:                                 \n"
+        "ldrd    r4, [%[f2]], #8         \n"
+        "ldrd    r0, [%[v1]]             \n"
+
+#if ORDER > 32
+    "1:                                  \n"
+        "smlad   %[res], r0, r4, %[res]  \n"
+#else
+        "smuad   %[res], r0, r4          \n"
+#endif
+        "ldrd    r6, [%[s2]], #8         \n"
+        "smlad   %[res], r1, r5, %[res]  \n"
+        "ldrd    r4, [%[f2]], #8         \n"
+        "ldrd    r2, [%[v1], #8]         \n"
+        "sadd16  r0, r0, r6              \n"
+        "sadd16  r1, r1, r7              \n"
+        "strd    r0, [%[v1]], #8         \n"
+
+        REPEAT_BLOCK(
+        "smlad   %[res], r2, r4, %[res]  \n"
+        "ldrd    r6, [%[s2]], #8         \n"
+        "smlad   %[res], r3, r5, %[res]  \n"
+        "ldrd    r4, [%[f2]], #8         \n"
+        "ldrd    r0, [%[v1], #8]         \n"
+        "sadd16  r2, r2, r6              \n"
+        "sadd16  r3, r3, r7              \n"
+        "strd    r2, [%[v1]], #8         \n"
+        "smlad   %[res], r0, r4, %[res]  \n"
+        "ldrd    r6, [%[s2]], #8         \n"
+        "smlad   %[res], r1, r5, %[res]  \n"
+        "ldrd    r4, [%[f2]], #8         \n"
+        "ldrd    r2, [%[v1], #8]         \n"
+        "sadd16  r0, r0, r6              \n"
+        "sadd16  r1, r1, r7              \n"
+        "strd    r0, [%[v1]], #8         \n"
+        )
+
+        "smlad   %[res], r2, r4, %[res]  \n"
+        "ldrd    r6, [%[s2]], #8         \n"
+        "smlad   %[res], r3, r5, %[res]  \n"
+#if ORDER > 32
+        "subs    %[cnt], %[cnt], #1      \n"
+        "ldrned  r4, [%[f2]], #8         \n"
+        "ldrned  r0, [%[v1], #8]         \n"
+        "sadd16  r2, r2, r6              \n"
+        "sadd16  r3, r3, r7              \n"
+        "strd    r2, [%[v1]], #8         \n"
+        "bne     1b                      \n"
+#else
+        "sadd16  r2, r2, r6              \n"
+        "sadd16  r3, r3, r7              \n"
+        "strd    r2, [%[v1]], #8         \n"
+#endif
+
+    "99:                                 \n"
+        : /* outputs */
+#if ORDER > 32
+        [cnt]"+r"(cnt),
+#endif
+        [v1] "+r"(v1),
+        [f2] "+r"(f2),
+        [s2] "+r"(s2),
+        [res]"=r"(res)
+        : /* inputs */
+        : /* clobbers */
+        "r0", "r1", "r2", "r3", "r4",
+        "r5", "r6", "r7", "cc", "memory"
+    );
+    return res;
+}
+
+/* Calculate scalarproduct, then subtract a 2nd vector (fused for performance)
+ * This version fetches data as 32 bit words, and *requires* v1 to be
+ * 32 bit aligned. It also requires that f2 and s2 are either both 32 bit
+ * aligned or both unaligned. If either condition isn't met, it will either
+ * result in a data abort or incorrect results. */
+static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
+{
+    int res;
+#if ORDER > 32
+    int cnt = ORDER>>5;
+#endif
+
+    asm volatile (
+#if ORDER > 32
+        "mov     %[res], #0              \n"
+#endif
+        "tst     %[f2], #2               \n"
+        "beq     20f                     \n"
+
+    "10:                                 \n"
+        "ldrh    r3, [%[f2]], #2         \n"
+        "ldrh    r6, [%[s2]], #2         \n"
+        "ldmia   %[f2]!, {r2,r4}         \n"
+        "mov     r3, r3, lsl #16         \n"
+        "mov     r6, r6, lsl #16         \n"
+
+    "1:                                  \n"
+        "ldmia   %[s2]!, {r5,r7}         \n"
+        "pkhtb   r3, r3, r2              \n"
+        "pkhtb   r2, r2, r4              \n"
+        "ldrd    r0, [%[v1]]             \n"
+        "mov     r5, r5, ror #16         \n"
+        "pkhtb   r6, r5, r6, asr #16     \n"
+        "pkhbt   r5, r5, r7, lsl #16     \n"
+#if ORDER > 32
+        "smladx  %[res], r0, r3, %[res]  \n"
+#else
+        "smuadx  %[res], r0, r3          \n"
+#endif
+        "smladx  %[res], r1, r2, %[res]  \n"
+        "ldmia   %[f2]!, {r2,r3}         \n"
+        "ssub16  r0, r0, r6              \n"
+        "ssub16  r1, r1, r5              \n"
+        "strd    r0, [%[v1]], #8         \n"
+
+        REPEAT_BLOCK(
+        "ldmia   %[s2]!, {r5,r6}         \n"
+        "pkhtb   r4, r4, r2              \n"
+        "pkhtb   r2, r2, r3              \n"
+        "ldrd    r0, [%[v1]]             \n"
+        "mov     r5, r5, ror #16         \n"
+        "pkhtb   r7, r5, r7, asr #16     \n"
+        "pkhbt   r5, r5, r6, lsl #16     \n"
+        "smladx  %[res], r0, r4, %[res]  \n"
+        "smladx  %[res], r1, r2, %[res]  \n"
+        "ldmia   %[f2]!, {r2,r4}         \n"
+        "ssub16  r0, r0, r7              \n"
+        "ssub16  r1, r1, r5              \n"
+        "strd    r0, [%[v1]], #8         \n"
+        "ldmia   %[s2]!, {r5,r7}         \n"
+        "pkhtb   r3, r3, r2              \n"
+        "pkhtb   r2, r2, r4              \n"
+        "ldrd    r0, [%[v1]]             \n"
+        "mov     r5, r5, ror #16         \n"
+        "pkhtb   r6, r5, r6, asr #16     \n"
+        "pkhbt   r5, r5, r7, lsl #16     \n"
+        "smladx  %[res], r0, r3, %[res]  \n"
+        "smladx  %[res], r1, r2, %[res]  \n"
+        "ldmia   %[f2]!, {r2,r3}         \n"
+        "ssub16  r0, r0, r6              \n"
+        "ssub16  r1, r1, r5              \n"
+        "strd    r0, [%[v1]], #8         \n"
+        )
+
+        "ldmia   %[s2]!, {r5,r6}         \n"
+        "pkhtb   r4, r4, r2              \n"
+        "pkhtb   r2, r2, r3              \n"
+        "ldrd    r0, [%[v1]]             \n"
+        "mov     r5, r5, ror #16         \n"
+        "pkhtb   r7, r5, r7, asr #16     \n"
+        "pkhbt   r5, r5, r6, lsl #16     \n"
+        "smladx  %[res], r0, r4, %[res]  \n"
+        "smladx  %[res], r1, r2, %[res]  \n"
+#if ORDER > 32
+        "subs    %[cnt], %[cnt], #1      \n"
+        "ldmneia %[f2]!, {r2,r4}         \n"
+        "ssub16  r0, r0, r7              \n"
+        "ssub16  r1, r1, r5              \n"
+        "strd    r0, [%[v1]], #8         \n"
+        "bne     1b                      \n"
+#else
+        "ssub16  r0, r0, r7              \n"
+        "ssub16  r1, r1, r5              \n"
+        "strd    r0, [%[v1]], #8         \n"
+#endif
+
+        "b       99f                     \n"
+
+    "20:                                 \n"
+        "ldrd    r4, [%[f2]], #8         \n"
+        "ldrd    r0, [%[v1]]             \n"
+
+#if ORDER > 32
+    "1:                                  \n"
+        "smlad   %[res], r0, r4, %[res]  \n"
+#else
+        "smuad   %[res], r0, r4          \n"
+#endif
+        "ldrd    r6, [%[s2]], #8         \n"
+        "smlad   %[res], r1, r5, %[res]  \n"
+        "ldrd    r4, [%[f2]], #8         \n"
+        "ldrd    r2, [%[v1], #8]         \n"
+        "ssub16  r0, r0, r6              \n"
+        "ssub16  r1, r1, r7              \n"
+        "strd    r0, [%[v1]], #8         \n"
+
+        REPEAT_BLOCK(
+        "smlad   %[res], r2, r4, %[res]  \n"
+        "ldrd    r6, [%[s2]], #8         \n"
+        "smlad   %[res], r3, r5, %[res]  \n"
+        "ldrd    r4, [%[f2]], #8         \n"
+        "ldrd    r0, [%[v1], #8]         \n"
+        "ssub16  r2, r2, r6              \n"
+        "ssub16  r3, r3, r7              \n"
+        "strd    r2, [%[v1]], #8         \n"
+        "smlad   %[res], r0, r4, %[res]  \n"
+        "ldrd    r6, [%[s2]], #8         \n"
+        "smlad   %[res], r1, r5, %[res]  \n"
+        "ldrd    r4, [%[f2]], #8         \n"
+        "ldrd    r2, [%[v1], #8]         \n"
+        "ssub16  r0, r0, r6              \n"
+        "ssub16  r1, r1, r7              \n"
+        "strd    r0, [%[v1]], #8         \n"
+        )
+
+        "smlad   %[res], r2, r4, %[res]  \n"
+        "ldrd    r6, [%[s2]], #8         \n"
+        "smlad   %[res], r3, r5, %[res]  \n"
+#if ORDER > 32
+        "subs    %[cnt], %[cnt], #1      \n"
+        "ldrned  r4, [%[f2]], #8         \n"
+        "ldrned  r0, [%[v1], #8]         \n"
+        "ssub16  r2, r2, r6              \n"
+        "ssub16  r3, r3, r7              \n"
+        "strd    r2, [%[v1]], #8         \n"
+        "bne     1b                      \n"
+#else
+        "ssub16  r2, r2, r6              \n"
+        "ssub16  r3, r3, r7              \n"
+        "strd    r2, [%[v1]], #8         \n"
+#endif
+
+    "99:                                 \n"
+        : /* outputs */
+#if ORDER > 32
+        [cnt]"+r"(cnt),
+#endif
+        [v1] "+r"(v1),
+        [f2] "+r"(f2),
+        [s2] "+r"(s2),
+        [res]"=r"(res)
+        : /* inputs */
+        : /* clobbers */
+        "r0", "r1", "r2", "r3", "r4",
+        "r5", "r6", "r7", "cc", "memory"
+    );
+    return res;
+}
+
+/* This version fetches data as 32 bit words, and *requires* v1 to be
+ * 32 bit aligned, otherwise it will result either in a data abort, or
+ * incorrect results (if ARM aligncheck is disabled). */
+static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
+{
+    int res;
+#if ORDER > 32
+    int cnt = ORDER>>5;
+#endif
+
+    asm volatile (
+#if ORDER > 32
+        "mov     %[res], #0              \n"
+#endif
+        "tst     %[v2], #2               \n"
+        "beq     20f                     \n"
+
+    "10:                                 \n"
+        "bic     %[v2], %[v2], #2        \n"
+        "ldmia   %[v2]!, {r5-r7}         \n"
+        "ldrd    r0, [%[v1]], #8         \n"
+
+    "1:                                  \n"
+        "pkhtb   r3, r5, r6              \n"
+        "ldrd    r4, [%[v2]], #8         \n"
+#if ORDER > 32
+        "smladx  %[res], r0, r3, %[res]  \n"
+#else
+        "smuadx  %[res], r0, r3          \n"
+#endif
+        REPEAT_BLOCK(
+        "pkhtb   r0, r6, r7              \n"
+        "ldrd    r2, [%[v1]], #8         \n"
+        "smladx  %[res], r1, r0, %[res]  \n"
+        "pkhtb   r1, r7, r4              \n"
+        "ldrd    r6, [%[v2]], #8         \n"
+        "smladx  %[res], r2, r1, %[res]  \n"
+        "pkhtb   r2, r4, r5              \n"
+        "ldrd    r0, [%[v1]], #8         \n"
+        "smladx  %[res], r3, r2, %[res]  \n"
+        "pkhtb   r3, r5, r6              \n"
+        "ldrd    r4, [%[v2]], #8         \n"
+        "smladx  %[res], r0, r3, %[res]  \n"
+        )
+
+        "pkhtb   r0, r6, r7              \n"
+        "ldrd    r2, [%[v1]], #8         \n"
+        "smladx  %[res], r1, r0, %[res]  \n"
+        "pkhtb   r1, r7, r4              \n"
+#if ORDER > 32
+        "subs    %[cnt], %[cnt], #1      \n"
+        "ldrned  r6, [%[v2]], #8         \n"
+        "smladx  %[res], r2, r1, %[res]  \n"
+        "pkhtb   r2, r4, r5              \n"
+        "ldrned  r0, [%[v1]], #8         \n"
+        "smladx  %[res], r3, r2, %[res]  \n"
+        "bne     1b                      \n"
+#else
+        "pkhtb   r4, r4, r5              \n"
+        "smladx  %[res], r2, r1, %[res]  \n"
+        "smladx  %[res], r3, r4, %[res]  \n"
+#endif
+
+        "b       99f                     \n"
+
+    "20:                                 \n"
+        "ldrd    r0, [%[v1]], #8         \n"
+        "ldmia   %[v2]!, {r5-r7}         \n"
+
+    "1:                                  \n"
+        "ldrd    r2, [%[v1]], #8         \n"
+#if ORDER > 32
+        "smlad   %[res], r0, r5, %[res]  \n"
+#else
+        "smuad   %[res], r0, r5          \n"
+#endif
+        REPEAT_BLOCK(
+        "ldrd    r4, [%[v2]], #8         \n"
+        "smlad   %[res], r1, r6, %[res]  \n"
+        "ldrd    r0, [%[v1]], #8         \n"
+        "smlad   %[res], r2, r7, %[res]  \n"
+        "ldrd    r6, [%[v2]], #8         \n"
+        "smlad   %[res], r3, r4, %[res]  \n"
+        "ldrd    r2, [%[v1]], #8         \n"
+        "smlad   %[res], r0, r5, %[res]  \n"
+        )
+
+#if ORDER > 32
+        "ldrd    r4, [%[v2]], #8         \n"
+        "smlad   %[res], r1, r6, %[res]  \n"
+        "subs    %[cnt], %[cnt], #1      \n"
+        "ldrned  r0, [%[v1]], #8         \n"
+        "smlad   %[res], r2, r7, %[res]  \n"
+        "ldrned  r6, [%[v2]], #8         \n"
+        "smlad   %[res], r3, r4, %[res]  \n"
+        "bne     1b                      \n"
+#else
+        "ldr     r4, [%[v2]], #4         \n"
+        "smlad   %[res], r1, r6, %[res]  \n"
+        "smlad   %[res], r2, r7, %[res]  \n"
+        "smlad   %[res], r3, r4, %[res]  \n"
+#endif
+
+    "99:                                 \n"
+        : /* outputs */
+#if ORDER > 32
+        [cnt]"+r"(cnt),
+#endif
+        [v1] "+r"(v1),
+        [v2] "+r"(v2),
+        [res]"=r"(res)
+        : /* inputs */
+        : /* clobbers */
+        "r0", "r1", "r2", "r3",
+        "r4", "r5", "r6", "r7", "cc", "memory"
+    );
+    return res;
+}
--- a/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv7.h
+++ b/lib/rbcodec/codecs/demac/libdemac/vector_math16_armv7.h
@ -0,0 +1,214 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+ARMv7 neon vector math copyright (C) 2010 Jens Arnold
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#define FUSED_VECTOR_MATH
+
+#if ORDER > 32
+#define REPEAT_BLOCK(x) x x x
+#elif ORDER > 16
+#define REPEAT_BLOCK(x) x
+#else
+#define REPEAT_BLOCK(x)
+#endif
+
+/* Calculate scalarproduct, then add a 2nd vector (fused for performance) */
+static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
+{
+    int res;
+#if ORDER > 64
+    int cnt = ORDER>>6;
+#endif
+
+    asm volatile (
+#if ORDER > 64
+        "vmov.i16    q0, #0              \n"
+    "1:                                  \n"
+        "subs        %[cnt], %[cnt], #1  \n"
+#endif
+        "vld1.16     {d6-d9}, [%[f2]]!   \n"
+        "vld1.16     {d2-d5}, [%[v1]]    \n"
+        "vld1.16     {d10-d13}, [%[s2]]! \n"
+#if ORDER > 64
+        "vmlal.s16   q0, d2, d6          \n"
+#else
+        "vmull.s16   q0, d2, d6          \n"
+#endif
+        "vmlal.s16   q0, d3, d7          \n"
+        "vmlal.s16   q0, d4, d8          \n"
+        "vmlal.s16   q0, d5, d9          \n"
+        "vadd.i16    q1, q1, q5          \n"
+        "vadd.i16    q2, q2, q6          \n"
+        "vst1.16     {d2-d5}, [%[v1]]!   \n"
+
+        REPEAT_BLOCK(
+        "vld1.16     {d6-d9}, [%[f2]]!   \n"
+        "vld1.16     {d2-d5}, [%[v1]]    \n"
+        "vld1.16     {d10-d13}, [%[s2]]! \n"
+        "vmlal.s16   q0, d2, d6          \n"
+        "vmlal.s16   q0, d3, d7          \n"
+        "vmlal.s16   q0, d4, d8          \n"
+        "vmlal.s16   q0, d5, d9          \n"
+        "vadd.i16    q1, q1, q5          \n"
+        "vadd.i16    q2, q2, q6          \n"
+        "vst1.16     {d2-d5}, [%[v1]]!   \n"
+        )
+#if ORDER > 64
+        "bne         1b                  \n"
+#endif
+        "vpadd.i32   d0, d0, d1          \n"
+        "vpaddl.s32  d0, d0              \n"
+        "vmov.32     %[res], d0[0]       \n"
+        : /* outputs */
+#if ORDER > 64
+        [cnt]"+r"(cnt),
+#endif
+        [v1] "+r"(v1),
+        [f2] "+r"(f2),
+        [s2] "+r"(s2),
+        [res]"=r"(res)
+        : /* inputs */
+        : /* clobbers */
+        "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+        "d8", "d9", "d10", "d11", "d12", "d13", "memory"
+    );
+    return res;
+}
+
+/* Calculate scalarproduct, then subtract a 2nd vector (fused for performance) */
+static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
+{
+    int res;
+#if ORDER > 64
+    int cnt = ORDER>>6;
+#endif
+
+    asm volatile (
+#if ORDER > 64
+        "vmov.i16    q0, #0              \n"
+    "1:                                  \n"
+        "subs        %[cnt], %[cnt], #1  \n"
+#endif
+        "vld1.16     {d6-d9}, [%[f2]]!   \n"
+        "vld1.16     {d2-d5}, [%[v1]]    \n"
+        "vld1.16     {d10-d13}, [%[s2]]! \n"
+#if ORDER > 64
+        "vmlal.s16   q0, d2, d6          \n"
+#else
+        "vmull.s16   q0, d2, d6          \n"
+#endif
+        "vmlal.s16   q0, d3, d7          \n"
+        "vmlal.s16   q0, d4, d8          \n"
+        "vmlal.s16   q0, d5, d9          \n"
+        "vsub.i16    q1, q1, q5          \n"
+        "vsub.i16    q2, q2, q6          \n"
+        "vst1.16     {d2-d5}, [%[v1]]!   \n"
+
+        REPEAT_BLOCK(
+        "vld1.16     {d6-d9}, [%[f2]]!   \n"
+        "vld1.16     {d2-d5}, [%[v1]]    \n"
+        "vld1.16     {d10-d13}, [%[s2]]! \n"
+        "vmlal.s16   q0, d2, d6          \n"
+        "vmlal.s16   q0, d3, d7          \n"
+        "vmlal.s16   q0, d4, d8          \n"
+        "vmlal.s16   q0, d5, d9          \n"
+        "vsub.i16    q1, q1, q5          \n"
+        "vsub.i16    q2, q2, q6          \n"
+        "vst1.16     {d2-d5}, [%[v1]]!   \n"
+        )
+#if ORDER > 64
+        "bne         1b                  \n"
+#endif
+        "vpadd.i32   d0, d0, d1          \n"
+        "vpaddl.s32  d0, d0              \n"
+        "vmov.32     %[res], d0[0]       \n"
+        : /* outputs */
+#if ORDER > 64
+        [cnt]"+r"(cnt),
+#endif
+        [v1] "+r"(v1),
+        [f2] "+r"(f2),
+        [s2] "+r"(s2),
+        [res]"=r"(res)
+        : /* inputs */
+        : /* clobbers */
+        "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+        "d8", "d9", "d10", "d11", "d12", "d13", "memory"
+    );
+    return res;
+}
+
+static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
+{
+    int res;
+#if ORDER > 64
+    int cnt = ORDER>>6;
+#endif
+
+    asm volatile (
+#if ORDER > 64
+        "vmov.i16    q0, #0              \n"
+    "1:                                  \n"
+        "subs        %[cnt], %[cnt], #1  \n"
+#endif
+        "vld1.16     {d2-d5}, [%[v1]]!   \n"
+        "vld1.16     {d6-d9}, [%[v2]]!   \n"
+#if ORDER > 64
+        "vmlal.s16   q0, d2, d6          \n"
+#else
+        "vmull.s16   q0, d2, d6          \n"
+#endif
+        "vmlal.s16   q0, d3, d7          \n"
+        "vmlal.s16   q0, d4, d8          \n"
+        "vmlal.s16   q0, d5, d9          \n"
+
+        REPEAT_BLOCK(
+        "vld1.16     {d2-d5}, [%[v1]]!   \n"
+        "vld1.16     {d6-d9}, [%[v2]]!   \n"
+        "vmlal.s16   q0, d2, d6          \n"
+        "vmlal.s16   q0, d3, d7          \n"
+        "vmlal.s16   q0, d4, d8          \n"
+        "vmlal.s16   q0, d5, d9          \n"
+        )
+#if ORDER > 64
+        "bne         1b                  \n"
+#endif
+        "vpadd.i32   d0, d0, d1          \n"
+        "vpaddl.s32  d0, d0              \n"
+        "vmov.32     %[res], d0[0]       \n"
+        : /* outputs */
+#if ORDER > 64
+        [cnt]"+r"(cnt),
+#endif
+        [v1] "+r"(v1),
+        [v2] "+r"(v2),
+        [res]"=r"(res)
+        : /* inputs */
+        : /* clobbers */
+        "d0", "d1", "d2", "d3", "d4",
+        "d5", "d6", "d7", "d8", "d9"
+    );
+    return res;
+}
--- a/lib/rbcodec/codecs/demac/libdemac/vector_math16_cf.h
+++ b/lib/rbcodec/codecs/demac/libdemac/vector_math16_cf.h
@ -0,0 +1,364 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+Coldfire vector math copyright (C) 2007 Jens Arnold
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#define FUSED_VECTOR_MATH
+
+#define PREPARE_SCALARPRODUCT coldfire_set_macsr(0); /* signed integer mode */
+
+#define REPEAT_2(x) x x
+#define REPEAT_3(x) x x x
+#define REPEAT_7(x) x x x x x x x
+
+/* Calculate scalarproduct, then add a 2nd vector (fused for performance)
+ * This version fetches data as 32 bit words, and *recommends* v1 to be
+ * 32 bit aligned. It also assumes that f2 and s2 are either both 32 bit
+ * aligned or both unaligned. Performance will suffer if either condition
+ * isn't met. It also needs EMAC in signed integer mode. */
+static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
+{
+    int res;
+#if ORDER > 16
+    int cnt = ORDER>>4;
+#endif
+
+#define ADDHALFREGS(s1, s2, sum)       /* Add register halves straight. */  \
+        "move.l " #s1  ", " #sum "\n"  /* 's1' and 's2' can be A or D */    \
+        "add.l  " #s2  ", " #s1  "\n"  /* regs, 'sum' must be a D reg. */   \
+        "clr.w  " #sum "          \n"  /* 's1' is clobbered! */             \
+        "add.l  " #s2  ", " #sum "\n"  \
+        "move.w " #s1  ", " #sum "\n"
+        
+#define ADDHALFXREGS(s1, s2, sum)      /* Add register halves across. */    \
+        "clr.w  " #sum "          \n"  /* Needs 'sum' pre-swapped, swaps */ \
+        "add.l  " #s1  ", " #sum "\n"  /* 's2', and clobbers 's1'. */       \
+        "swap   " #s2  "          \n"  /* 's1' can be an A or D reg. */     \
+        "add.l  " #s2  ", " #s1  "\n"  /* 'sum' and 's2' must be D regs. */ \
+        "move.w " #s1  ", " #sum "\n"
+
+    asm volatile (
+        "move.l  %[f2], %%d0                         \n"
+        "and.l   #2, %%d0                            \n"
+        "jeq     20f                                 \n"
+
+    "10:                                             \n"
+        "move.w  (%[f2])+, %%d0                      \n"
+        "move.w  (%[s2])+, %%d1                      \n"
+        "swap    %%d1                                \n"
+    "1:                                              \n"
+        REPEAT_2(
+        "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1        \n"
+        "mac.w   %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n"
+        "mac.w   %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n"
+        ADDHALFXREGS(%%d6, %%d2, %%d1)
+        "mac.w   %%d0l, %%d7u, (%[f2])+, %%d0, %%acc0\n"
+        "mac.w   %%d0u, %%d7l, (%[s2])+, %%d6, %%acc0\n"
+        "move.l  %%d1, (%[v1])+                      \n"
+        ADDHALFXREGS(%%d7, %%d6, %%d2)
+        "mac.w   %%d0l, %%a0u, (%[f2])+, %%d0, %%acc0\n"
+        "mac.w   %%d0u, %%a0l, (%[s2])+, %%d7, %%acc0\n"
+        "move.l  %%d2, (%[v1])+                      \n"
+        ADDHALFXREGS(%%a0, %%d7, %%d6)
+        "mac.w   %%d0l, %%a1u, (%[f2])+, %%d0, %%acc0\n"
+        "mac.w   %%d0u, %%a1l, (%[s2])+, %%d1, %%acc0\n"
+        "move.l  %%d6, (%[v1])+                      \n"
+        ADDHALFXREGS(%%a1, %%d1, %%d7)
+        "move.l  %%d7, (%[v1])+                      \n"
+        )
+
+#if ORDER > 16
+        "subq.l  #1, %[res]                          \n"
+        "bne.w   1b                                  \n"
+#endif
+        "jra     99f                                 \n"
+
+    "20:                                             \n"
+        "move.l  (%[f2])+, %%d0                      \n"
+    "1:                                              \n"
+        "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1        \n"
+        "mac.w   %%d0u, %%d6u, (%[s2])+, %%d1, %%acc0\n"
+        "mac.w   %%d0l, %%d6l, (%[f2])+, %%d0, %%acc0\n"
+        ADDHALFREGS(%%d6, %%d1, %%d2)
+        "mac.w   %%d0u, %%d7u, (%[s2])+, %%d1, %%acc0\n"
+        "mac.w   %%d0l, %%d7l, (%[f2])+, %%d0, %%acc0\n"
+        "move.l  %%d2, (%[v1])+                      \n"
+        ADDHALFREGS(%%d7, %%d1, %%d2)
+        "mac.w   %%d0u, %%a0u, (%[s2])+, %%d1, %%acc0\n"
+        "mac.w   %%d0l, %%a0l, (%[f2])+, %%d0, %%acc0\n"
+        "move.l  %%d2, (%[v1])+                      \n"
+        ADDHALFREGS(%%a0, %%d1, %%d2)
+        "mac.w   %%d0u, %%a1u, (%[s2])+, %%d1, %%acc0\n"
+        "mac.w   %%d0l, %%a1l, (%[f2])+, %%d0, %%acc0\n"
+        "move.l  %%d2, (%[v1])+                      \n"
+        ADDHALFREGS(%%a1, %%d1, %%d2)
+        "move.l  %%d2, (%[v1])+                      \n"
+
+        "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1        \n"
+        "mac.w   %%d0u, %%d6u, (%[s2])+, %%d1, %%acc0\n"
+        "mac.w   %%d0l, %%d6l, (%[f2])+, %%d0, %%acc0\n"
+        ADDHALFREGS(%%d6, %%d1, %%d2)
+        "mac.w   %%d0u, %%d7u, (%[s2])+, %%d1, %%acc0\n"
+        "mac.w   %%d0l, %%d7l, (%[f2])+, %%d0, %%acc0\n"
+        "move.l  %%d2, (%[v1])+                      \n"
+        ADDHALFREGS(%%d7, %%d1, %%d2)
+        "mac.w   %%d0u, %%a0u, (%[s2])+, %%d1, %%acc0\n"
+        "mac.w   %%d0l, %%a0l, (%[f2])+, %%d0, %%acc0\n"
+        "move.l  %%d2, (%[v1])+                      \n"
+        ADDHALFREGS(%%a0, %%d1, %%d2)
+        "mac.w   %%d0u, %%a1u, (%[s2])+, %%d1, %%acc0\n"
+#if ORDER > 16
+        "mac.w   %%d0l, %%a1l, (%[f2])+, %%d0, %%acc0\n"
+#else
+        "mac.w   %%d0l, %%a1l, %%acc0                \n"
+#endif
+        "move.l  %%d2, (%[v1])+                      \n"
+        ADDHALFREGS(%%a1, %%d1, %%d2)
+        "move.l  %%d2, (%[v1])+                      \n"
+#if ORDER > 16
+        "subq.l  #1, %[res]                          \n"
+        "bne.w   1b                                  \n"
+#endif
+
+    "99:                                             \n"
+        "movclr.l %%acc0, %[res]                     \n"
+        : /* outputs */
+        [v1]"+a"(v1),
+        [f2]"+a"(f2),
+        [s2]"+a"(s2),
+        [res]"=d"(res)
+        : /* inputs */
+#if ORDER > 16
+        [cnt]"[res]"(cnt)
+#endif
+        : /* clobbers */
+        "d0", "d1", "d2", "d6", "d7", 
+        "a0", "a1", "memory"
+
+    );
+    return res;
+}
+
+/* Calculate scalarproduct, then subtract a 2nd vector (fused for performance)
+ * This version fetches data as 32 bit words, and *recommends* v1 to be
+ * 32 bit aligned. It also assumes that f2 and s2 are either both 32 bit
+ * aligned or both unaligned. Performance will suffer if either condition
+ * isn't met. It also needs EMAC in signed integer mode. */
+static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
+{
+    int res;
+#if ORDER > 16
+    int cnt = ORDER>>4;
+#endif
+
+#define SUBHALFREGS(min, sub, dif)    /* Subtract register halves straight. */ \
+        "move.l " #min ", " #dif "\n" /* 'min' can be an A or D reg */         \
+        "sub.l  " #sub ", " #min "\n" /* 'sub' and 'dif' must be D regs */     \
+        "clr.w  " #sub           "\n" /* 'min' and 'sub' are clobbered! */     \
+        "sub.l  " #sub ", " #dif "\n" \
+        "move.w " #min ", " #dif "\n" 
+        
+#define SUBHALFXREGS(min, s2, s1d)    /* Subtract register halves across. */ \
+        "clr.w  " #s1d           "\n" /* Needs 's1d' pre-swapped, swaps */   \
+        "sub.l  " #s1d ", " #min "\n" /* 's2' and clobbers 'min'. */         \
+        "move.l " #min ", " #s1d "\n" /* 'min' can be an A or D reg, */      \
+        "swap   " #s2            "\n" /* 's2' and 's1d' must be D regs. */   \
+        "sub.l  " #s2  ", " #min "\n" \
+        "move.w " #min ", " #s1d "\n"
+
+    asm volatile (
+        "move.l  %[f2], %%d0                         \n"
+        "and.l   #2, %%d0                            \n"
+        "jeq     20f                                 \n"
+
+    "10:                                             \n"
+        "move.w  (%[f2])+, %%d0                      \n"
+        "move.w  (%[s2])+, %%d1                      \n"
+        "swap    %%d1                                \n"
+    "1:                                              \n"
+        REPEAT_2(
+        "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1        \n"
+        "mac.w   %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n"
+        "mac.w   %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n"
+        SUBHALFXREGS(%%d6, %%d2, %%d1)
+        "mac.w   %%d0l, %%d7u, (%[f2])+, %%d0, %%acc0\n"
+        "mac.w   %%d0u, %%d7l, (%[s2])+, %%d6, %%acc0\n"
+        "move.l  %%d1, (%[v1])+                      \n"
+        SUBHALFXREGS(%%d7, %%d6, %%d2)
+        "mac.w   %%d0l, %%a0u, (%[f2])+, %%d0, %%acc0\n"
+        "mac.w   %%d0u, %%a0l, (%[s2])+, %%d7, %%acc0\n"
+        "move.l  %%d2, (%[v1])+                      \n"
+        SUBHALFXREGS(%%a0, %%d7, %%d6)
+        "mac.w   %%d0l, %%a1u, (%[f2])+, %%d0, %%acc0\n"
+        "mac.w   %%d0u, %%a1l, (%[s2])+, %%d1, %%acc0\n"
+        "move.l  %%d6, (%[v1])+                      \n"
+        SUBHALFXREGS(%%a1, %%d1, %%d7)
+        "move.l  %%d7, (%[v1])+                      \n"
+        )
+
+#if ORDER > 16
+        "subq.l  #1, %[res]                          \n"
+        "bne.w   1b                                  \n"
+#endif
+
+        "jra     99f                                 \n"
+
+    "20:                                             \n"
+        "move.l  (%[f2])+, %%d0                      \n"
+    "1:                                              \n"
+        "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1        \n"
+        "mac.w   %%d0u, %%d6u, (%[s2])+, %%d1, %%acc0\n"
+        "mac.w   %%d0l, %%d6l, (%[f2])+, %%d0, %%acc0\n"
+        SUBHALFREGS(%%d6, %%d1, %%d2)
+        "mac.w   %%d0u, %%d7u, (%[s2])+, %%d1, %%acc0\n"
+        "mac.w   %%d0l, %%d7l, (%[f2])+, %%d0, %%acc0\n"
+        "move.l  %%d2, (%[v1])+                      \n"
+        SUBHALFREGS(%%d7, %%d1, %%d2)
+        "mac.w   %%d0u, %%a0u, (%[s2])+, %%d1, %%acc0\n"
+        "mac.w   %%d0l, %%a0l, (%[f2])+, %%d0, %%acc0\n"
+        "move.l  %%d2, (%[v1])+                      \n"
+        SUBHALFREGS(%%a0, %%d1, %%d2)
+        "mac.w   %%d0u, %%a1u, (%[s2])+, %%d1, %%acc0\n"
+        "mac.w   %%d0l, %%a1l, (%[f2])+, %%d0, %%acc0\n"
+        "move.l  %%d2, (%[v1])+                      \n"
+        SUBHALFREGS(%%a1, %%d1, %%d2)
+        "move.l  %%d2, (%[v1])+                      \n"
+
+        "movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1        \n"
+        "mac.w   %%d0u, %%d6u, (%[s2])+, %%d1, %%acc0\n"
+        "mac.w   %%d0l, %%d6l, (%[f2])+, %%d0, %%acc0\n"
+        SUBHALFREGS(%%d6, %%d1, %%d2)
+        "mac.w   %%d0u, %%d7u, (%[s2])+, %%d1, %%acc0\n"
+        "mac.w   %%d0l, %%d7l, (%[f2])+, %%d0, %%acc0\n"
+        "move.l  %%d2, (%[v1])+                      \n"
+        SUBHALFREGS(%%d7, %%d1, %%d2)
+        "mac.w   %%d0u, %%a0u, (%[s2])+, %%d1, %%acc0\n"
+        "mac.w   %%d0l, %%a0l, (%[f2])+, %%d0, %%acc0\n"
+        "move.l  %%d2, (%[v1])+                      \n"
+        SUBHALFREGS(%%a0, %%d1, %%d2)
+        "mac.w   %%d0u, %%a1u, (%[s2])+, %%d1, %%acc0\n"
+#if ORDER > 16
+        "mac.w   %%d0l, %%a1l, (%[f2])+, %%d0, %%acc0\n"
+#else
+        "mac.w   %%d0l, %%a1l, %%acc0                \n"
+#endif
+        "move.l  %%d2, (%[v1])+                      \n"
+        SUBHALFREGS(%%a1, %%d1, %%d2)
+        "move.l  %%d2, (%[v1])+                      \n"
+#if ORDER > 16
+        "subq.l  #1, %[res]                          \n"
+        "bne.w   1b                                  \n"
+#endif
+
+    "99:                                             \n"
+        "movclr.l %%acc0, %[res]                     \n"
+        : /* outputs */
+        [v1]"+a"(v1),
+        [f2]"+a"(f2),
+        [s2]"+a"(s2),
+        [res]"=d"(res)
+        : /* inputs */
+#if ORDER > 16
+        [cnt]"[res]"(cnt)
+#endif
+        : /* clobbers */
+        "d0", "d1", "d2", "d6", "d7", 
+        "a0", "a1", "memory"
+
+    );
+    return res;
+}
+
+/* This version fetches data as 32 bit words, and *recommends* v1 to be
+ * 32 bit aligned, otherwise performance will suffer. It also needs EMAC
+ * in signed integer mode. */
+static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
+{
+    int res;
+#if ORDER > 16
+    int cnt = ORDER>>4;
+#endif
+
+    asm volatile (
+        "move.l  %[v2], %%d0                         \n"
+        "and.l   #2, %%d0                            \n"
+        "jeq     20f                                 \n"
+
+    "10:                                             \n"
+        "move.l  (%[v1])+, %%d0                      \n"
+        "move.w  (%[v2])+, %%d1                      \n"
+    "1:                                              \n"
+        REPEAT_7(
+        "mac.w   %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
+        "mac.w   %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
+        )
+
+        "mac.w   %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
+#if ORDER > 16
+        "mac.w   %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
+        "subq.l  #1, %[res]                          \n"
+        "bne.b   1b                                  \n"
+#else
+        "mac.w   %%d0l, %%d1u, %%acc0                \n"
+#endif
+        "jra     99f                                  \n"
+        
+    "20:                                             \n"
+        "move.l  (%[v1])+, %%d0                      \n"
+        "move.l  (%[v2])+, %%d1                      \n"
+    "1:                                              \n"
+        REPEAT_3(
+        "mac.w   %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n"
+        "mac.w   %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
+        "mac.w   %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n"
+        "mac.w   %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
+        )
+
+        "mac.w   %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n"
+        "mac.w   %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
+#if ORDER > 16
+        "mac.w   %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n"
+        "mac.w   %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
+        "subq.l  #1, %[res]                          \n"
+        "bne.b   1b                                  \n"
+#else
+        "mac.w   %%d2u, %%d1u, %%acc0                \n"
+        "mac.w   %%d2l, %%d1l, %%acc0                \n"
+#endif
+
+    "99:                                             \n"
+        "movclr.l %%acc0, %[res]                     \n"
+        : /* outputs */
+        [v1]"+a"(v1),
+        [v2]"+a"(v2),
+        [res]"=d"(res)
+        : /* inputs */
+#if ORDER > 16
+        [cnt]"[res]"(cnt)
+#endif
+        : /* clobbers */
+        "d0", "d1", "d2"
+    );
+    return res;
+}
--- a/lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h
+++ b/lib/rbcodec/codecs/demac/libdemac/vector_math16_mmx.h
@ -0,0 +1,234 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+MMX vector math copyright (C) 2010 Jens Arnold
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#define FUSED_VECTOR_MATH
+
+#define REPEAT_MB3(x, n) x(n) x(n+8) x(n+16)
+#define REPEAT_MB7(x, n) x(n) x(n+8) x(n+16) x(n+24) x(n+32) x(n+40) x(n+48)
+#define REPEAT_MB8(x, n) REPEAT_MB7(x, n) x(n+56)
+
+#if ORDER == 16     /* 3 times */
+#define REPEAT_MB(x) REPEAT_MB3(x, 8) 
+#elif ORDER == 32   /* 7 times */
+#define REPEAT_MB(x) REPEAT_MB7(x, 8) 
+#elif ORDER == 64   /* 5*3 == 15 times */
+#define REPEAT_MB(x) REPEAT_MB3(x,  8) REPEAT_MB3(x, 32) REPEAT_MB3(x, 56) \
+                     REPEAT_MB3(x, 80) REPEAT_MB3(x, 104)
+#elif ORDER == 256  /* 9*7 == 63 times */
+#define REPEAT_MB(x) REPEAT_MB7(x,   8) REPEAT_MB7(x,  64) REPEAT_MB7(x, 120) \
+                     REPEAT_MB7(x, 176) REPEAT_MB7(x, 232) REPEAT_MB7(x, 288) \
+                     REPEAT_MB7(x, 344) REPEAT_MB7(x, 400) REPEAT_MB7(x, 456)
+#elif ORDER == 1280 /* 8*8 == 64 times */
+#define REPEAT_MB(x) REPEAT_MB8(x,   0) REPEAT_MB8(x,  64) REPEAT_MB8(x, 128) \
+                     REPEAT_MB8(x, 192) REPEAT_MB8(x, 256) REPEAT_MB8(x, 320) \
+                     REPEAT_MB8(x, 384) REPEAT_MB8(x, 448)
+#else
+#error unsupported order
+#endif
+
+
+static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t *s2)
+{
+    int res, t;
+#if ORDER > 256
+    int cnt = ORDER>>8;
+#endif
+
+    asm volatile (
+#if ORDER > 256
+        "pxor    %%mm2, %%mm2        \n"
+    "1:                              \n"
+#else
+        "movq    (%[v1]), %%mm2      \n"
+        "movq    %%mm2, %%mm0        \n"
+        "pmaddwd (%[f2]), %%mm2      \n"
+        "paddw   (%[s2]), %%mm0      \n"
+        "movq    %%mm0, (%[v1])      \n"
+#endif
+
+#define SP_ADD_BLOCK(n)                      \
+        "movq    " #n "(%[v1]), %%mm1    \n" \
+        "movq    %%mm1, %%mm0            \n" \
+        "pmaddwd " #n "(%[f2]), %%mm1    \n" \
+        "paddw   " #n "(%[s2]), %%mm0    \n" \
+        "movq    %%mm0, " #n "(%[v1])    \n" \
+        "paddd   %%mm1, %%mm2            \n"
+        
+REPEAT_MB(SP_ADD_BLOCK)
+
+#if ORDER > 256
+        "add     $512, %[v1]         \n"
+        "add     $512, %[s2]         \n"
+        "add     $512, %[f2]         \n"
+        "dec     %[cnt]              \n"
+        "jne     1b                  \n"
+#endif
+
+        "movd    %%mm2, %[t]         \n"
+        "psrlq   $32, %%mm2          \n"
+        "movd    %%mm2, %[res]       \n"
+        "add     %[t], %[res]        \n"
+        : /* outputs */
+#if ORDER > 256
+        [cnt]"+r"(cnt),
+        [s2] "+r"(s2),
+        [res]"=r"(res),
+        [t]  "=r"(t)
+        : /* inputs */
+        [v1]"2"(v1),
+        [f2]"3"(f2)
+#else
+        [res]"=r"(res),
+        [t]  "=r"(t)
+        : /* inputs */
+        [v1]"r"(v1),
+        [f2]"r"(f2),
+        [s2]"r"(s2)
+#endif
+        : /* clobbers */
+        "mm0", "mm1", "mm2"
+    );
+    return res;
+}
+
+static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t *s2)
+{
+    int res, t;
+#if ORDER > 256
+    int cnt = ORDER>>8;
+#endif
+
+    asm volatile (
+#if ORDER > 256
+        "pxor    %%mm2, %%mm2        \n"
+    "1:                              \n"
+#else
+        "movq    (%[v1]), %%mm2      \n"
+        "movq    %%mm2, %%mm0        \n"
+        "pmaddwd (%[f2]), %%mm2      \n"
+        "psubw   (%[s2]), %%mm0      \n"
+        "movq    %%mm0, (%[v1])      \n"
+#endif
+
+#define SP_SUB_BLOCK(n)                      \
+        "movq    " #n "(%[v1]), %%mm1    \n" \
+        "movq    %%mm1, %%mm0            \n" \
+        "pmaddwd " #n "(%[f2]), %%mm1    \n" \
+        "psubw   " #n "(%[s2]), %%mm0    \n" \
+        "movq    %%mm0, " #n "(%[v1])    \n" \
+        "paddd   %%mm1, %%mm2            \n"
+
+REPEAT_MB(SP_SUB_BLOCK)
+
+#if ORDER > 256
+        "add     $512, %[v1]         \n"
+        "add     $512, %[s2]         \n"
+        "add     $512, %[f2]         \n"
+        "dec     %[cnt]              \n"
+        "jne     1b                  \n"
+#endif
+
+        "movd    %%mm2, %[t]         \n"
+        "psrlq   $32, %%mm2          \n"
+        "movd    %%mm2, %[res]       \n"
+        "add     %[t], %[res]        \n"
+        : /* outputs */
+#if ORDER > 256
+        [cnt]"+r"(cnt),
+        [s2] "+r"(s2),
+        [res]"=r"(res),
+        [t]  "=r"(t)
+        : /* inputs */
+        [v1]"2"(v1),
+        [f2]"3"(f2)
+#else
+        [res]"=r"(res),
+        [t]  "=r"(t)
+        : /* inputs */
+        [v1]"r"(v1),
+        [f2]"r"(f2),
+        [s2]"r"(s2)
+#endif
+        : /* clobbers */
+        "mm0", "mm1", "mm2"
+    );
+    return res;
+}
+
+static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
+{
+    int res, t;
+#if ORDER > 256
+    int cnt = ORDER>>8;
+#endif
+               
+    asm volatile (
+#if ORDER > 256
+        "pxor    %%mm1, %%mm1        \n"
+    "1:                              \n"
+#else
+        "movq    (%[v1]), %%mm1      \n"
+        "pmaddwd (%[v2]), %%mm1      \n"
+#endif
+
+#define SP_BLOCK(n)                          \
+        "movq    " #n "(%[v1]), %%mm0    \n" \
+        "pmaddwd " #n "(%[v2]), %%mm0    \n" \
+        "paddd   %%mm0, %%mm1            \n"
+
+REPEAT_MB(SP_BLOCK)
+
+#if ORDER > 256
+        "add     $512, %[v1]         \n"
+        "add     $512, %[v2]         \n"
+        "dec     %[cnt]              \n"
+        "jne     1b                  \n"
+#endif
+
+        "movd    %%mm1, %[t]         \n"
+        "psrlq   $32, %%mm1          \n"
+        "movd    %%mm1, %[res]       \n"
+        "add     %[t], %[res]        \n"
+        : /* outputs */
+#if ORDER > 256
+        [cnt]"+r"(cnt),
+        [res]"=r"(res),
+        [t]  "=r"(t)
+        : /* inputs */
+        [v1]"1"(v1),
+        [v2]"2"(v2)
+#else
+        [res]"=r"(res),
+        [t]  "=r"(t)
+        : /* inputs */
+        [v1]"r"(v1),
+        [v2]"r"(v2)
+#endif
+        : /* clobbers */
+        "mm0", "mm1"
+    );
+    return res;
+}
--- a/lib/rbcodec/codecs/demac/libdemac/vector_math32_armv4.h
+++ b/lib/rbcodec/codecs/demac/libdemac/vector_math32_armv4.h
@ -0,0 +1,201 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+ARMv4 vector math copyright (C) 2008 Jens Arnold
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#define FUSED_VECTOR_MATH
+
+#if ORDER > 32
+#define REPEAT_BLOCK(x) x x x x x x x x
+#elif ORDER > 16
+#define REPEAT_BLOCK(x) x x x x x x x
+#else
+#define REPEAT_BLOCK(x) x x x
+#endif
+
+/* Calculate scalarproduct, then add a 2nd vector (fused for performance) */
+static inline int32_t vector_sp_add(int32_t* v1, int32_t* f2, int32_t* s2)
+{
+    int res;
+#if ORDER > 32
+    int cnt = ORDER>>5;
+#endif
+
+    asm volatile (
+#if ORDER > 32
+        "mov     %[res], #0              \n"
+    "1:                                  \n"
+#else
+        "ldmia   %[v1],  {r0-r3}         \n"
+        "ldmia   %[f2]!, {r4-r7}         \n"
+        "mul     %[res], r4, r0          \n"
+        "mla     %[res], r5, r1, %[res]  \n"
+        "mla     %[res], r6, r2, %[res]  \n"
+        "mla     %[res], r7, r3, %[res]  \n"
+        "ldmia   %[s2]!, {r4-r7}         \n"
+        "add     r0, r0, r4              \n"
+        "add     r1, r1, r5              \n"
+        "add     r2, r2, r6              \n"
+        "add     r3, r3, r7              \n"
+        "stmia   %[v1]!, {r0-r3}         \n"
+#endif
+        REPEAT_BLOCK(
+        "ldmia   %[v1],  {r0-r3}         \n"
+        "ldmia   %[f2]!, {r4-r7}         \n"
+        "mla     %[res], r4, r0, %[res]  \n"
+        "mla     %[res], r5, r1, %[res]  \n"
+        "mla     %[res], r6, r2, %[res]  \n"
+        "mla     %[res], r7, r3, %[res]  \n"
+        "ldmia   %[s2]!, {r4-r7}         \n"
+        "add     r0, r0, r4              \n"
+        "add     r1, r1, r5              \n"
+        "add     r2, r2, r6              \n"
+        "add     r3, r3, r7              \n"
+        "stmia   %[v1]!, {r0-r3}         \n"
+        )
+#if ORDER > 32
+        "subs    %[cnt], %[cnt], #1      \n"
+        "bne     1b                      \n"
+#endif
+        : /* outputs */
+#if ORDER > 32
+        [cnt]"+r"(cnt),
+#endif
+        [v1] "+r"(v1),
+        [f2] "+r"(f2),
+        [s2] "+r"(s2),
+        [res]"=r"(res)
+        : /* inputs */
+        : /* clobbers */
+        "r0", "r1", "r2", "r3", "r4",
+        "r5", "r6", "r7", "cc", "memory"
+    );
+    return res;
+}
+
+/* Calculate scalarproduct, then subtract a 2nd vector (fused for performance) */
+static inline int32_t vector_sp_sub(int32_t* v1, int32_t* f2, int32_t* s2)
+{
+    int res;
+#if ORDER > 32
+    int cnt = ORDER>>5;
+#endif
+
+    asm volatile (
+#if ORDER > 32
+        "mov     %[res], #0              \n"
+    "1:                                  \n"
+#else
+        "ldmia   %[v1],  {r0-r3}         \n"
+        "ldmia   %[f2]!, {r4-r7}         \n"
+        "mul     %[res], r4, r0          \n"
+        "mla     %[res], r5, r1, %[res]  \n"
+        "mla     %[res], r6, r2, %[res]  \n"
+        "mla     %[res], r7, r3, %[res]  \n"
+        "ldmia   %[s2]!, {r4-r7}         \n"
+        "sub     r0, r0, r4              \n"
+        "sub     r1, r1, r5              \n"
+        "sub     r2, r2, r6              \n"
+        "sub     r3, r3, r7              \n"
+        "stmia   %[v1]!, {r0-r3}         \n"
+#endif
+        REPEAT_BLOCK(
+        "ldmia   %[v1],  {r0-r3}         \n"
+        "ldmia   %[f2]!, {r4-r7}         \n"
+        "mla     %[res], r4, r0, %[res]  \n"
+        "mla     %[res], r5, r1, %[res]  \n"
+        "mla     %[res], r6, r2, %[res]  \n"
+        "mla     %[res], r7, r3, %[res]  \n"
+        "ldmia   %[s2]!, {r4-r7}         \n"
+        "sub     r0, r0, r4              \n"
+        "sub     r1, r1, r5              \n"
+        "sub     r2, r2, r6              \n"
+        "sub     r3, r3, r7              \n"
+        "stmia   %[v1]!, {r0-r3}         \n"
+        )
+#if ORDER > 32
+        "subs    %[cnt], %[cnt], #1      \n"
+        "bne     1b                      \n"
+#endif
+        : /* outputs */
+#if ORDER > 32
+        [cnt]"+r"(cnt),
+#endif
+        [v1] "+r"(v1),
+        [f2] "+r"(f2),
+        [s2] "+r"(s2),
+        [res]"=r"(res)
+        : /* inputs */
+        : /* clobbers */
+        "r0", "r1", "r2", "r3", "r4",
+        "r5", "r6", "r7", "cc", "memory"
+    );
+    return res;
+}
+
+static inline int32_t scalarproduct(int32_t* v1, int32_t* v2)
+{
+    int res;
+#if ORDER > 32
+    int cnt = ORDER>>5;
+#endif
+
+    asm volatile (
+#if ORDER > 32
+        "mov     %[res], #0              \n"
+    "1:                                  \n"
+#else
+        "ldmia   %[v1]!, {r0-r3}         \n"
+        "ldmia   %[v2]!, {r4-r7}         \n"
+        "mul     %[res], r4, r0          \n"
+        "mla     %[res], r5, r1, %[res]  \n"
+        "mla     %[res], r6, r2, %[res]  \n"
+        "mla     %[res], r7, r3, %[res]  \n"
+#endif
+        REPEAT_BLOCK(
+        "ldmia   %[v1]!, {r0-r3}         \n"
+        "ldmia   %[v2]!, {r4-r7}         \n"
+        "mla     %[res], r4, r0, %[res]  \n"
+        "mla     %[res], r5, r1, %[res]  \n"
+        "mla     %[res], r6, r2, %[res]  \n"
+        "mla     %[res], r7, r3, %[res]  \n"
+        )
+#if ORDER > 32
+        "subs    %[cnt], %[cnt], #1      \n"
+        "bne     1b                      \n"
+#endif
+        : /* outputs */
+#if ORDER > 32
+        [cnt]"+r"(cnt),
+#endif
+        [v1] "+r"(v1),
+        [v2] "+r"(v2),
+        [res]"=r"(res)
+        : /* inputs */
+        : /* clobbers */
+        "r0", "r1", "r2", "r3",
+        "r4", "r5", "r6", "r7", "cc", "memory"
+    );
+    return res;
+}
--- a/lib/rbcodec/codecs/demac/libdemac/vector_math_generic.h
+++ b/lib/rbcodec/codecs/demac/libdemac/vector_math_generic.h
@ -0,0 +1,160 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#include "demac_config.h"
+
+static inline void vector_add(filter_int* v1, filter_int* v2)
+{
+#if ORDER > 32
+    int order = (ORDER >> 5);
+    while (order--)
+#endif
+    {
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+#if ORDER > 16
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+        *v1++ += *v2++;
+#endif
+    }
+}
+
+static inline void vector_sub(filter_int* v1, filter_int* v2)
+{
+#if ORDER > 32
+    int order = (ORDER >> 5);
+    while (order--)
+#endif
+    {
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+#if ORDER > 16
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+        *v1++ -= *v2++;
+#endif
+    }
+}
+
+static inline int32_t scalarproduct(filter_int* v1, filter_int* v2)
+{
+    int res = 0;
+
+#if ORDER > 32
+    int order = (ORDER >> 5);
+    while (order--)
+#endif
+    {
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+#if ORDER > 16
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+        res += *v1++ * *v2++;
+#endif
+    }
+    return res;
+}
--- a/lib/rbcodec/codecs/demac/wavwrite.c
+++ b/lib/rbcodec/codecs/demac/wavwrite.c
@ -0,0 +1,110 @@
+/*
+
+demac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include "inttypes.h"
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "parser.h"
+
+#ifndef __WIN32__
+#define O_BINARY 0
+#endif
+
+static unsigned char wav_header[44]={
+    'R','I','F','F',//  0 - ChunkID
+    0,0,0,0,        //  4 - ChunkSize (filesize-8)
+    'W','A','V','E',//  8 - Format
+    'f','m','t',' ',// 12 - SubChunkID
+    16,0,0,0,       // 16 - SubChunk1ID  // 16 for PCM
+    1,0,            // 20 - AudioFormat (1=Uncompressed)
+    2,0,            // 22 - NumChannels
+    0,0,0,0,        // 24 - SampleRate in Hz
+    0,0,0,0,        // 28 - Byte Rate (SampleRate*NumChannels*(BitsPerSample/8)
+    4,0,            // 32 - BlockAlign (== NumChannels * BitsPerSample/8)
+    16,0,           // 34 - BitsPerSample
+    'd','a','t','a',// 36 - Subchunk2ID
+    0,0,0,0         // 40 - Subchunk2Size
+};
+
+int open_wav(struct ape_ctx_t* ape_ctx, char* filename)
+{
+    int fd;
+    int x;
+    int filesize;
+    int bytespersample;
+
+    fd=open(filename, O_CREAT|O_WRONLY|O_TRUNC|O_BINARY, 0644);
+    if (fd < 0)
+        return fd;
+
+    bytespersample=ape_ctx->bps/8;
+
+    filesize=ape_ctx->totalsamples*bytespersample*ape_ctx->channels+44;
+
+    // ChunkSize
+    x=filesize-8;
+    wav_header[4]=(x&0xff);
+    wav_header[5]=(x&0xff00)>>8;
+    wav_header[6]=(x&0xff0000)>>16;
+    wav_header[7]=(x&0xff000000)>>24;
+
+    // Number of channels
+    wav_header[22]=ape_ctx->channels;
+
+    // Samplerate
+    wav_header[24]=ape_ctx->samplerate&0xff;
+    wav_header[25]=(ape_ctx->samplerate&0xff00)>>8;
+    wav_header[26]=(ape_ctx->samplerate&0xff0000)>>16;
+    wav_header[27]=(ape_ctx->samplerate&0xff000000)>>24;
+
+    // ByteRate
+    x=ape_ctx->samplerate*(ape_ctx->bps/8)*ape_ctx->channels;
+    wav_header[28]=(x&0xff);
+    wav_header[29]=(x&0xff00)>>8;
+    wav_header[30]=(x&0xff0000)>>16;
+    wav_header[31]=(x&0xff000000)>>24;
+
+    // BlockAlign
+    wav_header[32]=(ape_ctx->bps/8)*ape_ctx->channels;
+
+    // Bits per sample
+    wav_header[34]=ape_ctx->bps;
+    
+    // Subchunk2Size
+    x=filesize-44;
+    wav_header[40]=(x&0xff);
+    wav_header[41]=(x&0xff00)>>8;
+    wav_header[42]=(x&0xff0000)>>16;
+    wav_header[43]=(x&0xff000000)>>24;
+
+    write(fd,wav_header,sizeof(wav_header));
+
+    return fd;
+}
--- a/lib/rbcodec/codecs/demac/wavwrite.h
+++ b/lib/rbcodec/codecs/demac/wavwrite.h
@ -0,0 +1,32 @@
+/*
+
+demac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#ifndef _APE_WAVWRITE_H
+#define _APE_WAVWRITE_H
+
+#include "parser.h"
+
+int open_wav(struct ape_ctx_t* ape_ctx, char* filename);
+
+#endif
--- a/lib/rbcodec/codecs/flac.c
+++ b/lib/rbcodec/codecs/flac.c
@ -0,0 +1,536 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2005 Dave Chapman
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "codeclib.h"
+#include <codecs/libffmpegFLAC/decoder.h>
+
+CODEC_HEADER
+
+static FLACContext fc IBSS_ATTR_FLAC;
+
+/* The output buffers containing the decoded samples (channels 0 and 1) */
+static int32_t decoded0[MAX_BLOCKSIZE] IBSS_ATTR_FLAC;
+static int32_t decoded1[MAX_BLOCKSIZE] IBSS_ATTR_FLAC;
+static int32_t decoded2[MAX_BLOCKSIZE] IBSS_ATTR_FLAC_LARGE_IRAM;
+static int32_t decoded3[MAX_BLOCKSIZE] IBSS_ATTR_FLAC_LARGE_IRAM;
+static int32_t decoded4[MAX_BLOCKSIZE] IBSS_ATTR_FLAC_XLARGE_IRAM;
+static int32_t decoded5[MAX_BLOCKSIZE] IBSS_ATTR_FLAC_XLARGE_IRAM;
+
+#define MAX_SUPPORTED_SEEKTABLE_SIZE 5000
+
+/* Notes about seeking:
+
+   The full seek table consists of:
+      uint64_t sample (only 36 bits are used)
+      uint64_t offset
+      uint32_t blocksize
+
+   We also limit the sample and offset values to 32-bits - Rockbox doesn't
+   support files bigger than 2GB on FAT32 filesystems.
+
+   The reference FLAC encoder produces a seek table with points every
+   10 seconds, but this can be overridden by the user when encoding a file.
+
+   With the default settings, a typical 4 minute track will contain
+   24 seek points.
+
+   Taking the extreme case of a Rockbox supported file to be a 2GB (compressed)
+   16-bit/44.1KHz mono stream with a likely uncompressed size of 4GB:
+      Total duration is: 48694 seconds (about 810 minutes - 13.5 hours)
+      Total number of seek points: 4869
+
+   Therefore we limit the number of seek points to 5000.  This is a
+   very extreme case, and requires 5000*8=40000 bytes of storage.
+
+   If we come across a FLAC file with more than this number of seekpoints, we
+   just use the first 5000.
+
+*/
+
+struct FLACseekpoints {
+    uint32_t sample;
+    uint32_t offset;
+    uint16_t blocksize;
+};
+
+static struct FLACseekpoints seekpoints[MAX_SUPPORTED_SEEKTABLE_SIZE];
+static int nseekpoints;
+
+static int8_t *bit_buffer;
+static size_t buff_size;
+
+static bool flac_init(FLACContext* fc, int first_frame_offset)
+{
+    unsigned char buf[255];
+    bool found_streaminfo=false;
+    uint32_t seekpoint_hi,seekpoint_lo;
+    uint32_t offset_hi,offset_lo;
+    uint16_t blocksize;
+    int endofmetadata=0;
+    uint32_t blocklength;
+
+    ci->memset(fc,0,sizeof(FLACContext));
+    nseekpoints=0;
+
+    fc->sample_skip = 0;
+    
+    /* Reset sample buffers */
+    memset(decoded0, 0, sizeof(decoded0));
+    memset(decoded1, 0, sizeof(decoded1));
+    memset(decoded2, 0, sizeof(decoded2));
+    memset(decoded3, 0, sizeof(decoded3));
+    memset(decoded4, 0, sizeof(decoded4));
+    memset(decoded5, 0, sizeof(decoded5));
+    
+    /* Set sample buffers in decoder structure */
+    fc->decoded[0] = decoded0;
+    fc->decoded[1] = decoded1;
+    fc->decoded[2] = decoded2;
+    fc->decoded[3] = decoded3;
+    fc->decoded[4] = decoded4;
+    fc->decoded[5] = decoded5;
+
+
+    /* Skip any foreign tags at start of file */
+    ci->seek_buffer(first_frame_offset);
+
+    fc->metadatalength = first_frame_offset;
+
+    if (ci->read_filebuf(buf, 4) < 4)
+    {
+        return false;
+    }
+
+    if (ci->memcmp(buf,"fLaC",4) != 0) 
+    {
+        return false;
+    }
+    fc->metadatalength += 4;
+
+    while (!endofmetadata) {
+        if (ci->read_filebuf(buf, 4) < 4)
+        {
+            return false;
+        }
+
+        endofmetadata=(buf[0]&0x80);
+        blocklength = (buf[1] << 16) | (buf[2] << 8) | buf[3];
+        fc->metadatalength+=blocklength+4;
+
+        if ((buf[0] & 0x7f) == 0)       /* 0 is the STREAMINFO block */
+        {
+            if (ci->read_filebuf(buf, blocklength) < blocklength) return false;
+          
+            fc->filesize = ci->filesize;
+            fc->min_blocksize = (buf[0] << 8) | buf[1];
+            int max_blocksize = (buf[2] << 8) | buf[3];
+            if (max_blocksize > MAX_BLOCKSIZE)
+            {
+                LOGF("FLAC: Maximum blocksize is too large (%d > %d)\n",
+                     max_blocksize, MAX_BLOCKSIZE);
+                return false;
+            }
+            fc->max_blocksize = max_blocksize;
+            fc->min_framesize = (buf[4] << 16) | (buf[5] << 8) | buf[6];
+            fc->max_framesize = (buf[7] << 16) | (buf[8] << 8) | buf[9];
+            fc->samplerate = (buf[10] << 12) | (buf[11] << 4) 
+                             | ((buf[12] & 0xf0) >> 4);
+            fc->channels = ((buf[12]&0x0e)>>1) + 1;
+            fc->bps = (((buf[12]&0x01) << 4) | ((buf[13]&0xf0)>>4) ) + 1;
+
+            /* totalsamples is a 36-bit field, but we assume <= 32 bits are 
+               used */
+            fc->totalsamples = (buf[14] << 24) | (buf[15] << 16) 
+                               | (buf[16] << 8) | buf[17];
+
+            /* Calculate track length (in ms) and estimate the bitrate 
+               (in kbit/s) */
+            fc->length = ((int64_t) fc->totalsamples * 1000) / fc->samplerate;
+
+            found_streaminfo=true;
+        } else if ((buf[0] & 0x7f) == 3) { /* 3 is the SEEKTABLE block */
+            while ((nseekpoints < MAX_SUPPORTED_SEEKTABLE_SIZE) && 
+                   (blocklength >= 18)) {
+                if (ci->read_filebuf(buf,18) < 18) return false;
+                blocklength-=18;
+
+                seekpoint_hi=(buf[0] << 24) | (buf[1] << 16) | 
+                             (buf[2] << 8) | buf[3];
+                seekpoint_lo=(buf[4] << 24) | (buf[5] << 16) | 
+                             (buf[6] << 8) | buf[7];
+                offset_hi=(buf[8] << 24) | (buf[9] << 16) | 
+                             (buf[10] << 8) | buf[11];
+                offset_lo=(buf[12] << 24) | (buf[13] << 16) | 
+                             (buf[14] << 8) | buf[15];
+
+                blocksize=(buf[16] << 8) | buf[17];
+
+                /* Only store seekpoints where the high 32 bits are zero */
+                if ((seekpoint_hi == 0) && (seekpoint_lo != 0xffffffff) &&
+                    (offset_hi == 0)) {
+                        seekpoints[nseekpoints].sample=seekpoint_lo;
+                        seekpoints[nseekpoints].offset=offset_lo;
+                        seekpoints[nseekpoints].blocksize=blocksize;
+                        nseekpoints++;
+                }
+            }
+            /* Skip any unread seekpoints */
+            if (blocklength > 0)
+                ci->advance_buffer(blocklength);
+        } else {
+          /* Skip to next metadata block */
+          ci->advance_buffer(blocklength);
+        }
+    }
+
+   if (found_streaminfo) {
+       fc->bitrate = ((int64_t) (fc->filesize-fc->metadatalength) * 8) 
+                     / fc->length;
+       return true;
+   } else {
+       return false;
+   }
+}
+
+/* Synchronize to next frame in stream - adapted from libFLAC 1.1.3b2 */
+static bool frame_sync(FLACContext* fc) {
+    unsigned int x = 0;
+    bool cached = false;
+
+    /* Make sure we're byte aligned. */
+    align_get_bits(&fc->gb);
+
+    while(1) {
+        if(fc->gb.size_in_bits - get_bits_count(&fc->gb) < 8) {
+            /* Error, end of bitstream, a valid stream should never reach here
+             * since the buffer should contain at least one frame header.
+             */
+            return false;
+        }
+
+        if(cached)
+            cached = false;
+        else
+            x = get_bits(&fc->gb, 8);
+
+        if(x == 0xff) { /* MAGIC NUMBER for first 8 frame sync bits. */
+            x = get_bits(&fc->gb, 8);
+            /* We have to check if we just read two 0xff's in a row; the second
+             * may actually be the beginning of the sync code.
+             */
+            if(x == 0xff) { /* MAGIC NUMBER for first 8 frame sync bits. */
+                cached = true;
+            }
+            else if(x >> 2 == 0x3e) { /* MAGIC NUMBER for last 6 sync bits. */
+                /* Succesfully synced. */
+                break;
+            }
+        }
+    }
+
+    /* Advance and init bit buffer to the new frame. */
+    ci->advance_buffer((get_bits_count(&fc->gb)-16)>>3); /* consumed bytes */
+    bit_buffer = ci->request_buffer(&buff_size, MAX_FRAMESIZE+16);
+    init_get_bits(&fc->gb, bit_buffer, buff_size*8);
+
+    /* Decode the frame to verify the frame crc and
+     * fill fc with its metadata.
+     */
+    if(flac_decode_frame(fc, 
+       bit_buffer, buff_size, ci->yield) < 0) {
+        return false;
+    }
+
+    return true;
+}
+
+/* Seek to sample - adapted from libFLAC 1.1.3b2+ */
+static bool flac_seek(FLACContext* fc, uint32_t target_sample) {
+    off_t orig_pos = ci->curpos;
+    off_t pos = -1;
+    unsigned long lower_bound, upper_bound;
+    unsigned long lower_bound_sample, upper_bound_sample;
+    int i;
+    unsigned approx_bytes_per_frame;
+    uint32_t this_frame_sample = fc->samplenumber;
+    unsigned this_block_size = fc->blocksize;
+    bool needs_seek = true, first_seek = true;
+
+    /* We are just guessing here. */
+    if(fc->max_framesize > 0)
+        approx_bytes_per_frame = (fc->max_framesize + fc->min_framesize)/2 + 1;
+    /* Check if it's a known fixed-blocksize stream. */
+    else if(fc->min_blocksize == fc->max_blocksize && fc->min_blocksize > 0)
+        approx_bytes_per_frame = fc->min_blocksize*fc->channels*fc->bps/8 + 64;
+    else
+        approx_bytes_per_frame = 4608 * fc->channels * fc->bps/8 + 64;
+
+    /* Set an upper and lower bound on where in the stream we will search. */
+    lower_bound = fc->metadatalength;
+    lower_bound_sample = 0;
+    upper_bound = fc->filesize;
+    upper_bound_sample = fc->totalsamples>0 ? fc->totalsamples : target_sample;
+
+    /* Refine the bounds if we have a seektable with suitable points. */
+    if(nseekpoints > 0) {
+        /* Find the closest seek point <= target_sample, if it exists. */
+        for(i = nseekpoints-1; i >= 0; i--) {
+            if(seekpoints[i].sample <= target_sample)
+                break;
+        }
+        if(i >= 0) { /* i.e. we found a suitable seek point... */
+            lower_bound = fc->metadatalength + seekpoints[i].offset;
+            lower_bound_sample = seekpoints[i].sample;
+        }
+
+        /* Find the closest seek point > target_sample, if it exists. */
+        for(i = 0; i < nseekpoints; i++) {
+            if(seekpoints[i].sample > target_sample)
+                break;
+        }
+        if(i < nseekpoints) { /* i.e. we found a suitable seek point... */
+            upper_bound = fc->metadatalength + seekpoints[i].offset;
+            upper_bound_sample = seekpoints[i].sample;
+        }
+    }
+
+    while(1) {
+        /* Check if bounds are still ok. */
+        if(lower_bound_sample >= upper_bound_sample ||
+           lower_bound > upper_bound) {
+            return false;
+        }
+
+        /* Calculate new seek position */
+        if(needs_seek) {
+            pos = (off_t)(lower_bound +
+              (((target_sample - lower_bound_sample) *
+              (int64_t)(upper_bound - lower_bound)) /
+              (upper_bound_sample - lower_bound_sample)) -
+              approx_bytes_per_frame);
+            
+            if(pos >= (off_t)upper_bound)
+                pos = (off_t)upper_bound-1;
+            if(pos < (off_t)lower_bound)
+                pos = (off_t)lower_bound;
+        }
+
+        if(!ci->seek_buffer(pos))
+            return false;
+
+        bit_buffer = ci->request_buffer(&buff_size, MAX_FRAMESIZE+16);
+        init_get_bits(&fc->gb, bit_buffer, buff_size*8);
+
+        /* Now we need to get a frame.  It is possible for our seek
+         * to land in the middle of audio data that looks exactly like
+         * a frame header from a future version of an encoder.  When
+         * that happens, frame_sync() will return false.
+         * But there is a remote possibility that it is properly
+         * synced at such a "future-codec frame", so to make sure,
+         * we wait to see several "unparseable" errors in a row before
+         * bailing out.
+         */
+        {
+            unsigned unparseable_count;
+            bool got_a_frame = false;
+            for(unparseable_count = 0; !got_a_frame
+                && unparseable_count < 10; unparseable_count++) {
+                if(frame_sync(fc))
+                    got_a_frame = true;
+            }
+            if(!got_a_frame) {
+                ci->seek_buffer(orig_pos);
+                return false;
+            }
+        }
+
+        this_frame_sample = fc->samplenumber;
+        this_block_size = fc->blocksize;
+
+        if(target_sample >= this_frame_sample
+           && target_sample < this_frame_sample+this_block_size) {
+            /* Found the frame containing the target sample. */
+            fc->sample_skip = target_sample - this_frame_sample;
+            break;
+        }
+
+        if(this_frame_sample + this_block_size >= upper_bound_sample &&
+           !first_seek) {
+            if(pos == (off_t)lower_bound || !needs_seek) {
+                ci->seek_buffer(orig_pos);
+                return false;
+            }
+            /* Our last move backwards wasn't big enough, try again. */
+            approx_bytes_per_frame *= 2;
+            continue;
+        }
+        /* Allow one seek over upper bound,
+         * required for streams with unknown total samples.
+         */
+        first_seek = false;
+
+        /* Make sure we are not seeking in a corrupted stream */
+        if(this_frame_sample < lower_bound_sample) {
+            ci->seek_buffer(orig_pos);
+            return false;
+        }
+
+        approx_bytes_per_frame = this_block_size*fc->channels*fc->bps/8 + 64;
+
+        /* We need to narrow the search. */
+        if(target_sample < this_frame_sample) {
+            upper_bound_sample = this_frame_sample;
+            upper_bound = ci->curpos;
+        }
+        else { /* Target is beyond this frame. */
+            /* We are close, continue in decoding next frames. */
+            if(target_sample < this_frame_sample + 4*this_block_size) {
+                pos = ci->curpos + fc->framesize;
+                needs_seek = false;
+            }
+
+            lower_bound_sample = this_frame_sample + this_block_size;
+            lower_bound = ci->curpos + fc->framesize;
+        }
+    }
+
+    return true;
+}
+
+/* Seek to file offset */
+static bool flac_seek_offset(FLACContext* fc, uint32_t offset) {
+    unsigned unparseable_count;
+    bool got_a_frame = false;
+
+    if(!ci->seek_buffer(offset))
+        return false;
+
+    bit_buffer = ci->request_buffer(&buff_size, MAX_FRAMESIZE);
+    init_get_bits(&fc->gb, bit_buffer, buff_size*8);
+
+    for(unparseable_count = 0; !got_a_frame
+        && unparseable_count < 10; unparseable_count++) {
+        if(frame_sync(fc))
+            got_a_frame = true;
+    }
+    
+    if(!got_a_frame) {
+        ci->seek_buffer(fc->metadatalength);
+        return false;
+    }
+
+    return true;
+}
+
+/* this is the codec entry point */
+enum codec_status codec_main(enum codec_entry_call_reason reason)
+{
+    if (reason == CODEC_LOAD) {
+        /* Generic codec initialisation */
+        ci->configure(DSP_SET_SAMPLE_DEPTH, FLAC_OUTPUT_DEPTH-1);
+    }
+
+    return CODEC_OK;
+}
+
+/* this is called for each file to process */
+enum codec_status codec_run(void)
+{
+    int8_t *buf;
+    uint32_t samplesdone;
+    uint32_t elapsedtime;
+    size_t bytesleft;
+    int consumed;
+    int res;
+    int frame;
+    intptr_t param;
+
+    if (codec_init()) {
+        LOGF("FLAC: Error initialising codec\n");
+        return CODEC_ERROR;
+    }
+
+    /* Need to save offset for later use (cleared indirectly by flac_init) */
+    samplesdone = ci->id3->offset;
+    
+    if (!flac_init(&fc,ci->id3->first_frame_offset)) {
+        LOGF("FLAC: Error initialising codec\n");
+        return CODEC_ERROR;
+    }
+
+    ci->configure(DSP_SWITCH_FREQUENCY, ci->id3->frequency);
+    ci->configure(DSP_SET_STEREO_MODE, fc.channels == 1 ?
+                  STEREO_MONO : STEREO_NONINTERLEAVED);
+    codec_set_replaygain(ci->id3);
+
+    flac_seek_offset(&fc, samplesdone);
+    samplesdone=fc.samplenumber+fc.blocksize;
+    elapsedtime=(samplesdone*10)/(ci->id3->frequency/100);
+    ci->set_elapsed(elapsedtime);
+
+    /* The main decoding loop */
+    frame=0;
+    buf = ci->request_buffer(&bytesleft, MAX_FRAMESIZE);
+    while (bytesleft) {
+        enum codec_command_action action = ci->get_command(&param);
+
+        if (action == CODEC_ACTION_HALT)
+            break;
+
+        /* Deal with any pending seek requests */
+        if (action == CODEC_ACTION_SEEK_TIME) {
+            if (flac_seek(&fc,(uint32_t)(((uint64_t)param
+                *ci->id3->frequency)/1000))) {
+                /* Refill the input buffer */
+                buf = ci->request_buffer(&bytesleft, MAX_FRAMESIZE);
+            }
+
+            ci->set_elapsed(param);
+            ci->seek_complete();
+        }
+
+        if((res=flac_decode_frame(&fc,buf,
+                             bytesleft,ci->yield)) < 0) {
+             LOGF("FLAC: Frame %d, error %d\n",frame,res);
+             return CODEC_ERROR;
+        }
+        consumed=fc.gb.index/8;
+        frame++;
+
+        ci->yield();
+        ci->pcmbuf_insert(&fc.decoded[0][fc.sample_skip], &fc.decoded[1][fc.sample_skip],
+                          fc.blocksize - fc.sample_skip);
+        
+        fc.sample_skip = 0;
+
+        /* Update the elapsed-time indicator */
+        samplesdone=fc.samplenumber+fc.blocksize;
+        elapsedtime=(samplesdone*10)/(ci->id3->frequency/100);
+        ci->set_elapsed(elapsedtime);
+
+        ci->advance_buffer(consumed);
+
+        buf = ci->request_buffer(&bytesleft, MAX_FRAMESIZE);
+    }
+
+    LOGF("FLAC: Decoded %lu samples\n",(unsigned long)samplesdone);
+    return CODEC_OK;
+}
--- a/lib/rbcodec/codecs/gbs.c
+++ b/lib/rbcodec/codecs/gbs.c
@ -0,0 +1,108 @@
+
+/* Ripped off from Game_Music_Emu 0.5.2. http://www.slack.net/~ant/ */
+
+#include <codecs/lib/codeclib.h>
+#include "libgme/gbs_emu.h" 
+
+CODEC_HEADER
+
+/* Maximum number of bytes to process in one iteration */
+#define CHUNK_SIZE (1024*2)
+
+static int16_t samples[CHUNK_SIZE] IBSS_ATTR;
+static struct Gbs_Emu gbs_emu;
+
+/****************** rockbox interface ******************/
+
+static void set_codec_track(int t) {
+    Gbs_start_track(&gbs_emu, t); 
+
+    /* for loop mode we disable track limits */
+    if (!ci->loop_track()) {
+        Track_set_fade(&gbs_emu, Track_get_length( &gbs_emu, t ), 4000);
+    }
+    ci->set_elapsed(t*1000); /* t is track no to display */
+}
+
+/* this is the codec entry point */
+enum codec_status codec_main(enum codec_entry_call_reason reason)
+{
+    if (reason == CODEC_LOAD) {
+        /* we only render 16 bits */
+        ci->configure(DSP_SET_SAMPLE_DEPTH, 16);
+
+        /* 44 Khz, Interleaved stereo */
+        ci->configure(DSP_SET_FREQUENCY, 44100);
+        ci->configure(DSP_SET_STEREO_MODE, STEREO_INTERLEAVED);
+
+        Gbs_init(&gbs_emu);
+        Gbs_set_sample_rate(&gbs_emu, 44100);
+    }
+
+    return CODEC_OK;
+}
+
+/* this is called for each file to process */
+enum codec_status codec_run(void)
+{
+    blargg_err_t err;
+    uint8_t *buf;
+    size_t n;
+    intptr_t param;
+    int track = 0;
+
+    DEBUGF("GBS: next_track\n");
+    if (codec_init()) {
+        return CODEC_ERROR;
+    }
+
+    codec_set_replaygain(ci->id3);
+
+    /* Read the entire file */
+    DEBUGF("GBS: request file\n");
+    ci->seek_buffer(0);
+    buf = ci->request_buffer(&n, ci->filesize);
+    if (!buf || n < (size_t)ci->filesize) {
+        DEBUGF("GBS: file load failed\n");
+        return CODEC_ERROR;
+    }
+   
+    if ((err = Gbs_load_mem(&gbs_emu, buf, ci->filesize))) {
+        DEBUGF("GBS: Gbs_load_mem failed (%s)\n", err);
+        return CODEC_ERROR;
+    }
+
+    /* Update internal track count */
+    if (gbs_emu.m3u.size > 0)
+        gbs_emu.track_count = gbs_emu.m3u.size;
+
+next_track:
+    set_codec_track(track);
+
+    /* The main decoder loop */
+    while (1) {
+        enum codec_command_action action = ci->get_command(&param);
+
+        if (action == CODEC_ACTION_HALT)
+            break;
+
+        if (action == CODEC_ACTION_SEEK_TIME) {
+            track = param/1000;
+            ci->seek_complete();
+            if (track >= gbs_emu.track_count) break;
+            goto next_track;
+        }
+
+        /* Generate audio buffer */
+        err = Gbs_play(&gbs_emu, CHUNK_SIZE, samples);
+        if (err || Track_ended(&gbs_emu)) {
+            track++;
+            if (track >= gbs_emu.track_count) break;
+            goto next_track;
+        }
+
+        ci->pcmbuf_insert(samples, NULL, CHUNK_SIZE >> 1);
+    }
+
+    return CODEC_OK;
+}
--- a/lib/rbcodec/codecs/hes.c
+++ b/lib/rbcodec/codecs/hes.c
@ -0,0 +1,108 @@
+/* Ripped off from Game_Music_Emu 0.5.2. http://www.slack.net/~ant/ */
+
+#include <string.h>
+#include "codeclib.h"
+#include "libgme/hes_emu.h"
+ 
+CODEC_HEADER
+
+/* Maximum number of bytes to process in one iteration */
+#define CHUNK_SIZE (1024*2)
+
+static int16_t samples[CHUNK_SIZE] IBSS_ATTR;
+static struct Hes_Emu hes_emu;
+
+/****************** rockbox interface ******************/
+
+static void set_codec_track(int t) {
+    Hes_start_track(&hes_emu, t); 
+
+    /* for loop mode we disable track limits */
+    if (!ci->loop_track()) {
+        Track_set_fade(&hes_emu, Track_get_length( &hes_emu, t ), 4000);
+    }
+    ci->set_elapsed(t*1000); /* t is track no to display */
+}
+
+/* this is the codec entry point */
+enum codec_status codec_main(enum codec_entry_call_reason reason)
+{
+    if (reason == CODEC_LOAD) {
+        /* we only render 16 bits */
+        ci->configure(DSP_SET_SAMPLE_DEPTH, 16);
+
+        /* 44 Khz, Interleaved stereo */
+        ci->configure(DSP_SET_FREQUENCY, 44100);
+        ci->configure(DSP_SET_STEREO_MODE, STEREO_INTERLEAVED);
+
+        Hes_init(&hes_emu);
+        Hes_set_sample_rate(&hes_emu, 44100);
+    }
+
+    return CODEC_OK;
+}
+
+/* this is called for each file to process */
+enum codec_status codec_run(void)
+{
+    blargg_err_t err;
+    uint8_t *buf;
+    size_t n;
+    intptr_t param;
+    int track = 0;
+    
+    DEBUGF("HES: next_track\n");
+    if (codec_init()) {
+        return CODEC_ERROR;
+    }  
+
+    codec_set_replaygain(ci->id3);
+
+    /* Read the entire file */
+    DEBUGF("HES: request file\n");
+    ci->seek_buffer(0);
+    buf = ci->request_buffer(&n, ci->filesize);
+    if (!buf || n < (size_t)ci->filesize) {
+        DEBUGF("HES: file load failed\n");
+        return CODEC_ERROR;
+    }
+
+    if ((err = Hes_load_mem(&hes_emu, buf, ci->filesize))) {
+        DEBUGF("HES: Hes_load_mem failed (%s)\n", err);
+        return CODEC_ERROR;
+    }
+
+    /* Update internal track count */
+    if (hes_emu.m3u.size > 0)
+        hes_emu.track_count = hes_emu.m3u.size;
+
+next_track:
+    set_codec_track(track);
+
+    /* The main decoder loop */
+    while ( 1 ) {
+        enum codec_command_action action = ci->get_command(&param);
+
+        if (action == CODEC_ACTION_HALT)
+            break;
+
+        if (action == CODEC_ACTION_SEEK_TIME) {
+            track = param/1000;
+            ci->seek_complete();
+            if (track >= hes_emu.track_count) break;
+            goto next_track;
+        }
+
+        /* Generate audio buffer */
+        err = Hes_play(&hes_emu, CHUNK_SIZE, samples);
+        if (err || Track_ended(&hes_emu)) {
+            track++;
+            if (track >= hes_emu.track_count) break;
+            goto next_track;
+        }
+
+        ci->pcmbuf_insert(samples, NULL, CHUNK_SIZE >> 1);
+    }
+
+    return CODEC_OK;
+}
--- a/lib/rbcodec/codecs/kss.c
+++ b/lib/rbcodec/codecs/kss.c
@ -0,0 +1,111 @@
+
+/* Ripped off from Game_Music_Emu 0.5.2. http://www.slack.net/~ant/ */
+
+#include <codecs/lib/codeclib.h>
+#include "libgme/kss_emu.h" 
+
+CODEC_HEADER
+
+/* Maximum number of bytes to process in one iteration */
+#define CHUNK_SIZE (1024*2)
+
+static int16_t samples[CHUNK_SIZE] IBSS_ATTR;
+static struct Kss_Emu kss_emu;
+
+/****************** rockbox interface ******************/
+
+static void set_codec_track(int t) {
+    Kss_start_track(&kss_emu, t); 
+
+    /* for REPEAT_ONE we disable track limits */
+    if (!ci->loop_track()) {
+        Track_set_fade(&kss_emu, Track_get_length( &kss_emu, t ), 4000);
+    }
+    ci->set_elapsed(t*1000); /* t is track no to display */
+}
+
+/* this is the codec entry point */
+enum codec_status codec_main(enum codec_entry_call_reason reason)
+{
+    if (reason == CODEC_LOAD) {
+        /* we only render 16 bits */
+        ci->configure(DSP_SET_SAMPLE_DEPTH, 16);
+
+        /* 44 Khz, Interleaved stereo */
+        ci->configure(DSP_SET_FREQUENCY, 44100);
+        ci->configure(DSP_SET_STEREO_MODE, STEREO_INTERLEAVED);
+
+        Kss_init(&kss_emu);
+        Kss_set_sample_rate(&kss_emu, 44100);
+    }
+
+    return CODEC_OK;
+}
+
+/* this is called for each file to process */
+enum codec_status codec_run(void)
+{
+    blargg_err_t err;
+    uint8_t *buf;
+    size_t n;
+    int track;
+    intptr_t param;
+
+    /* reset values */
+    track = 0;
+
+    DEBUGF("KSS: next_track\n");
+    if (codec_init()) {
+        return CODEC_ERROR;
+    }  
+
+    codec_set_replaygain(ci->id3);
+        
+    /* Read the entire file */
+    DEBUGF("KSS: request file\n");
+    ci->seek_buffer(0);
+    buf = ci->request_buffer(&n, ci->filesize);
+    if (!buf || n < (size_t)ci->filesize) {
+        DEBUGF("KSS: file load failed\n");
+        return CODEC_ERROR;
+    }
+   
+    if ((err = Kss_load_mem(&kss_emu, buf, ci->filesize))) {
+        DEBUGF("KSS: Kss_load failed (%s)\n", err);
+        return CODEC_ERROR;
+    }
+
+    /* Update internal track count */
+    if (kss_emu.m3u.size > 0)
+        kss_emu.track_count = kss_emu.m3u.size;
+
+next_track:
+    set_codec_track(track);
+
+    /* The main decoder loop */
+    while (1) {
+        enum codec_command_action action = ci->get_command(&param);
+
+        if (action == CODEC_ACTION_HALT)
+            break;
+
+        if (action == CODEC_ACTION_SEEK_TIME) {
+                track = param/1000;
+                ci->seek_complete();
+                if (track >= kss_emu.track_count) break;
+                goto next_track;
+        }
+
+        /* Generate audio buffer */
+        err = Kss_play(&kss_emu, CHUNK_SIZE, samples);
+        if (err || Track_ended(&kss_emu)) {
+            track++;
+            if (track >= kss_emu.track_count) break;
+            goto next_track;
+        }
+
+        ci->pcmbuf_insert(samples, NULL, CHUNK_SIZE >> 1);
+    }
+
+    return CODEC_OK;
+}
--- a/lib/rbcodec/codecs/lib/SOURCES
+++ b/lib/rbcodec/codecs/lib/SOURCES
@ -0,0 +1,12 @@
+#if CONFIG_CODEC == SWCODEC /* software codec platforms */
+codeclib.c
+fixedpoint.c
+ffmpeg_bitstream.c
+
+mdct_lookup.c
+fft-ffmpeg.c
+mdct.c
+
+#elif (CONFIG_PLATFORM & PLATFORM_HOSTED) && defined(__APPLE__)
+osx.dummy.c
+#endif
--- a/lib/rbcodec/codecs/lib/asm_arm.h
+++ b/lib/rbcodec/codecs/lib/asm_arm.h
@ -0,0 +1,292 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE.   *
+ *                                                                  *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002    *
+ * BY THE Xiph.Org FOUNDATION http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+ function: arm7 and later wide math functions
+
+ ********************************************************************/
+#ifdef CPU_ARM
+
+#define INCL_OPTIMIZED_MULT32
+#if ARM_ARCH >= 6
+static inline int32_t MULT32(int32_t x, int32_t y) {
+  int32_t hi;
+  asm volatile("smmul %[hi], %[x], %[y] \n\t"
+               : [hi] "=&r" (hi)
+               : [x] "r" (x), [y] "r" (y) );
+  return(hi);
+}
+#else
+static inline int32_t MULT32(int32_t x, int32_t y) {
+  int32_t lo, hi;
+  asm volatile("smull\t%0, %1, %2, %3 \n\t"
+               : "=&r"(lo),"=&r"(hi)
+               : "r"(x),"r"(y) );
+  return(hi);
+}
+#endif
+
+#define INCL_OPTIMIZED_MULT31
+static inline int32_t MULT31(int32_t x, int32_t y) {
+  return MULT32(x,y)<<1;
+}
+
+#define INCL_OPTIMIZED_MULT31_SHIFT15
+static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) {
+  int32_t lo,hi;
+  asm volatile("smull   %0, %1, %2, %3\n\t"
+               "movs    %0, %0, lsr #15\n\t"
+               "adc %1, %0, %1, lsl #17\n\t"
+               : "=&r"(lo),"=&r"(hi)
+               : "r"(x),"r"(y)
+               : "cc" );
+  return(hi);
+}
+
+#define INCL_OPTIMIZED_MULT31_SHIFT16
+static inline int32_t MULT31_SHIFT16(int32_t x, int32_t y) {
+  int32_t lo,hi;
+  asm volatile("smull   %0, %1, %2, %3\n\t"
+               "movs    %0, %0, lsr #16\n\t"
+               "adc %1, %0, %1, lsl #16\n\t"
+               : "=&r"(lo),"=&r"(hi)
+               : "r"(x),"r"(y)
+               : "cc" );
+  return(hi);
+}
+
+#define INCL_OPTIMIZED_XPROD32
+#define XPROD32(a, b, t, v, x, y) \
+{ \
+  int32_t l; \
+  asm("smull  %0, %1, %3, %5\n\t" \
+      "rsb    %2, %6, #0\n\t" \
+      "smlal  %0, %1, %4, %6\n\t" \
+      "smull  %0, %2, %3, %2\n\t" \
+      "smlal  %0, %2, %4, %5" \
+      : "=&r" (l), "=&r" (x), "=&r" (y) \
+      : "r" ((a)), "r" ((b)), "r" ((t)), "r" ((v)) ); \
+}
+
+#define INCL_OPTIMIZED_XPROD31_R
+#define INCL_OPTIMIZED_XNPROD31_R
+#if ARM_ARCH >= 6
+/* These may yield slightly different result from the macros below
+   because only the high 32 bits of the multiplications are accumulated while
+   the below macros use a 64 bit accumulator that is truncated to 32 bits.*/
+#define XPROD31_R(_a, _b, _t, _v, _x, _y)\
+{\
+  int32_t x1, y1;\
+  asm("smmul  %[x1], %[t], %[a] \n\t"\
+      "smmul  %[y1], %[t], %[b] \n\t"\
+      "smmla  %[x1], %[v], %[b], %[x1] \n\t"\
+      "smmls  %[y1], %[v], %[a], %[y1] \n\t"\
+      : [x1] "=&r" (x1), [y1] "=&r" (y1)\
+      : [a] "r" (_a), [b] "r" (_b), [t] "r" (_t), [v] "r" (_v) );\
+  _x = x1 << 1;\
+  _y = y1 << 1;\
+}
+
+#define XNPROD31_R(_a, _b, _t, _v, _x, _y)\
+{\
+  int32_t x1, y1;\
+  asm("smmul  %[x1], %[t], %[a] \n\t"\
+      "smmul  %[y1], %[t], %[b] \n\t"\
+      "smmls  %[x1], %[v], %[b], %[x1] \n\t"\
+      "smmla  %[y1], %[v], %[a], %[y1] \n\t"\
+      : [x1] "=&r" (x1), [y1] "=&r" (y1)\
+      : [a] "r" (_a), [b] "r" (_b), [t] "r" (_t), [v] "r" (_v) );\
+  _x = x1 << 1;\
+  _y = y1 << 1;\
+}
+#else
+#define XPROD31_R(_a, _b, _t, _v, _x, _y)\
+{\
+  int32_t x1, y1, l;\
+  asm("smull  %0, %1, %5, %3\n\t"\
+      "rsb    %2, %3, #0\n\t"\
+      "smlal  %0, %1, %6, %4\n\t"\
+      "smull  %0, %2, %6, %2\n\t"\
+      "smlal  %0, %2, %5, %4"\
+      : "=&r" (l), "=&r" (x1), "=&r" (y1)\
+      : "r" (_a), "r" (_b), "r" (_t), "r" (_v) );\
+  _x = x1 << 1;\
+  _y = y1 << 1;\
+}
+
+#define XNPROD31_R(_a, _b, _t, _v, _x, _y)\
+{\
+  int32_t x1, y1, l;\
+  asm("smull  %0, %1, %5, %3\n\t"\
+      "rsb    %2, %4, #0\n\t"\
+      "smlal  %0, %1, %6, %2\n\t"\
+      "smull  %0, %2, %5, %4\n\t"\
+      "smlal  %0, %2, %6, %3"\
+      : "=&r" (l), "=&r" (x1), "=&r" (y1)\
+      : "r" (_a), "r" (_b), "r" (_t), "r" (_v) );\
+  _x = x1 << 1;\
+  _y = y1 << 1;\
+}
+#endif
+
+#define INCL_OPTIMIZED_XPROD31
+static inline void XPROD31(int32_t  a, int32_t  b,
+                           int32_t  t, int32_t  v,
+                           int32_t *x, int32_t *y)
+{
+  int32_t _x1, _y1;
+  XPROD31_R(a, b, t, v, _x1, _y1);
+  *x = _x1;
+  *y = _y1;
+}
+
+#define INCL_OPTIMIZED_XNPROD31
+static inline void XNPROD31(int32_t  a, int32_t  b,
+                            int32_t  t, int32_t  v,
+                            int32_t *x, int32_t *y)
+{
+  int32_t _x1, _y1;
+  XNPROD31_R(a, b, t, v, _x1, _y1);
+  *x = _x1;
+  *y = _y1;
+}
+
+
+#ifndef _V_VECT_OPS
+#define _V_VECT_OPS
+
+/* asm versions of vector operations for block.c, window.c */
+static inline
+void vect_add(int32_t *x, const int32_t *y, int n)
+{
+  while (n>=4) {
+    asm volatile ("ldmia %[x], {r0, r1, r2, r3};"
+                  "ldmia %[y]!, {r4, r5, r6, r7};"
+                  "add r0, r0, r4;"
+                  "add r1, r1, r5;"
+                  "add r2, r2, r6;"
+                  "add r3, r3, r7;"
+                  "stmia %[x]!, {r0, r1, r2, r3};"
+                  : [x] "+r" (x), [y] "+r" (y)
+                  : : "r0", "r1", "r2", "r3",
+                  "r4", "r5", "r6", "r7",
+                  "memory");
+    n -= 4;
+  }
+  /* add final elements */
+  while (n>0) {
+    *x++ += *y++;
+    n--;
+  }
+}
+
+static inline
+void vect_copy(int32_t *x, const int32_t *y, int n)
+{
+  while (n>=4) {
+    asm volatile ("ldmia %[y]!, {r0, r1, r2, r3};"
+                  "stmia %[x]!, {r0, r1, r2, r3};"
+                  : [x] "+r" (x), [y] "+r" (y)
+                  : : "r0", "r1", "r2", "r3",
+                  "memory");
+    n -= 4;
+  }
+  /* copy final elements */
+  while (n>0) {
+    *x++ = *y++;
+    n--;
+  }
+}
+
+static inline
+void vect_mult_fw(int32_t *data, const int32_t *window, int n)
+{
+  while (n>=4) {
+    asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
+                  "ldmia %[w]!, {r4, r5, r6, r7};"
+                  "smull r8, r9, r0, r4;"
+                  "mov   r0, r9, lsl #1;"
+                  "smull r8, r9, r1, r5;"
+                  "mov   r1, r9, lsl #1;"
+                  "smull r8, r9, r2, r6;"
+                  "mov   r2, r9, lsl #1;"
+                  "smull r8, r9, r3, r7;"
+                  "mov   r3, r9, lsl #1;"
+                  "stmia %[d]!, {r0, r1, r2, r3};"
+                  : [d] "+r" (data), [w] "+r" (window)
+                  : : "r0", "r1", "r2", "r3",
+                  "r4", "r5", "r6", "r7", "r8", "r9",
+                  "memory" );
+    n -= 4;
+  }
+  while(n>0) {
+    *data = MULT31(*data, *window);
+    data++;
+    window++;
+    n--;
+  }
+}
+
+static inline
+void vect_mult_bw(int32_t *data, const int32_t *window, int n)
+{
+  while (n>=4) {
+    asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
+                  "ldmda %[w]!, {r4, r5, r6, r7};"
+                  "smull r8, r9, r0, r7;"
+                  "mov   r0, r9, lsl #1;"
+                  "smull r8, r9, r1, r6;"
+                  "mov   r1, r9, lsl #1;"
+                  "smull r8, r9, r2, r5;"
+                  "mov   r2, r9, lsl #1;"
+                  "smull r8, r9, r3, r4;"
+                  "mov   r3, r9, lsl #1;"
+                  "stmia %[d]!, {r0, r1, r2, r3};"
+                  : [d] "+r" (data), [w] "+r" (window)
+                  : : "r0", "r1", "r2", "r3",
+                  "r4", "r5", "r6", "r7", "r8", "r9",
+                  "memory" );
+    n -= 4;
+  }
+  while(n>0) {
+    *data = MULT31(*data, *window);
+    data++;
+    window--;
+    n--;
+  }
+}
+
+#endif
+
+/* not used anymore */
+/*
+#ifndef _V_CLIP_MATH
+#define _V_CLIP_MATH
+
+static inline int32_t CLIP_TO_15(int32_t x) {
+  int tmp;
+  asm volatile("subs    %1, %0, #32768\n\t"
+           "movpl   %0, #0x7f00\n\t"
+           "orrpl   %0, %0, #0xff\n"
+           "adds    %1, %0, #32768\n\t"
+           "movmi   %0, #0x8000"
+           : "+r"(x),"=r"(tmp)
+           :
+           : "cc");
+  return(x);
+}
+
+#endif
+*/
+
+#endif
+
--- a/lib/rbcodec/codecs/lib/asm_mcf5249.h
+++ b/lib/rbcodec/codecs/lib/asm_mcf5249.h
@ -0,0 +1,353 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ *
+ * Copyright (C) 2005 by Pedro Vasconcelos
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+/* asm routines for wide math on the MCF5249 */
+
+#if defined(CPU_COLDFIRE)
+
+#define INCL_OPTIMIZED_MULT32
+static inline int32_t MULT32(int32_t x, int32_t y) {
+
+  asm volatile ("mac.l %[x], %[y], %%acc0;"    /* multiply & shift  */
+                "movclr.l %%acc0, %[x];"       /* move & clear acc */
+                "asr.l #1, %[x];"              /* no overflow test */
+                : [x] "+&d" (x)
+                : [y] "r" (y)
+                : "cc");
+  return x;
+}
+
+#define INCL_OPTIMIZED_MULT31
+static inline int32_t MULT31(int32_t x, int32_t y) {
+  asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
+                "movclr.l %%acc0, %[x];"    /* move and clear */
+                : [x] "+&r" (x)
+                : [y] "r" (y)
+                : "cc");
+  return x;
+}
+
+#define INCL_OPTIMIZED_MULT31_SHIFT15
+/* NOTE: this requires that the emac is *NOT* rounding */
+static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) {
+  int32_t r;
+
+  asm volatile ("mac.l %[x], %[y], %%acc0;"  /* multiply */
+                "mulu.l %[y], %[x];"         /* get lower half, avoid emac stall */
+                "movclr.l %%acc0, %[r];"     /* get higher half */
+                "swap %[r];"                 /* hi<<16, plus one free */
+                "lsr.l #8, %[x];"            /* (unsigned)lo >> 15 */
+                "lsr.l #7, %[x];"
+                "move.w %[x], %[r];"         /* logical-or results */
+                : [r] "=&d" (r), [x] "+d" (x)
+                : [y] "d" (y)
+                : "cc");
+  return r;
+}
+
+#define INCL_OPTIMIZED_MULT31_SHIFT16
+static inline int32_t MULT31_SHIFT16(int32_t x, int32_t y) {
+  int32_t r;
+
+  asm volatile ("mac.l %[x], %[y], %%acc0;"  /* multiply */
+                "mulu.l %[y], %[x];"         /* get lower half, avoid emac stall */
+                "movclr.l %%acc0, %[r];"     /* get higher half */
+                "lsr.l #1, %[r];"            /* hi >> 1, to compensate emac shift */
+                "move.w %[r], %[x];"         /* x = x & 0xffff0000 | r & 0xffff */
+                "swap %[x];"                 /* x = (unsigned)x << 16 | (unsigned)x >> 16 */
+                : [r] "=&d" (r), [x] "+d" (x)
+                : [y] "d" (y)
+                : "cc");
+  return x;
+}
+
+#define INCL_OPTIMIZED_XPROD31
+static inline
+void XPROD31(int32_t  a, int32_t  b,
+             int32_t  t, int32_t  v,
+             int32_t *x, int32_t *y)
+{
+  asm volatile ("mac.l %[a], %[t], %%acc0;"
+                "mac.l %[b], %[v], %%acc0;"
+                "mac.l %[b], %[t], %%acc1;"
+                "msac.l %[a], %[v], %%acc1;"
+                "movclr.l %%acc0, %[a];"
+                "move.l %[a], (%[x]);"
+                "movclr.l %%acc1, %[a];"
+                "move.l %[a], (%[y]);"
+                : [a] "+&r" (a)
+                : [x] "a" (x), [y] "a" (y),
+                  [b] "r" (b), [t] "r" (t), [v] "r" (v)
+                : "cc", "memory");
+}
+
+#define INCL_OPTIMIZED_XNPROD31
+static inline
+void XNPROD31(int32_t  a, int32_t  b,
+              int32_t  t, int32_t  v,
+              int32_t *x, int32_t *y)
+{
+  asm volatile ("mac.l %[a], %[t], %%acc0;"
+                "msac.l %[b], %[v], %%acc0;"
+                "mac.l %[b], %[t], %%acc1;"
+                "mac.l %[a], %[v], %%acc1;"
+                "movclr.l %%acc0, %[a];"
+                "move.l %[a], (%[x]);"
+                "movclr.l %%acc1, %[a];"
+                "move.l %[a], (%[y]);"
+                : [a] "+&r" (a)
+                : [x] "a" (x), [y] "a" (y),
+                  [b] "r" (b), [t] "r" (t), [v] "r" (v)
+                : "cc", "memory");
+}
+
+
+/* this could lose the LSB by overflow, but i don't think it'll ever happen.
+   if anyone think they can hear a bug caused by this, please try the above
+   version. */
+#define INCL_OPTIMIZED_XPROD32
+#define XPROD32(_a, _b, _t, _v, _x, _y)     \
+  asm volatile ("mac.l %[a], %[t], %%acc0;" \
+                "mac.l %[b], %[v], %%acc0;" \
+                "mac.l %[b], %[t], %%acc1;" \
+                "msac.l %[a], %[v], %%acc1;" \
+                "movclr.l %%acc0, %[x];" \
+                "asr.l #1, %[x];" \
+                "movclr.l %%acc1, %[y];" \
+                "asr.l #1, %[y];" \
+                : [x] "=d" (_x), [y] "=d" (_y) \
+                : [a] "r" (_a), [b] "r" (_b), \
+                  [t] "r" (_t), [v] "r" (_v) \
+                : "cc");
+
+#define INCL_OPTIMIZED_XPROD31_R
+#define XPROD31_R(_a, _b, _t, _v, _x, _y)   \
+  asm volatile ("mac.l %[a], %[t], %%acc0;" \
+                "mac.l %[b], %[v], %%acc0;" \
+                "mac.l %[b], %[t], %%acc1;" \
+                "msac.l %[a], %[v], %%acc1;" \
+                "movclr.l %%acc0, %[x];" \
+                "movclr.l %%acc1, %[y];" \
+                : [x] "=r" (_x), [y] "=r" (_y) \
+                : [a] "r" (_a), [b] "r" (_b), \
+                  [t] "r" (_t), [v] "r" (_v) \
+                : "cc");
+
+#define INCL_OPTIMIZED_XNPROD31_R
+#define XNPROD31_R(_a, _b, _t, _v, _x, _y)  \
+  asm volatile ("mac.l %[a], %[t], %%acc0;" \
+                "msac.l %[b], %[v], %%acc0;" \
+                "mac.l %[b], %[t], %%acc1;" \
+                "mac.l %[a], %[v], %%acc1;" \
+                "movclr.l %%acc0, %[x];" \
+                "movclr.l %%acc1, %[y];" \
+                : [x] "=r" (_x), [y] "=r" (_y) \
+                : [a] "r" (_a), [b] "r" (_b), \
+                  [t] "r" (_t), [v] "r" (_v) \
+                : "cc");
+
+#ifndef _V_VECT_OPS
+#define _V_VECT_OPS
+
+/* asm versions of vector operations for block.c, window.c */
+/* assumes MAC is initialized & accumulators cleared */
+static inline
+void vect_add(int32_t *x, const int32_t *y, int n)
+{
+  /* align to 16 bytes */
+  while(n>0 && (int)x&15) {
+    *x++ += *y++;
+    n--;
+  }
+  asm volatile ("bra 1f;"
+                "0:"                          /* loop start */
+                "movem.l (%[x]), %%d0-%%d3;"  /* fetch values */
+                "movem.l (%[y]), %%a0-%%a3;"
+                /* add */
+                "add.l %%a0, %%d0;"
+                "add.l %%a1, %%d1;"
+                "add.l %%a2, %%d2;"
+                "add.l %%a3, %%d3;"
+                /* store and advance */
+                "movem.l %%d0-%%d3, (%[x]);"
+                "lea.l (4*4, %[x]), %[x];"
+                "lea.l (4*4, %[y]), %[y];"
+                "subq.l #4, %[n];"     /* done 4 elements */
+                "1: cmpi.l #4, %[n];"
+                "bge 0b;"
+                : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
+                : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
+                    "cc", "memory");
+  /* add final elements */
+  while (n>0) {
+    *x++ += *y++;
+    n--;
+  }
+}
+
+static inline
+void vect_copy(int32_t *x, const int32_t *y, int n)
+{
+  /* align to 16 bytes */
+  while(n>0 && (int)x&15) {
+    *x++ = *y++;
+    n--;
+  }
+  asm volatile ("bra 1f;"
+                "0:"                                    /* loop start */
+                "movem.l (%[y]), %%d0-%%d3;"            /* fetch values */
+                "movem.l %%d0-%%d3, (%[x]);"            /* store */
+                "lea.l (4*4, %[x]), %[x];"              /* advance */
+                "lea.l (4*4, %[y]), %[y];"
+                "subq.l #4, %[n];"                      /* done 4 elements */
+                "1: cmpi.l #4, %[n];"
+                "bge 0b;"
+                : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
+                : : "%d0", "%d1", "%d2", "%d3", "cc", "memory");
+  /* copy final elements */
+  while (n>0) {
+    *x++ = *y++;
+    n--;
+  }
+}
+
+static inline
+void vect_mult_fw(int32_t *data, const int32_t *window, int n)
+{
+  /* ensure data is aligned to 16-bytes */
+  while(n>0 && (int)data&15) {
+    *data = MULT31(*data, *window);
+    data++;
+    window++;
+    n--;
+  }
+  asm volatile ("movem.l (%[d]), %%d0-%%d3;"  /* loop start */
+                "movem.l (%[w]), %%a0-%%a3;"  /* pre-fetch registers */
+                "lea.l (4*4, %[w]), %[w];"
+                "bra 1f;"               /* jump to loop condition */
+                "0:" /* loop body */
+                /* multiply and load next window values */
+                "mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;"
+                "mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;"
+                "mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;"
+                "mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;"
+                "movclr.l %%acc0, %%d0;"  /* get the products */
+                "movclr.l %%acc1, %%d1;"
+                "movclr.l %%acc2, %%d2;"
+                "movclr.l %%acc3, %%d3;"
+                /* store and advance */
+                "movem.l %%d0-%%d3, (%[d]);"
+                "lea.l (4*4, %[d]), %[d];"
+                "movem.l (%[d]), %%d0-%%d3;"
+                "subq.l #4, %[n];"     /* done 4 elements */
+                "1: cmpi.l #4, %[n];"
+                "bge 0b;"
+                /* multiply final elements */
+                "tst.l %[n];"
+                "beq 1f;"      /* n=0 */
+                "mac.l %%d0, %%a0, %%acc0;"
+                "movclr.l %%acc0, %%d0;"
+                "move.l %%d0, (%[d])+;"
+                "subq.l #1, %[n];"
+                "beq 1f;"     /* n=1 */
+                "mac.l %%d1, %%a1, %%acc0;"
+                "movclr.l %%acc0, %%d1;"
+                "move.l %%d1, (%[d])+;"
+                "subq.l #1, %[n];"
+                "beq 1f;"     /* n=2 */
+                /* otherwise n = 3 */
+                "mac.l %%d2, %%a2, %%acc0;"
+                "movclr.l %%acc0, %%d2;"
+                "move.l %%d2, (%[d])+;"
+                "1:"
+                : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
+                : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
+                    "cc", "memory");
+}
+
+static inline
+void vect_mult_bw(int32_t *data, const int32_t *window, int n)
+{
+  /* ensure at least data is aligned to 16-bytes */
+  while(n>0 && (int)data&15) {
+    *data = MULT31(*data, *window);
+    data++;
+    window--;
+    n--;
+  }
+  asm volatile ("lea.l (-3*4, %[w]), %[w];"     /* loop start */
+                "movem.l (%[d]), %%d0-%%d3;"    /* pre-fetch registers */
+                "movem.l (%[w]), %%a0-%%a3;"
+                "bra 1f;"               /* jump to loop condition */
+                "0:" /* loop body */
+                /* multiply and load next window value */
+                "mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;"
+                "mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;"
+                "mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;"
+                "mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;"
+                "movclr.l %%acc0, %%d0;"  /* get the products */
+                "movclr.l %%acc1, %%d1;"
+                "movclr.l %%acc2, %%d2;"
+                "movclr.l %%acc3, %%d3;"
+                /* store and advance */
+                "movem.l %%d0-%%d3, (%[d]);"
+                "lea.l (4*4, %[d]), %[d];"
+                "movem.l (%[d]), %%d0-%%d3;"
+                "subq.l #4, %[n];"     /* done 4 elements */
+                "1: cmpi.l #4, %[n];"
+                "bge 0b;"
+                /* multiply final elements */
+                "tst.l %[n];"
+                "beq 1f;"      /* n=0 */
+                "mac.l %%d0, %%a3, %%acc0;"
+                "movclr.l %%acc0, %%d0;"
+                "move.l %%d0, (%[d])+;"
+                "subq.l #1, %[n];"
+                "beq 1f;"     /* n=1 */
+                "mac.l %%d1, %%a2, %%acc0;"
+                "movclr.l %%acc0, %%d1;"
+                "move.l %%d1, (%[d])+;"
+                "subq.l #1, %[n];"
+                "beq 1f;"     /* n=2 */
+                /* otherwise n = 3 */
+                "mac.l %%d2, %%a1, %%acc0;"
+                "movclr.l %%acc0, %%d2;"
+                "move.l %%d2, (%[d])+;"
+                "1:"
+                : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
+                : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
+                    "cc", "memory");
+}
+
+#endif
+
+/* not used anymore */
+/*
+#ifndef _V_CLIP_MATH
+#define _V_CLIP_MATH
+
+* this is portable C and simple; why not use this as default?
+static inline int32_t CLIP_TO_15(register int32_t x) {
+  register int32_t hi=32767, lo=-32768;
+  return (x>=hi ? hi : (x<=lo ? lo : x));
+}
+
+#endif
+*/
+#endif
--- a/lib/rbcodec/codecs/lib/codeclib.c
+++ b/lib/rbcodec/codecs/lib/codeclib.c
@ -0,0 +1,182 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2005 Dave Chapman
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+/* "helper functions" common to all codecs  */
+
+#include <string.h>
+#include "codecs.h"
+#include "dsp.h"
+#include "codeclib.h"
+#include "metadata.h"
+
+/* The following variables are used by codec_malloc() to make use of free RAM
+ * within the statically allocated codec buffer. */
+static size_t mem_ptr = 0;
+static size_t bufsize = 0;
+static unsigned char* mallocbuf = NULL;
+
+int codec_init(void)
+{
+    /* codec_get_buffer() aligns the resulting point to CACHEALIGN_SIZE. */
+    mem_ptr = 0;
+    mallocbuf = (unsigned char *)ci->codec_get_buffer((size_t *)&bufsize);
+  
+    return 0;
+}
+
+void codec_set_replaygain(const struct mp3entry *id3)
+{
+    ci->configure(DSP_SET_TRACK_GAIN, id3->track_gain);
+    ci->configure(DSP_SET_ALBUM_GAIN, id3->album_gain);
+    ci->configure(DSP_SET_TRACK_PEAK, id3->track_peak);
+    ci->configure(DSP_SET_ALBUM_PEAK, id3->album_peak);
+}
+
+/* Various "helper functions" common to all the xxx2wav decoder plugins  */
+
+
+void* codec_malloc(size_t size)
+{
+    void* x;
+
+    if (mem_ptr + (long)size > bufsize)
+        return NULL;
+    
+    x=&mallocbuf[mem_ptr];
+    
+    /* Keep memory aligned to CACHEALIGN_SIZE. */
+    mem_ptr += (size + (CACHEALIGN_SIZE-1)) & ~(CACHEALIGN_SIZE-1);
+
+    return(x);
+}
+
+void* codec_calloc(size_t nmemb, size_t size)
+{
+    void* x;
+    x = codec_malloc(nmemb*size);
+    if (x == NULL)
+        return NULL;
+    ci->memset(x,0,nmemb*size);
+    return(x);
+}
+
+void codec_free(void* ptr) {
+    (void)ptr;
+}
+
+void* codec_realloc(void* ptr, size_t size)
+{
+    void* x;
+    (void)ptr;
+    x = codec_malloc(size);
+    return(x);
+}
+
+size_t strlen(const char *s)
+{
+    return(ci->strlen(s));
+}
+
+char *strcpy(char *dest, const char *src)
+{
+    return(ci->strcpy(dest,src));
+}
+
+char *strcat(char *dest, const char *src)
+{
+    return(ci->strcat(dest,src));
+}
+
+int strcmp(const char *s1, const char *s2)
+{
+    return(ci->strcmp(s1,s2));
+}
+
+void *memcpy(void *dest, const void *src, size_t n)
+{
+    return(ci->memcpy(dest,src,n));
+}
+
+void *memset(void *s, int c, size_t n)
+{
+    return(ci->memset(s,c,n));
+}
+
+int memcmp(const void *s1, const void *s2, size_t n)
+{
+    return(ci->memcmp(s1,s2,n));
+}
+
+void* memchr(const void *s, int c, size_t n)
+{
+    return(ci->memchr(s,c,n));
+}
+
+void *memmove(void *dest, const void *src, size_t n)
+{
+    return(ci->memmove(dest,src,n));
+}
+
+void qsort(void *base, size_t nmemb, size_t size,
+           int(*compar)(const void *, const void *))
+{
+    ci->qsort(base,nmemb,size,compar);
+}
+
+/* From ffmpeg - libavutil/common.h */
+const uint8_t bs_log2_tab[256] ICONST_ATTR = {
+    0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+    5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+    6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+    6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+    7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+    7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+    7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+    7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
+};
+
+const uint8_t bs_clz_tab[256] ICONST_ATTR = {
+    8,7,6,6,5,5,5,5,4,4,4,4,4,4,4,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+};
+
+#ifdef RB_PROFILE
+void __cyg_profile_func_enter(void *this_fn, void *call_site) {
+/* This workaround is required for coldfire gcc 3.4 but is broken for 4.4
+   and 4.5, but for those the other way works. */
+#if defined(CPU_COLDFIRE) && defined(__GNUC__) && __GNUC__ < 4
+    (void)call_site;
+    ci->profile_func_enter(this_fn, __builtin_return_address(1));
+#else
+    ci->profile_func_enter(this_fn, call_site);
+#endif
+}
+
+void __cyg_profile_func_exit(void *this_fn, void *call_site) {
+    ci->profile_func_exit(this_fn,call_site);
+}
+#endif
--- a/lib/rbcodec/codecs/lib/codeclib.h
+++ b/lib/rbcodec/codecs/lib/codeclib.h
@ -0,0 +1,163 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2005 Dave Chapman
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#ifndef __CODECLIB_H__
+#define __CODECLIB_H__
+
+#include <inttypes.h>
+#include <string.h>
+#include "config.h"
+#include "codecs.h"
+#include "mdct.h"
+#include "fft.h"
+
+extern struct codec_api *ci;
+
+/* Standard library functions that are used by the codecs follow here */
+
+/* Get these functions 'out of the way' of the standard functions. Not doing
+ * so confuses the cygwin linker, and maybe others. These functions need to
+ * be implemented elsewhere */
+#define malloc(x) codec_malloc(x)
+#define calloc(x,y) codec_calloc(x,y)
+#define realloc(x,y) codec_realloc(x,y)
+#define free(x) codec_free(x)
+#undef alloca
+#define alloca(x) __builtin_alloca(x)
+
+void* codec_malloc(size_t size);
+void* codec_calloc(size_t nmemb, size_t size);
+void* codec_realloc(void* ptr, size_t size);
+void codec_free(void* ptr);
+
+void *memcpy(void *dest, const void *src, size_t n);
+void *memset(void *s, int c, size_t n);
+int memcmp(const void *s1, const void *s2, size_t n);
+void *memmove(void *s1, const void *s2, size_t n);
+
+size_t strlen(const char *s);
+char *strcpy(char *dest, const char *src);
+char *strcat(char *dest, const char *src);
+
+/* on some platforms strcmp() seems to be a tricky define which
+ * breaks if we write down strcmp's prototype */
+#undef strcmp
+int strcmp(const char *s1, const char *s2);
+
+void qsort(void *base, size_t nmemb, size_t size, int(*compar)(const void *, const void *));
+
+/*MDCT library functions*/
+/* -1- Tremor mdct */
+extern void mdct_backward(int n, int32_t *in, int32_t *out);
+/* -2- ffmpeg fft-based mdct */
+extern void ff_imdct_half(unsigned int nbits, int32_t *output, const int32_t *input);
+extern void ff_imdct_calc(unsigned int nbits, int32_t *output, const int32_t *input);
+/*ffmpeg fft (can be used without mdct)*/
+extern void ff_fft_calc_c(int nbits, FFTComplex *z);
+
+#if !defined(CPU_ARM) || ARM_ARCH < 5
+/* From libavutil/common.h */
+extern const uint8_t bs_log2_tab[256] ICONST_ATTR;
+extern const uint8_t bs_clz_tab[256] ICONST_ATTR;
+#endif
+
+#define BS_LOG2  0 /* default personality, equivalent floor(log2(x)) */
+#define BS_CLZ   1 /* alternate personality, Count Leading Zeros */
+#define BS_SHORT 2 /* input guaranteed not to exceed 16 bits */
+#define BS_0_0   4 /* guarantee mapping of 0 input to 0 output */
+
+/* Generic bit-scanning function, used to wrap platform CLZ instruction or
+   scan-and-lookup code, and to provide control over output for 0 inputs. */
+static inline unsigned int bs_generic(unsigned int v, int mode)
+{
+#if defined(CPU_ARM) && ARM_ARCH >= 5
+    unsigned int r = __builtin_clz(v);
+    if (mode & BS_CLZ)
+    {
+        if (mode & BS_0_0)
+            r &= 31;
+    } else {
+        r = 31 - r;
+    /* If mode is constant, this is a single conditional instruction */
+        if (mode & BS_0_0 && (signed)r < 0) 
+            r += 1;
+    }
+#else
+    const uint8_t *bs_tab;
+    unsigned int r;
+    unsigned int n = v;
+    int inc;
+    /* Set up table, increment, and initial result value based on
+       personality. */
+    if (mode & BS_CLZ)
+    {
+        bs_tab = bs_clz_tab;
+        r = 24;
+        inc = -16;
+    } else {
+        bs_tab = bs_log2_tab;
+        r = 0;
+        inc = 16;
+    }
+    if (!(mode & BS_SHORT) && n >= 0x10000) {
+        n >>= 16;
+        r += inc;
+    }
+    if (n > 0xff) {
+        n >>= 8;
+        r += inc / 2;
+    }
+#ifdef CPU_COLDFIRE
+    /* The high 24 bits of n are guaranteed empty after the above, so a
+       superfluous ext.b instruction can be saved by loading the LUT value over
+       n with asm */
+    asm volatile (
+        "move.b (%1,%0.l),%0"
+        : "+d" (n)
+        : "a" (bs_tab)
+    );
+#else
+    n = bs_tab[n];
+#endif
+    r += n;
+    if (mode & BS_CLZ && mode & BS_0_0 && v == 0)
+        r = 0;
+#endif
+    return r;
+}
+
+/* TODO figure out if we really need to care about calculating
+   av_log2(0) */
+#define av_log2(v) bs_generic(v, BS_0_0)
+
+/* Various codec helper functions */
+
+int codec_init(void);
+void codec_set_replaygain(const struct mp3entry *id3);
+
+#ifdef RB_PROFILE
+void __cyg_profile_func_enter(void *this_fn, void *call_site)
+    NO_PROF_ATTR ICODE_ATTR;
+void __cyg_profile_func_exit(void *this_fn, void *call_site)
+    NO_PROF_ATTR ICODE_ATTR;
+#endif
+
+#endif /* __CODECLIB_H__ */
--- a/lib/rbcodec/codecs/lib/codeclib_misc.h
+++ b/lib/rbcodec/codecs/lib/codeclib_misc.h
@ -0,0 +1,310 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE.   *
+ *                                                                  *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002    *
+ * BY THE Xiph.Org FOUNDATION http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+ function: miscellaneous math and prototypes
+
+ ********************************************************************/
+
+#ifndef _CODECLIB_MISC_H_
+#define _CODECLIB_MISC_H_
+
+#include <stdint.h>
+#include "asm_arm.h"
+#include "asm_mcf5249.h"
+
+#ifndef  _LOW_ACCURACY_
+/* 64 bit multiply */
+
+#ifdef ROCKBOX_LITTLE_ENDIAN
+union magic {
+  struct {
+    int32_t lo;
+    int32_t hi;
+  } halves;
+  int64_t whole;
+};
+#elif defined(ROCKBOX_BIG_ENDIAN)
+union magic {
+  struct {
+    int32_t hi;
+    int32_t lo;
+  } halves;
+  int64_t whole;
+};
+#endif
+
+#ifndef INCL_OPTIMIZED_MULT32
+#define INCL_OPTIMIZED_MULT32
+static inline int32_t MULT32(int32_t x, int32_t y) {
+  union magic magic;
+  magic.whole = (int64_t)x * y;
+  return magic.halves.hi;
+}
+#endif
+
+#ifndef INCL_OPTIMIZED_MULT31
+#define INCL_OPTIMIZED_MULT31
+static inline int32_t MULT31(int32_t x, int32_t y) {
+  return MULT32(x,y)<<1;
+}
+#endif
+
+#ifndef INCL_OPTIMIZED_MULT31_SHIFT15
+#define INCL_OPTIMIZED_MULT31_SHIFT15
+static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) {
+  union magic magic;
+  magic.whole  = (int64_t)x * y;
+  return ((uint32_t)(magic.halves.lo)>>15) | ((magic.halves.hi)<<17);
+}
+#endif
+
+#ifndef INCL_OPTIMIZED_MULT31_SHIFT16
+#define INCL_OPTIMIZED_MULT31_SHIFT16
+static inline int32_t MULT31_SHIFT16(int32_t x, int32_t y) {
+  union magic magic;
+  magic.whole  = (int64_t)x * y;
+  return ((uint32_t)(magic.halves.lo)>>16) | ((magic.halves.hi)<<16);
+}
+#endif
+
+#else
+/* Rockbox: unused */
+#if 0
+/* 32 bit multiply, more portable but less accurate */
+
+/*
+ * Note: Precision is biased towards the first argument therefore ordering
+ * is important.  Shift values were chosen for the best sound quality after
+ * many listening tests.
+ */
+
+/*
+ * For MULT32 and MULT31: The second argument is always a lookup table
+ * value already preshifted from 31 to 8 bits.  We therefore take the
+ * opportunity to save on text space and use unsigned char for those
+ * tables in this case.
+ */
+
+static inline int32_t MULT32(int32_t x, int32_t y) {
+  return (x >> 9) * y;  /* y preshifted >>23 */
+}
+
+static inline int32_t MULT31(int32_t x, int32_t y) {
+  return (x >> 8) * y;  /* y preshifted >>23 */
+}
+
+static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) {
+  return (x >> 6) * y;  /* y preshifted >>9 */
+}
+#endif
+#endif
+
+/*
+ * The XPROD functions are meant to optimize the cross products found all
+ * over the place in mdct.c by forcing memory operation ordering to avoid
+ * unnecessary register reloads as soon as memory is being written to.
+ * However this is only beneficial on CPUs with a sane number of general
+ * purpose registers which exclude the Intel x86.  On Intel, better let the
+ * compiler actually reload registers directly from original memory by using
+ * macros.
+ */
+
+#ifndef INCL_OPTIMIZED_XPROD32
+#define INCL_OPTIMIZED_XPROD32
+/* replaced XPROD32 with a macro to avoid memory reference
+   _x, _y are the results (must be l-values) */
+#define XPROD32(_a, _b, _t, _v, _x, _y)     \
+  { (_x)=MULT32(_a,_t)+MULT32(_b,_v);       \
+    (_y)=MULT32(_b,_t)-MULT32(_a,_v); }
+#endif
+
+/* Rockbox: Unused */
+/*
+#ifdef __i386__
+
+#define XPROD31(_a, _b, _t, _v, _x, _y)     \
+  { *(_x)=MULT31(_a,_t)+MULT31(_b,_v);      \
+    *(_y)=MULT31(_b,_t)-MULT31(_a,_v); }
+#define XNPROD31(_a, _b, _t, _v, _x, _y)    \
+  { *(_x)=MULT31(_a,_t)-MULT31(_b,_v);      \
+    *(_y)=MULT31(_b,_t)+MULT31(_a,_v); }
+
+#else
+*/
+
+#ifndef INCL_OPTIMIZED_XPROD31
+#define INCL_OPTIMIZED_XPROD31
+static inline void XPROD31(int32_t a, int32_t b,
+                      int32_t t, int32_t v,
+                      int32_t *x, int32_t *y)
+{
+  *x = MULT31(a, t) + MULT31(b, v);
+  *y = MULT31(b, t) - MULT31(a, v);
+}
+#endif
+
+#ifndef INCL_OPTIMIZED_XNPROD31
+#define INCL_OPTIMIZED_XNPROD31
+static inline void XNPROD31(int32_t a, int32_t b,
+                       int32_t  t, int32_t  v,
+                       int32_t *x, int32_t *y)
+{
+  *x = MULT31(a, t) - MULT31(b, v);
+  *y = MULT31(b, t) + MULT31(a, v);
+}
+#endif
+/*#endif*/
+
+#ifndef INCL_OPTIMIZED_XPROD31_R
+#define INCL_OPTIMIZED_XPROD31_R
+#define XPROD31_R(_a, _b, _t, _v, _x, _y)\
+{\
+  _x = MULT31(_a, _t) + MULT31(_b, _v);\
+  _y = MULT31(_b, _t) - MULT31(_a, _v);\
+}
+#endif
+
+#ifndef INCL_OPTIMIZED_XNPROD31_R
+#define INCL_OPTIMIZED_XNPROD31_R
+#define XNPROD31_R(_a, _b, _t, _v, _x, _y)\
+{\
+  _x = MULT31(_a, _t) - MULT31(_b, _v);\
+  _y = MULT31(_b, _t) + MULT31(_a, _v);\
+}
+#endif
+
+#ifndef _V_VECT_OPS
+#define _V_VECT_OPS
+
+static inline
+void vect_add(int32_t *x, const int32_t *y, int n)
+{
+  while (n>0) {
+    *x++ += *y++;
+    n--;
+  }
+}
+
+static inline
+void vect_copy(int32_t *x, const int32_t *y, int n)
+{
+  while (n>0) {
+    *x++ = *y++;
+    n--;
+  }
+}
+
+static inline
+void vect_mult_fw(int32_t *data, const int32_t *window, int n)
+{
+  while(n>0) {
+    *data = MULT31(*data, *window);
+    data++;
+    window++;
+    n--;
+  }
+}
+
+static inline
+void vect_mult_bw(int32_t *data, const int32_t *window, int n)
+{
+  while(n>0) {
+    *data = MULT31(*data, *window);
+    data++;
+    window--;
+    n--;
+  }
+}
+#endif
+
+/* not used anymore */
+/*
+#ifndef _V_CLIP_MATH
+#define _V_CLIP_MATH
+
+static inline int32_t CLIP_TO_15(int32_t x) {
+  int ret=x;
+  ret-= ((x<=32767)-1)&(x-32767);
+  ret-= ((x>=-32768)-1)&(x+32768);
+  return(ret);
+}
+
+#endif
+*/
+static inline int32_t VFLOAT_MULT(int32_t a,int32_t ap,
+                      int32_t b,int32_t bp,
+                      int32_t *p){
+  if(a && b){
+#ifndef _LOW_ACCURACY_
+    *p=ap+bp+32;
+    return MULT32(a,b);
+#else
+    *p=ap+bp+31;
+    return (a>>15)*(b>>16);
+#endif
+  }else
+    return 0;
+}
+
+/*static inline int32_t VFLOAT_MULTI(int32_t a,int32_t ap,
+                      int32_t i,
+                      int32_t *p){
+
+  int ip=_ilog(abs(i))-31;
+  return VFLOAT_MULT(a,ap,i<<-ip,ip,p);
+}
+*/
+static inline int32_t VFLOAT_ADD(int32_t a,int32_t ap,
+                      int32_t b,int32_t bp,
+                      int32_t *p){
+
+  if(!a){
+    *p=bp;
+    return b;
+  }else if(!b){
+    *p=ap;
+    return a;
+  }
+
+  /* yes, this can leak a bit. */
+  if(ap>bp){
+    int shift=ap-bp+1;
+    *p=ap+1;
+    a>>=1;
+    if(shift<32){
+      b=(b+(1<<(shift-1)))>>shift;
+    }else{
+      b=0;
+    }
+  }else{
+    int shift=bp-ap+1;
+    *p=bp+1;
+    b>>=1;
+    if(shift<32){
+      a=(a+(1<<(shift-1)))>>shift;
+    }else{
+      a=0;
+    }
+  }
+
+  a+=b;
+  if((a&0xc0000000)==0xc0000000 ||
+     (a&0xc0000000)==0){
+    a<<=1;
+    (*p)--;
+  }
+  return(a);
+}
+
+#endif
+
--- a/lib/rbcodec/codecs/lib/ffmpeg_bitstream.c
+++ b/lib/rbcodec/codecs/lib/ffmpeg_bitstream.c
@ -0,0 +1,374 @@
+/*
+ * Common bit i/o utils
+ * Copyright (c) 2000, 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2010 Loren Merritt
+ *
+ * alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * bitstream api.
+ */
+
+//#include "avcodec.h"
+#include "ffmpeg_get_bits.h"
+#include "ffmpeg_put_bits.h"
+#include "ffmpeg_intreadwrite.h"
+
+#define av_log(...)
+
+#ifdef ROCKBOX
+#undef DEBUGF
+#define DEBUGF(...)
+#endif
+
+const uint8_t ff_log2_run[32]={
+ 0, 0, 0, 0, 1, 1, 1, 1,
+ 2, 2, 2, 2, 3, 3, 3, 3,
+ 4, 4, 5, 5, 6, 6, 7, 7,
+ 8, 9,10,11,12,13,14,15
+};
+
+#if 0 // unused in rockbox
+void align_put_bits(PutBitContext *s)
+{
+#ifdef ALT_BITSTREAM_WRITER
+    put_bits(s,(  - s->index) & 7,0);
+#else
+    put_bits(s,s->bit_left & 7,0);
+#endif
+}
+
+void ff_put_string(PutBitContext *pb, const char *string, int terminate_string)
+{
+    while(*string){
+        put_bits(pb, 8, *string);
+        string++;
+    }
+    if(terminate_string)
+        put_bits(pb, 8, 0);
+}
+#endif
+
+void ff_copy_bits(PutBitContext *pb, const uint8_t *src, int length)
+{
+    int words= length>>4;
+    int bits= length&15;
+    int i;
+
+    if(length==0) return;
+
+    if(words < 16 || put_bits_count(pb)&7){
+        for(i=0; i<words; i++) put_bits(pb, 16, AV_RB16(src + 2*i));
+    }else{
+        for(i=0; put_bits_count(pb)&31; i++)
+            put_bits(pb, 8, src[i]);
+        flush_put_bits(pb);
+        memcpy(put_bits_ptr(pb), src+i, 2*words-i);
+        skip_put_bytes(pb, 2*words-i);
+    }
+
+    put_bits(pb, bits, AV_RB16(src + 2*words)>>(16-bits));
+}
+
+/* VLC decoding */
+
+//#define DEBUG_VLC
+
+#define GET_DATA(v, table, i, wrap, size) \
+{\
+    const uint8_t *ptr = (const uint8_t *)table + i * wrap;\
+    switch(size) {\
+    case 1:\
+        v = *(const uint8_t *)ptr;\
+        break;\
+    case 2:\
+        v = *(const uint16_t *)ptr;\
+        break;\
+    default:\
+        v = *(const uint32_t *)ptr;\
+        break;\
+    }\
+}
+
+
+static int alloc_table(VLC *vlc, int size, int use_static)
+{
+    int index;
+    index = vlc->table_size;
+    vlc->table_size += size;
+    if (vlc->table_size > vlc->table_allocated) {
+        if(use_static)
+        {
+            DEBUGF("init_vlc() used with too little memory : table_size > allocated_memory\n");
+            return -1;
+        }
+//            abort(); //cant do anything, init_vlc() is used with too little memory
+//        vlc->table_allocated += (1 << vlc->bits);
+//        vlc->table = av_realloc(vlc->table,
+//                                sizeof(VLC_TYPE) * 2 * vlc->table_allocated);
+        if (!vlc->table)
+            return -1;
+    }
+    return index;
+}
+
+/* 
+static av_always_inline uint32_t bitswap_32(uint32_t x) {
+    return av_reverse[x&0xFF]<<24
+         | av_reverse[(x>>8)&0xFF]<<16
+         | av_reverse[(x>>16)&0xFF]<<8
+         | av_reverse[x>>24];
+}
+*/
+
+typedef struct {
+    uint8_t bits;
+    uint16_t symbol;
+    /** codeword, with the first bit-to-be-read in the msb
+     * (even if intended for a little-endian bitstream reader) */
+    uint32_t code;
+} __attribute__((__packed__)) VLCcode; /* packed to save space */
+
+static int compare_vlcspec(const void *a, const void *b)
+{
+    const VLCcode *sa=a, *sb=b;
+    return (sa->code >> 1) - (sb->code >> 1);
+}
+
+/**
+ * Build VLC decoding tables suitable for use with get_vlc().
+ *
+ * @param vlc            the context to be initted
+ *
+ * @param table_nb_bits  max length of vlc codes to store directly in this table
+ *                       (Longer codes are delegated to subtables.)
+ *
+ * @param nb_codes       number of elements in codes[]
+ *
+ * @param codes          descriptions of the vlc codes
+ *                       These must be ordered such that codes going into the same subtable are contiguous.
+ *                       Sorting by VLCcode.code is sufficient, though not necessary.
+ */
+static int build_table(VLC *vlc, int table_nb_bits, int nb_codes,
+                       VLCcode *codes, int flags)
+{
+    int table_size, table_index, index, symbol, subtable_bits;
+    int i, j, k, n, nb, inc;
+    uint32_t code, code_prefix;
+    VLC_TYPE (*table)[2];
+
+    table_size = 1 << table_nb_bits;
+    table_index = alloc_table(vlc, table_size, flags & INIT_VLC_USE_NEW_STATIC);
+#ifdef DEBUG_VLC
+    av_log(NULL,AV_LOG_DEBUG,"new table index=%d size=%d\n",
+           table_index, table_size);
+#endif
+    if (table_index < 0)
+        return -1;
+    table = &vlc->table[table_index];
+
+    for (i = 0; i < table_size; i++) {
+        table[i][1] = 0; //bits
+        table[i][0] = -1; //codes
+    }
+
+    /* first pass: map codes and compute auxillary table sizes */
+    for (i = 0; i < nb_codes; i++) {
+        n = codes[i].bits;
+        code = codes[i].code;
+        symbol = codes[i].symbol;
+#if defined(DEBUG_VLC) && 0
+        av_log(NULL,AV_LOG_DEBUG,"i=%d n=%d code=0x%x\n", i, n, code);
+#endif
+        if (n <= table_nb_bits) {
+            /* no need to add another table */
+            j = code >> (32 - table_nb_bits);
+            nb = 1 << (table_nb_bits - n);
+            inc = 1;
+/*            if (flags & INIT_VLC_LE) {
+                j = bitswap_32(code);
+                inc = 1 << n;
+            } */
+            for (k = 0; k < nb; k++) {
+#ifdef DEBUG_VLC
+                av_log(NULL, AV_LOG_DEBUG, "%4x: code=%d n=%d\n",
+                       j, i, n);
+#endif
+                if (table[j][1] /*bits*/ != 0) {
+                    av_log(NULL, AV_LOG_ERROR, "incorrect codes\n");
+                    return -1;
+                }
+                table[j][1] = n; //bits
+                table[j][0] = symbol;
+                j += inc;
+            }
+        } else {
+            /* fill auxiliary table recursively */
+            n -= table_nb_bits;
+            code_prefix = code >> (32 - table_nb_bits);
+            subtable_bits = n;
+            codes[i].bits = n;
+            codes[i].code = code << table_nb_bits;
+            for (k = i+1; k < nb_codes; k++) {
+                n = codes[k].bits - table_nb_bits;
+                if (n <= 0)
+                    break;
+                code = codes[k].code;
+                if (code >> (32 - table_nb_bits) != code_prefix)
+                    break;
+                codes[k].bits = n;
+                codes[k].code = code << table_nb_bits;
+                subtable_bits = FFMAX(subtable_bits, n);
+            }
+            subtable_bits = FFMIN(subtable_bits, table_nb_bits);
+            j = /*(flags & INIT_VLC_LE) ? bitswap_32(code_prefix) >> (32 - table_nb_bits) :*/ code_prefix;
+            table[j][1] = -subtable_bits;
+#ifdef DEBUG_VLC
+            av_log(NULL,AV_LOG_DEBUG,"%4x: n=%d (subtable)\n",
+                   j, codes[i].bits + table_nb_bits);
+#endif
+            index = build_table(vlc, subtable_bits, k-i, codes+i, flags);
+            if (index < 0)
+                return -1;
+            /* note: realloc has been done, so reload tables */
+            table = &vlc->table[table_index];
+            table[j][0] = index; //code
+            i = k-1;
+        }
+    }
+    return table_index;
+}
+
+
+/* Build VLC decoding tables suitable for use with get_vlc().
+
+   'nb_bits' set thee decoding table size (2^nb_bits) entries. The
+   bigger it is, the faster is the decoding. But it should not be too
+   big to save memory and L1 cache. '9' is a good compromise.
+
+   'nb_codes' : number of vlcs codes
+
+   'bits' : table which gives the size (in bits) of each vlc code.
+
+   'codes' : table which gives the bit pattern of of each vlc code.
+
+   'symbols' : table which gives the values to be returned from get_vlc().
+
+   'xxx_wrap' : give the number of bytes between each entry of the
+   'bits' or 'codes' tables.
+
+   'xxx_size' : gives the number of bytes of each entry of the 'bits'
+   or 'codes' tables.
+
+   'wrap' and 'size' allows to use any memory configuration and types
+   (byte/word/long) to store the 'bits', 'codes', and 'symbols' tables.
+
+   'use_static' should be set to 1 for tables, which should be freed
+   with av_free_static(), 0 if free_vlc() will be used.
+*/
+
+/* Rockbox: support for INIT_VLC_LE is currently disabled since none of our
+   codecs use it, there's a LUT based bit reverse function for this commented
+   out above (bitswap_32) and an inline asm version in libtremor/codebook.c
+   if we ever want this */
+   
+static VLCcode buf[1336+1]; /* worst case is wma, which has one table with 1336 entries */
+
+int init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes,
+             const void *bits, int bits_wrap, int bits_size,
+             const void *codes, int codes_wrap, int codes_size,
+             const void *symbols, int symbols_wrap, int symbols_size,
+             int flags)
+{
+    if (nb_codes+1 > (int)(sizeof (buf)/ sizeof (VLCcode)))
+    {
+        DEBUGF("Table is larger than temp buffer!\n");
+        return -1;
+    }
+
+    int i, j, ret;
+
+    vlc->bits = nb_bits;
+    if(flags & INIT_VLC_USE_NEW_STATIC){
+        if(vlc->table_size && vlc->table_size == vlc->table_allocated){
+            return 0;
+        }else if(vlc->table_size){
+            DEBUGF("fatal error, we are called on a partially initialized table\n");
+            return -1;
+//            abort(); // fatal error, we are called on a partially initialized table
+        }
+    }else {
+        vlc->table = NULL;
+        vlc->table_allocated = 0;
+        vlc->table_size = 0;
+    }
+
+#ifdef DEBUG_VLC
+    av_log(NULL,AV_LOG_DEBUG,"build table nb_codes=%d\n", nb_codes);
+#endif
+
+//    buf = av_malloc((nb_codes+1)*sizeof(VLCcode));
+
+//    assert(symbols_size <= 2 || !symbols);
+    j = 0;
+#define COPY(condition)\
+    for (i = 0; i < nb_codes; i++) {\
+        GET_DATA(buf[j].bits, bits, i, bits_wrap, bits_size);\
+        if (!(condition))\
+            continue;\
+        GET_DATA(buf[j].code, codes, i, codes_wrap, codes_size);\
+/*        if (flags & INIT_VLC_LE)*/\
+/*            buf[j].code = bitswap_32(buf[j].code);*/\
+/*        else*/\
+            buf[j].code <<= 32 - buf[j].bits;\
+        if (symbols)\
+            GET_DATA(buf[j].symbol, symbols, i, symbols_wrap, symbols_size)\
+        else\
+            buf[j].symbol = i;\
+        j++;\
+    }
+    COPY(buf[j].bits > nb_bits);
+    // qsort is the slowest part of init_vlc, and could probably be improved or avoided
+    qsort(buf, j, sizeof(VLCcode), compare_vlcspec);
+    COPY(buf[j].bits && buf[j].bits <= nb_bits);
+    nb_codes = j;
+
+    ret = build_table(vlc, nb_bits, nb_codes, buf, flags);
+
+//    av_free(buf);
+    if (ret < 0) {
+//        av_freep(&vlc->table);
+        return -1;
+    }
+    if((flags & INIT_VLC_USE_NEW_STATIC) && vlc->table_size != vlc->table_allocated) {
+        av_log(NULL, AV_LOG_ERROR, "needed %d had %d\n", vlc->table_size, vlc->table_allocated);
+    }
+    return 0;
+}
+
+/* not used in rockbox
+void free_vlc(VLC *vlc)
+{
+    av_freep(&vlc->table);
+}
+*/
+
--- a/lib/rbcodec/codecs/lib/ffmpeg_bswap.h
+++ b/lib/rbcodec/codecs/lib/ffmpeg_bswap.h
@ -0,0 +1,150 @@
+/**
+ * @file bswap.h
+ * byte swap.
+ */
+
+#ifndef __BSWAP_H__
+#define __BSWAP_H__
+
+#ifdef HAVE_BYTESWAP_H
+#include <byteswap.h>
+#else
+
+#ifdef ROCKBOX
+#include "codecs.h"
+
+/* rockbox' optimised inline functions */
+#define bswap_16(x) swap16(x)
+#define bswap_32(x) swap32(x)
+
+static inline uint64_t ByteSwap64(uint64_t x)
+{
+    union { 
+        uint64_t ll;
+        struct {
+           uint32_t l,h;
+        } l;
+    } r;
+    r.l.l = bswap_32 (x);
+    r.l.h = bswap_32 (x>>32);
+    return r.ll;
+}
+#define bswap_64(x) ByteSwap64(x)
+
+#elif defined(ARCH_X86)
+static inline unsigned short ByteSwap16(unsigned short x)
+{
+  __asm("xchgb %b0,%h0" :
+        "=q" (x)    :
+        "0" (x));
+    return x;
+}
+#define bswap_16(x) ByteSwap16(x)
+
+static inline unsigned int ByteSwap32(unsigned int x)
+{
+#if __CPU__ > 386
+ __asm("bswap   %0":
+      "=r" (x)     :
+#else
+ __asm("xchgb   %b0,%h0\n"
+      " rorl    $16,%0\n"
+      " xchgb   %b0,%h0":
+      "=q" (x)      :
+#endif
+      "0" (x));
+  return x;
+}
+#define bswap_32(x) ByteSwap32(x)
+
+static inline unsigned long long int ByteSwap64(unsigned long long int x)
+{
+  register union { __extension__ uint64_t __ll;
+          uint32_t __l[2]; } __x;
+  asm("xchgl    %0,%1":
+      "=r"(__x.__l[0]),"=r"(__x.__l[1]):
+      "0"(bswap_32((unsigned long)x)),"1"(bswap_32((unsigned long)(x>>32))));
+  return __x.__ll;
+}
+#define bswap_64(x) ByteSwap64(x)
+
+#elif defined(ARCH_SH4)
+
+static inline uint16_t ByteSwap16(uint16_t x) {
+    __asm__("swap.b %0,%0":"=r"(x):"0"(x));
+    return x;
+}
+
+static inline uint32_t ByteSwap32(uint32_t x) {
+    __asm__(
+    "swap.b %0,%0\n"
+    "swap.w %0,%0\n"
+    "swap.b %0,%0\n"
+    :"=r"(x):"0"(x));
+    return x;
+}
+
+#define bswap_16(x) ByteSwap16(x)
+#define bswap_32(x) ByteSwap32(x)
+
+static inline uint64_t ByteSwap64(uint64_t x)
+{
+    union { 
+        uint64_t ll;
+        struct {
+           uint32_t l,h;
+        } l;
+    } r;
+    r.l.l = bswap_32 (x);
+    r.l.h = bswap_32 (x>>32);
+    return r.ll;
+}
+#define bswap_64(x) ByteSwap64(x)
+
+#else
+
+#define bswap_16(x) (((x) & 0x00ff) << 8 | ((x) & 0xff00) >> 8)
+
+
+// code from bits/byteswap.h (C) 1997, 1998 Free Software Foundation, Inc.
+#define bswap_32(x) \
+     ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >>  8) | \
+      (((x) & 0x0000ff00) <<  8) | (((x) & 0x000000ff) << 24))
+
+static inline uint64_t ByteSwap64(uint64_t x)
+{
+    union { 
+        uint64_t ll;
+        uint32_t l[2]; 
+    } w, r;
+    w.ll = x;
+    r.l[0] = bswap_32 (w.l[1]);
+    r.l[1] = bswap_32 (w.l[0]);
+    return r.ll;
+}
+#define bswap_64(x) ByteSwap64(x)
+
+#endif  /* !ARCH_X86 */
+
+#endif  /* !HAVE_BYTESWAP_H */
+
+// be2me ... BigEndian to MachineEndian
+// le2me ... LittleEndian to MachineEndian
+
+#ifdef ROCKBOX_BIG_ENDIAN
+#define be2me_16(x) (x)
+#define be2me_32(x) (x)
+#define be2me_64(x) (x)
+#define le2me_16(x) bswap_16(x)
+#define le2me_32(x) bswap_32(x)
+#define le2me_64(x) bswap_64(x)
+#else
+#define be2me_16(x) bswap_16(x)
+#define be2me_32(x) bswap_32(x)
+#define be2me_64(x) bswap_64(x)
+#define le2me_16(x) (x)
+#define le2me_32(x) (x)
+#define le2me_64(x) (x)
+#endif
+
+#endif /* __BSWAP_H__ */
--- a/lib/rbcodec/codecs/lib/ffmpeg_get_bits.h
+++ b/lib/rbcodec/codecs/lib/ffmpeg_get_bits.h
@ -0,0 +1,743 @@
+/*
+ * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * bitstream reader API header.
+ */
+
+#ifndef AVCODEC_GET_BITS_H
+#define AVCODEC_GET_BITS_H
+
+#include <stdint.h>
+#include <stdlib.h>
+#include "ffmpeg_intreadwrite.h"
+//#include <assert.h>
+//#include "libavutil/bswap.h"
+//#include "libavutil/common.h"
+//#include "libavutil/intreadwrite.h"
+//#include "libavutil/log.h"
+//#include "mathops.h"
+
+#include "codecs.h"
+
+/* rockbox' optimised inline functions */
+#define bswap_16(x) swap16(x)
+#define bswap_32(x) swap32(x)
+
+#ifdef ROCKBOX_BIG_ENDIAN
+#define be2me_16(x) (x)
+#define be2me_32(x) (x)
+#define le2me_16(x) bswap_16(x)
+#define le2me_32(x) bswap_32(x)
+#else
+#define be2me_16(x) bswap_16(x)
+#define be2me_32(x) bswap_32(x)
+#define le2me_16(x) (x)
+#define le2me_32(x) (x)
+#endif
+
+#define av_const __attribute__((const))
+#define av_always_inline inline __attribute__((always_inline))
+
+/* The following is taken from mathops.h */
+
+#ifndef sign_extend
+static inline av_const int sign_extend(int val, unsigned bits)
+{
+    return (val << ((8 * sizeof(int)) - bits)) >> ((8 * sizeof(int)) - bits);
+}
+#endif
+
+#ifndef NEG_SSR32
+#   define NEG_SSR32(a,s) ((( int32_t)(a))>>(32-(s)))
+#endif
+
+#ifndef NEG_USR32
+#   define NEG_USR32(a,s) (((uint32_t)(a))>>(32-(s)))
+#endif
+
+/* these 2 are from libavutil/common.h */
+
+#define FFMAX(a,b) ((a) > (b) ? (a) : (b))
+#define FFMIN(a,b) ((a) > (b) ? (b) : (a))
+
+#if defined(ALT_BITSTREAM_READER_LE) && !defined(ALT_BITSTREAM_READER)
+#   define ALT_BITSTREAM_READER
+#endif
+
+/*
+#if !defined(LIBMPEG2_BITSTREAM_READER) && !defined(A32_BITSTREAM_READER) && !defined(ALT_BITSTREAM_READER)
+#   if ARCH_ARM && !HAVE_FAST_UNALIGNED
+#       define A32_BITSTREAM_READER
+#   else
+*/
+#       define ALT_BITSTREAM_READER
+/*
+//#define LIBMPEG2_BITSTREAM_READER
+//#define A32_BITSTREAM_READER
+#   endif
+#endif
+*/
+
+/* bit input */
+/* buffer, buffer_end and size_in_bits must be present and used by every reader */
+typedef struct GetBitContext {
+    const uint8_t *buffer, *buffer_end;
+#ifdef ALT_BITSTREAM_READER
+    int index;
+#elif defined LIBMPEG2_BITSTREAM_READER
+    uint8_t *buffer_ptr;
+    uint32_t cache;
+    int bit_count;
+#elif defined A32_BITSTREAM_READER
+    uint32_t *buffer_ptr;
+    uint32_t cache0;
+    uint32_t cache1;
+    int bit_count;
+#endif
+    int size_in_bits;
+} GetBitContext;
+
+#define VLC_TYPE int16_t
+
+typedef struct VLC {
+    int bits;
+    VLC_TYPE (*table)[2]; ///< code, bits
+    int table_size, table_allocated;
+} VLC;
+
+typedef struct RL_VLC_ELEM {
+    int16_t level;
+    int8_t len;
+    uint8_t run;
+} RL_VLC_ELEM;
+
+/* Bitstream reader API docs:
+name
+    arbitrary name which is used as prefix for the internal variables
+
+gb
+    getbitcontext
+
+OPEN_READER(name, gb)
+    loads gb into local variables
+
+CLOSE_READER(name, gb)
+    stores local vars in gb
+
+UPDATE_CACHE(name, gb)
+    refills the internal cache from the bitstream
+    after this call at least MIN_CACHE_BITS will be available,
+
+GET_CACHE(name, gb)
+    will output the contents of the internal cache, next bit is MSB of 32 or 64 bit (FIXME 64bit)
+
+SHOW_UBITS(name, gb, num)
+    will return the next num bits
+
+SHOW_SBITS(name, gb, num)
+    will return the next num bits and do sign extension
+
+SKIP_BITS(name, gb, num)
+    will skip over the next num bits
+    note, this is equivalent to SKIP_CACHE; SKIP_COUNTER
+
+SKIP_CACHE(name, gb, num)
+    will remove the next num bits from the cache (note SKIP_COUNTER MUST be called before UPDATE_CACHE / CLOSE_READER)
+
+SKIP_COUNTER(name, gb, num)
+    will increment the internal bit counter (see SKIP_CACHE & SKIP_BITS)
+
+LAST_SKIP_CACHE(name, gb, num)
+    will remove the next num bits from the cache if it is needed for UPDATE_CACHE otherwise it will do nothing
+
+LAST_SKIP_BITS(name, gb, num)
+    is equivalent to LAST_SKIP_CACHE; SKIP_COUNTER
+
+for examples see get_bits, show_bits, skip_bits, get_vlc
+*/
+
+#ifdef ALT_BITSTREAM_READER
+#   define MIN_CACHE_BITS 25
+
+
+/* ROCKBOX: work around "set but not used" warning */
+#   define OPEN_READER(name, gb)\
+        unsigned int name##_index= (gb)->index;\
+        int name##_cache __attribute__((unused)) = 0;\
+
+#   define CLOSE_READER(name, gb)\
+        (gb)->index= name##_index;\
+
+# ifdef ALT_BITSTREAM_READER_LE
+#   define UPDATE_CACHE(name, gb)\
+        name##_cache= AV_RL32( ((const uint8_t *)(gb)->buffer)+(name##_index>>3) ) >> (name##_index&0x07);\
+
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache >>= (num);
+# else
+#   define UPDATE_CACHE(name, gb)\
+        name##_cache= AV_RB32( ((const uint8_t *)(gb)->buffer)+(name##_index>>3) ) << (name##_index&0x07);\
+
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache <<= (num);
+# endif
+
+// FIXME name?
+#   define SKIP_COUNTER(name, gb, num)\
+        name##_index += (num);\
+
+#   define SKIP_BITS(name, gb, num)\
+        {\
+            SKIP_CACHE(name, gb, num)\
+            SKIP_COUNTER(name, gb, num)\
+        }\
+
+#   define LAST_SKIP_BITS(name, gb, num) SKIP_COUNTER(name, gb, num)
+#   define LAST_SKIP_CACHE(name, gb, num) ;
+
+# ifdef ALT_BITSTREAM_READER_LE
+#   define SHOW_UBITS(name, gb, num)\
+        zero_extend(name##_cache, num)
+
+#   define SHOW_SBITS(name, gb, num)\
+        sign_extend(name##_cache, num)
+# else
+#   define SHOW_UBITS(name, gb, num)\
+        NEG_USR32(name##_cache, num)
+
+#   define SHOW_SBITS(name, gb, num)\
+        NEG_SSR32(name##_cache, num)
+# endif
+
+#   define GET_CACHE(name, gb)\
+        ((uint32_t)name##_cache)
+
+static inline int get_bits_count(const GetBitContext *s){
+    return s->index;
+}
+
+static inline void skip_bits_long(GetBitContext *s, int n){
+    s->index += n;
+}
+
+#elif defined LIBMPEG2_BITSTREAM_READER
+//libmpeg2 like reader
+
+#   define MIN_CACHE_BITS 17
+
+#   define OPEN_READER(name, gb)\
+        int name##_bit_count=(gb)->bit_count;\
+        int name##_cache= (gb)->cache;\
+        uint8_t * name##_buffer_ptr=(gb)->buffer_ptr;\
+
+#   define CLOSE_READER(name, gb)\
+        (gb)->bit_count= name##_bit_count;\
+        (gb)->cache= name##_cache;\
+        (gb)->buffer_ptr= name##_buffer_ptr;\
+
+#   define UPDATE_CACHE(name, gb)\
+    if(name##_bit_count >= 0){\
+        name##_cache+= AV_RB16(name##_buffer_ptr) << name##_bit_count; \
+        name##_buffer_ptr+=2;\
+        name##_bit_count-= 16;\
+    }\
+
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache <<= (num);\
+
+#   define SKIP_COUNTER(name, gb, num)\
+        name##_bit_count += (num);\
+
+#   define SKIP_BITS(name, gb, num)\
+        {\
+            SKIP_CACHE(name, gb, num)\
+            SKIP_COUNTER(name, gb, num)\
+        }\
+
+#   define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num)
+#   define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num)
+
+#   define SHOW_UBITS(name, gb, num)\
+        NEG_USR32(name##_cache, num)
+
+#   define SHOW_SBITS(name, gb, num)\
+        NEG_SSR32(name##_cache, num)
+
+#   define GET_CACHE(name, gb)\
+        ((uint32_t)name##_cache)
+
+static inline int get_bits_count(const GetBitContext *s){
+    return (s->buffer_ptr - s->buffer)*8 - 16 + s->bit_count;
+}
+
+static inline void skip_bits_long(GetBitContext *s, int n){
+    OPEN_READER(re, s)
+    re_bit_count += n;
+    re_buffer_ptr += 2*(re_bit_count>>4);
+    re_bit_count &= 15;
+    re_cache = ((re_buffer_ptr[-2]<<8) + re_buffer_ptr[-1]) << (16+re_bit_count);
+    UPDATE_CACHE(re, s)
+    CLOSE_READER(re, s)
+}
+
+#elif defined A32_BITSTREAM_READER
+
+#   define MIN_CACHE_BITS 32
+
+#   define OPEN_READER(name, gb)\
+        int name##_bit_count=(gb)->bit_count;\
+        uint32_t name##_cache0= (gb)->cache0;\
+        uint32_t name##_cache1= (gb)->cache1;\
+        uint32_t * name##_buffer_ptr=(gb)->buffer_ptr;\
+
+#   define CLOSE_READER(name, gb)\
+        (gb)->bit_count= name##_bit_count;\
+        (gb)->cache0= name##_cache0;\
+        (gb)->cache1= name##_cache1;\
+        (gb)->buffer_ptr= name##_buffer_ptr;\
+
+#   define UPDATE_CACHE(name, gb)\
+    if(name##_bit_count > 0){\
+        const uint32_t next= av_be2ne32( *name##_buffer_ptr );\
+        name##_cache0 |= NEG_USR32(next,name##_bit_count);\
+        name##_cache1 |= next<<name##_bit_count;\
+        name##_buffer_ptr++;\
+        name##_bit_count-= 32;\
+    }\
+
+#if ARCH_X86
+#   define SKIP_CACHE(name, gb, num)\
+        __asm__(\
+            "shldl %2, %1, %0          \n\t"\
+            "shll %2, %1               \n\t"\
+            : "+r" (name##_cache0), "+r" (name##_cache1)\
+            : "Ic" ((uint8_t)(num))\
+           );
+#else
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache0 <<= (num);\
+        name##_cache0 |= NEG_USR32(name##_cache1,num);\
+        name##_cache1 <<= (num);
+#endif
+
+#   define SKIP_COUNTER(name, gb, num)\
+        name##_bit_count += (num);\
+
+#   define SKIP_BITS(name, gb, num)\
+        {\
+            SKIP_CACHE(name, gb, num)\
+            SKIP_COUNTER(name, gb, num)\
+        }\
+
+#   define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num)
+#   define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num)
+
+#   define SHOW_UBITS(name, gb, num)\
+        NEG_USR32(name##_cache0, num)
+
+#   define SHOW_SBITS(name, gb, num)\
+        NEG_SSR32(name##_cache0, num)
+
+#   define GET_CACHE(name, gb)\
+        (name##_cache0)
+
+static inline int get_bits_count(const GetBitContext *s){
+    return ((uint8_t*)s->buffer_ptr - s->buffer)*8 - 32 + s->bit_count;
+}
+
+static inline void skip_bits_long(GetBitContext *s, int n){
+    OPEN_READER(re, s)
+    re_bit_count += n;
+    re_buffer_ptr += re_bit_count>>5;
+    re_bit_count &= 31;
+    re_cache0 = av_be2ne32( re_buffer_ptr[-1] ) << re_bit_count;
+    re_cache1 = 0;
+    UPDATE_CACHE(re, s)
+    CLOSE_READER(re, s)
+}
+
+#endif
+
+/**
+ * read mpeg1 dc style vlc (sign bit + mantisse with no MSB).
+ * if MSB not set it is negative
+ * @param n length in bits
+ * @author BERO
+ */
+static inline int get_xbits(GetBitContext *s, int n){
+    register int sign;
+    register int32_t cache;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    cache = GET_CACHE(re,s);
+    sign=(~cache)>>31;
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+    return (NEG_USR32(sign ^ cache, n) ^ sign) - sign;
+}
+
+static inline int get_sbits(GetBitContext *s, int n){
+    register int tmp;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    tmp= SHOW_SBITS(re, s, n);
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+    return tmp;
+}
+
+/**
+ * reads 1-17 bits.
+ * Note, the alt bitstream reader can read up to 25 bits, but the libmpeg2 reader can't
+ */
+static inline unsigned int get_bits(GetBitContext *s, int n){
+    register int tmp;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    tmp= SHOW_UBITS(re, s, n);
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+    return tmp;
+}
+
+/**
+ * shows 1-17 bits.
+ * Note, the alt bitstream reader can read up to 25 bits, but the libmpeg2 reader can't
+ */
+static inline unsigned int show_bits(GetBitContext *s, int n){
+    register int tmp;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    tmp= SHOW_UBITS(re, s, n);
+//    CLOSE_READER(re, s)
+    return tmp;
+}
+
+static inline void skip_bits(GetBitContext *s, int n){
+ //Note gcc seems to optimize this to s->index+=n for the ALT_READER :))
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+}
+
+static inline unsigned int get_bits1(GetBitContext *s){
+#ifdef ALT_BITSTREAM_READER
+    unsigned int index= s->index;
+    uint8_t result= s->buffer[ index>>3 ];
+#ifdef ALT_BITSTREAM_READER_LE
+    result>>= (index&0x07);
+    result&= 1;
+#else
+    result<<= (index&0x07);
+    result>>= 8 - 1;
+#endif
+    index++;
+    s->index= index;
+
+    return result;
+#else
+    return get_bits(s, 1);
+#endif
+}
+
+static inline unsigned int show_bits1(GetBitContext *s){
+    return show_bits(s, 1);
+}
+
+static inline void skip_bits1(GetBitContext *s){
+    skip_bits(s, 1);
+}
+
+/**
+ * reads 0-32 bits.
+ */
+static inline unsigned int get_bits_long(GetBitContext *s, int n){
+    if(n<=MIN_CACHE_BITS) return get_bits(s, n);
+    else{
+#ifdef ALT_BITSTREAM_READER_LE
+        int ret= get_bits(s, 16);
+        return ret | (get_bits(s, n-16) << 16);
+#else
+        int ret= get_bits(s, 16) << (n-16);
+        return ret | get_bits(s, n-16);
+#endif
+    }
+}
+
+/**
+ * reads 0-32 bits as a signed integer.
+ */
+static inline int get_sbits_long(GetBitContext *s, int n) {
+    return sign_extend(get_bits_long(s, n), n);
+}
+
+/**
+ * shows 0-32 bits.
+ */
+static inline unsigned int show_bits_long(GetBitContext *s, int n){
+    if(n<=MIN_CACHE_BITS) return show_bits(s, n);
+    else{
+        GetBitContext gb= *s;
+        return get_bits_long(&gb, n);
+    }
+}
+
+/* not used
+static inline int check_marker(GetBitContext *s, const char *msg)
+{
+    int bit= get_bits1(s);
+    if(!bit)
+        av_log(NULL, AV_LOG_INFO, "Marker bit missing %s\n", msg);
+
+    return bit;
+}
+*/
+
+/**
+ * init GetBitContext.
+ * @param buffer bitstream buffer, must be FF_INPUT_BUFFER_PADDING_SIZE bytes larger then the actual read bits
+ * because some optimized bitstream readers read 32 or 64 bit at once and could read over the end
+ * @param bit_size the size of the buffer in bits
+ *
+ * While GetBitContext stores the buffer size, for performance reasons you are
+ * responsible for checking for the buffer end yourself (take advantage of the padding)!
+ */
+static inline void init_get_bits(GetBitContext *s,
+                   const uint8_t *buffer, int bit_size)
+{
+    int buffer_size= (bit_size+7)>>3;
+    if(buffer_size < 0 || bit_size < 0) {
+        buffer_size = bit_size = 0;
+        buffer = NULL;
+    }
+
+    s->buffer= buffer;
+    s->size_in_bits= bit_size;
+    s->buffer_end= buffer + buffer_size;
+#ifdef ALT_BITSTREAM_READER
+    s->index=0;
+#elif defined LIBMPEG2_BITSTREAM_READER
+    s->buffer_ptr = (uint8_t*)((intptr_t)buffer&(~1));
+    s->bit_count = 16 + 8*((intptr_t)buffer&1);
+    skip_bits_long(s, 0);
+#elif defined A32_BITSTREAM_READER
+    s->buffer_ptr = (uint32_t*)((intptr_t)buffer&(~3));
+    s->bit_count = 32 + 8*((intptr_t)buffer&3);
+    skip_bits_long(s, 0);
+#endif
+}
+
+static inline void align_get_bits(GetBitContext *s)
+{
+    int n= (-get_bits_count(s)) & 7;
+    if(n) skip_bits(s, n);
+}
+
+#define init_vlc(vlc, nb_bits, nb_codes,\
+                 bits, bits_wrap, bits_size,\
+                 codes, codes_wrap, codes_size,\
+                 flags)\
+        init_vlc_sparse(vlc, nb_bits, nb_codes,\
+                 bits, bits_wrap, bits_size,\
+                 codes, codes_wrap, codes_size,\
+                 NULL, 0, 0, flags)
+
+int init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes,
+             const void *bits, int bits_wrap, int bits_size,
+             const void *codes, int codes_wrap, int codes_size,
+             const void *symbols, int symbols_wrap, int symbols_size,
+             int flags);
+#define INIT_VLC_LE         2
+#define INIT_VLC_USE_NEW_STATIC 4
+void free_vlc(VLC *vlc);
+
+#define INIT_VLC_STATIC(vlc, bits, a,b,c,d,e,f,g, static_size, attr)\
+{\
+    static VLC_TYPE table[static_size][2] attr;\
+    (vlc)->table= table;\
+    (vlc)->table_allocated= static_size;\
+    init_vlc(vlc, bits, a,b,c,d,e,f,g, INIT_VLC_USE_NEW_STATIC);\
+}
+
+
+/**
+ *
+ * If the vlc code is invalid and max_depth=1, then no bits will be removed.
+ * If the vlc code is invalid and max_depth>1, then the number of bits removed
+ * is undefined.
+ */
+#define GET_VLC(code, name, gb, table, bits, max_depth)\
+{\
+    int n, nb_bits;\
+    unsigned int index;\
+\
+    index= SHOW_UBITS(name, gb, bits);\
+    code = table[index][0];\
+    n    = table[index][1];\
+\
+    if(max_depth > 1 && n < 0){\
+        LAST_SKIP_BITS(name, gb, bits)\
+        UPDATE_CACHE(name, gb)\
+\
+        nb_bits = -n;\
+\
+        index= SHOW_UBITS(name, gb, nb_bits) + code;\
+        code = table[index][0];\
+        n    = table[index][1];\
+        if(max_depth > 2 && n < 0){\
+            LAST_SKIP_BITS(name, gb, nb_bits)\
+            UPDATE_CACHE(name, gb)\
+\
+            nb_bits = -n;\
+\
+            index= SHOW_UBITS(name, gb, nb_bits) + code;\
+            code = table[index][0];\
+            n    = table[index][1];\
+        }\
+    }\
+    SKIP_BITS(name, gb, n)\
+}
+
+#define GET_RL_VLC(level, run, name, gb, table, bits, max_depth, need_update)\
+{\
+    int n, nb_bits;\
+    unsigned int index;\
+\
+    index= SHOW_UBITS(name, gb, bits);\
+    level = table[index].level;\
+    n     = table[index].len;\
+\
+    if(max_depth > 1 && n < 0){\
+        SKIP_BITS(name, gb, bits)\
+        if(need_update){\
+            UPDATE_CACHE(name, gb)\
+        }\
+\
+        nb_bits = -n;\
+\
+        index= SHOW_UBITS(name, gb, nb_bits) + level;\
+        level = table[index].level;\
+        n     = table[index].len;\
+    }\
+    run= table[index].run;\
+    SKIP_BITS(name, gb, n)\
+}
+
+
+/**
+ * parses a vlc code, faster then get_vlc()
+ * @param bits is the number of bits which will be read at once, must be
+ *             identical to nb_bits in init_vlc()
+ * @param max_depth is the number of times bits bits must be read to completely
+ *                  read the longest vlc code
+ *                  = (max_vlc_length + bits - 1) / bits
+ */
+static av_always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
+                                  int bits, int max_depth)
+{
+    int code;
+
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+
+    GET_VLC(code, re, s, table, bits, max_depth)
+
+    CLOSE_READER(re, s)
+    return code;
+}
+
+//#define TRACE
+
+#ifdef TRACE
+static inline void print_bin(int bits, int n){
+    int i;
+
+    for(i=n-1; i>=0; i--){
+        av_log(NULL, AV_LOG_DEBUG, "%d", (bits>>i)&1);
+    }
+    for(i=n; i<24; i++)
+        av_log(NULL, AV_LOG_DEBUG, " ");
+}
+
+static inline int get_bits_trace(GetBitContext *s, int n, char *file, const char *func, int line){
+    int r= get_bits(s, n);
+
+    print_bin(r, n);
+    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d bit @%5d in %s %s:%d\n", r, n, r, get_bits_count(s)-n, file, func, line);
+    return r;
+}
+static inline int get_vlc_trace(GetBitContext *s, VLC_TYPE (*table)[2], int bits, int max_depth, char *file, const char *func, int line){
+    int show= show_bits(s, 24);
+    int pos= get_bits_count(s);
+    int r= get_vlc2(s, table, bits, max_depth);
+    int len= get_bits_count(s) - pos;
+    int bits2= show>>(24-len);
+
+    print_bin(bits2, len);
+
+    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d vlc @%5d in %s %s:%d\n", bits2, len, r, pos, file, func, line);
+    return r;
+}
+static inline int get_xbits_trace(GetBitContext *s, int n, char *file, const char *func, int line){
+    int show= show_bits(s, n);
+    int r= get_xbits(s, n);
+
+    print_bin(show, n);
+    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d xbt @%5d in %s %s:%d\n", show, n, r, get_bits_count(s)-n, file, func, line);
+    return r;
+}
+
+#define get_bits(s, n)  get_bits_trace(s, n, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_bits1(s)    get_bits_trace(s, 1, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_xbits(s, n) get_xbits_trace(s, n, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_vlc(s, vlc)            get_vlc_trace(s, (vlc)->table, (vlc)->bits, 3, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_vlc2(s, tab, bits, max) get_vlc_trace(s, tab, bits, max, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+
+#define tprintf(p, ...) av_log(p, AV_LOG_DEBUG, __VA_ARGS__)
+
+#else //TRACE
+#define tprintf(p, ...) {}
+#endif
+
+static inline int decode012(GetBitContext *gb){
+    int n;
+    n = get_bits1(gb);
+    if (n == 0)
+        return 0;
+    else
+        return get_bits1(gb) + 1;
+}
+
+static inline int decode210(GetBitContext *gb){
+    if (get_bits1(gb))
+        return 0;
+    else
+        return 2 - get_bits1(gb);
+}
+
+static inline int get_bits_left(GetBitContext *gb)
+{
+    return gb->size_in_bits - get_bits_count(gb);
+}
+
+#endif /* AVCODEC_GET_BITS_H */
--- a/lib/rbcodec/codecs/lib/ffmpeg_intreadwrite.h
+++ b/lib/rbcodec/codecs/lib/ffmpeg_intreadwrite.h
@ -0,0 +1,484 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_INTREADWRITE_H
+#define AVUTIL_INTREADWRITE_H
+
+#include <stdint.h>
+/*
+ * Arch-specific headers can provide any combination of
+ * AV_[RW][BLN](16|24|32|64) and AV_(COPY|SWAP|ZERO)(64|128) macros.
+ * Preprocessor symbols must be defined, even if these are implemented
+ * as inline functions.
+ */
+
+/*
+ * Map AV_RNXX <-> AV_R[BL]XX for all variants provided by per-arch headers.
+ */
+#define HAVE_BIGENDIAN 0
+#if HAVE_BIGENDIAN
+
+#   if    defined(AV_RN16) && !defined(AV_RB16)
+#       define AV_RB16(p) AV_RN16(p)
+#   elif !defined(AV_RN16) &&  defined(AV_RB16)
+#       define AV_RN16(p) AV_RB16(p)
+#   endif
+
+#   if    defined(AV_WN16) && !defined(AV_WB16)
+#       define AV_WB16(p, v) AV_WN16(p, v)
+#   elif !defined(AV_WN16) &&  defined(AV_WB16)
+#       define AV_WN16(p, v) AV_WB16(p, v)
+#   endif
+
+#   if    defined(AV_RN24) && !defined(AV_RB24)
+#       define AV_RB24(p) AV_RN24(p)
+#   elif !defined(AV_RN24) &&  defined(AV_RB24)
+#       define AV_RN24(p) AV_RB24(p)
+#   endif
+
+#   if    defined(AV_WN24) && !defined(AV_WB24)
+#       define AV_WB24(p, v) AV_WN24(p, v)
+#   elif !defined(AV_WN24) &&  defined(AV_WB24)
+#       define AV_WN24(p, v) AV_WB24(p, v)
+#   endif
+
+#   if    defined(AV_RN32) && !defined(AV_RB32)
+#       define AV_RB32(p) AV_RN32(p)
+#   elif !defined(AV_RN32) &&  defined(AV_RB32)
+#       define AV_RN32(p) AV_RB32(p)
+#   endif
+
+#   if    defined(AV_WN32) && !defined(AV_WB32)
+#       define AV_WB32(p, v) AV_WN32(p, v)
+#   elif !defined(AV_WN32) &&  defined(AV_WB32)
+#       define AV_WN32(p, v) AV_WB32(p, v)
+#   endif
+
+#   if    defined(AV_RN64) && !defined(AV_RB64)
+#       define AV_RB64(p) AV_RN64(p)
+#   elif !defined(AV_RN64) &&  defined(AV_RB64)
+#       define AV_RN64(p) AV_RB64(p)
+#   endif
+
+#   if    defined(AV_WN64) && !defined(AV_WB64)
+#       define AV_WB64(p, v) AV_WN64(p, v)
+#   elif !defined(AV_WN64) &&  defined(AV_WB64)
+#       define AV_WN64(p, v) AV_WB64(p, v)
+#   endif
+
+#else /* HAVE_BIGENDIAN */
+
+#   if    defined(AV_RN16) && !defined(AV_RL16)
+#       define AV_RL16(p) AV_RN16(p)
+#   elif !defined(AV_RN16) &&  defined(AV_RL16)
+#       define AV_RN16(p) AV_RL16(p)
+#   endif
+
+#   if    defined(AV_WN16) && !defined(AV_WL16)
+#       define AV_WL16(p, v) AV_WN16(p, v)
+#   elif !defined(AV_WN16) &&  defined(AV_WL16)
+#       define AV_WN16(p, v) AV_WL16(p, v)
+#   endif
+
+#   if    defined(AV_RN24) && !defined(AV_RL24)
+#       define AV_RL24(p) AV_RN24(p)
+#   elif !defined(AV_RN24) &&  defined(AV_RL24)
+#       define AV_RN24(p) AV_RL24(p)
+#   endif
+
+#   if    defined(AV_WN24) && !defined(AV_WL24)
+#       define AV_WL24(p, v) AV_WN24(p, v)
+#   elif !defined(AV_WN24) &&  defined(AV_WL24)
+#       define AV_WN24(p, v) AV_WL24(p, v)
+#   endif
+
+#   if    defined(AV_RN32) && !defined(AV_RL32)
+#       define AV_RL32(p) AV_RN32(p)
+#   elif !defined(AV_RN32) &&  defined(AV_RL32)
+#       define AV_RN32(p) AV_RL32(p)
+#   endif
+
+#   if    defined(AV_WN32) && !defined(AV_WL32)
+#       define AV_WL32(p, v) AV_WN32(p, v)
+#   elif !defined(AV_WN32) &&  defined(AV_WL32)
+#       define AV_WN32(p, v) AV_WL32(p, v)
+#   endif
+
+#   if    defined(AV_RN64) && !defined(AV_RL64)
+#       define AV_RL64(p) AV_RN64(p)
+#   elif !defined(AV_RN64) &&  defined(AV_RL64)
+#       define AV_RN64(p) AV_RL64(p)
+#   endif
+
+#   if    defined(AV_WN64) && !defined(AV_WL64)
+#       define AV_WL64(p, v) AV_WN64(p, v)
+#   elif !defined(AV_WN64) &&  defined(AV_WL64)
+#       define AV_WN64(p, v) AV_WL64(p, v)
+#   endif
+
+#endif /* !HAVE_BIGENDIAN */
+
+#define HAVE_ATTRIBUTE_PACKED 0
+#define HAVE_FAST_UNALIGNED 0
+/*
+ * Define AV_[RW]N helper macros to simplify definitions not provided
+ * by per-arch headers.
+ */
+
+#if   HAVE_ATTRIBUTE_PACKED
+
+union unaligned_64 { uint64_t l; } __attribute__((packed)) av_alias;
+union unaligned_32 { uint32_t l; } __attribute__((packed)) av_alias;
+union unaligned_16 { uint16_t l; } __attribute__((packed)) av_alias;
+
+#   define AV_RN(s, p) (((const union unaligned_##s *) (p))->l)
+#   define AV_WN(s, p, v) ((((union unaligned_##s *) (p))->l) = (v))
+
+#elif defined(__DECC)
+
+#   define AV_RN(s, p) (*((const __unaligned uint##s##_t*)(p)))
+#   define AV_WN(s, p, v) (*((__unaligned uint##s##_t*)(p)) = (v))
+
+#elif HAVE_FAST_UNALIGNED
+
+#   define AV_RN(s, p) (((const av_alias##s*)(p))->u##s)
+#   define AV_WN(s, p, v) (((av_alias##s*)(p))->u##s = (v))
+
+#else
+
+#ifndef AV_RB16
+#   define AV_RB16(x)                           \
+    ((((const uint8_t*)(x))[0] << 8) |          \
+      ((const uint8_t*)(x))[1])
+#endif
+#ifndef AV_WB16
+#   define AV_WB16(p, d) do {                   \
+        ((uint8_t*)(p))[1] = (d);               \
+        ((uint8_t*)(p))[0] = (d)>>8;            \
+    } while(0)
+#endif
+
+#ifndef AV_RL16
+#   define AV_RL16(x)                           \
+    ((((const uint8_t*)(x))[1] << 8) |          \
+      ((const uint8_t*)(x))[0])
+#endif
+#ifndef AV_WL16
+#   define AV_WL16(p, d) do {                   \
+        ((uint8_t*)(p))[0] = (d);               \
+        ((uint8_t*)(p))[1] = (d)>>8;            \
+    } while(0)
+#endif
+
+#ifndef AV_RB32
+/* Coldfire and ARMv6 and above support unaligned long reads */ 
+#if defined CPU_COLDFIRE || (defined CPU_ARM && ARM_ARCH >= 6)
+#define AV_RB32(x) (htobe32(*(const uint32_t*)(x)))
+#else
+#   define AV_RB32(x)                           \
+    ((((const uint8_t*)(x))[0] << 24) |         \
+     (((const uint8_t*)(x))[1] << 16) |         \
+     (((const uint8_t*)(x))[2] <<  8) |         \
+      ((const uint8_t*)(x))[3])
+#endif
+#endif
+#ifndef AV_WB32
+#   define AV_WB32(p, d) do {                   \
+        ((uint8_t*)(p))[3] = (d);               \
+        ((uint8_t*)(p))[2] = (d)>>8;            \
+        ((uint8_t*)(p))[1] = (d)>>16;           \
+        ((uint8_t*)(p))[0] = (d)>>24;           \
+    } while(0)
+#endif
+
+#ifndef AV_RL32
+#   define AV_RL32(x)                           \
+    ((((const uint8_t*)(x))[3] << 24) |         \
+     (((const uint8_t*)(x))[2] << 16) |         \
+     (((const uint8_t*)(x))[1] <<  8) |         \
+      ((const uint8_t*)(x))[0])
+#endif
+#ifndef AV_WL32
+#   define AV_WL32(p, d) do {                   \
+        ((uint8_t*)(p))[0] = (d);               \
+        ((uint8_t*)(p))[1] = (d)>>8;            \
+        ((uint8_t*)(p))[2] = (d)>>16;           \
+        ((uint8_t*)(p))[3] = (d)>>24;           \
+    } while(0)
+#endif
+
+#ifndef AV_RB64
+#   define AV_RB64(x)                                   \
+    (((uint64_t)((const uint8_t*)(x))[0] << 56) |       \
+     ((uint64_t)((const uint8_t*)(x))[1] << 48) |       \
+     ((uint64_t)((const uint8_t*)(x))[2] << 40) |       \
+     ((uint64_t)((const uint8_t*)(x))[3] << 32) |       \
+     ((uint64_t)((const uint8_t*)(x))[4] << 24) |       \
+     ((uint64_t)((const uint8_t*)(x))[5] << 16) |       \
+     ((uint64_t)((const uint8_t*)(x))[6] <<  8) |       \
+      (uint64_t)((const uint8_t*)(x))[7])
+#endif
+#ifndef AV_WB64
+#   define AV_WB64(p, d) do {                   \
+        ((uint8_t*)(p))[7] = (d);               \
+        ((uint8_t*)(p))[6] = (d)>>8;            \
+        ((uint8_t*)(p))[5] = (d)>>16;           \
+        ((uint8_t*)(p))[4] = (d)>>24;           \
+        ((uint8_t*)(p))[3] = (d)>>32;           \
+        ((uint8_t*)(p))[2] = (d)>>40;           \
+        ((uint8_t*)(p))[1] = (d)>>48;           \
+        ((uint8_t*)(p))[0] = (d)>>56;           \
+    } while(0)
+#endif
+
+#ifndef AV_RL64
+#   define AV_RL64(x)                                   \
+    (((uint64_t)((const uint8_t*)(x))[7] << 56) |       \
+     ((uint64_t)((const uint8_t*)(x))[6] << 48) |       \
+     ((uint64_t)((const uint8_t*)(x))[5] << 40) |       \
+     ((uint64_t)((const uint8_t*)(x))[4] << 32) |       \
+     ((uint64_t)((const uint8_t*)(x))[3] << 24) |       \
+     ((uint64_t)((const uint8_t*)(x))[2] << 16) |       \
+     ((uint64_t)((const uint8_t*)(x))[1] <<  8) |       \
+      (uint64_t)((const uint8_t*)(x))[0])
+#endif
+#ifndef AV_WL64
+#   define AV_WL64(p, d) do {                   \
+        ((uint8_t*)(p))[0] = (d);               \
+        ((uint8_t*)(p))[1] = (d)>>8;            \
+        ((uint8_t*)(p))[2] = (d)>>16;           \
+        ((uint8_t*)(p))[3] = (d)>>24;           \
+        ((uint8_t*)(p))[4] = (d)>>32;           \
+        ((uint8_t*)(p))[5] = (d)>>40;           \
+        ((uint8_t*)(p))[6] = (d)>>48;           \
+        ((uint8_t*)(p))[7] = (d)>>56;           \
+    } while(0)
+#endif
+
+#if HAVE_BIGENDIAN
+#   define AV_RN(s, p)    AV_RB##s(p)
+#   define AV_WN(s, p, v) AV_WB##s(p, v)
+#else
+#   define AV_RN(s, p)    AV_RL##s(p)
+#   define AV_WN(s, p, v) AV_WL##s(p, v)
+#endif
+
+#endif /* HAVE_FAST_UNALIGNED */
+
+#ifndef AV_RN16
+#   define AV_RN16(p) AV_RN(16, p)
+#endif
+
+#ifndef AV_RN32
+#   define AV_RN32(p) AV_RN(32, p)
+#endif
+
+#ifndef AV_RN64
+#   define AV_RN64(p) AV_RN(64, p)
+#endif
+
+#ifndef AV_WN16
+#   define AV_WN16(p, v) AV_WN(16, p, v)
+#endif
+
+#ifndef AV_WN32
+#   define AV_WN32(p, v) AV_WN(32, p, v)
+#endif
+
+#ifndef AV_WN64
+#   define AV_WN64(p, v) AV_WN(64, p, v)
+#endif
+
+#if HAVE_BIGENDIAN
+#   define AV_RB(s, p)    AV_RN##s(p)
+#   define AV_WB(s, p, v) AV_WN##s(p, v)
+#   define AV_RL(s, p)    bswap_##s(AV_RN##s(p))
+#   define AV_WL(s, p, v) AV_WN##s(p, bswap_##s(v))
+#else
+#   define AV_RB(s, p)    bswap_##s(AV_RN##s(p))
+#   define AV_WB(s, p, v) AV_WN##s(p, bswap_##s(v))
+#   define AV_RL(s, p)    AV_RN##s(p)
+#   define AV_WL(s, p, v) AV_WN##s(p, v)
+#endif
+
+#define AV_RB8(x)     (((const uint8_t*)(x))[0])
+#define AV_WB8(p, d)  do { ((uint8_t*)(p))[0] = (d); } while(0)
+
+#define AV_RL8(x)     AV_RB8(x)
+#define AV_WL8(p, d)  AV_WB8(p, d)
+
+#ifndef AV_RB16
+#   define AV_RB16(p)    AV_RB(16, p)
+#endif
+#ifndef AV_WB16
+#   define AV_WB16(p, v) AV_WB(16, p, v)
+#endif
+
+#ifndef AV_RL16
+#   define AV_RL16(p)    AV_RL(16, p)
+#endif
+#ifndef AV_WL16
+#   define AV_WL16(p, v) AV_WL(16, p, v)
+#endif
+
+#ifndef AV_RB32
+#   define AV_RB32(p)    AV_RB(32, p)
+#endif
+#ifndef AV_WB32
+#   define AV_WB32(p, v) AV_WB(32, p, v)
+#endif
+
+#ifndef AV_RL32
+#   define AV_RL32(p)    AV_RL(32, p)
+#endif
+#ifndef AV_WL32
+#   define AV_WL32(p, v) AV_WL(32, p, v)
+#endif
+
+#ifndef AV_RB64
+#   define AV_RB64(p)    AV_RB(64, p)
+#endif
+#ifndef AV_WB64
+#   define AV_WB64(p, v) AV_WB(64, p, v)
+#endif
+
+#ifndef AV_RL64
+#   define AV_RL64(p)    AV_RL(64, p)
+#endif
+#ifndef AV_WL64
+#   define AV_WL64(p, v) AV_WL(64, p, v)
+#endif
+
+#ifndef AV_RB24
+#   define AV_RB24(x)                           \
+    ((((const uint8_t*)(x))[0] << 16) |         \
+     (((const uint8_t*)(x))[1] <<  8) |         \
+      ((const uint8_t*)(x))[2])
+#endif
+#ifndef AV_WB24
+#   define AV_WB24(p, d) do {                   \
+        ((uint8_t*)(p))[2] = (d);               \
+        ((uint8_t*)(p))[1] = (d)>>8;            \
+        ((uint8_t*)(p))[0] = (d)>>16;           \
+    } while(0)
+#endif
+
+#ifndef AV_RL24
+#   define AV_RL24(x)                           \
+    ((((const uint8_t*)(x))[2] << 16) |         \
+     (((const uint8_t*)(x))[1] <<  8) |         \
+      ((const uint8_t*)(x))[0])
+#endif
+#ifndef AV_WL24
+#   define AV_WL24(p, d) do {                   \
+        ((uint8_t*)(p))[0] = (d);               \
+        ((uint8_t*)(p))[1] = (d)>>8;            \
+        ((uint8_t*)(p))[2] = (d)>>16;           \
+    } while(0)
+#endif
+
+/*
+ * The AV_[RW]NA macros access naturally aligned data
+ * in a type-safe way.
+ */
+
+#define AV_RNA(s, p)    (((const av_alias##s*)(p))->u##s)
+#define AV_WNA(s, p, v) (((av_alias##s*)(p))->u##s = (v))
+
+#ifndef AV_RN16A
+#   define AV_RN16A(p) AV_RNA(16, p)
+#endif
+
+#ifndef AV_RN32A
+#   define AV_RN32A(p) AV_RNA(32, p)
+#endif
+
+#ifndef AV_RN64A
+#   define AV_RN64A(p) AV_RNA(64, p)
+#endif
+
+#ifndef AV_WN16A
+#   define AV_WN16A(p, v) AV_WNA(16, p, v)
+#endif
+
+#ifndef AV_WN32A
+#   define AV_WN32A(p, v) AV_WNA(32, p, v)
+#endif
+
+#ifndef AV_WN64A
+#   define AV_WN64A(p, v) AV_WNA(64, p, v)
+#endif
+
+/* Parameters for AV_COPY*, AV_SWAP*, AV_ZERO* must be
+ * naturally aligned. They may be implemented using MMX,
+ * so emms_c() must be called before using any float code
+ * afterwards.
+ */
+
+#define AV_COPY(n, d, s) \
+    (((av_alias##n*)(d))->u##n = ((const av_alias##n*)(s))->u##n)
+
+#ifndef AV_COPY16
+#   define AV_COPY16(d, s) AV_COPY(16, d, s)
+#endif
+
+#ifndef AV_COPY32
+#   define AV_COPY32(d, s) AV_COPY(32, d, s)
+#endif
+
+#ifndef AV_COPY64
+#   define AV_COPY64(d, s) AV_COPY(64, d, s)
+#endif
+
+#ifndef AV_COPY128
+#   define AV_COPY128(d, s)                    \
+    do {                                       \
+        AV_COPY64(d, s);                       \
+        AV_COPY64((char*)(d)+8, (char*)(s)+8); \
+    } while(0)
+#endif
+
+#define AV_SWAP(n, a, b) FFSWAP(av_alias##n, *(av_alias##n*)(a), *(av_alias##n*)(b))
+
+#ifndef AV_SWAP64
+#   define AV_SWAP64(a, b) AV_SWAP(64, a, b)
+#endif
+
+#define AV_ZERO(n, d) (((av_alias##n*)(d))->u##n = 0)
+
+#ifndef AV_ZERO16
+#   define AV_ZERO16(d) AV_ZERO(16, d)
+#endif
+
+#ifndef AV_ZERO32
+#   define AV_ZERO32(d) AV_ZERO(32, d)
+#endif
+
+#ifndef AV_ZERO64
+#   define AV_ZERO64(d) AV_ZERO(64, d)
+#endif
+
+#ifndef AV_ZERO128
+#   define AV_ZERO128(d)         \
+    do {                         \
+        AV_ZERO64(d);            \
+        AV_ZERO64((char*)(d)+8); \
+    } while(0)
+#endif
+
+#endif /* AVUTIL_INTREADWRITE_H */
--- a/lib/rbcodec/codecs/lib/ffmpeg_put_bits.h
+++ b/lib/rbcodec/codecs/lib/ffmpeg_put_bits.h
@ -0,0 +1,323 @@
+/*
+ * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavcodec/put_bits.h
+ * bitstream writer API
+ */
+
+#ifndef AVCODEC_PUT_BITS_H
+#define AVCODEC_PUT_BITS_H
+
+#include <stdint.h>
+#include <stdlib.h>
+#include "ffmpeg_bswap.h"
+#include "ffmpeg_intreadwrite.h"
+
+#define av_log(...)
+#define HAVE_FAST_UNALIGNED 0
+
+/* buf and buf_end must be present and used by every alternative writer. */
+typedef struct PutBitContext {
+#ifdef ALT_BITSTREAM_WRITER
+    uint8_t *buf, *buf_end;
+    int index;
+#else
+    uint32_t bit_buf;
+    int bit_left;
+    uint8_t *buf, *buf_ptr, *buf_end;
+#endif
+    int size_in_bits;
+} PutBitContext;
+
+/**
+ * Initializes the PutBitContext s.
+ *
+ * @param buffer the buffer where to put bits
+ * @param buffer_size the size in bytes of buffer
+ */
+static inline void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
+{
+    if(buffer_size < 0) {
+        buffer_size = 0;
+        buffer = NULL;
+    }
+
+    s->size_in_bits= 8*buffer_size;
+    s->buf = buffer;
+    s->buf_end = s->buf + buffer_size;
+#ifdef ALT_BITSTREAM_WRITER
+    s->index=0;
+    ((uint32_t*)(s->buf))[0]=0;
+//    memset(buffer, 0, buffer_size);
+#else
+    s->buf_ptr = s->buf;
+    s->bit_left=32;
+    s->bit_buf=0;
+#endif
+}
+
+/**
+ * Returns the total number of bits written to the bitstream.
+ */
+static inline int put_bits_count(PutBitContext *s)
+{
+#ifdef ALT_BITSTREAM_WRITER
+    return s->index;
+#else
+    return (s->buf_ptr - s->buf) * 8 + 32 - s->bit_left;
+#endif
+}
+
+/**
+ * Pads the end of the output stream with zeros.
+ */
+static inline void flush_put_bits(PutBitContext *s)
+{
+#ifdef ALT_BITSTREAM_WRITER
+    align_put_bits(s);
+#else
+#ifndef BITSTREAM_WRITER_LE
+    s->bit_buf<<= s->bit_left;
+#endif
+    while (s->bit_left < 32) {
+        /* XXX: should test end of buffer */
+#ifdef BITSTREAM_WRITER_LE
+        *s->buf_ptr++=s->bit_buf;
+        s->bit_buf>>=8;
+#else
+        *s->buf_ptr++=s->bit_buf >> 24;
+        s->bit_buf<<=8;
+#endif
+        s->bit_left+=8;
+    }
+    s->bit_left=32;
+    s->bit_buf=0;
+#endif
+}
+
+#if defined(ALT_BITSTREAM_WRITER) || defined(BITSTREAM_WRITER_LE)
+#define align_put_bits align_put_bits_unsupported_here
+#define ff_put_string ff_put_string_unsupported_here
+#define ff_copy_bits ff_copy_bits_unsupported_here
+#else
+/**
+ * Pads the bitstream with zeros up to the next byte boundary.
+ */
+void align_put_bits(PutBitContext *s);
+
+/**
+ * Puts the string string in the bitstream.
+ *
+ * @param terminate_string 0-terminates the written string if value is 1
+ */
+void ff_put_string(PutBitContext *pb, const char *string, int terminate_string);
+
+/**
+ * Copies the content of src to the bitstream.
+ *
+ * @param length the number of bits of src to copy
+ */
+void ff_copy_bits(PutBitContext *pb, const uint8_t *src, int length);
+#endif
+
+/**
+ * Writes up to 31 bits into a bitstream.
+ * Use put_bits32 to write 32 bits.
+ */
+static inline void put_bits(PutBitContext *s, int n, unsigned int value)
+#ifndef ALT_BITSTREAM_WRITER
+{
+    unsigned int bit_buf;
+    int bit_left;
+
+    //    printf("put_bits=%d %x\n", n, value);
+    //assert(n <= 31 && value < (1U << n));
+
+    bit_buf = s->bit_buf;
+    bit_left = s->bit_left;
+
+    //    printf("n=%d value=%x cnt=%d buf=%x\n", n, value, bit_cnt, bit_buf);
+    /* XXX: optimize */
+#ifdef BITSTREAM_WRITER_LE
+    bit_buf |= value << (32 - bit_left);
+    if (n >= bit_left) {
+#if !HAVE_FAST_UNALIGNED
+        if (3 & (intptr_t) s->buf_ptr) {
+            AV_WL32(s->buf_ptr, bit_buf);
+        } else
+#endif
+        *(uint32_t *)s->buf_ptr = le2me_32(bit_buf);
+        s->buf_ptr+=4;
+        bit_buf = (bit_left==32)?0:value >> bit_left;
+        bit_left+=32;
+    }
+    bit_left-=n;
+#else
+    if (n < bit_left) {
+        bit_buf = (bit_buf<<n) | value;
+        bit_left-=n;
+    } else {
+        bit_buf<<=bit_left;
+        bit_buf |= value >> (n - bit_left);
+#if !HAVE_FAST_UNALIGNED
+        if (3 & (intptr_t) s->buf_ptr) {
+            AV_WB32(s->buf_ptr, bit_buf);
+        } else
+#endif
+        *(uint32_t *)s->buf_ptr = be2me_32(bit_buf);
+        //printf("bitbuf = %08x\n", bit_buf);
+        s->buf_ptr+=4;
+        bit_left+=32 - n;
+        bit_buf = value;
+    }
+#endif
+
+    s->bit_buf = bit_buf;
+    s->bit_left = bit_left;
+}
+#else  /* ALT_BITSTREAM_WRITER defined */
+{
+#    ifdef ALIGNED_BITSTREAM_WRITER
+#        if ARCH_X86
+    __asm__ volatile(
+        "movl %0, %%ecx                 \n\t"
+        "xorl %%eax, %%eax              \n\t"
+        "shrdl %%cl, %1, %%eax          \n\t"
+        "shrl %%cl, %1                  \n\t"
+        "movl %0, %%ecx                 \n\t"
+        "shrl $3, %%ecx                 \n\t"
+        "andl $0xFFFFFFFC, %%ecx        \n\t"
+        "bswapl %1                      \n\t"
+        "orl %1, (%2, %%ecx)            \n\t"
+        "bswapl %%eax                   \n\t"
+        "addl %3, %0                    \n\t"
+        "movl %%eax, 4(%2, %%ecx)       \n\t"
+        : "=&r" (s->index), "=&r" (value)
+        : "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n))
+        : "%eax", "%ecx"
+    );
+#        else
+    int index= s->index;
+    uint32_t *ptr= ((uint32_t *)s->buf)+(index>>5);
+
+    value<<= 32-n;
+
+    ptr[0] |= be2me_32(value>>(index&31));
+    ptr[1]  = be2me_32(value<<(32-(index&31)));
+//if(n>24) printf("%d %d\n", n, value);
+    index+= n;
+    s->index= index;
+#        endif
+#    else //ALIGNED_BITSTREAM_WRITER
+#        if ARCH_X86
+    __asm__ volatile(
+        "movl $7, %%ecx                 \n\t"
+        "andl %0, %%ecx                 \n\t"
+        "addl %3, %%ecx                 \n\t"
+        "negl %%ecx                     \n\t"
+        "shll %%cl, %1                  \n\t"
+        "bswapl %1                      \n\t"
+        "movl %0, %%ecx                 \n\t"
+        "shrl $3, %%ecx                 \n\t"
+        "orl %1, (%%ecx, %2)            \n\t"
+        "addl %3, %0                    \n\t"
+        "movl $0, 4(%%ecx, %2)          \n\t"
+        : "=&r" (s->index), "=&r" (value)
+        : "r" (s->buf), "r" (n), "0" (s->index), "1" (value)
+        : "%ecx"
+    );
+#        else
+    int index= s->index;
+    uint32_t *ptr= (uint32_t*)(((uint8_t *)s->buf)+(index>>3));
+
+    ptr[0] |= be2me_32(value<<(32-n-(index&7) ));
+    ptr[1] = 0;
+//if(n>24) printf("%d %d\n", n, value);
+    index+= n;
+    s->index= index;
+#        endif
+#    endif //!ALIGNED_BITSTREAM_WRITER
+}
+#endif
+
+static inline void put_sbits(PutBitContext *pb, int n, int32_t value)
+{
+    //assert(n >= 0 && n <= 31);
+
+    put_bits(pb, n, value & ((1<<n)-1));
+}
+
+/**
+ * Returns the pointer to the byte where the bitstream writer will put
+ * the next bit.
+ */
+static inline uint8_t* put_bits_ptr(PutBitContext *s)
+{
+#ifdef ALT_BITSTREAM_WRITER
+        return s->buf + (s->index>>3);
+#else
+        return s->buf_ptr;
+#endif
+}
+
+/**
+ * Skips the given number of bytes.
+ * PutBitContext must be flushed & aligned to a byte boundary before calling this.
+ */
+static inline void skip_put_bytes(PutBitContext *s, int n)
+{
+        //assert((put_bits_count(s)&7)==0);
+#ifdef ALT_BITSTREAM_WRITER
+        FIXME may need some cleaning of the buffer
+        s->index += n<<3;
+#else
+        //assert(s->bit_left==32);
+        s->buf_ptr += n;
+#endif
+}
+
+/**
+ * Skips the given number of bits.
+ * Must only be used if the actual values in the bitstream do not matter.
+ * If n is 0 the behavior is undefined.
+ */
+static inline void skip_put_bits(PutBitContext *s, int n)
+{
+#ifdef ALT_BITSTREAM_WRITER
+    s->index += n;
+#else
+    s->bit_left -= n;
+    s->buf_ptr-= 4*(s->bit_left>>5);
+    s->bit_left &= 31;
+#endif
+}
+
+/**
+ * Changes the end of the buffer.
+ *
+ * @param size the new size in bytes of the buffer where to put bits
+ */
+static inline void set_put_bits_buffer_size(PutBitContext *s, int size)
+{
+    s->buf_end= s->buf + size;
+}
+
+#endif /* AVCODEC_PUT_BITS_H */
--- a/lib/rbcodec/codecs/lib/fft-ffmpeg.c
+++ b/lib/rbcodec/codecs/lib/fft-ffmpeg.c
@ -0,0 +1,473 @@
+/*
+ * FFT/IFFT transforms converted to integer precision
+ * Copyright (c) 2010 Dave Hooper, Mohamed Tarek, Michael Giacomelli
+ * Copyright (c) 2008 Loren Merritt
+ * Copyright (c) 2002 Fabrice Bellard
+ * Partly based on libdjbfft by D. J. Bernstein
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavcodec/fft.c
+ * FFT/IFFT transforms.
+ */
+
+
+#ifdef CPU_ARM
+// we definitely want CONFIG_SMALL undefined for ipod
+// so we get the inlined version of fft16 (which is measurably faster)
+#undef CONFIG_SMALL
+#else
+#undef CONFIG_SMALL 
+#endif
+ 
+#include "fft.h"
+#include <string.h>
+#include <stdlib.h>
+#include <math.h>
+#include <inttypes.h>
+#include <time.h>
+#include <codecs/lib/codeclib.h>
+
+#include "codeclib_misc.h"
+#include "mdct_lookup.h"
+
+/* constants for fft_16 (same constants as in mdct_arm.S ... ) */
+#define cPI1_8 (0x7641af3d) /* cos(pi/8) s.31 */
+#define cPI2_8 (0x5a82799a) /* cos(2pi/8) = 1/sqrt(2) s.31 */
+#define cPI3_8 (0x30fbc54d) /* cos(3pi/8) s.31 */
+
+/* asm-optimised functions and/or macros */
+#include "fft-ffmpeg_arm.h"
+#include "fft-ffmpeg_cf.h"
+
+#ifndef ICODE_ATTR_TREMOR_MDCT
+#define ICODE_ATTR_TREMOR_MDCT ICODE_ATTR
+#endif
+
+#if 0
+static int split_radix_permutation(int i, int n, int inverse)
+{
+    int m;
+    if(n <= 2) return i&1;
+    m = n >> 1;
+    if(!(i&m))            return split_radix_permutation(i, m, inverse)*2;
+    m >>= 1;
+    if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1;
+    else                  return split_radix_permutation(i, m, inverse)*4 - 1;
+}
+
+static void ff_fft_permute_c(FFTContext *s, FFTComplex *z)
+{
+    int j, k, np;
+    FFTComplex tmp;
+    //const uint16_t *revtab = s->revtab;
+    np = 1 << s->nbits;
+    
+    const int revtab_shift = (12 - s->nbits);
+
+    /* reverse */
+    for(j=0;j<np;j++) {
+        k = revtab[j]>>revtab_shift;
+        if (k < j) {
+            tmp = z[k];
+            z[k] = z[j];
+            z[j] = tmp;
+        }
+    }
+}
+#endif
+
+#define BF(x,y,a,b) {\
+    x = a - b;\
+    y = a + b;\
+}
+
+#define BF_REV(x,y,a,b) {\
+    x = a + b;\
+    y = a - b;\
+}
+
+#ifndef FFT_FFMPEG_INCL_OPTIMISED_BUTTERFLIES
+#define BUTTERFLIES(a0,a1,a2,a3) {\
+    {\
+        FFTSample temp1,temp2;\
+        BF(temp1, temp2, t5, t1);\
+        BF(a2.re, a0.re, a0.re, temp2);\
+        BF(a3.im, a1.im, a1.im, temp1);\
+    }\
+    {\
+        FFTSample temp1,temp2;\
+        BF(temp1, temp2, t2, t6);\
+        BF(a3.re, a1.re, a1.re, temp1);\
+        BF(a2.im, a0.im, a0.im, temp2);\
+    }\
+}
+
+// force loading all the inputs before storing any.
+// this is slightly slower for small data, but avoids store->load aliasing
+// for addresses separated by large powers of 2.
+#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
+    FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
+    {\
+        FFTSample temp1, temp2;\
+        BF(temp1, temp2, t5, t1);\
+        BF(a2.re, a0.re, r0, temp2);\
+        BF(a3.im, a1.im, i1, temp1);\
+    }\
+    {\
+        FFTSample temp1, temp2;\
+        BF(temp1, temp2, t2, t6);\
+        BF(a3.re, a1.re, r1, temp1);\
+        BF(a2.im, a0.im, i0, temp2);\
+    }\
+}
+#endif
+
+/*
+  see conjugate pair description in
+  http://www.fftw.org/newsplit.pdf
+
+  a0 = z[k]
+  a1 = z[k+N/4]
+  a2 = z[k+2N/4]
+  a3 = z[k+3N/4]
+  
+  result:
+  y[k]      = z[k]+w(z[k+2N/4])+w'(z[k+3N/4])
+  y[k+N/4]  = z[k+N/4]-iw(z[k+2N/4])+iw'(z[k+3N/4])
+  y[k+2N/4] = z[k]-w(z[k+2N/4])-w'(z[k+3N/4])
+  y[k+3N/4] = z[k+N/4]+iw(z[k+2N/4])-iw'(z[k+3N/4])
+  
+  i.e.
+  
+  a0        = a0 +  (w.a2 + w'.a3)
+  a1        = a1 - i(w.a2 - w'.a3)
+  a2        = a0 -  (w.a2 + w'.a3)
+  a3        = a1 + i(w.a2 - w'.a3)
+  
+  note re(w') = re(w) and im(w') = -im(w)
+  
+  so therefore
+  
+  re(a0)   = re(a0) + re(w.a2) + re(w.a3)
+  im(a0)   = im(a0) + im(w.a2) - im(w.a3) etc
+
+  and remember also that  
+  Re([s+it][u+iv]) = su-tv
+  Im([s+it][u+iv]) = sv+tu
+  
+  so
+  Re(w'.(s+it)) = Re(w').s - Im(w').t = Re(w).s + Im(w).t
+  Im(w'.(s+it)) = Re(w').t + Im(w').s = Re(w).t - Im(w).s
+
+  For inverse dft we take the complex conjugate of all twiddle factors.
+  Hence 
+  
+  a0        = a0 +  (w'.a2 + w.a3)
+  a1        = a1 - i(w'.a2 - w.a3)
+  a2        = a0 -  (w'.a2 + w.a3)
+  a3        = a1 + i(w'.a2 - w.a3)
+  
+  Define t1 = Re(w'.a2)  =  Re(w)*Re(a2) + Im(w)*Im(a2)
+         t2 = Im(w'.a2)  =  Re(w)*Im(a2) - Im(w)*Re(a2)
+         t5 = Re(w.a3)   =  Re(w)*Re(a3) - Im(w)*Im(a3)
+         t6 = Im(w.a3)   =  Re(w)*Im(a3) + Im(w)*Re(a3)
+         
+  Then we just output:
+  a0.re = a0.re + ( t1 + t5 )
+  a0.im = a0.im + ( t2 + t6 )
+  a1.re = a1.re + ( t2 - t6 )   // since we multiply by -i and i(-i) = 1
+  a1.im = a1.im - ( t1 - t5 )   // since we multiply by -i and 1(-i) = -i
+  a2.re = a0.re - ( t1 + t5 )
+  a2.im = a0.im - ( t1 + t5 )
+  a3.re = a1.re - ( t2 - t6 )   // since we multiply by +i and i(+i) = -1
+  a3.im = a1.im + ( t1 - t5 )   // since we multiply by +i and 1(+i) = i
+    
+    
+*/
+
+#ifndef FFT_FFMPEG_INCL_OPTIMISED_TRANSFORM
+static inline FFTComplex* TRANSFORM(FFTComplex * z, unsigned int n, FFTSample wre, FFTSample wim)
+{
+    register FFTSample t1,t2,t5,t6,r_re,r_im;
+    r_re = z[n*2].re;
+    r_im = z[n*2].im;
+    XPROD31_R(r_re, r_im, wre, wim, t1,t2);
+    r_re = z[n*3].re;
+    r_im = z[n*3].im;
+    XNPROD31_R(r_re, r_im, wre, wim, t5,t6);
+    BUTTERFLIES(z[0],z[n],z[n*2],z[n*3]);
+    return z+1;
+}
+
+static inline FFTComplex* TRANSFORM_W01(FFTComplex * z, unsigned int n, const FFTSample * w)
+{
+    register const FFTSample wre=w[0],wim=w[1];
+    register FFTSample t1,t2,t5,t6,r_re,r_im;
+    r_re = z[n*2].re;
+    r_im = z[n*2].im;
+    XPROD31_R(r_re, r_im, wre, wim, t1,t2);
+    r_re = z[n*3].re;
+    r_im = z[n*3].im;
+    XNPROD31_R(r_re, r_im, wre, wim, t5,t6);
+    BUTTERFLIES(z[0],z[n],z[n*2],z[n*3]);
+    return z+1;
+}
+
+static inline FFTComplex* TRANSFORM_W10(FFTComplex * z, unsigned int n, const FFTSample * w)
+{
+    register const FFTSample wim=w[0],wre=w[1];
+    register FFTSample t1,t2,t5,t6,r_re,r_im;
+    r_re = z[n*2].re;
+    r_im = z[n*2].im;
+    XPROD31_R(r_re, r_im, wre, wim, t1,t2);
+    r_re = z[n*3].re;
+    r_im = z[n*3].im;
+    XNPROD31_R(r_re, r_im, wre, wim, t5,t6);
+    BUTTERFLIES(z[0],z[n],z[n*2],z[n*3]);
+    return z+1;
+}
+
+static inline FFTComplex* TRANSFORM_EQUAL(FFTComplex * z, unsigned int n)
+{
+    register FFTSample t1,t2,t5,t6,temp1,temp2;
+    register FFTSample * my_z = (FFTSample *)(z);
+    my_z += n*4;
+    t2    = MULT31(my_z[0], cPI2_8);
+    temp1 = MULT31(my_z[1], cPI2_8);
+    my_z += n*2;
+    temp2 = MULT31(my_z[0], cPI2_8);
+    t5    = MULT31(my_z[1], cPI2_8);
+    t1 = ( temp1 + t2 );
+    t2 = ( temp1 - t2 );
+    t6 = ( temp2 + t5 );
+    t5 = ( temp2 - t5 );
+    my_z -= n*6;
+    BUTTERFLIES(z[0],z[n],z[n*2],z[n*3]);
+    return z+1;
+}
+
+static inline FFTComplex* TRANSFORM_ZERO(FFTComplex * z, unsigned int n)
+{
+    FFTSample t1,t2,t5,t6;
+    t1 = z[n*2].re;
+    t2 = z[n*2].im;
+    t5 = z[n*3].re;
+    t6 = z[n*3].im;
+    BUTTERFLIES(z[0],z[n],z[n*2],z[n*3]);
+    return z+1;
+}
+#endif
+
+/* z[0...8n-1], w[1...2n-1] */
+static void pass(FFTComplex *z_arg, unsigned int STEP_arg, unsigned int n_arg) ICODE_ATTR_TREMOR_MDCT;
+static void pass(FFTComplex *z_arg, unsigned int STEP_arg, unsigned int n_arg)
+{
+    register FFTComplex * z = z_arg;
+    register unsigned int STEP = STEP_arg;
+    register unsigned int n = n_arg;
+
+    register const FFTSample *w = sincos_lookup0+STEP;
+    /* wre = *(wim+1) .  ordering is sin,cos */
+    register const FFTSample *w_end = sincos_lookup0+1024;
+
+    /* first two are special (well, first one is special, but we need to do pairs) */
+    z = TRANSFORM_ZERO(z,n);
+    z = TRANSFORM_W10(z,n,w);
+    w += STEP;
+    /* first pass forwards through sincos_lookup0*/
+    do {
+        z = TRANSFORM_W10(z,n,w);
+        w += STEP;
+        z = TRANSFORM_W10(z,n,w);
+        w += STEP;
+    } while(LIKELY(w < w_end));
+    /* second half: pass backwards through sincos_lookup0*/
+    /* wim and wre are now in opposite places so ordering now [0],[1] */
+    w_end=sincos_lookup0;
+    while(LIKELY(w>w_end))
+    {
+        z = TRANSFORM_W01(z,n,w);
+        w -= STEP;
+        z = TRANSFORM_W01(z,n,w);
+        w -= STEP;
+    }
+}
+
+/* what is STEP?
+   sincos_lookup0 has sin,cos pairs for 1/4 cycle, in 1024 points
+   so half cycle would be 2048 points
+   ff_cos_16 has 8 elements corresponding to 4 cos points and 4 sin points
+   so each of the 4 points pairs corresponds to a 256*2-byte jump in sincos_lookup0
+   8192/16 (from "ff_cos_16") is 512 bytes.
+   i.e.  for fft16, STEP = 8192/16 */
+#define DECL_FFT(n,n2,n4)\
+static void fft##n(FFTComplex *z) ICODE_ATTR_TREMOR_MDCT;\
+static void fft##n(FFTComplex *z)\
+{\
+    fft##n2(z);\
+    fft##n4(z+n4*2);\
+    fft##n4(z+n4*3);\
+    pass(z,8192/n,n4);\
+}
+
+#ifndef FFT_FFMPEG_INCL_OPTIMISED_FFT4
+static inline void fft4(FFTComplex *z)
+{
+    FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
+
+    BF(t3, t1, z[0].re, z[1].re); // t3=r1-r3 ; t1 = r1+r3
+    BF(t8, t6, z[3].re, z[2].re); // t8=r7-r5 ; t6 = r7+r5
+
+    BF(z[2].re, z[0].re, t1, t6); // r5=t1-t6 ; r1 = t1+t6
+
+    BF(t4, t2, z[0].im, z[1].im); // t4=r2-r4 ; t2 = r2+r4
+    BF(t7, t5, z[2].im, z[3].im); // t7=r6-r8 ; t5 = r6+r8
+
+    BF(z[3].im, z[1].im, t4, t8); // r8=t4-t8 ; r4 = t4+t8
+    BF(z[3].re, z[1].re, t3, t7); // r7=t3-t7 ; r3 = t3+t7
+    BF(z[2].im, z[0].im, t2, t5); // r6=t2-t5 ; r2 = t2+t5
+}
+#endif
+
+static void fft4_dispatch(FFTComplex *z)
+{
+    fft4(z);
+}
+
+#ifndef FFT_FFMPEG_INCL_OPTIMISED_FFT8
+static inline void fft8(FFTComplex *z)
+{
+    fft4(z);
+    FFTSample t1,t2,t3,t4,t7,t8;
+    
+    BF(t1, z[5].re, z[4].re, -z[5].re);
+    BF(t2, z[5].im, z[4].im, -z[5].im);
+    BF(t3, z[7].re, z[6].re, -z[7].re);
+    BF(t4, z[7].im, z[6].im, -z[7].im);
+    BF(t8, t1, t3, t1);
+    BF(t7, t2, t2, t4);
+    BF(z[4].re, z[0].re, z[0].re, t1);
+    BF(z[4].im, z[0].im, z[0].im, t2);
+    BF(z[6].re, z[2].re, z[2].re, t7);
+    BF(z[6].im, z[2].im, z[2].im, t8);
+
+    z++;
+    TRANSFORM_EQUAL(z,2);
+}
+#endif
+
+static void fft8_dispatch(FFTComplex *z)
+{
+    fft8(z);
+}
+
+#ifndef CONFIG_SMALL
+static void fft16(FFTComplex *z) ICODE_ATTR_TREMOR_MDCT;
+static void fft16(FFTComplex *z)
+{
+    fft8(z);
+    fft4(z+8);
+    fft4(z+12);
+
+    TRANSFORM_ZERO(z,4);
+    z+=2;
+    TRANSFORM_EQUAL(z,4);
+    z-=1;
+    TRANSFORM(z,4,cPI1_8,cPI3_8);
+    z+=2;
+    TRANSFORM(z,4,cPI3_8,cPI1_8);
+}
+#else
+DECL_FFT(16,8,4)
+#endif
+DECL_FFT(32,16,8)
+DECL_FFT(64,32,16)
+DECL_FFT(128,64,32)
+DECL_FFT(256,128,64)
+DECL_FFT(512,256,128)
+DECL_FFT(1024,512,256)
+DECL_FFT(2048,1024,512)
+DECL_FFT(4096,2048,1024)
+
+static void (*fft_dispatch[])(FFTComplex*) = {
+    fft4_dispatch, fft8_dispatch, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
+    fft2048, fft4096
+};
+
+void ff_fft_calc_c(int nbits, FFTComplex *z)
+{
+    fft_dispatch[nbits-2](z);
+}
+
+#if 0
+int main (void)
+{
+#define PRECISION       16
+#define FFT_SIZE 1024
+#define ftofix32(x)       ((fixed32)((x) * (float)(1 << PRECISION) + ((x) < 0 ? -0.5 : 0.5)))
+#define itofix32(x)       ((x) << PRECISION)
+#define fixtoi32(x)       ((x) >> PRECISION)
+
+    int             j;
+    const long      N = FFT_SIZE;
+    double          r[FFT_SIZE] = {0.0}, i[FFT_SIZE] = {0.0};
+    long            n;
+    double          t;
+    double          amp, phase;
+    clock_t         start, end;
+    double          exec_time = 0;
+    FFTContext      s;
+    FFTComplex      z[FFT_SIZE];
+    memset(z, 0, 64*sizeof(FFTComplex));
+
+    /* Generate saw-tooth test data */
+    for (n = 0; n < FFT_SIZE; n++)
+    {
+        t = (2 * M_PI * n)/N;
+        /*z[n].re =  1.1      + sin(      t) +                
+                   0.5      * sin(2.0 * t) +
+                  (1.0/3.0) * sin(3.0 * t) +
+                   0.25     * sin(4.0 * t) +
+                   0.2      * sin(5.0 * t) +
+                  (1.0/6.0) * sin(6.0 * t) +
+                  (1.0/7.0) * sin(7.0 * t) ;*/
+        z[n].re  =  ftofix32(cos(2*M_PI*n/64));
+        //printf("z[%d] = %f\n", n, z[n].re);
+        //getchar();
+    }
+
+    ff_fft_init(&s, 10, 1);
+//start = clock();
+//for(n = 0; n < 1000000; n++)
+    ff_fft_permute_c(&s, z);
+    ff_fft_calc_c(&s, z);
+//end   = clock();
+//exec_time = (((double)end-(double)start)/CLOCKS_PER_SEC);
+    for(j = 0; j < FFT_SIZE; j++)
+    {
+        printf("%8.4f\n", sqrt(pow(fixtof32(z[j].re),2)+ pow(fixtof32(z[j].im), 2)));   
+        //getchar();
+    }
+    printf("muls = %d, adds = %d\n", muls, adds);
+//printf(" Time elapsed = %f\n", exec_time);
+    //ff_fft_end(&s);
+
+}
+#endif
--- a/lib/rbcodec/codecs/lib/fft-ffmpeg_arm.h
+++ b/lib/rbcodec/codecs/lib/fft-ffmpeg_arm.h
@ -0,0 +1,456 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2010 Dave Hooper
+ *
+ * ARM optimisations for ffmpeg's fft (used in fft-ffmpeg.c)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#ifdef CPU_ARM
+
+/* Start off with optimised variants of the butterflies that work
+   nicely on arm */
+/* 1.  where y and a share the same variable/register */
+#define BF_OPT(x,y,a,b) {\
+    y = a + b;\
+    x = y - (b<<1);\
+}
+
+/* 2.  where y and b share the same variable/register */
+#define BF_OPT2(x,y,a,b) {\
+    x = a - b;\
+    y = x + (b<<1);\
+}
+
+/* 3.  where y and b share the same variable/register (but y=(-b)) */
+#define BF_OPT2_REV(x,y,a,b) {\
+    x = a + b;\
+    y = x - (b<<1);\
+}
+
+
+/* standard BUTTERFLIES package.  Note, we actually manually inline this
+   in all the TRANSFORM macros below anyway */
+#define FFT_FFMPEG_INCL_OPTIMISED_BUTTERFLIES
+#define BUTTERFLIES(a0,a1,a2,a3) {\
+    {\
+        BF_OPT(t1, t5, t5, t1);\
+        BF_OPT(t6, t2, t2, t6);\
+        BF_OPT(a2.re, a0.re, a0.re, t5);\
+        BF_OPT(a2.im, a0.im, a0.im, t2);\
+        BF_OPT(a3.re, a1.re, a1.re, t6);\
+        BF_OPT(a3.im, a1.im, a1.im, t1);\
+    }\
+}
+
+#define FFT_FFMPEG_INCL_OPTIMISED_TRANSFORM
+
+static inline FFTComplex* TRANSFORM( FFTComplex* z, int n, FFTSample wre, FFTSample wim )
+{
+    register FFTSample t1,t2 asm("r5"),t5 asm("r6"),t6 asm("r7"),r_re asm("r8"),r_im asm("r9");
+    z += n*2; /* z[o2] */
+    asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
+    XPROD31_R(r_re, r_im, wre, wim, t1,t2);
+    
+    z += n; /* z[o3] */
+    asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
+    XNPROD31_R(r_re, r_im, wre, wim, t5,t6);
+    
+    BF_OPT(t1, t5, t5, t1);
+    BF_OPT(t6, t2, t2, t6);
+
+    {    
+        register FFTSample rt0temp asm("r4");
+        /*{*/
+        /*   BF_OPT(t1, t5, t5, t1);*/
+        /*    BF_OPT(t6, t2, t2, t6);*/
+        /*    BF_OPT(a2.re, a0.re, a0.re, t5);*/
+        /*    BF_OPT(a2.im, a0.im, a0.im, t2);*/
+        /*    BF_OPT(a3.re, a1.re, a1.re, t6);*/
+        /*    BF_OPT(a3.im, a1.im, a1.im, t1);*/
+        /*}*/
+        z -= n*3;
+        /* r_re = my_z[0]; r_im = my_z[1]; */
+        asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
+        BF_OPT(rt0temp, r_re, r_re, t5);
+        BF_OPT(t2,      r_im, r_im, t2);
+        /* my_z[0] = r_re; my_z[1] = r_im; */
+        asm volatile( "stmia %[my_z], {%[r_re],%[r_im]}\n\t"::[my_z] "r" (z), [r_re] "r" (r_re), [r_im] "r" (r_im):"memory" );
+        z += n;
+        /* r_re = my_z[0]; r_im = my_z[1]; */
+        asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
+        BF_OPT(t5, r_re, r_re, t6);
+        BF_OPT(t6, r_im, r_im, t1);
+        /* my_z[0] = r_re; my_z[1] = r_im; */
+        asm volatile( "stmia %[my_z], {%[r_re],%[r_im]}\n\t"::[my_z] "r" (z), [r_re] "r" (r_re), [r_im] "r" (r_im):"memory");
+        z += n;
+        /* my_z[0] = rt0temp; my_z[1] = t2; */
+        asm volatile( "stmia %[my_z], {%[rt0temp],%[t2]}\n\t"::[my_z] "r" (z), [rt0temp] "r" (rt0temp), [t2] "r" (t2):"memory");
+    }
+    z += n;
+   
+    /* my_z[0] = t5; my_z[1] = t6; */
+    asm volatile( "stmia %[my_z]!, {%[t5],%[t6]}\n\t":[my_z] "+r" (z) : [t5] "r" (t5), [t6] "r" (t6):"memory");
+    z -= n*3;
+    return(z);
+}
+
+static inline FFTComplex* TRANSFORM_W01( FFTComplex* z, int n, const FFTSample* w )
+{
+    register FFTSample t1,t2 asm("r5"),t5 asm("r6"),t6 asm("r7"),r_re asm("r8"),r_im asm("r9");
+    
+    /* load wre,wim into t5,t6 */
+    asm volatile( "ldmia %[w], {%[wre], %[wim]}\n\t":[wre] "=r" (t5), [wim] "=r" (t6):[w] "r" (w));
+    z += n*2; /* z[o2] -- 2n * 2 since complex numbers */
+    asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
+    XPROD31_R(r_re, r_im, t5 /*wre*/, t6 /*wim*/, t1,t2);
+
+    z += n; /* z[o3] */
+    asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
+    XNPROD31_R(r_re, r_im, t5 /*wre*/, t6 /*wim*/, t5,t6);
+    
+    BF_OPT(t1, t5, t5, t1);
+    BF_OPT(t6, t2, t2, t6);
+    {
+        register FFTSample rt0temp asm("r4");
+        /*{*/
+        /*   BF_OPT(t1, t5, t5, t1);*/
+        /*    BF_OPT(t6, t2, t2, t6);*/
+        /*    BF_OPT(a2.re, a0.re, a0.re, t5);*/
+        /*    BF_OPT(a2.im, a0.im, a0.im, t2);*/
+        /*    BF_OPT(a3.re, a1.re, a1.re, t6);*/
+        /*    BF_OPT(a3.im, a1.im, a1.im, t1);*/
+        /*}*/
+        z -= n*3;
+        /* r_re = my_z[0]; r_im = my_z[1]; */
+        asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
+        BF_OPT(rt0temp, r_re, r_re, t5);
+        BF_OPT(t2,      r_im, r_im, t2);
+        /* my_z[0] = r_re; my_z[1] = r_im; */
+        asm volatile( "stmia %[my_z], {%[r_re],%[r_im]}\n\t"::[my_z] "r" (z), [r_re] "r" (r_re), [r_im] "r" (r_im):"memory");
+        z += n;
+        /* r_re = my_z[0]; r_im = my_z[1]; */
+        asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
+        BF_OPT(t5, r_re, r_re, t6);
+        BF_OPT(t6, r_im, r_im, t1);
+        /* my_z[0] = r_re; my_z[1] = r_im; */
+        asm volatile( "stmia %[my_z], {%[r_re],%[r_im]}\n\t"::[my_z] "r" (z), [r_re] "r" (r_re), [r_im] "r" (r_im):"memory");
+        z += n;
+        /* my_z[0] = rt0temp; my_z[1] = t2; */
+        asm volatile( "stmia %[my_z], {%[rt0temp],%[t2]}\n\t"::[my_z] "r" (z), [rt0temp] "r" (rt0temp), [t2] "r" (t2):"memory");
+    }
+    z += n;
+
+    /* my_z[0] = t5; my_z[1] = t6; */
+    asm volatile( "stmia %[my_z]!, {%[t5],%[t6]}\n\t":[my_z] "+r" (z) : [t5] "r" (t5), [t6] "r" (t6):"memory");
+    z -= n*3;
+    return(z);
+}
+
+static inline FFTComplex* TRANSFORM_W10( FFTComplex* z, int n, const FFTSample* w )
+{
+    register FFTSample t1,t2 asm("r5"),t5 asm("r6"),t6 asm("r7"),r_re asm("r8"),r_im asm("r9");
+    
+    /* load wim,wre into t5,t6 */
+    asm volatile( "ldmia %[w], {%[wim], %[wre]}\n\t":[wim] "=r" (t5), [wre] "=r" (t6):[w] "r" (w));
+    z += n*2; /* z[o2] -- 2n * 2 since complex numbers */
+    asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
+    XPROD31_R(r_re, r_im, t6 /*wim*/, t5 /*wre*/, t1,t2);
+
+    z += n; /* z[o3] */
+    asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
+    XNPROD31_R(r_re, r_im, t6 /*wim*/, t5 /*wre*/, t5,t6);
+    
+    BF_OPT(t1, t5, t5, t1);
+    BF_OPT(t6, t2, t2, t6);
+    {
+        register FFTSample rt0temp asm("r4");
+        /*{*/
+        /*   BF_OPT(t1, t5, t5, t1);*/
+        /*    BF_OPT(t6, t2, t2, t6);*/
+        /*    BF_OPT(a2.re, a0.re, a0.re, t5);*/
+        /*    BF_OPT(a2.im, a0.im, a0.im, t2);*/
+        /*    BF_OPT(a3.re, a1.re, a1.re, t6);*/
+        /*    BF_OPT(a3.im, a1.im, a1.im, t1);*/
+        /*}*/
+        z -= n*3;
+        /* r_re = my_z[0]; r_im = my_z[1]; */
+        asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
+        BF_OPT(rt0temp, r_re, r_re, t5);
+        BF_OPT(t2,      r_im, r_im, t2);
+        /* my_z[0] = r_re; my_z[1] = r_im; */
+        asm volatile( "stmia %[my_z], {%[r_re],%[r_im]}\n\t"::[my_z] "r" (z), [r_re] "r" (r_re), [r_im] "r" (r_im):"memory");
+        z += n;
+        /* r_re = my_z[0]; r_im = my_z[1]; */
+        asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
+        BF_OPT(t5, r_re, r_re, t6);
+        BF_OPT(t6, r_im, r_im, t1);
+        /* my_z[0] = r_re; my_z[1] = r_im; */
+        asm volatile( "stmia %[my_z], {%[r_re],%[r_im]}\n\t"::[my_z] "r" (z), [r_re] "r" (r_re), [r_im] "r" (r_im):"memory");
+        z += n;
+        /* my_z[0] = rt0temp; my_z[1] = t2; */
+        asm volatile( "stmia %[my_z], {%[rt0temp],%[t2]}\n\t"::[my_z] "r" (z), [rt0temp] "r" (rt0temp), [t2] "r" (t2):"memory");
+    }
+    z += n;
+
+    /* my_z[0] = t5; my_z[1] = t6; */
+    asm volatile( "stmia %[my_z]!, {%[t5],%[t6]}\n\t":[my_z] "+r" (z) : [t5] "r" (t5), [t6] "r" (t6):"memory");
+    z -= n*3;
+    return(z);
+}
+
+static inline FFTComplex* TRANSFORM_EQUAL( FFTComplex* z, int n )
+{
+    register FFTSample t1,t2 asm("r5"),t5 asm("r6"),t6 asm("r7"),r_re asm("r8"),r_im asm("r9");
+
+    z += n*2; /* z[o2] -- 2n * 2 since complex numbers */
+    asm volatile( "ldmia %[my_z], {%[t5],%[t6]}\n\t":[t5] "=r" (t5), [t6] "=r" (t6):[my_z] "r" (z));
+    z += n; /* z[o3] */
+    asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
+
+/**/
+/*t2 = MULT32(cPI2_8, t5);*/
+/*t1 = MULT31(cPI2_8, t6);*/
+/*t6 = MULT31(cPI2_8, r_re);*/
+/*t5 = MULT32(cPI2_8, r_im);*/
+
+/*t1 = ( t1 + (t2<<1) );*/
+/*t2 = ( t1 - (t2<<2) );*/
+/*t6 = ( t6 + (t5<<1) );*/
+/*t5 = ( t6 - (t5<<2) );*/
+/**/
+    t2   = MULT31(cPI2_8, t5);
+    t6   = MULT31(cPI2_8, t6);
+    r_re = MULT31(cPI2_8, r_re);
+    t5   = MULT31(cPI2_8, r_im);
+    
+    t1 = ( t6 + t2 );
+    t2 = ( t6 - t2 );
+    t6 = ( r_re + t5 );
+    t5 = ( r_re - t5 );
+    
+    BF_OPT(t1, t5, t5, t1);
+    BF_OPT(t6, t2, t2, t6);
+    {
+        register FFTSample rt0temp asm("r4");
+        /*{*/
+        /*   BF_OPT(t1, t5, t5, t1);*/
+        /*    BF_OPT(t6, t2, t2, t6);*/
+        /*    BF_OPT(a2.re, a0.re, a0.re, t5);*/
+        /*    BF_OPT(a2.im, a0.im, a0.im, t2);*/
+        /*    BF_OPT(a3.re, a1.re, a1.re, t6);*/
+        /*    BF_OPT(a3.im, a1.im, a1.im, t1);*/
+        /*}*/
+        z -= n*3;
+        /* r_re = my_z[0]; r_im = my_z[1]; */
+        asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
+        BF_OPT(rt0temp, r_re, r_re, t5);
+        BF_OPT(t2,      r_im, r_im, t2);
+        /* my_z[0] = r_re; my_z[1] = r_im; */
+        asm volatile( "stmia %[my_z], {%[r_re],%[r_im]}\n\t"::[my_z] "r" (z), [r_re] "r" (r_re), [r_im] "r" (r_im):"memory");
+        z += n;
+        /* r_re = my_z[0]; r_im = my_z[1]; */
+        asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
+        BF_OPT(t5, r_re, r_re, t6);
+        BF_OPT(t6, r_im, r_im, t1);
+        /* my_z[0] = r_re; my_z[1] = r_im; */
+        asm volatile( "stmia %[my_z], {%[r_re],%[r_im]}\n\t"::[my_z] "r" (z), [r_re] "r" (r_re), [r_im] "r" (r_im):"memory");
+        z += n;
+        /* my_z[0] = rt0temp; my_z[1] = t2; */
+        asm volatile( "stmia %[my_z], {%[rt0temp],%[t2]}\n\t"::[my_z] "r" (z), [rt0temp] "r" (rt0temp), [t2] "r" (t2):"memory");
+    }
+    z += n;
+
+    /* my_z[0] = t5; my_z[1] = t6; */
+    asm volatile( "stmia %[my_z]!, {%[t5],%[t6]}\n\t":[my_z] "+r" (z) : [t5] "r" (t5), [t6] "r" (t6):"memory");
+    z -= n*3;
+    return(z);
+}
+
+static inline FFTComplex* TRANSFORM_ZERO( FFTComplex* z, int n )
+{
+    register FFTSample t1,t2 asm("r5"),t5 asm("r6"),t6 asm("r7"), r_re asm("r8"), r_im asm("r9");
+
+    z += n*2; /* z[o2] -- 2n * 2 since complex numbers */
+    asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
+    z += n; /* z[o3] */
+    asm volatile( "ldmia %[my_z], {%[t5],%[t6]}\n\t":[t5] "=r" (t5), [t6] "=r" (t6):[my_z] "r" (z));
+
+    BF_OPT(t1, t5, t5, r_re);
+    BF_OPT(t6, t2, r_im, t6);
+    {
+        register FFTSample rt0temp asm("r4");
+        /*{*/
+        /*   BF_OPT(t1, t5, t5, t1);*/
+        /*    BF_OPT(t6, t2, t2, t6);*/
+        /*    BF_OPT(a2.re, a0.re, a0.re, t5);*/
+        /*    BF_OPT(a2.im, a0.im, a0.im, t2);*/
+        /*    BF_OPT(a3.re, a1.re, a1.re, t6);*/
+        /*    BF_OPT(a3.im, a1.im, a1.im, t1);*/
+        /*}*/
+        z -= n*3;
+        /* r_re = my_z[0]; r_im = my_z[1]; */
+        asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
+        BF_OPT(rt0temp, r_re, r_re, t5);
+        BF_OPT(t2,      r_im, r_im, t2);
+        /* my_z[0] = r_re; my_z[1] = r_im; */
+        asm volatile( "stmia %[my_z], {%[r_re],%[r_im]}\n\t"::[my_z] "r" (z), [r_re] "r" (r_re), [r_im] "r" (r_im):"memory");
+        z += n;
+        /* r_re = my_z[0]; r_im = my_z[1]; */
+        asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
+        BF_OPT(t5, r_re, r_re, t6);
+        BF_OPT(t6, r_im, r_im, t1);
+        /* my_z[0] = r_re; my_z[1] = r_im; */
+        asm volatile( "stmia %[my_z], {%[r_re],%[r_im]}\n\t"::[my_z] "r" (z), [r_re] "r" (r_re), [r_im] "r" (r_im):"memory");
+        z += n;
+        /* my_z[0] = rt0temp; my_z[1] = t2; */
+        asm volatile( "stmia %[my_z], {%[rt0temp],%[t2]}\n\t"::[my_z] "r" (z), [rt0temp] "r" (rt0temp), [t2] "r" (t2):"memory");
+    }
+    z += n;
+
+    /* my_z[0] = t5; my_z[1] = t6; */
+    asm volatile( "stmia %[my_z]!, {%[t5],%[t6]}\n\t":[my_z] "+r" (z) : [t5] "r" (t5), [t6] "r" (t6):"memory");
+    z -= n*3;
+    return(z);
+}
+
+#define FFT_FFMPEG_INCL_OPTIMISED_FFT4
+static inline FFTComplex* fft4(FFTComplex * z)
+{
+    FFTSample temp;
+    
+    /* input[0..7] -> output[0..7] */
+    /* load r1=z[0],r2=z[1],...,r8=z[7] */
+    asm volatile(
+      "ldmia %[z], {r1-r8}\n\t"
+      "add r1,r1,r3\n\t"         /* r1 :=t1 */
+      "sub r3,r1,r3, lsl #1\n\t" /* r3 :=t3 */
+      "sub r7,r7,r5\n\t"         /* r10:=t8 */
+      "add r5,r7,r5, lsl #1\n\t" /* r5 :=t6 */
+      
+      "add r1,r1,r5\n\t"                 /* r1 = o[0] */
+      "sub r5,r1,r5, lsl #1\n\t"         /* r5 = o[4] */
+      
+      "add r2,r2,r4\n\t"         /* r2 :=t2 */
+      "sub r4,r2,r4, lsl #1\n\t" /* r9 :=t4 */
+      
+      "add %[temp],r6,r8\n\t"        /* r10:=t5 */
+      "sub r6,r6,r8\n\t"         /* r6 :=t7 */
+      
+      "sub r8,r4,r7\n\t"                 /* r8 = o[7]*/ 
+      "add r4,r4,r7\n\t"                 /* r4 = o[3]*/ 
+      "sub r7,r3,r6\n\t"                 /* r7 = o[6]*/ 
+      "add r3,r3,r6\n\t"                 /* r3 = o[2]*/ 
+      "sub r6,r2,%[temp]\n\t"                /* r6 = o[5]*/ 
+      "add r2,r2,%[temp]\n\t"                /* r2 = o[1]*/ 
+      
+      "stmia %[z]!, {r1-r8}\n\t"
+      : /* outputs */ [z] "+r" (z), [temp] "=r" (temp)
+      : /* inputs */
+      : /* clobbers */
+      "r1","r2","r3","r4","r5","r6","r7","r8","memory"
+   );
+   return z;
+}
+
+#define FFT_FFMPEG_INCL_OPTIMISED_FFT8
+        /* The chunk of asm below is equivalent to the following:
+        
+        // first load in z[4].re thru z[7].im into local registers
+        // ...
+        BF_OPT2_REV(z[4].re, z[5].re, z[4].re, z[5].re); // x=a+b; y=x-(b<<1)
+        BF_OPT2_REV(z[4].im, z[5].im, z[4].im, z[5].im);
+        BF_REV     (temp, z[7].re, z[6].re, z[7].re);  // x=a+b; y=a-b;
+        BF_REV     (z[6].re, z[7].im, z[6].im, z[7].im);
+        // save z[7].re and z[7].im as those are complete now
+        // z[5].re and z[5].im are also complete now but save these later on
+        
+        BF(z[6].im, z[4].re, temp, z[4].re);        // x=a-b; y=a+b
+        BF_OPT(z[6].re, z[4].im, z[4].im, z[6].re); // y=a+b; x=y-(b<<1)
+        // now load z[2].re and z[2].im
+        // ...        
+        BF_OPT(z[6].re, z[2].re, z[2].re, z[6].re); // y=a+b; x=y-(b<<1)
+        BF_OPT(z[6].im, z[2].im, z[2].im, z[6].im); // y=a+b; x=y-(b<<1)
+        // Now save z[6].re and z[6].im, along with z[5].re and z[5].im
+        // for efficiency.  Also save z[2].re and z[2].im.
+        // Now load z[0].re and z[0].im
+        // ...
+        
+        BF_OPT(z[4].re, z[0].re, z[0].re, z[4].re); // y=a+b; x=y-(b<<1)
+        BF_OPT(z[4].im, z[0].im, z[0].im, z[4].im); // y=a+b; x=y-(b<<1)
+        // Finally save out z[4].re, z[4].im, z[0].re and z[0].im
+        // ...
+        */
+static inline void fft8(FFTComplex * z)
+{
+    FFTComplex* m4 = fft4(z);
+    {
+        /* note that we increment z_ptr on the final stmia, which 
+           leaves z_ptr pointing to z[1].re ready for the Transform step */
+           
+        register FFTSample temp;
+
+        asm volatile(
+            /* read in z[4].re thru z[7].im */
+            "ldmia %[z4_ptr]!, {r1-r8}\n\t"
+            /* (now points one word past &z[7].im) */
+            "add r1,r1,r3\n\t"
+            "sub r3,r1,r3,lsl #1\n\t"
+            "add r2,r2,r4\n\t"
+            "sub r4,r2,r4,lsl #1\n\t"
+            "add %[temp],r5,r7\n\t"
+            "sub r7,r5,r7\n\t"
+            "add r5,r6,r8\n\t"
+            "sub r8,r6,r8\n\t"
+
+            "stmdb %[z4_ptr]!, {r7,r8}\n\t" /* write z[7].re,z[7].im  straight away */
+                                            /* Note, registers r7 & r8 now free */
+
+            "sub r6,%[temp],r1\n\t"
+            "add r1,%[temp],r1\n\t"
+            "add r2,r2,r5\n\t"
+            "sub r5,r2,r5,lsl #1\n\t"
+            "add %[temp], %[z_ptr], #16\n\t"  /* point to &z[2].re */
+            "ldmia %[temp],{r7,r8}\n\t"  /* load z[2].re and z[2].im */
+            "add r7,r7,r5\n\t"
+            "sub r5,r7,r5,lsl #1\n\t"
+            "add r8,r8,r6\n\t"
+            "sub r6,r8,r6,lsl #1\n\t"
+
+            /* write out z[5].re, z[5].im, z[6].re, z[6].im in one go*/
+            "stmdb %[z4_ptr]!, {r3-r6}\n\t"
+            "stmia %[temp],{r7,r8}\n\t" /* write out z[2].re, z[2].im */
+            "ldmia %[z_ptr],{r7,r8}\n\t" /* load r[0].re, r[0].im */
+
+            "add r7,r7,r1\n\t"
+            "sub r1,r7,r1,lsl #1\n\t"
+            "add r8,r8,r2\n\t"
+            "sub r2,r8,r2,lsl #1\n\t"
+
+            "stmia %[z_ptr]!,{r7,r8}\n\t" /* write out z[0].re, z[0].im */
+            "stmdb %[z4_ptr], {r1,r2}\n\t" /* write out z[4].re, z[4].im */
+            : [z4_ptr] "+r" (m4), [temp] "=r" (temp), [z_ptr] "+r" (z)
+            :
+            : "r1","r2","r3","r4","r5","r6","r7","r8","memory"
+        );
+    }
+
+    TRANSFORM_EQUAL(z,2);
+}
+
+#endif // CPU_ARM
--- a/lib/rbcodec/codecs/lib/fft-ffmpeg_cf.h
+++ b/lib/rbcodec/codecs/lib/fft-ffmpeg_cf.h
@ -0,0 +1,370 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2010 Nils Wallménius
+ *
+ * Coldfire v2 optimisations for ffmpeg's fft (used in fft-ffmpeg.c)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#ifdef CPU_COLDFIRE
+#define FFT_FFMPEG_INCL_OPTIMISED_FFT4
+static inline void fft4(FFTComplex * z)
+{
+    asm volatile ("movem.l (%[z]), %%d0-%%d7\n\t"
+                  "move.l  %%d0, %%a0\n\t"
+                  "add.l   %%d2, %%d0\n\t" /* d0 == t1 */
+                  "neg.l   %%d2\n\t"
+                  "add.l   %%a0, %%d2\n\t" /* d2 == t3, a0 free */
+                  "move.l  %%d6, %%a0\n\t"
+                  "sub.l   %%d4, %%d6\n\t" /* d6 == t8 */
+                  "add.l   %%d4, %%a0\n\t" /* a0 == t6 */
+
+                  "move.l  %%d0, %%d4\n\t"
+                  "sub.l   %%a0, %%d4\n\t" /* z[2].re done */
+                  "add.l   %%a0, %%d0\n\t" /* z[0].re done, a0 free */
+
+                  "move.l  %%d5, %%a0\n\t"
+                  "sub.l   %%d7, %%d5\n\t" /* d5 == t7 */
+                  "add.l   %%d7, %%a0\n\t" /* a0 == t5 */
+
+                  "move.l  %%d1, %%d7\n\t"
+                  "sub.l   %%d3, %%d7\n\t" /* d7 == t4 */
+                  "add.l   %%d3, %%d1\n\t" /* d1 == t2 */
+
+                  "move.l  %%d7, %%d3\n\t"
+                  "sub.l   %%d6, %%d7\n\t" /* z[3].im done */
+                  "add.l   %%d6, %%d3\n\t" /* z[1].im done */
+
+                  "move.l  %%d2, %%d6\n\t"
+                  "sub.l   %%d5, %%d6\n\t" /* z[3].re done */
+                  "add.l   %%d5, %%d2\n\t" /* z[1].re done */
+
+                  "move.l  %%d1, %%d5\n\t"
+                  "sub.l   %%a0, %%d5\n\t" /* z[2].im done */
+                  "add.l   %%a0, %%d1\n\t" /* z[0].im done */
+
+                  "movem.l %%d0-%%d7, (%[z])\n\t"
+                  : :[z] "a" (z)
+                  : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+                    "a0", "cc", "memory");
+                  
+}
+
+#define FFT_FFMPEG_INCL_OPTIMISED_FFT8
+static inline void fft8(FFTComplex *z)
+{
+    asm volatile ("movem.l (4*8, %[z]), %%d0-%%d7\n\t"
+                  "move.l  %%d0, %%a1\n\t"
+                  "add.l   %%d2, %%a1\n\t" /* a1 == t1 */
+                  "sub.l   %%d2, %%d0\n\t" /* d0 == z[5].re */
+                  
+                  "move.l  %%d1, %%a2\n\t"
+                  "add.l   %%d3, %%a2\n\t" /* a2 == t2 */
+                  "sub.l   %%d3, %%d1\n\t" /* d1 == z[5].im */
+                  
+                  "move.l  %%d4, %%d2\n\t"
+                  "add.l   %%d6, %%d2\n\t" /* d2 == t3 */
+                  "sub.l   %%d6, %%d4\n\t" /* d4 == z[7].re */
+                  
+                  "move.l  %%d5, %%d3\n\t"
+                  "add.l   %%d7, %%d3\n\t" /* d3 == t4 */
+                  "sub.l   %%d7, %%d5\n\t" /* d5 == z[7].im */
+                  
+                  "move.l  %%d2, %%a4\n\t"
+                  "sub.l   %%a1, %%a4\n\t" /* a4 == t8 */
+                  "add.l   %%d2, %%a1\n\t" /* a1 == t1, d2 free */
+                  
+                  "move.l  %%a2, %%a3\n\t"
+                  "sub.l   %%d3, %%a3\n\t" /* a3 == t7 */
+                  "add.l   %%d3, %%a2\n\t" /* a2 == t2, d3 free */
+                  
+                  /* emac block from TRANSFORM_EQUAL, do this now
+                     so we don't need to store and load z[5] and z[7] */
+                  "move.l  %[_cPI2_8], %%d2\n\t"
+                  "mac.l   %%d2, %%d0, %%acc0\n\t"
+                  "mac.l   %%d2, %%d1, %%acc1\n\t"
+                  "mac.l   %%d2, %%d4, %%acc2\n\t"
+                  "mac.l   %%d2, %%d5, %%acc3\n\t"
+                  
+                  /* fft4, clobbers all d regs and a0 */
+                  "movem.l (%[z]), %%d0-%%d7\n\t"
+                  "move.l  %%d0, %%a0\n\t"
+                  "add.l   %%d2, %%d0\n\t" /* d0 == t1 */
+                  "neg.l   %%d2\n\t"
+                  "add.l   %%a0, %%d2\n\t" /* d2 == t3, a0 free */
+                  "move.l  %%d6, %%a0\n\t"
+                  "sub.l   %%d4, %%d6\n\t" /* d6 == t8 */
+                  "add.l   %%d4, %%a0\n\t" /* a0 == t6 */
+
+                  "move.l  %%d0, %%d4\n\t"
+                  "sub.l   %%a0, %%d4\n\t" /* z[2].re done */
+                  "add.l   %%a0, %%d0\n\t" /* z[0].re done, a0 free */
+
+                  "move.l  %%d5, %%a0\n\t"
+                  "sub.l   %%d7, %%d5\n\t" /* d5 == t7 */
+                  "add.l   %%d7, %%a0\n\t" /* a0 == t5 */
+
+                  "move.l  %%d1, %%d7\n\t"
+                  "sub.l   %%d3, %%d7\n\t" /* d7 == t4 */
+                  "add.l   %%d3, %%d1\n\t" /* d1 == t2 */
+
+                  "move.l  %%d7, %%d3\n\t"
+                  "sub.l   %%d6, %%d7\n\t" /* z[3].im done */
+                  "add.l   %%d6, %%d3\n\t" /* z[1].im done */
+
+                  "move.l  %%d2, %%d6\n\t"
+                  "sub.l   %%d5, %%d6\n\t" /* z[3].re done */
+                  "add.l   %%d5, %%d2\n\t" /* z[1].re done */
+
+                  "move.l  %%d1, %%d5\n\t"
+                  "sub.l   %%a0, %%d5\n\t" /* z[2].im done */
+                  "add.l   %%a0, %%d1\n\t" /* z[0].im done */
+                  /* end of fft4, but don't store yet */
+
+                  "move.l  %%d0, %%a0\n\t"
+                  "add.l   %%a1, %%d0\n\t"
+                  "sub.l   %%a1, %%a0\n\t" /* z[4].re, z[0].re done, a1 free */
+
+                  "move.l  %%d1, %%a1\n\t"
+                  "add.l   %%a2, %%d1\n\t"
+                  "sub.l   %%a2, %%a1\n\t" /* z[4].im, z[0].im done, a2 free */
+
+                  "move.l  %%d4, %%a2\n\t"
+                  "add.l   %%a3, %%d4\n\t"
+                  "sub.l   %%a3, %%a2\n\t" /* z[6].re, z[2].re done, a3 free */
+
+                  "move.l  %%d5, %%a3\n\t"
+                  "add.l   %%a4, %%d5\n\t"
+                  "sub.l   %%a4, %%a3\n\t" /* z[6].im, z[2].im done, a4 free */
+
+                  "movem.l %%d0-%%d1, (%[z])\n\t"      /* save z[0] */
+                  "movem.l %%d4-%%d5, (2*8, %[z])\n\t" /* save z[2] */
+                  "movem.l %%a0-%%a1, (4*8, %[z])\n\t" /* save z[4] */
+                  "movem.l %%a2-%%a3, (6*8, %[z])\n\t" /* save z[6] */
+
+                  /* TRANSFORM_EQUAL */
+                  "movclr.l %%acc0, %%d0\n\t"
+                  "movclr.l %%acc1, %%d1\n\t"
+                  "movclr.l %%acc2, %%d4\n\t"
+                  "movclr.l %%acc3, %%d5\n\t"
+
+                  "move.l  %%d1, %%a0\n\t"
+                  "add.l   %%d0, %%a0\n\t" /* a0 == t1 */
+                  "sub.l   %%d0, %%d1\n\t" /* d1 == t2 */
+
+                  "move.l  %%d4, %%d0\n\t"
+                  "add.l   %%d5, %%d0\n\t" /* d0 == t6 */
+                  "sub.l   %%d5, %%d4\n\t" /* d4 == t5 */
+
+                  "move.l  %%d4, %%a1\n\t"
+                  "sub.l   %%a0, %%a1\n\t" /* a1 == temp1 */
+                  "add.l   %%a0, %%d4\n\t" /* d4 == temp2 */
+
+                  "move.l  %%d2, %%a2\n\t"
+                  "sub.l   %%d4, %%a2\n\t" /* a2 == z[5].re */
+                  "add.l   %%d4, %%d2\n\t" /* z[1].re done */
+
+                  "move.l  %%d7, %%d5\n\t"
+                  "sub.l   %%a1, %%d5\n\t" /* d5 == z[7].im */
+                  "add.l   %%a1, %%d7\n\t" /* z[3].im done */
+
+                  "move.l  %%d1, %%a0\n\t"
+                  "sub.l   %%d0, %%a0\n\t" /* a0 == temp1 */
+                  "add.l   %%d0, %%d1\n\t" /* d1 == temp2 */
+
+                  "move.l  %%d6, %%d4\n\t"
+                  "sub.l   %%a0, %%d4\n\t" /* d4 == z[7].re */
+                  "add.l   %%a0, %%d6\n\t" /* z[3].re done */
+
+                  "move.l  %%d3, %%a3\n\t"
+                  "sub.l   %%d1, %%a3\n\t" /* a3 == z[5].im */
+                  "add.l   %%d1, %%d3\n\t" /* z[1].im done */
+
+                  "movem.l %%d2-%%d3, (1*8, %[z])\n\t" /* save z[1] */
+                  "movem.l %%d6-%%d7, (3*8, %[z])\n\t" /* save z[3] */
+                  "movem.l %%a2-%%a3, (5*8, %[z])\n\t" /* save z[5] */
+                  "movem.l %%d4-%%d5, (7*8, %[z])\n\t" /* save z[7] */
+                  : :[z] "a" (z), [_cPI2_8] "i" (cPI2_8)
+                  : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+                    "a0", "a1", "a2", "a3", "a4", "cc", "memory");
+}
+
+#define FFT_FFMPEG_INCL_OPTIMISED_TRANSFORM
+
+static inline FFTComplex* TRANSFORM(FFTComplex * z, unsigned int n, FFTSample wre, FFTSample wim)
+{
+    asm volatile ("move.l   (%[z2]),   %%d5\n\t"
+                  "mac.l    %%d5,      %[wre], (4, %[z2]), %%d4, %%acc0\n\t"
+                  "mac.l    %%d4,      %[wim], %%acc0\n\t"
+                  "mac.l    %%d4,      %[wre], (%[z3]), %%d6, %%acc1\n\t"
+                  "msac.l   %%d5,      %[wim], (4, %[z3]), %%d7, %%acc1\n\t"
+                  "mac.l    %%d6,      %[wre], (%[z])+, %%d4, %%acc2\n\t"
+                  "msac.l   %%d7,      %[wim], (%[z])+, %%d5, %%acc2\n\t"
+                  "mac.l    %%d7,      %[wre], %%acc3\n\t"
+                  "mac.l    %%d6,      %[wim], %%acc3\n\t"
+
+                  "movclr.l %%acc0,    %[wre]\n\t"     /* t1 */
+                  "movclr.l %%acc2,    %[wim]\n\t"     /* t5 */
+
+                  "move.l   %%d4,      %%d6\n\t"
+                  "move.l   %[wim],    %%d7\n\t"
+                  "sub.l    %[wre],    %[wim]\n\t"     /* t5 = t5-t1 */
+                  "add.l    %[wre],    %%d7\n\t"
+                  "sub.l    %%d7,      %%d6\n\t"       /* d6 = a0re - (t5+t1) => a2re */
+                  "add.l    %%d7,      %%d4\n\t"       /* d4 = a0re + (t5+t1) => a0re */
+
+                  "movclr.l %%acc3,    %%d7\n\t"       /* t6 */
+                  "movclr.l %%acc1,    %%d3\n\t"       /* t2 */
+                  
+                  "move.l   %%d3,      %[wre]\n\t"
+                  "add.l    %%d7,      %[wre]\n\t"
+                  "sub.l    %%d7,      %%d3\n\t"       /* t2 = t6-t2 */
+                  "move.l   %%d5,      %%d7\n\t"
+                  "sub.l    %[wre],    %%d7\n\t"       /* d7 = a0im - (t2+t6) => a2im */
+
+                  "movem.l  %%d6-%%d7, (%[z2])\n\t"    /* store z2 */
+                  "add.l    %[wre],    %%d5\n\t"       /* d5 = a0im + (t2+t6) => a0im */
+                  "movem.l  %%d4-%%d5, (-8, %[z])\n\t"     /* store z0 */
+
+                  "movem.l  (%[z1]),   %%d4-%%d5\n\t"  /* load z1 */
+                  "move.l   %%d4,      %%d6\n\t"
+
+                  "sub.l    %%d3,      %%d6\n\t"       /* d6 = a1re - (t2-t6) => a3re */
+                  "add.l    %%d3,      %%d4\n\t"       /* d4 = a1re + (t2-t6) => a1re */
+
+                  "move.l   %%d5,      %%d7\n\t"
+                  "sub.l    %[wim],    %%d7\n\t"
+                  "movem.l  %%d6-%%d7, (%[z3])\n\t"    /* store z3 */
+                  "add.l    %[wim],    %%d5\n\t"
+                  "movem.l  %%d4-%%d5, (%[z1])\n\t"    /* store z1 */
+
+                  : [wre] "+r" (wre), [wim] "+r" (wim), /* we clobber these after using them */
+                    [z] "+a" (z)
+                  : [z1] "a" (&z[n]), [z2] "a" (&z[2*n]), [z3] "a" (&z[3*n])
+                  : "d3", "d4", "d5", "d6", "d7", "cc", "memory");
+    return z;
+}
+
+static inline FFTComplex* TRANSFORM_W01(FFTComplex * z, unsigned int n, const FFTSample * w)
+{
+    return TRANSFORM(z, n, w[0], w[1]);
+}
+
+static inline FFTComplex* TRANSFORM_W10(FFTComplex * z, unsigned int n, const FFTSample * w)
+{
+    return TRANSFORM(z, n, w[1], w[0]);
+}
+
+static inline FFTComplex* TRANSFORM_ZERO(FFTComplex * z, unsigned int n)
+{
+    asm volatile("movem.l (%[z]), %%d4-%%d5\n\t" /* load z0 */
+                 "move.l  %%d4, %%d6\n\t"
+                 "movem.l (%[z2]), %%d2-%%d3\n\t" /* load z2 */
+                 "movem.l (%[z3]), %%d0-%%d1\n\t" /* load z0 */
+                 "move.l  %%d0, %%d7\n\t"
+                 "sub.l   %%d2, %%d0\n\t"
+                 "add.l   %%d2, %%d7\n\t"
+                 "sub.l   %%d7, %%d6\n\t" /* d6 = a0re - (t5+t1) => a2re */
+                 "add.l   %%d7, %%d4\n\t" /* d4 = a0re + (t5+t1) => a0re */
+
+                 "move.l  %%d5, %%d7\n\t"
+                 "move.l  %%d3, %%d2\n\t"
+                 "add.l   %%d1, %%d2\n\t"
+                 "sub.l   %%d2, %%d7\n\t" /* d7 = a0im - (t2+t6) => a2im */
+                 "movem.l %%d6-%%d7, (%[z2])\n\t" /* store z2 */
+                 "add.l   %%d2, %%d5\n\t" /* d5 = a0im + (t2+t6) => a0im */
+                 "movem.l %%d4-%%d5, (%[z])\n\t" /* store z0 */
+
+                 "movem.l (%[z1]), %%d4-%%d5\n\t" /* load z1 */
+                 "move.l  %%d4, %%d6\n\t"
+                 "sub.l   %%d1, %%d3\n\t"
+                 "sub.l   %%d3, %%d6\n\t" /* d6 = a1re - (t2-t6) => a3re */
+                 "add.l   %%d3, %%d4\n\t" /* d4 = a1re + (t2-t6) => a1re */
+
+                 "move.l  %%d5, %%d7\n\t"
+                 "sub.l   %%d0, %%d7\n\t"
+                 "movem.l %%d6-%%d7, (%[z3])\n\t" /* store z3 */
+                 "add.l   %%d0, %%d5\n\t"
+
+                 "movem.l %%d4-%%d5, (%[z1])\n\t" /* store z1 */
+
+                 :
+                 : [z] "a" (z), [z1] "a" (&z[n]), [z2] "a" (&z[2*n]), [z3] "a" (&z[3*n])
+                 : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "cc", "memory");
+    return z+1;
+}
+
+static inline FFTComplex* TRANSFORM_EQUAL(FFTComplex * z, unsigned int n)
+{
+    asm volatile ("movem.l  (%[z2]),    %%d0-%%d1\n\t"
+                  "move.l   %[_cPI2_8], %%d2\n\t"
+                  "mac.l    %%d0,       %%d2, (%[z3]),    %%d0, %%acc0\n\t"
+                  "mac.l    %%d1,       %%d2, (4, %[z3]), %%d1, %%acc1\n\t"
+                  "mac.l    %%d0,       %%d2, (%[z]),     %%d4, %%acc2\n\t"
+                  "mac.l    %%d1,       %%d2, (4, %[z]),  %%d5, %%acc3\n\t"
+
+                  "movclr.l %%acc0,    %%d0\n\t"
+                  "movclr.l %%acc1,    %%d1\n\t"
+                  "movclr.l %%acc2,    %%d2\n\t"
+                  "movclr.l %%acc3,    %%d3\n\t"
+                  
+                  "move.l   %%d0, %%d7\n\t"
+                  "add.l    %%d1, %%d0\n\t"            /* d0 == t1 */
+                  "sub.l    %%d7, %%d1\n\t"            /* d1 == t2 */
+
+                  "move.l   %%d3, %%d7\n\t"
+                  "add.l    %%d2, %%d3\n\t"            /* d3 == t6 */
+                  "sub.l    %%d7, %%d2\n\t"            /* d2 == t5 */
+
+                  "move.l   %%d4,      %%d6\n\t"
+                  "move.l   %%d2,      %%d7\n\t"
+                  "sub.l    %%d0,      %%d2\n\t"       /* t5 = t5-t1 */
+                  "add.l    %%d0,      %%d7\n\t"
+                  "sub.l    %%d7,      %%d6\n\t"       /* d6 = a0re - (t5+t1) => a2re */
+                  "add.l    %%d7,      %%d4\n\t"       /* d4 = a0re + (t5+t1) => a0re */
+
+                  "move.l   %%d1,      %%d0\n\t"
+                  "add.l    %%d3,      %%d0\n\t"
+                  "sub.l    %%d3,      %%d1\n\t"       /* t2 = t6-t2 */
+                  "move.l   %%d5,      %%d7\n\t"
+                  "sub.l    %%d0,    %%d7\n\t"         /* d7 = a0im - (t2+t6) => a2im */
+
+                  "movem.l  %%d6-%%d7, (%[z2])\n\t"    /* store z2 */
+                  "add.l    %%d0,    %%d5\n\t"         /* d5 = a0im + (t2+t6) => a0im */
+                  "movem.l  %%d4-%%d5, (%[z])\n\t"     /* store z0 */
+
+                  "movem.l  (%[z1]),   %%d4-%%d5\n\t"  /* load z1 */
+                  "move.l   %%d4,      %%d6\n\t"
+
+                  "sub.l    %%d1,      %%d6\n\t"       /* d6 = a1re - (t2-t6) => a3re */
+                  "add.l    %%d1,      %%d4\n\t"       /* d4 = a1re + (t2-t6) => a1re */
+
+                  "move.l   %%d5,      %%d7\n\t"
+                  "sub.l    %%d2,    %%d7\n\t"
+                  "movem.l  %%d6-%%d7, (%[z3])\n\t"    /* store z3 */
+                  "add.l    %%d2,    %%d5\n\t"
+                  "movem.l  %%d4-%%d5, (%[z1])\n\t"    /* store z1 */
+
+                  :: [z] "a" (z), [z1] "a" (&z[n]), [z2] "a" (&z[2*n]), [z3] "a" (&z[3*n]),
+                    [_cPI2_8] "i" (cPI2_8)
+                  : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "cc", "memory");
+
+    return z+1;
+}
+
+#endif /* CPU_COLDIFRE */
--- a/lib/rbcodec/codecs/lib/fft.h
+++ b/lib/rbcodec/codecs/lib/fft.h
@ -0,0 +1,64 @@
+/*
+ * WMA compatible decoder
+ * Copyright (c) 2002 The FFmpeg Project.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef CODECLIB_FFT_H_INCLUDED
+#define CODECLIB_FFT_H_INCLUDED
+ 
+#include <inttypes.h>
+typedef int32_t fixed32; 
+typedef int64_t fixed64;
+
+#define FFT_FIXED
+
+#ifdef FFT_FIXED
+typedef fixed32 FFTSample;
+#else /* FFT_FIXED */
+typedef float   FFTSample;
+#endif /* FFT_FIXED */
+
+typedef struct FFTComplex {
+    FFTSample re, im;
+} FFTComplex;
+
+typedef struct FFTContext {
+    int nbits;
+    int inverse;
+    uint16_t *revtab;
+    int mdct_size; /* size of MDCT (i.e. number of input data * 2) */
+    int mdct_bits; /* n = 2^nbits */
+    /* pre/post rotation tables */
+    FFTSample *tcos;
+    FFTSample *tsin;
+    void (*fft_permute)(struct FFTContext *s, FFTComplex *z);
+    void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
+    void (*imdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+    void (*imdct_half)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+    void (*mdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+    int split_radix;
+    int permutation;
+#define FF_MDCT_PERM_NONE       0
+#define FF_MDCT_PERM_INTERLEAVE 1
+} FFTContext;
+
+// internal api  (fft<->mdct)
+//int fft_calc_unscaled(FFTContext *s, FFTComplex *z);
+//void ff_fft_permute_c(FFTContext *s, FFTComplex *z); // internal only?
+void ff_fft_calc_c(int nbits, FFTComplex *z);
+
+#endif // CODECLIB_FFT_H_INCLUDED
+
--- a/lib/rbcodec/codecs/lib/fixedpoint.c
+++ b/lib/rbcodec/codecs/lib/fixedpoint.c
@ -0,0 +1 @@
+#include "../../../apps/fixedpoint.c"
--- a/lib/rbcodec/codecs/lib/fixedpoint.h
+++ b/lib/rbcodec/codecs/lib/fixedpoint.h
@ -0,0 +1,49 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2006 Jens Arnold
+ *
+ * Fixed point library for plugins
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+ /** CODECS - FIXED POINT MATH ROUTINES - USAGE
+ *
+ *  - x and y arguments are fixed point integers
+ *  - fracbits is the number of fractional bits in the argument(s)
+ *  - functions return long fixed point integers with the specified number
+ *    of fractional bits unless otherwise specified
+ *
+ *  Calculate sin and cos of an angle:
+ *      fp_sincos(phase, *cos)
+ *          where phase is a 32 bit unsigned integer with 0 representing 0
+ *          and 0xFFFFFFFF representing 2*pi, and *cos is the address to
+ *          a long signed integer.  Value returned is a long signed integer
+ *          from -0x80000000 to 0x7fffffff, representing -1 to 1 respectively.
+ *          That is, value is a fixed point integer with 31 fractional bits.
+ *
+ *  Take square root of a fixed point number:
+ *      fp_sqrt(x, fracbits)
+ *
+ */
+#ifndef _FIXEDPOINT_H_CODECS
+#define _FIXEDPOINT_H_CODECS
+
+long fp_sincos(unsigned long phase, long *cos);
+long fp_sqrt(long a, unsigned int fracbits);
+
+#endif
--- a/lib/rbcodec/codecs/lib/libcodec.make
+++ b/lib/rbcodec/codecs/lib/libcodec.make
@ -0,0 +1,37 @@
+#             __________               __   ___.
+#   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+#   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+#   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+#   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+#                     \/            \/     \/    \/            \/
+# $Id$
+#
+
+CODECLIB := $(CODECDIR)/libcodec.a
+CODECLIB_SRC := $(call preprocess, $(RBCODECLIB_DIR)/codecs/lib/SOURCES)
+CODECLIB_OBJ := $(call c2obj, $(CODECLIB_SRC))
+OTHER_SRC += $(CODECLIB_SRC)
+
+$(CODECLIB): $(CODECLIB_OBJ)
+	$(SILENT)$(shell rm -f $@)
+	$(call PRINTS,AR $(@F))$(AR) rcs $@ $^ >/dev/null
+
+CODECLIBFLAGS = $(filter-out -O%,$(CODECFLAGS))
+
+ifeq ($(MEMORYSIZE),2)
+    CODECLIBFLAGS += -Os
+else ifeq ($(ARCH),arch_m68k)
+    CODECLIBFLAGS += -O2
+else
+    CODECLIBFLAGS += -O1
+endif
+
+# Do not use '-ffunction-sections' when compiling sdl-sim
+ifneq ($(findstring sdl-sim, $(APP_TYPE)), sdl-sim)
+    CODECLIBFLAGS += -ffunction-sections
+endif
+
+$(CODECDIR)/lib/%.o: $(RBCODECLIB_DIR)/codecs/lib/%.c
+	$(SILENT)mkdir -p $(dir $@)
+	$(call PRINTS,CC $(subst $(ROOTDIR)/,,$<))$(CC) \
+		-I$(dir $<) $(CODECLIBFLAGS) -c $< -o $@
--- a/lib/rbcodec/codecs/lib/mdct.c
+++ b/lib/rbcodec/codecs/lib/mdct.c
@ -0,0 +1,644 @@
+/*
+ * Fixed Point IMDCT 
+ * Copyright (c) 2002 The FFmpeg Project.
+ * Copyright (c) 2010 Dave Hooper, Mohamed Tarek, Michael Giacomelli
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "codeclib.h"
+#include "mdct.h"
+#include "codeclib_misc.h"
+#include "mdct_lookup.h"
+
+#ifndef ICODE_ATTR_TREMOR_MDCT
+#define ICODE_ATTR_TREMOR_MDCT ICODE_ATTR
+#endif
+
+/**
+ * Compute the middle half of the inverse MDCT of size N = 2^nbits
+ * thus excluding the parts that can be derived by symmetry
+ * @param output N/2 samples
+ * @param input N/2 samples
+ *
+ * NOTE - CANNOT CURRENTLY OPERATE IN PLACE (input and output must
+ *                                          not overlap or intersect at all)
+ */
+void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input) ICODE_ATTR_TREMOR_MDCT;
+void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input)
+{
+    int n8, n4, n2, n, j;
+    const fixed32 *in1, *in2;
+    (void)j;
+    n = 1 << nbits;
+
+    n2 = n >> 1;
+    n4 = n >> 2;
+    n8 = n >> 3;
+
+    FFTComplex *z = (FFTComplex *)output;
+
+    /* pre rotation */
+    in1 = input;
+    in2 = input + n2 - 1;
+    
+    /* revtab comes from the fft; revtab table is sized for N=4096 size fft = 2^12.
+       The fft is size N/4 so s->nbits-2, so our shift needs to be (12-(nbits-2)) */
+    const int revtab_shift = (14- nbits);
+    
+    /* bitreverse reorder the input and rotate;   result here is in OUTPUT ... */
+    /* (note that when using the current split radix, the bitreverse ordering is
+        complex, meaning that this reordering cannot easily be done in-place) */
+    /* Using the following pdf, you can see that it is possible to rearrange
+       the 'classic' pre/post rotate with an alternative one that enables
+       us to use fewer distinct twiddle factors.
+       http://www.eurasip.org/Proceedings/Eusipco/Eusipco2006/papers/1568980508.pdf
+       
+       For prerotation, the factors are just sin,cos(2PI*i/N)
+       For postrotation, the factors are sin,cos(2PI*(i+1/4)/N)
+       
+       Therefore, prerotation can immediately reuse the same twiddles as fft
+       (for postrotation it's still a bit complex, we reuse the fft trig tables
+        where we can, or a special table for N=2048, or interpolate between
+        trig tables for N>2048)
+       */
+    const int32_t *T = sincos_lookup0;
+    const int step = 2<<(12-nbits);
+    const uint16_t * p_revtab=revtab;
+    {
+        const uint16_t * const p_revtab_end = p_revtab + n8;
+#ifdef CPU_COLDFIRE
+        asm volatile ("move.l (%[in2]), %%d0\n\t"
+                      "move.l (%[in1]), %%d1\n\t"
+                      "bra.s 1f\n\t"
+                      "0:\n\t"
+                      "movem.l (%[T]), %%d2-%%d3\n\t"
+
+                      "addq.l #8, %[in1]\n\t"
+                      "subq.l #8, %[in2]\n\t"
+
+                      "lea (%[step]*4, %[T]), %[T]\n\t"
+
+                      "mac.l %%d0, %%d3, (%[T]), %%d4, %%acc0;"
+                      "msac.l %%d1, %%d2, (4, %[T]), %%d5, %%acc0;"
+                      "mac.l %%d1, %%d3, (%[in1]), %%d1, %%acc1;"
+                      "mac.l %%d0, %%d2, (%[in2]), %%d0, %%acc1;"
+
+                      "addq.l #8, %[in1]\n\t"
+                      "subq.l #8, %[in2]\n\t"
+
+                      "mac.l %%d0, %%d5, %%acc2;"
+                      "msac.l %%d1, %%d4, (%[p_revtab])+, %%d2, %%acc2;"
+                      "mac.l %%d1, %%d5, (%[in1]), %%d1, %%acc3;"
+                      "mac.l %%d0, %%d4, (%[in2]), %%d0, %%acc3;"
+
+                      "clr.l %%d3\n\t"
+                      "move.w %%d2, %%d3\n\t"
+                      "eor.l %%d3, %%d2\n\t"
+                      "swap %%d2\n\t"
+                      "lsr.l %[revtab_shift], %%d2\n\t"
+
+                      "movclr.l %%acc0, %%d4;"
+                      "movclr.l %%acc1, %%d5;"
+                      "lsl.l #3, %%d2\n\t"
+                      "lea (%%d2, %[z]), %%a1\n\t"
+                      "movem.l %%d4-%%d5, (%%a1)\n\t"
+
+                      "lsr.l %[revtab_shift], %%d3\n\t"
+
+                      "movclr.l %%acc2, %%d4;"
+                      "movclr.l %%acc3, %%d5;"
+                      "lsl.l #3, %%d3\n\t"
+                      "lea (%%d3, %[z]), %%a1\n\t"
+                      "movem.l %%d4-%%d5, (%%a1)\n\t"
+                          
+                      "lea (%[step]*4, %[T]), %[T]\n\t"
+
+                      "1:\n\t"
+                      "cmp.l %[p_revtab_end], %[p_revtab]\n\t"
+                      "bcs.s 0b\n\t"
+                      : [in1] "+a" (in1), [in2] "+a" (in2), [T] "+a" (T),
+                        [p_revtab] "+a" (p_revtab)
+                      : [z] "a" (z), [step] "d" (step), [revtab_shift] "d" (revtab_shift),
+                        [p_revtab_end] "r" (p_revtab_end)
+                      : "d0", "d1", "d2", "d3", "d4", "d5", "a1", "cc", "memory");
+#else
+        while(LIKELY(p_revtab < p_revtab_end))
+        {
+            j = (*p_revtab)>>revtab_shift;
+            XNPROD31(*in2, *in1, T[1], T[0], &z[j].re, &z[j].im );
+            T += step;
+            in1 += 2;
+            in2 -= 2;
+            p_revtab++;
+            j = (*p_revtab)>>revtab_shift;
+            XNPROD31(*in2, *in1, T[1], T[0], &z[j].re, &z[j].im );
+            T += step;
+            in1 += 2;
+            in2 -= 2;
+            p_revtab++;
+        }
+#endif
+    }
+    {
+        const uint16_t * const p_revtab_end = p_revtab + n8;
+#ifdef CPU_COLDFIRE
+        asm volatile ("move.l (%[in2]), %%d0\n\t"
+                      "move.l (%[in1]), %%d1\n\t"
+                      "bra.s 1f\n\t"
+                      "0:\n\t"
+                      "movem.l (%[T]), %%d2-%%d3\n\t"
+
+                      "addq.l #8, %[in1]\n\t"
+                      "subq.l #8, %[in2]\n\t"
+
+                      "lea (%[step]*4, %[T]), %[T]\n\t"
+
+                      "mac.l %%d0, %%d2, (%[T]), %%d4, %%acc0;"
+                      "msac.l %%d1, %%d3, (4, %[T]), %%d5, %%acc0;"
+                      "mac.l %%d1, %%d2, (%[in1]), %%d1, %%acc1;"
+                      "mac.l %%d0, %%d3, (%[in2]), %%d0, %%acc1;"
+
+                      "addq.l #8, %[in1]\n\t"
+                      "subq.l #8, %[in2]\n\t"
+
+                      "mac.l %%d0, %%d4, %%acc2;"
+                      "msac.l %%d1, %%d5, (%[p_revtab])+, %%d2, %%acc2;"
+                      "mac.l %%d1, %%d4, (%[in1]), %%d1, %%acc3;"
+                      "mac.l %%d0, %%d5, (%[in2]), %%d0, %%acc3;"
+
+                      "clr.l %%d3\n\t"
+                      "move.w %%d2, %%d3\n\t"
+                      "eor.l %%d3, %%d2\n\t"
+                      "swap %%d2\n\t"
+                      "lsr.l %[revtab_shift], %%d2\n\t"
+
+                      "movclr.l %%acc0, %%d4;"
+                      "movclr.l %%acc1, %%d5;"
+                      "lsl.l #3, %%d2\n\t"
+                      "lea (%%d2, %[z]), %%a1\n\t"
+                      "movem.l %%d4-%%d5, (%%a1)\n\t"
+
+                      "lsr.l %[revtab_shift], %%d3\n\t"
+
+                      "movclr.l %%acc2, %%d4;"
+                      "movclr.l %%acc3, %%d5;"
+                      "lsl.l #3, %%d3\n\t"
+                      "lea (%%d3, %[z]), %%a1\n\t"
+                      "movem.l %%d4-%%d5, (%%a1)\n\t"
+                          
+                      "lea (%[step]*4, %[T]), %[T]\n\t"
+
+                      "1:\n\t"
+                      "cmp.l %[p_revtab_end], %[p_revtab]\n\t"
+                      "bcs.s 0b\n\t"
+                      : [in1] "+a" (in1), [in2] "+a" (in2), [T] "+a" (T),
+                        [p_revtab] "+a" (p_revtab)
+                      : [z] "a" (z), [step] "d" (-step), [revtab_shift] "d" (revtab_shift),
+                        [p_revtab_end] "r" (p_revtab_end)
+                      : "d0", "d1", "d2", "d3", "d4", "d5", "a1", "cc", "memory");
+#else
+        while(LIKELY(p_revtab < p_revtab_end))
+        {
+            j = (*p_revtab)>>revtab_shift;
+            XNPROD31(*in2, *in1, T[0], T[1], &z[j].re, &z[j].im);
+            T -= step;
+            in1 += 2;
+            in2 -= 2;
+            p_revtab++;
+            j = (*p_revtab)>>revtab_shift;
+            XNPROD31(*in2, *in1, T[0], T[1], &z[j].re, &z[j].im);
+            T -= step;
+            in1 += 2;
+            in2 -= 2;
+            p_revtab++;
+        }
+#endif
+    }
+
+
+    /* ... and so fft runs in OUTPUT buffer */
+    ff_fft_calc_c(nbits-2, z);
+
+    /* post rotation + reordering.  now keeps the result within the OUTPUT buffer */
+    switch( nbits )
+    {
+        default:
+        {
+            fixed32 * z1 = (fixed32 *)(&z[0]);
+            int magic_step = step>>2;
+            int newstep;
+            if(n<=1024)
+            {
+                T = sincos_lookup0 + magic_step;
+                newstep = step>>1;
+            }
+            else
+            {   
+                T = sincos_lookup1;
+                newstep = 2;
+            }
+
+#ifdef CPU_COLDFIRE
+            fixed32 * z2 = (fixed32 *)(&z[n4]);
+            int c = n4;
+            if (newstep == 2)
+            {
+                asm volatile ("movem.l (%[z1]), %%d0-%%d1\n\t"
+                              "addq.l #8, %[z1]\n\t"
+                              "movem.l (%[T]), %%d2-%%d3\n\t"
+                              "addq.l #8, %[T]\n\t"
+                              "bra.s 1f\n\t"
+                              "0:\n\t"
+                              "msac.l %%d1, %%d2, (%[T])+, %%a3, %%acc0\n\t"
+                              "mac.l  %%d0, %%d3, (%[T])+, %%a4, %%acc0\n\t"
+                              
+                              "msac.l %%d1, %%d3, -(%[z2]), %%d1, %%acc1\n\t"
+                              "msac.l %%d0, %%d2, -(%[z2]), %%d0, %%acc1\n\t"
+
+                              "msac.l %%d1, %%a4, (%[T])+, %%d2, %%acc2\n\t"
+                              "mac.l  %%d0, %%a3, (%[T])+, %%d3, %%acc2\n\t"
+                              "msac.l %%d0, %%a4, (%[z1])+, %%d0, %%acc3\n\t"
+                              "msac.l %%d1, %%a3, (%[z1])+, %%d1, %%acc3\n\t"
+
+                              "movclr.l %%acc0, %%a3\n\t"
+                              "movclr.l %%acc3, %%a4\n\t"
+                              "movem.l %%a3-%%a4, (-16, %[z1])\n\t"
+
+                              "movclr.l %%acc1, %%a4\n\t"
+                              "movclr.l %%acc2, %%a3\n\t"
+                              "movem.l %%a3-%%a4, (%[z2])\n\t"
+
+                              "subq.l #2, %[n]\n\t"
+                              "1:\n\t"
+                              "bhi.s 0b\n\t"
+                              : [z1] "+a" (z1), [z2] "+a" (z2), [T] "+a" (T), [n] "+d" (c)
+                              :
+                              : "d0", "d1", "d2", "d3", "a3", "a4", "cc", "memory");
+            }
+            else
+            {
+                asm volatile ("movem.l (%[z1]), %%d0-%%d1\n\t"
+                              "addq.l #8, %[z1]\n\t"
+                              "movem.l (%[T]), %%d2-%%d3\n\t"
+                              "lea (%[newstep]*4, %[T]), %[T]\n\t"
+                              "bra.s 1f\n\t"
+                              "0:\n\t"
+                              "msac.l %%d1, %%d2, (%[T]), %%a3, %%acc0\n\t"
+                              "mac.l  %%d0, %%d3, (4, %[T]), %%a4, %%acc0\n\t"
+                              "msac.l %%d1, %%d3, -(%[z2]), %%d1, %%acc1\n\t"
+                              "msac.l %%d0, %%d2, -(%[z2]), %%d0, %%acc1\n\t"
+
+                              "lea (%[newstep]*4, %[T]), %[T]\n\t"
+                              "msac.l %%d1, %%a4, (%[T]), %%d2, %%acc2\n\t"
+                              "mac.l  %%d0, %%a3, (4, %[T]), %%d3, %%acc2\n\t"
+                              "msac.l %%d0, %%a4, (%[z1])+, %%d0, %%acc3\n\t"
+                              "msac.l %%d1, %%a3, (%[z1])+, %%d1, %%acc3\n\t"
+
+                              "lea (%[newstep]*4, %[T]), %[T]\n\t"
+
+                              "movclr.l %%acc0, %%a3\n\t"
+                              "movclr.l %%acc3, %%a4\n\t"
+                              "movem.l %%a3-%%a4, (-16, %[z1])\n\t"
+
+                              "movclr.l %%acc1, %%a4\n\t"
+                              "movclr.l %%acc2, %%a3\n\t"
+                              "movem.l %%a3-%%a4, (%[z2])\n\t"
+
+                              "subq.l #2, %[n]\n\t"
+                              "1:\n\t"
+                              "bhi.s 0b\n\t"
+                              : [z1] "+a" (z1), [z2] "+a" (z2), [T] "+a" (T), [n] "+d" (c)
+                              : [newstep] "d" (newstep)
+                              : "d0", "d1", "d2", "d3", "a3", "a4", "cc", "memory");
+            }
+#else
+            fixed32 * z2 = (fixed32 *)(&z[n4-1]);
+            while(z1<z2)
+            {
+                fixed32 r0,i0,r1,i1;
+                XNPROD31_R(z1[1], z1[0], T[0], T[1], r0, i1 ); T+=newstep;
+                XNPROD31_R(z2[1], z2[0], T[1], T[0], r1, i0 ); T+=newstep;
+                z1[0] = -r0;
+                z1[1] = -i0;
+                z2[0] = -r1;
+                z2[1] = -i1;
+                z1+=2;
+                z2-=2;
+            }
+#endif 
+            break;
+        }
+
+        case 12: /* n=4096 */
+        {
+            /* linear interpolation (50:50) between sincos_lookup0 and sincos_lookup1 */
+            const int32_t * V = sincos_lookup1;
+            T = sincos_lookup0;
+            int32_t t0,t1,v0,v1;
+            fixed32 * z1 = (fixed32 *)(&z[0]);
+            fixed32 * z2 = (fixed32 *)(&z[n4-1]);
+
+            t0 = T[0]>>1; t1=T[1]>>1;
+        
+            while(z1<z2)
+            {
+                fixed32 r0,i0,r1,i1;
+                t0 += (v0 = (V[0]>>1));
+                t1 += (v1 = (V[1]>>1));
+                XNPROD31_R(z1[1], z1[0], t0, t1, r0, i1 );
+                T+=2;
+                v0 += (t0 = (T[0]>>1));
+                v1 += (t1 = (T[1]>>1));
+                XNPROD31_R(z2[1], z2[0], v1, v0, r1, i0 );
+                z1[0] = -r0;
+                z1[1] = -i0;
+                z2[0] = -r1;
+                z2[1] = -i1;
+                z1+=2;
+                z2-=2;
+                V+=2;
+            }
+            
+            break;
+        }
+        
+        case 13: /* n = 8192 */
+        {
+            /* weight linear interpolation between sincos_lookup0 and sincos_lookup1
+               specifically: 25:75 for first twiddle and 75:25 for second twiddle */
+            const int32_t * V = sincos_lookup1;
+            T = sincos_lookup0;
+            int32_t t0,t1,v0,v1,q0,q1;
+            fixed32 * z1 = (fixed32 *)(&z[0]);
+            fixed32 * z2 = (fixed32 *)(&z[n4-1]);
+
+            t0 = T[0]; t1=T[1];
+        
+            while(z1<z2)
+            {
+                fixed32 r0,i0,r1,i1;
+                v0 = V[0]; v1 = V[1];
+                t0 += (q0 = (v0-t0)>>1);
+                t1 += (q1 = (v1-t1)>>1);
+                XNPROD31_R(z1[1], z1[0], t0, t1, r0, i1 );
+                t0 = v0-q0;
+                t1 = v1-q1;
+                XNPROD31_R(z2[1], z2[0], t1, t0, r1, i0 );
+                z1[0] = -r0;
+                z1[1] = -i0;
+                z2[0] = -r1;
+                z2[1] = -i1;
+                z1+=2;
+                z2-=2;
+                T+=2;
+                
+                t0 = T[0]; t1 = T[1];
+                v0 += (q0 = (t0-v0)>>1);
+                v1 += (q1 = (t1-v1)>>1);
+                XNPROD31_R(z1[1], z1[0], v0, v1, r0, i1 );
+                v0 = t0-q0;
+                v1 = t1-q1;
+                XNPROD31_R(z2[1], z2[0], v1, v0, r1, i0 );
+                z1[0] = -r0;
+                z1[1] = -i0;
+                z2[0] = -r1;
+                z2[1] = -i1;
+                z1+=2;
+                z2-=2;
+                V+=2;
+            }
+               
+            break;
+        }
+    }
+} 
+
+/**
+ * Compute inverse MDCT of size N = 2^nbits
+ * @param output N samples
+ * @param input N/2 samples
+ * "In-place" processing can be achieved provided that:
+ *            [0  ..  N/2-1 | N/2  ..  N-1 ]
+ *            <----input---->
+ *            <-----------output----------->
+ *
+ * The result of ff_imdct_half is to put the 'half' imdct here
+ *
+ *                          N/2          N-1
+ *                          <--half imdct-->
+ *
+ * We want it here for the full imdct:
+ *                   N/4      3N/4-1
+ *                   <-------------->
+ *
+ * In addition we need to apply two symmetries to get the full imdct:
+ *
+ *           <AAAAAA>                <DDDDDD>
+ *                   <BBBBBB><CCCCCC>
+ *
+ *           D is a reflection of C
+ *           A is a reflection of B (but with sign flipped)
+ *
+ * We process the symmetries at the same time as we 'move' the half imdct
+ * from [N/2,N-1] to [N/4,3N/4-1]
+ *
+ * TODO: find a way to make ff_imdct_half put the result in [N/4..3N/4-1]
+ * This would require being able to use revtab 'inplace' (since the input
+ * and output of imdct_half would then overlap somewhat)
+ */
+void ff_imdct_calc(unsigned int nbits, fixed32 *output, const fixed32 *input) ICODE_ATTR_TREMOR_MDCT;
+#ifndef CPU_ARM
+void ff_imdct_calc(unsigned int nbits, fixed32 *output, const fixed32 *input)
+{
+    const int n = (1<<nbits);
+    const int n2 = (n>>1);
+    const int n4 = (n>>2);
+    
+    /* tell imdct_half to put the output in [N/2..3N/4-1] i.e. output+n2 */
+    ff_imdct_half(nbits,output+n2,input);
+
+    fixed32 * in_r, * in_r2, * out_r, * out_r2;
+
+    /* Copy BBBB to AAAA, reflected and sign-flipped.
+       Also copy BBBB to its correct destination (from [N/2..3N/4-1] to [N/4..N/2-1]) */
+    out_r = output;
+    out_r2 = output+n2-8;
+    in_r  = output+n2+n4-8;
+    while(out_r<out_r2)
+    {
+#if defined CPU_COLDFIRE
+        asm volatile( 
+            "movem.l (%[in_r]), %%d0-%%d7\n\t"
+            "movem.l %%d0-%%d7, (%[out_r2])\n\t"
+            "neg.l %%d7\n\t"
+            "move.l %%d7, (%[out_r])+\n\t"
+            "neg.l %%d6\n\t"
+            "move.l %%d6, (%[out_r])+\n\t"
+            "neg.l %%d5\n\t"
+            "move.l %%d5, (%[out_r])+\n\t"
+            "neg.l %%d4\n\t"
+            "move.l %%d4, (%[out_r])+\n\t"
+            "neg.l %%d3\n\t"
+            "move.l %%d3, (%[out_r])+\n\t"
+            "neg.l %%d2\n\t"
+            "move.l %%d2, (%[out_r])+\n\t"
+            "lea.l (-8*4, %[in_r]), %[in_r]\n\t"
+            "neg.l %%d1\n\t"
+            "move.l %%d1, (%[out_r])+\n\t"
+            "lea.l (-8*4, %[out_r2]), %[out_r2]\n\t"
+            "neg.l %%d0\n\t"
+            "move.l %%d0, (%[out_r])+\n\t"
+            : [in_r] "+a" (in_r), [out_r] "+a" (out_r), [out_r2] "+a" (out_r2)
+            :
+            : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "cc", "memory" );
+#else
+        out_r[0]     = -(out_r2[7] = in_r[7]);
+        out_r[1]     = -(out_r2[6] = in_r[6]);
+        out_r[2]     = -(out_r2[5] = in_r[5]);
+        out_r[3]     = -(out_r2[4] = in_r[4]);
+        out_r[4]     = -(out_r2[3] = in_r[3]);
+        out_r[5]     = -(out_r2[2] = in_r[2]);
+        out_r[6]     = -(out_r2[1] = in_r[1]);
+        out_r[7]     = -(out_r2[0] = in_r[0]);
+        in_r -= 8;
+        out_r += 8;
+        out_r2 -= 8;
+#endif
+    }
+    in_r = output + n2+n4;
+    in_r2 = output + n-4;
+    out_r = output + n2;
+    out_r2 = output + n2 + n4 - 4;
+    while(in_r<in_r2)
+    {
+#if defined CPU_COLDFIRE
+        asm volatile(
+            "movem.l (%[in_r]), %%d0-%%d3\n\t"
+            "movem.l %%d0-%%d3, (%[out_r])\n\t"
+            "movem.l (%[in_r2]), %%d4-%%d7\n\t"
+            "movem.l %%d4-%%d7, (%[out_r2])\n\t"
+            "move.l %%d0, %%a3\n\t"
+            "move.l %%d3, %%d0\n\t"
+            "move.l %%d1, %%d3\n\t"
+            "movem.l %%d0/%%d2-%%d3/%%a3, (%[in_r2])\n\t"
+            "move.l %%d7, %%d1\n\t"
+            "move.l %%d6, %%d2\n\t"
+            "move.l %%d5, %%d3\n\t"
+            "movem.l %%d1-%%d4, (%[in_r])\n\t"
+            "lea.l (4*4, %[in_r]), %[in_r]\n\t"
+            "lea.l (-4*4, %[in_r2]), %[in_r2]\n\t"
+            "lea.l (4*4, %[out_r]), %[out_r]\n\t"
+            "lea.l (-4*4, %[out_r2]), %[out_r2]\n\t"
+            : [in_r] "+a" (in_r), [in_r2] "+a" (in_r2),
+              [out_r] "+a" (out_r), [out_r2] "+a" (out_r2)
+            :
+            : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "a3", "memory", "cc" );
+#else
+        register fixed32 t0,t1,t2,t3;
+        register fixed32 s0,s1,s2,s3;
+
+        /* Copy and reflect CCCC to DDDD.  Because CCCC is already where
+           we actually want to put DDDD this is a bit complicated.
+         * So simultaneously do the following things:
+         * 1. copy range from [n2+n4 .. n-1] to range[n2 .. n2+n4-1]
+         * 2. reflect range from [n2+n4 .. n-1] inplace
+         *
+         *  [                      |                        ]
+         *   ^a ->            <- ^b ^c ->               <- ^d
+         *
+         *  #1: copy from ^c to ^a
+         *  #2: copy from ^d to ^b
+         *  #3: swap ^c and ^d in place
+         */
+        /* #1 pt1 : load 4 words from ^c. */
+        t0=in_r[0]; t1=in_r[1]; t2=in_r[2]; t3=in_r[3];
+        /* #1 pt2 : write to ^a */
+        out_r[0]=t0;out_r[1]=t1;out_r[2]=t2;out_r[3]=t3;
+        /* #2 pt1 : load 4 words from ^d */
+        s0=in_r2[0];s1=in_r2[1];s2=in_r2[2];s3=in_r2[3];
+        /* #2 pt2 : write to ^b */
+        out_r2[0]=s0;out_r2[1]=s1;out_r2[2]=s2;out_r2[3]=s3;
+        /* #3 pt1 : write words from #2 to ^c */
+        in_r[0]=s3;in_r[1]=s2;in_r[2]=s1;in_r[3]=s0;
+        /* #3 pt2 : write words from #1 to ^d */
+        in_r2[0]=t3;in_r2[1]=t2;in_r2[2]=t1;in_r2[3]=t0;
+
+        in_r += 4;
+        in_r2 -= 4;
+        out_r += 4;
+        out_r2 -= 4;
+#endif
+    }
+}
+#else
+/* Follows the same structure as the canonical version above */
+void ff_imdct_calc(unsigned int nbits, fixed32 *output, const fixed32 *input)
+{
+    const int n = (1<<nbits);
+    const int n2 = (n>>1);
+    const int n4 = (n>>2);
+    
+    ff_imdct_half(nbits,output+n2,input);
+
+    fixed32 * in_r, * in_r2, * out_r, * out_r2;
+
+    out_r = output;
+    out_r2 = output+n2;
+    in_r  = output+n2+n4;
+    while(out_r<out_r2)
+    {
+        asm volatile( 
+            "ldmdb %[in_r]!, {r0-r7}\n\t"
+            "stmdb %[out_r2]!, {r0-r7}\n\t"
+            "rsb r8,r0,#0\n\t"
+            "rsb r0,r7,#0\n\t"
+            "rsb r7,r1,#0\n\t"
+            "rsb r1,r6,#0\n\t"
+            "rsb r6,r2,#0\n\t"
+            "rsb r2,r5,#0\n\t"
+            "rsb r5,r3,#0\n\t"
+            "rsb r3,r4,#0\n\t"
+            "stmia %[out_r]!, {r0-r3,r5-r8}\n\t"
+            : [in_r] "+r" (in_r), [out_r] "+r" (out_r), [out_r2] "+r" (out_r2)
+            :
+            : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "memory" );
+    }
+    in_r = output + n2+n4;
+    in_r2 = output + n;
+    out_r = output + n2;
+    out_r2 = output + n2 + n4;
+    while(in_r<in_r2)
+    {
+        asm volatile(
+            "ldmia %[in_r], {r0-r3}\n\t"
+            "stmia %[out_r]!, {r0-r3}\n\t"
+            "ldmdb %[in_r2], {r5-r8}\n\t"
+            "stmdb %[out_r2]!, {r5-r8}\n\t"
+            "mov r4,r0\n\t"
+            "mov r0,r3\n\t"
+            "mov r3,r1\n\t"
+            "stmdb %[in_r2]!, {r0,r2,r3,r4}\n\t"
+            "mov r4,r8\n\t"
+            "mov r8,r5\n\t"
+            "mov r5,r7\n\t"
+            "stmia %[in_r]!, {r4,r5,r6,r8}\n\t"
+            :
+            [in_r] "+r" (in_r), [in_r2] "+r" (in_r2), [out_r] "+r" (out_r), [out_r2] "+r" (out_r2)
+            :
+            : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "memory" );
+    }
+}
+#endif
--- a/lib/rbcodec/codecs/lib/mdct.h
+++ b/lib/rbcodec/codecs/lib/mdct.h
@ -0,0 +1,139 @@
+/*
+ * WMA compatible decoder
+ * Copyright (c) 2002 The FFmpeg Project.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef CODECLIB_MDCT_H_INCLUDED
+#define CODECLIB_MDCT_H_INCLUDED
+
+//#include "types.h"
+#include "fft.h"
+
+void ff_imdct_calc(unsigned int nbits, fixed32 *output, const fixed32 *input);
+void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input);
+
+#ifdef CPU_ARM
+
+/*Sign-15.16 format */
+#define fixmul32b(x, y)  \
+    ({ int32_t __hi;  \
+       uint32_t __lo;  \
+       int32_t __result;  \
+       asm ("smull   %0, %1, %3, %4\n\t"  \
+            "mov     %2, %1, lsl #1"  \
+            : "=&r" (__lo), "=&r" (__hi), "=r" (__result)  \
+            : "%r" (x), "r" (y)  \
+            : "cc" );  \
+       __result;  \
+    })
+
+#elif defined(CPU_COLDFIRE)
+
+static inline int32_t fixmul32b(int32_t x, int32_t y)
+{
+    asm (
+        "mac.l   %[x], %[y], %%acc0  \n" /* multiply */
+        "movclr.l %%acc0, %[x]  \n"     /* get higher half */
+        : [x] "+d" (x)
+        : [y] "d"  (y)
+    );
+    return x;
+}
+
+#else
+
+static inline fixed32 fixmul32b(fixed32 x, fixed32 y)
+{
+    fixed64 temp;
+
+    temp = x;
+    temp *= y;
+
+    temp >>= 31;        //16+31-16 = 31 bits
+
+    return (fixed32)temp;
+}
+#endif
+
+
+#ifdef CPU_ARM
+static inline
+void CMUL(fixed32 *x, fixed32 *y,
+          fixed32  a, fixed32  b,
+          fixed32  t, fixed32  v)
+{
+    /* This version loses one bit of precision. Could be solved at the cost
+     * of 2 extra cycles if it becomes an issue. */
+    int x1, y1, l;
+    asm(
+        "smull    %[l], %[y1], %[b], %[t] \n"
+        "smlal    %[l], %[y1], %[a], %[v] \n"
+        "rsb      %[b], %[b], #0          \n"
+        "smull    %[l], %[x1], %[a], %[t] \n"
+        "smlal    %[l], %[x1], %[b], %[v] \n"
+        : [l] "=&r" (l), [x1]"=&r" (x1), [y1]"=&r" (y1), [b] "+r" (b)
+        : [a] "r" (a),   [t] "r" (t),    [v] "r" (v)
+        : "cc"
+    );
+    *x = x1 << 1;
+    *y = y1 << 1;
+}
+#elif defined CPU_COLDFIRE
+static inline
+void CMUL(fixed32 *x, fixed32 *y,
+          fixed32  a, fixed32  b,
+          fixed32  t, fixed32  v)
+{
+  asm volatile ("mac.l %[a], %[t], %%acc0;"
+                "msac.l %[b], %[v], %%acc0;"
+                "mac.l %[b], %[t], %%acc1;"
+                "mac.l %[a], %[v], %%acc1;"
+                "movclr.l %%acc0, %[a];"
+                "move.l %[a], (%[x]);"
+                "movclr.l %%acc1, %[a];"
+                "move.l %[a], (%[y]);"
+                : [a] "+&r" (a)
+                : [x] "a" (x), [y] "a" (y),
+                  [b] "r" (b), [t] "r" (t), [v] "r" (v)
+                : "cc", "memory");
+}
+#else
+static inline
+void CMUL(fixed32 *pre,
+          fixed32 *pim,
+          fixed32 are,
+          fixed32 aim,
+          fixed32 bre,
+          fixed32 bim)
+{
+    //int64_t x,y;
+    fixed32 _aref = are;
+    fixed32 _aimf = aim;
+    fixed32 _bref = bre;
+    fixed32 _bimf = bim;
+    fixed32 _r1 = fixmul32b(_bref, _aref);
+    fixed32 _r2 = fixmul32b(_bimf, _aimf);
+    fixed32 _r3 = fixmul32b(_bref, _aimf);
+    fixed32 _r4 = fixmul32b(_bimf, _aref);
+    *pre = _r1 - _r2;
+    *pim = _r3 + _r4;
+
+}
+#endif
+
+
+#endif // CODECLIB_MDCT_H_INCLUDED
--- a/lib/rbcodec/codecs/lib/mdct_lookup.c
+++ b/lib/rbcodec/codecs/lib/mdct_lookup.c
@ -0,0 +1,872 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2009 Michael Giacomelli
+ *
+ * 
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#ifdef ROCKBOX
+#include <codecs/lib/codeclib.h>
+#else
+#include <stdlib.h>
+#include <stdint.h>
+#endif /* ROCKBOX */
+
+/* {sin(2*i*PI/4096, cos(2*i*PI/4096)}, with i = 0 to 512 */
+const int32_t sincos_lookup0[1026] ICONST_ATTR = {
+  0x00000000, 0x7fffffff, 0x003243f5, 0x7ffff621,
+  0x006487e3, 0x7fffd886, 0x0096cbc1, 0x7fffa72c,
+  0x00c90f88, 0x7fff6216, 0x00fb5330, 0x7fff0943,
+  0x012d96b1, 0x7ffe9cb2, 0x015fda03, 0x7ffe1c65,
+  0x01921d20, 0x7ffd885a, 0x01c45ffe, 0x7ffce093,
+  0x01f6a297, 0x7ffc250f, 0x0228e4e2, 0x7ffb55ce,
+  0x025b26d7, 0x7ffa72d1, 0x028d6870, 0x7ff97c18,
+  0x02bfa9a4, 0x7ff871a2, 0x02f1ea6c, 0x7ff75370,
+  0x03242abf, 0x7ff62182, 0x03566a96, 0x7ff4dbd9,
+  0x0388a9ea, 0x7ff38274, 0x03bae8b2, 0x7ff21553,
+  0x03ed26e6, 0x7ff09478, 0x041f6480, 0x7feeffe1,
+  0x0451a177, 0x7fed5791, 0x0483ddc3, 0x7feb9b85,
+  0x04b6195d, 0x7fe9cbc0, 0x04e8543e, 0x7fe7e841,
+  0x051a8e5c, 0x7fe5f108, 0x054cc7b1, 0x7fe3e616,
+  0x057f0035, 0x7fe1c76b, 0x05b137df, 0x7fdf9508,
+  0x05e36ea9, 0x7fdd4eec, 0x0615a48b, 0x7fdaf519,
+  0x0647d97c, 0x7fd8878e, 0x067a0d76, 0x7fd6064c,
+  0x06ac406f, 0x7fd37153, 0x06de7262, 0x7fd0c8a3,
+  0x0710a345, 0x7fce0c3e, 0x0742d311, 0x7fcb3c23,
+  0x077501be, 0x7fc85854, 0x07a72f45, 0x7fc560cf,
+  0x07d95b9e, 0x7fc25596, 0x080b86c2, 0x7fbf36aa,
+  0x083db0a7, 0x7fbc040a, 0x086fd947, 0x7fb8bdb8,
+  0x08a2009a, 0x7fb563b3, 0x08d42699, 0x7fb1f5fc,
+  0x09064b3a, 0x7fae7495, 0x09386e78, 0x7faadf7c,
+  0x096a9049, 0x7fa736b4, 0x099cb0a7, 0x7fa37a3c,
+  0x09cecf89, 0x7f9faa15, 0x0a00ece8, 0x7f9bc640,
+  0x0a3308bd, 0x7f97cebd, 0x0a6522fe, 0x7f93c38c,
+  0x0a973ba5, 0x7f8fa4b0, 0x0ac952aa, 0x7f8b7227,
+  0x0afb6805, 0x7f872bf3, 0x0b2d7baf, 0x7f82d214,
+  0x0b5f8d9f, 0x7f7e648c, 0x0b919dcf, 0x7f79e35a,
+  0x0bc3ac35, 0x7f754e80, 0x0bf5b8cb, 0x7f70a5fe,
+  0x0c27c389, 0x7f6be9d4, 0x0c59cc68, 0x7f671a05,
+  0x0c8bd35e, 0x7f62368f, 0x0cbdd865, 0x7f5d3f75,
+  0x0cefdb76, 0x7f5834b7, 0x0d21dc87, 0x7f531655,
+  0x0d53db92, 0x7f4de451, 0x0d85d88f, 0x7f489eaa,
+  0x0db7d376, 0x7f434563, 0x0de9cc40, 0x7f3dd87c,
+  0x0e1bc2e4, 0x7f3857f6, 0x0e4db75b, 0x7f32c3d1,
+  0x0e7fa99e, 0x7f2d1c0e, 0x0eb199a4, 0x7f2760af,
+  0x0ee38766, 0x7f2191b4, 0x0f1572dc, 0x7f1baf1e,
+  0x0f475bff, 0x7f15b8ee, 0x0f7942c7, 0x7f0faf25,
+  0x0fab272b, 0x7f0991c4, 0x0fdd0926, 0x7f0360cb,
+  0x100ee8ad, 0x7efd1c3c, 0x1040c5bb, 0x7ef6c418,
+  0x1072a048, 0x7ef05860, 0x10a4784b, 0x7ee9d914,
+  0x10d64dbd, 0x7ee34636, 0x11082096, 0x7edc9fc6,
+  0x1139f0cf, 0x7ed5e5c6, 0x116bbe60, 0x7ecf1837,
+  0x119d8941, 0x7ec8371a, 0x11cf516a, 0x7ec14270,
+  0x120116d5, 0x7eba3a39, 0x1232d979, 0x7eb31e78,
+  0x1264994e, 0x7eabef2c, 0x1296564d, 0x7ea4ac58,
+  0x12c8106f, 0x7e9d55fc, 0x12f9c7aa, 0x7e95ec1a,
+  0x132b7bf9, 0x7e8e6eb2, 0x135d2d53, 0x7e86ddc6,
+  0x138edbb1, 0x7e7f3957, 0x13c0870a, 0x7e778166,
+  0x13f22f58, 0x7e6fb5f4, 0x1423d492, 0x7e67d703,
+  0x145576b1, 0x7e5fe493, 0x148715ae, 0x7e57dea7,
+  0x14b8b17f, 0x7e4fc53e, 0x14ea4a1f, 0x7e47985b,
+  0x151bdf86, 0x7e3f57ff, 0x154d71aa, 0x7e37042a,
+  0x157f0086, 0x7e2e9cdf, 0x15b08c12, 0x7e26221f,
+  0x15e21445, 0x7e1d93ea, 0x16139918, 0x7e14f242,
+  0x16451a83, 0x7e0c3d29, 0x1676987f, 0x7e0374a0,
+  0x16a81305, 0x7dfa98a8, 0x16d98a0c, 0x7df1a942,
+  0x170afd8d, 0x7de8a670, 0x173c6d80, 0x7ddf9034,
+  0x176dd9de, 0x7dd6668f, 0x179f429f, 0x7dcd2981,
+  0x17d0a7bc, 0x7dc3d90d, 0x1802092c, 0x7dba7534,
+  0x183366e9, 0x7db0fdf8, 0x1864c0ea, 0x7da77359,
+  0x18961728, 0x7d9dd55a, 0x18c7699b, 0x7d9423fc,
+  0x18f8b83c, 0x7d8a5f40, 0x192a0304, 0x7d808728,
+  0x195b49ea, 0x7d769bb5, 0x198c8ce7, 0x7d6c9ce9,
+  0x19bdcbf3, 0x7d628ac6, 0x19ef0707, 0x7d58654d,
+  0x1a203e1b, 0x7d4e2c7f, 0x1a517128, 0x7d43e05e,
+  0x1a82a026, 0x7d3980ec, 0x1ab3cb0d, 0x7d2f0e2b,
+  0x1ae4f1d6, 0x7d24881b, 0x1b161479, 0x7d19eebf,
+  0x1b4732ef, 0x7d0f4218, 0x1b784d30, 0x7d048228,
+  0x1ba96335, 0x7cf9aef0, 0x1bda74f6, 0x7ceec873,
+  0x1c0b826a, 0x7ce3ceb2, 0x1c3c8b8c, 0x7cd8c1ae,
+  0x1c6d9053, 0x7ccda169, 0x1c9e90b8, 0x7cc26de5,
+  0x1ccf8cb3, 0x7cb72724, 0x1d00843d, 0x7cabcd28,
+  0x1d31774d, 0x7ca05ff1, 0x1d6265dd, 0x7c94df83,
+  0x1d934fe5, 0x7c894bde, 0x1dc4355e, 0x7c7da505,
+  0x1df5163f, 0x7c71eaf9, 0x1e25f282, 0x7c661dbc,
+  0x1e56ca1e, 0x7c5a3d50, 0x1e879d0d, 0x7c4e49b7,
+  0x1eb86b46, 0x7c4242f2, 0x1ee934c3, 0x7c362904,
+  0x1f19f97b, 0x7c29fbee, 0x1f4ab968, 0x7c1dbbb3,
+  0x1f7b7481, 0x7c116853, 0x1fac2abf, 0x7c0501d2,
+  0x1fdcdc1b, 0x7bf88830, 0x200d888d, 0x7bebfb70,
+  0x203e300d, 0x7bdf5b94, 0x206ed295, 0x7bd2a89e,
+  0x209f701c, 0x7bc5e290, 0x20d0089c, 0x7bb9096b,
+  0x21009c0c, 0x7bac1d31, 0x21312a65, 0x7b9f1de6,
+  0x2161b3a0, 0x7b920b89, 0x219237b5, 0x7b84e61f,
+  0x21c2b69c, 0x7b77ada8, 0x21f3304f, 0x7b6a6227,
+  0x2223a4c5, 0x7b5d039e, 0x225413f8, 0x7b4f920e,
+  0x22847de0, 0x7b420d7a, 0x22b4e274, 0x7b3475e5,
+  0x22e541af, 0x7b26cb4f, 0x23159b88, 0x7b190dbc,
+  0x2345eff8, 0x7b0b3d2c, 0x23763ef7, 0x7afd59a4,
+  0x23a6887f, 0x7aef6323, 0x23d6cc87, 0x7ae159ae,
+  0x24070b08, 0x7ad33d45, 0x243743fa, 0x7ac50dec,
+  0x24677758, 0x7ab6cba4, 0x2497a517, 0x7aa8766f,
+  0x24c7cd33, 0x7a9a0e50, 0x24f7efa2, 0x7a8b9348,
+  0x25280c5e, 0x7a7d055b, 0x2558235f, 0x7a6e648a,
+  0x2588349d, 0x7a5fb0d8, 0x25b84012, 0x7a50ea47,
+  0x25e845b6, 0x7a4210d8, 0x26184581, 0x7a332490,
+  0x26483f6c, 0x7a24256f, 0x26783370, 0x7a151378,
+  0x26a82186, 0x7a05eead, 0x26d809a5, 0x79f6b711,
+  0x2707ebc7, 0x79e76ca7, 0x2737c7e3, 0x79d80f6f,
+  0x27679df4, 0x79c89f6e, 0x27976df1, 0x79b91ca4,
+  0x27c737d3, 0x79a98715, 0x27f6fb92, 0x7999dec4,
+  0x2826b928, 0x798a23b1, 0x2856708d, 0x797a55e0,
+  0x288621b9, 0x796a7554, 0x28b5cca5, 0x795a820e,
+  0x28e5714b, 0x794a7c12, 0x29150fa1, 0x793a6361,
+  0x2944a7a2, 0x792a37fe, 0x29743946, 0x7919f9ec,
+  0x29a3c485, 0x7909a92d, 0x29d34958, 0x78f945c3,
+  0x2a02c7b8, 0x78e8cfb2, 0x2a323f9e, 0x78d846fb,
+  0x2a61b101, 0x78c7aba2, 0x2a911bdc, 0x78b6fda8,
+  0x2ac08026, 0x78a63d11, 0x2aefddd8, 0x789569df,
+  0x2b1f34eb, 0x78848414, 0x2b4e8558, 0x78738bb3,
+  0x2b7dcf17, 0x786280bf, 0x2bad1221, 0x7851633b,
+  0x2bdc4e6f, 0x78403329, 0x2c0b83fa, 0x782ef08b,
+  0x2c3ab2b9, 0x781d9b65, 0x2c69daa6, 0x780c33b8,
+  0x2c98fbba, 0x77fab989, 0x2cc815ee, 0x77e92cd9,
+  0x2cf72939, 0x77d78daa, 0x2d263596, 0x77c5dc01,
+  0x2d553afc, 0x77b417df, 0x2d843964, 0x77a24148,
+  0x2db330c7, 0x7790583e, 0x2de2211e, 0x777e5cc3,
+  0x2e110a62, 0x776c4edb, 0x2e3fec8b, 0x775a2e89,
+  0x2e6ec792, 0x7747fbce, 0x2e9d9b70, 0x7735b6af,
+  0x2ecc681e, 0x77235f2d, 0x2efb2d95, 0x7710f54c,
+  0x2f29ebcc, 0x76fe790e, 0x2f58a2be, 0x76ebea77,
+  0x2f875262, 0x76d94989, 0x2fb5fab2, 0x76c69647,
+  0x2fe49ba7, 0x76b3d0b4, 0x30133539, 0x76a0f8d2,
+  0x3041c761, 0x768e0ea6, 0x30705217, 0x767b1231,
+  0x309ed556, 0x76680376, 0x30cd5115, 0x7654e279,
+  0x30fbc54d, 0x7641af3d, 0x312a31f8, 0x762e69c4,
+  0x3158970e, 0x761b1211, 0x3186f487, 0x7607a828,
+  0x31b54a5e, 0x75f42c0b, 0x31e39889, 0x75e09dbd,
+  0x3211df04, 0x75ccfd42, 0x32401dc6, 0x75b94a9c,
+  0x326e54c7, 0x75a585cf, 0x329c8402, 0x7591aedd,
+  0x32caab6f, 0x757dc5ca, 0x32f8cb07, 0x7569ca99,
+  0x3326e2c3, 0x7555bd4c, 0x3354f29b, 0x75419de7,
+  0x3382fa88, 0x752d6c6c, 0x33b0fa84, 0x751928e0,
+  0x33def287, 0x7504d345, 0x340ce28b, 0x74f06b9e,
+  0x343aca87, 0x74dbf1ef, 0x3468aa76, 0x74c7663a,
+  0x34968250, 0x74b2c884, 0x34c4520d, 0x749e18cd,
+  0x34f219a8, 0x7489571c, 0x351fd918, 0x74748371,
+  0x354d9057, 0x745f9dd1, 0x357b3f5d, 0x744aa63f,
+  0x35a8e625, 0x74359cbd, 0x35d684a6, 0x74208150,
+  0x36041ad9, 0x740b53fb, 0x3631a8b8, 0x73f614c0,
+  0x365f2e3b, 0x73e0c3a3, 0x368cab5c, 0x73cb60a8,
+  0x36ba2014, 0x73b5ebd1, 0x36e78c5b, 0x73a06522,
+  0x3714f02a, 0x738acc9e, 0x37424b7b, 0x73752249,
+  0x376f9e46, 0x735f6626, 0x379ce885, 0x73499838,
+  0x37ca2a30, 0x7333b883, 0x37f76341, 0x731dc70a,
+  0x382493b0, 0x7307c3d0, 0x3851bb77, 0x72f1aed9,
+  0x387eda8e, 0x72db8828, 0x38abf0ef, 0x72c54fc1,
+  0x38d8fe93, 0x72af05a7, 0x39060373, 0x7298a9dd,
+  0x3932ff87, 0x72823c67, 0x395ff2c9, 0x726bbd48,
+  0x398cdd32, 0x72552c85, 0x39b9bebc, 0x723e8a20,
+  0x39e6975e, 0x7227d61c, 0x3a136712, 0x7211107e,
+  0x3a402dd2, 0x71fa3949, 0x3a6ceb96, 0x71e35080,
+  0x3a99a057, 0x71cc5626, 0x3ac64c0f, 0x71b54a41,
+  0x3af2eeb7, 0x719e2cd2, 0x3b1f8848, 0x7186fdde,
+  0x3b4c18ba, 0x716fbd68, 0x3b78a007, 0x71586b74,
+  0x3ba51e29, 0x71410805, 0x3bd19318, 0x7129931f,
+  0x3bfdfecd, 0x71120cc5, 0x3c2a6142, 0x70fa74fc,
+  0x3c56ba70, 0x70e2cbc6, 0x3c830a50, 0x70cb1128,
+  0x3caf50da, 0x70b34525, 0x3cdb8e09, 0x709b67c0,
+  0x3d07c1d6, 0x708378ff, 0x3d33ec39, 0x706b78e3,
+  0x3d600d2c, 0x70536771, 0x3d8c24a8, 0x703b44ad,
+  0x3db832a6, 0x7023109a, 0x3de4371f, 0x700acb3c,
+  0x3e10320d, 0x6ff27497, 0x3e3c2369, 0x6fda0cae,
+  0x3e680b2c, 0x6fc19385, 0x3e93e950, 0x6fa90921,
+  0x3ebfbdcd, 0x6f906d84, 0x3eeb889c, 0x6f77c0b3,
+  0x3f1749b8, 0x6f5f02b2, 0x3f430119, 0x6f463383,
+  0x3f6eaeb8, 0x6f2d532c, 0x3f9a5290, 0x6f1461b0,
+  0x3fc5ec98, 0x6efb5f12, 0x3ff17cca, 0x6ee24b57,
+  0x401d0321, 0x6ec92683, 0x40487f94, 0x6eaff099,
+  0x4073f21d, 0x6e96a99d, 0x409f5ab6, 0x6e7d5193,
+  0x40cab958, 0x6e63e87f, 0x40f60dfb, 0x6e4a6e66,
+  0x4121589b, 0x6e30e34a, 0x414c992f, 0x6e174730,
+  0x4177cfb1, 0x6dfd9a1c, 0x41a2fc1a, 0x6de3dc11,
+  0x41ce1e65, 0x6dca0d14, 0x41f93689, 0x6db02d29,
+  0x42244481, 0x6d963c54, 0x424f4845, 0x6d7c3a98,
+  0x427a41d0, 0x6d6227fa, 0x42a5311b, 0x6d48047e,
+  0x42d0161e, 0x6d2dd027, 0x42faf0d4, 0x6d138afb,
+  0x4325c135, 0x6cf934fc, 0x4350873c, 0x6cdece2f,
+  0x437b42e1, 0x6cc45698, 0x43a5f41e, 0x6ca9ce3b,
+  0x43d09aed, 0x6c8f351c, 0x43fb3746, 0x6c748b3f,
+  0x4425c923, 0x6c59d0a9, 0x4450507e, 0x6c3f055d,
+  0x447acd50, 0x6c242960, 0x44a53f93, 0x6c093cb6,
+  0x44cfa740, 0x6bee3f62, 0x44fa0450, 0x6bd3316a,
+  0x452456bd, 0x6bb812d1, 0x454e9e80, 0x6b9ce39b,
+  0x4578db93, 0x6b81a3cd, 0x45a30df0, 0x6b66536b,
+  0x45cd358f, 0x6b4af279, 0x45f7526b, 0x6b2f80fb,
+  0x4621647d, 0x6b13fef5, 0x464b6bbe, 0x6af86c6c,
+  0x46756828, 0x6adcc964, 0x469f59b4, 0x6ac115e2,
+  0x46c9405c, 0x6aa551e9, 0x46f31c1a, 0x6a897d7d,
+  0x471cece7, 0x6a6d98a4, 0x4746b2bc, 0x6a51a361,
+  0x47706d93, 0x6a359db9, 0x479a1d67, 0x6a1987b0,
+  0x47c3c22f, 0x69fd614a, 0x47ed5be6, 0x69e12a8c,
+  0x4816ea86, 0x69c4e37a, 0x48406e08, 0x69a88c19,
+  0x4869e665, 0x698c246c, 0x48935397, 0x696fac78,
+  0x48bcb599, 0x69532442, 0x48e60c62, 0x69368bce,
+  0x490f57ee, 0x6919e320, 0x49389836, 0x68fd2a3d,
+  0x4961cd33, 0x68e06129, 0x498af6df, 0x68c387e9,
+  0x49b41533, 0x68a69e81, 0x49dd282a, 0x6889a4f6,
+  0x4a062fbd, 0x686c9b4b, 0x4a2f2be6, 0x684f8186,
+  0x4a581c9e, 0x683257ab, 0x4a8101de, 0x68151dbe,
+  0x4aa9dba2, 0x67f7d3c5, 0x4ad2a9e2, 0x67da79c3,
+  0x4afb6c98, 0x67bd0fbd, 0x4b2423be, 0x679f95b7,
+  0x4b4ccf4d, 0x67820bb7, 0x4b756f40, 0x676471c0,
+  0x4b9e0390, 0x6746c7d8, 0x4bc68c36, 0x67290e02,
+  0x4bef092d, 0x670b4444, 0x4c177a6e, 0x66ed6aa1,
+  0x4c3fdff4, 0x66cf8120, 0x4c6839b7, 0x66b187c3,
+  0x4c9087b1, 0x66937e91, 0x4cb8c9dd, 0x6675658c,
+  0x4ce10034, 0x66573cbb, 0x4d092ab0, 0x66390422,
+  0x4d31494b, 0x661abbc5, 0x4d595bfe, 0x65fc63a9,
+  0x4d8162c4, 0x65ddfbd3, 0x4da95d96, 0x65bf8447,
+  0x4dd14c6e, 0x65a0fd0b, 0x4df92f46, 0x65826622,
+  0x4e210617, 0x6563bf92, 0x4e48d0dd, 0x6545095f,
+  0x4e708f8f, 0x6526438f, 0x4e984229, 0x65076e25,
+  0x4ebfe8a5, 0x64e88926, 0x4ee782fb, 0x64c99498,
+  0x4f0f1126, 0x64aa907f, 0x4f369320, 0x648b7ce0,
+  0x4f5e08e3, 0x646c59bf, 0x4f857269, 0x644d2722,
+  0x4faccfab, 0x642de50d, 0x4fd420a4, 0x640e9386,
+  0x4ffb654d, 0x63ef3290, 0x50229da1, 0x63cfc231,
+  0x5049c999, 0x63b0426d, 0x5070e92f, 0x6390b34a,
+  0x5097fc5e, 0x637114cc, 0x50bf031f, 0x635166f9,
+  0x50e5fd6d, 0x6331a9d4, 0x510ceb40, 0x6311dd64,
+  0x5133cc94, 0x62f201ac, 0x515aa162, 0x62d216b3,
+  0x518169a5, 0x62b21c7b, 0x51a82555, 0x6292130c,
+  0x51ced46e, 0x6271fa69, 0x51f576ea, 0x6251d298,
+  0x521c0cc2, 0x62319b9d, 0x524295f0, 0x6211557e,
+  0x5269126e, 0x61f1003f, 0x528f8238, 0x61d09be5,
+  0x52b5e546, 0x61b02876, 0x52dc3b92, 0x618fa5f7,
+  0x53028518, 0x616f146c, 0x5328c1d0, 0x614e73da,
+  0x534ef1b5, 0x612dc447, 0x537514c2, 0x610d05b7,
+  0x539b2af0, 0x60ec3830, 0x53c13439, 0x60cb5bb7,
+  0x53e73097, 0x60aa7050, 0x540d2005, 0x60897601,
+  0x5433027d, 0x60686ccf, 0x5458d7f9, 0x604754bf,
+  0x547ea073, 0x60262dd6, 0x54a45be6, 0x6004f819,
+  0x54ca0a4b, 0x5fe3b38d, 0x54efab9c, 0x5fc26038,
+  0x55153fd4, 0x5fa0fe1f, 0x553ac6ee, 0x5f7f8d46,
+  0x556040e2, 0x5f5e0db3, 0x5585adad, 0x5f3c7f6b,
+  0x55ab0d46, 0x5f1ae274, 0x55d05faa, 0x5ef936d1,
+  0x55f5a4d2, 0x5ed77c8a, 0x561adcb9, 0x5eb5b3a2,
+  0x56400758, 0x5e93dc1f, 0x566524aa, 0x5e71f606,
+  0x568a34a9, 0x5e50015d, 0x56af3750, 0x5e2dfe29,
+  0x56d42c99, 0x5e0bec6e, 0x56f9147e, 0x5de9cc33,
+  0x571deefa, 0x5dc79d7c, 0x5742bc06, 0x5da5604f,
+  0x57677b9d, 0x5d8314b1, 0x578c2dba, 0x5d60baa7,
+  0x57b0d256, 0x5d3e5237, 0x57d5696d, 0x5d1bdb65,
+  0x57f9f2f8, 0x5cf95638, 0x581e6ef1, 0x5cd6c2b5,
+  0x5842dd54, 0x5cb420e0, 0x58673e1b, 0x5c9170bf,
+  0x588b9140, 0x5c6eb258, 0x58afd6bd, 0x5c4be5b0,
+  0x58d40e8c, 0x5c290acc, 0x58f838a9, 0x5c0621b2,
+  0x591c550e, 0x5be32a67, 0x594063b5, 0x5bc024f0,
+  0x59646498, 0x5b9d1154, 0x598857b2, 0x5b79ef96,
+  0x59ac3cfd, 0x5b56bfbd, 0x59d01475, 0x5b3381ce,
+  0x59f3de12, 0x5b1035cf, 0x5a1799d1, 0x5aecdbc5,
+  0x5a3b47ab, 0x5ac973b5, 0x5a5ee79a, 0x5aa5fda5,
+  0x5a82799a, 0x5a82799a
+  };
+
+  /* {sin((2*i+1)*PI/4096, cos((2*i+1)*PI/4096)}, with i = 0 to 511 */
+const int32_t sincos_lookup1[1024] ICONST_ATTR = {
+  0x001921fb, 0x7ffffd88, 0x004b65ee, 0x7fffe9cb,
+  0x007da9d4, 0x7fffc251, 0x00afeda8, 0x7fff8719,
+  0x00e23160, 0x7fff3824, 0x011474f6, 0x7ffed572,
+  0x0146b860, 0x7ffe5f03, 0x0178fb99, 0x7ffdd4d7,
+  0x01ab3e97, 0x7ffd36ee, 0x01dd8154, 0x7ffc8549,
+  0x020fc3c6, 0x7ffbbfe6, 0x024205e8, 0x7ffae6c7,
+  0x027447b0, 0x7ff9f9ec, 0x02a68917, 0x7ff8f954,
+  0x02d8ca16, 0x7ff7e500, 0x030b0aa4, 0x7ff6bcf0,
+  0x033d4abb, 0x7ff58125, 0x036f8a51, 0x7ff4319d,
+  0x03a1c960, 0x7ff2ce5b, 0x03d407df, 0x7ff1575d,
+  0x040645c7, 0x7fefcca4, 0x04388310, 0x7fee2e30,
+  0x046abfb3, 0x7fec7c02, 0x049cfba7, 0x7feab61a,
+  0x04cf36e5, 0x7fe8dc78, 0x05017165, 0x7fe6ef1c,
+  0x0533ab20, 0x7fe4ee06, 0x0565e40d, 0x7fe2d938,
+  0x05981c26, 0x7fe0b0b1, 0x05ca5361, 0x7fde7471,
+  0x05fc89b8, 0x7fdc247a, 0x062ebf22, 0x7fd9c0ca,
+  0x0660f398, 0x7fd74964, 0x06932713, 0x7fd4be46,
+  0x06c5598a, 0x7fd21f72, 0x06f78af6, 0x7fcf6ce8,
+  0x0729bb4e, 0x7fcca6a7, 0x075bea8c, 0x7fc9ccb2,
+  0x078e18a7, 0x7fc6df08, 0x07c04598, 0x7fc3dda9,
+  0x07f27157, 0x7fc0c896, 0x08249bdd, 0x7fbd9fd0,
+  0x0856c520, 0x7fba6357, 0x0888ed1b, 0x7fb7132b,
+  0x08bb13c5, 0x7fb3af4e, 0x08ed3916, 0x7fb037bf,
+  0x091f5d06, 0x7facac7f, 0x09517f8f, 0x7fa90d8e,
+  0x0983a0a7, 0x7fa55aee, 0x09b5c048, 0x7fa1949e,
+  0x09e7de6a, 0x7f9dbaa0, 0x0a19fb04, 0x7f99ccf4,
+  0x0a4c1610, 0x7f95cb9a, 0x0a7e2f85, 0x7f91b694,
+  0x0ab0475c, 0x7f8d8de1, 0x0ae25d8d, 0x7f895182,
+  0x0b147211, 0x7f850179, 0x0b4684df, 0x7f809dc5,
+  0x0b7895f0, 0x7f7c2668, 0x0baaa53b, 0x7f779b62,
+  0x0bdcb2bb, 0x7f72fcb4, 0x0c0ebe66, 0x7f6e4a5e,
+  0x0c40c835, 0x7f698461, 0x0c72d020, 0x7f64aabf,
+  0x0ca4d620, 0x7f5fbd77, 0x0cd6da2d, 0x7f5abc8a,
+  0x0d08dc3f, 0x7f55a7fa, 0x0d3adc4e, 0x7f507fc7,
+  0x0d6cda53, 0x7f4b43f2, 0x0d9ed646, 0x7f45f47b,
+  0x0dd0d01f, 0x7f409164, 0x0e02c7d7, 0x7f3b1aad,
+  0x0e34bd66, 0x7f359057, 0x0e66b0c3, 0x7f2ff263,
+  0x0e98a1e9, 0x7f2a40d2, 0x0eca90ce, 0x7f247ba5,
+  0x0efc7d6b, 0x7f1ea2dc, 0x0f2e67b8, 0x7f18b679,
+  0x0f604faf, 0x7f12b67c, 0x0f923546, 0x7f0ca2e7,
+  0x0fc41876, 0x7f067bba, 0x0ff5f938, 0x7f0040f6,
+  0x1027d784, 0x7ef9f29d, 0x1059b352, 0x7ef390ae,
+  0x108b8c9b, 0x7eed1b2c, 0x10bd6356, 0x7ee69217,
+  0x10ef377d, 0x7edff570, 0x11210907, 0x7ed94538,
+  0x1152d7ed, 0x7ed28171, 0x1184a427, 0x7ecbaa1a,
+  0x11b66dad, 0x7ec4bf36, 0x11e83478, 0x7ebdc0c6,
+  0x1219f880, 0x7eb6aeca, 0x124bb9be, 0x7eaf8943,
+  0x127d7829, 0x7ea85033, 0x12af33ba, 0x7ea1039b,
+  0x12e0ec6a, 0x7e99a37c, 0x1312a230, 0x7e922fd6,
+  0x13445505, 0x7e8aa8ac, 0x137604e2, 0x7e830dff,
+  0x13a7b1bf, 0x7e7b5fce, 0x13d95b93, 0x7e739e1d,
+  0x140b0258, 0x7e6bc8eb, 0x143ca605, 0x7e63e03b,
+  0x146e4694, 0x7e5be40c, 0x149fe3fc, 0x7e53d462,
+  0x14d17e36, 0x7e4bb13c, 0x1503153a, 0x7e437a9c,
+  0x1534a901, 0x7e3b3083, 0x15663982, 0x7e32d2f4,
+  0x1597c6b7, 0x7e2a61ed, 0x15c95097, 0x7e21dd73,
+  0x15fad71b, 0x7e194584, 0x162c5a3b, 0x7e109a24,
+  0x165dd9f0, 0x7e07db52, 0x168f5632, 0x7dff0911,
+  0x16c0cef9, 0x7df62362, 0x16f2443e, 0x7ded2a47,
+  0x1723b5f9, 0x7de41dc0, 0x17552422, 0x7ddafdce,
+  0x17868eb3, 0x7dd1ca75, 0x17b7f5a3, 0x7dc883b4,
+  0x17e958ea, 0x7dbf298d, 0x181ab881, 0x7db5bc02,
+  0x184c1461, 0x7dac3b15, 0x187d6c82, 0x7da2a6c6,
+  0x18aec0db, 0x7d98ff17, 0x18e01167, 0x7d8f4409,
+  0x19115e1c, 0x7d85759f, 0x1942a6f3, 0x7d7b93da,
+  0x1973ebe6, 0x7d719eba, 0x19a52ceb, 0x7d679642,
+  0x19d669fc, 0x7d5d7a74, 0x1a07a311, 0x7d534b50,
+  0x1a38d823, 0x7d4908d9, 0x1a6a0929, 0x7d3eb30f,
+  0x1a9b361d, 0x7d3449f5, 0x1acc5ef6, 0x7d29cd8c,
+  0x1afd83ad, 0x7d1f3dd6, 0x1b2ea43a, 0x7d149ad5,
+  0x1b5fc097, 0x7d09e489, 0x1b90d8bb, 0x7cff1af5,
+  0x1bc1ec9e, 0x7cf43e1a, 0x1bf2fc3a, 0x7ce94dfb,
+  0x1c240786, 0x7cde4a98, 0x1c550e7c, 0x7cd333f3,
+  0x1c861113, 0x7cc80a0f, 0x1cb70f43, 0x7cbcccec,
+  0x1ce80906, 0x7cb17c8d, 0x1d18fe54, 0x7ca618f3,
+  0x1d49ef26, 0x7c9aa221, 0x1d7adb73, 0x7c8f1817,
+  0x1dabc334, 0x7c837ad8, 0x1ddca662, 0x7c77ca65,
+  0x1e0d84f5, 0x7c6c06c0, 0x1e3e5ee5, 0x7c602fec,
+  0x1e6f342c, 0x7c5445e9, 0x1ea004c1, 0x7c4848ba,
+  0x1ed0d09d, 0x7c3c3860, 0x1f0197b8, 0x7c3014de,
+  0x1f325a0b, 0x7c23de35, 0x1f63178f, 0x7c179467,
+  0x1f93d03c, 0x7c0b3777, 0x1fc4840a, 0x7bfec765,
+  0x1ff532f2, 0x7bf24434, 0x2025dcec, 0x7be5ade6,
+  0x205681f1, 0x7bd9047c, 0x208721f9, 0x7bcc47fa,
+  0x20b7bcfe, 0x7bbf7860, 0x20e852f6, 0x7bb295b0,
+  0x2118e3dc, 0x7ba59fee, 0x21496fa7, 0x7b989719,
+  0x2179f64f, 0x7b8b7b36, 0x21aa77cf, 0x7b7e4c45,
+  0x21daf41d, 0x7b710a49, 0x220b6b32, 0x7b63b543,
+  0x223bdd08, 0x7b564d36, 0x226c4996, 0x7b48d225,
+  0x229cb0d5, 0x7b3b4410, 0x22cd12bd, 0x7b2da2fa,
+  0x22fd6f48, 0x7b1feee5, 0x232dc66d, 0x7b1227d3,
+  0x235e1826, 0x7b044dc7, 0x238e646a, 0x7af660c2,
+  0x23beab33, 0x7ae860c7, 0x23eeec78, 0x7ada4dd8,
+  0x241f2833, 0x7acc27f7, 0x244f5e5c, 0x7abdef25,
+  0x247f8eec, 0x7aafa367, 0x24afb9da, 0x7aa144bc,
+  0x24dfdf20, 0x7a92d329, 0x250ffeb7, 0x7a844eae,
+  0x25401896, 0x7a75b74f, 0x25702cb7, 0x7a670d0d,
+  0x25a03b11, 0x7a584feb, 0x25d0439f, 0x7a497feb,
+  0x26004657, 0x7a3a9d0f, 0x26304333, 0x7a2ba75a,
+  0x26603a2c, 0x7a1c9ece, 0x26902b39, 0x7a0d836d,
+  0x26c01655, 0x79fe5539, 0x26effb76, 0x79ef1436,
+  0x271fda96, 0x79dfc064, 0x274fb3ae, 0x79d059c8,
+  0x277f86b5, 0x79c0e062, 0x27af53a6, 0x79b15435,
+  0x27df1a77, 0x79a1b545, 0x280edb23, 0x79920392,
+  0x283e95a1, 0x79823f20, 0x286e49ea, 0x797267f2,
+  0x289df7f8, 0x79627e08, 0x28cd9fc1, 0x79528167,
+  0x28fd4140, 0x79427210, 0x292cdc6d, 0x79325006,
+  0x295c7140, 0x79221b4b, 0x298bffb2, 0x7911d3e2,
+  0x29bb87bc, 0x790179cd, 0x29eb0957, 0x78f10d0f,
+  0x2a1a847b, 0x78e08dab, 0x2a49f920, 0x78cffba3,
+  0x2a796740, 0x78bf56f9, 0x2aa8ced3, 0x78ae9fb0,
+  0x2ad82fd2, 0x789dd5cb, 0x2b078a36, 0x788cf94c,
+  0x2b36ddf7, 0x787c0a36, 0x2b662b0e, 0x786b088c,
+  0x2b957173, 0x7859f44f, 0x2bc4b120, 0x7848cd83,
+  0x2bf3ea0d, 0x7837942b, 0x2c231c33, 0x78264849,
+  0x2c52478a, 0x7814e9df, 0x2c816c0c, 0x780378f1,
+  0x2cb089b1, 0x77f1f581, 0x2cdfa071, 0x77e05f91,
+  0x2d0eb046, 0x77ceb725, 0x2d3db928, 0x77bcfc3f,
+  0x2d6cbb10, 0x77ab2ee2, 0x2d9bb5f6, 0x77994f11,
+  0x2dcaa9d5, 0x77875cce, 0x2df996a3, 0x7775581d,
+  0x2e287c5a, 0x776340ff, 0x2e575af3, 0x77511778,
+  0x2e863267, 0x773edb8b, 0x2eb502ae, 0x772c8d3a,
+  0x2ee3cbc1, 0x771a2c88, 0x2f128d99, 0x7707b979,
+  0x2f41482e, 0x76f5340e, 0x2f6ffb7a, 0x76e29c4b,
+  0x2f9ea775, 0x76cff232, 0x2fcd4c19, 0x76bd35c7,
+  0x2ffbe95d, 0x76aa670d, 0x302a7f3a, 0x76978605,
+  0x30590dab, 0x768492b4, 0x308794a6, 0x76718d1c,
+  0x30b61426, 0x765e7540, 0x30e48c22, 0x764b4b23,
+  0x3112fc95, 0x76380ec8, 0x31416576, 0x7624c031,
+  0x316fc6be, 0x76115f63, 0x319e2067, 0x75fdec60,
+  0x31cc7269, 0x75ea672a, 0x31fabcbd, 0x75d6cfc5,
+  0x3228ff5c, 0x75c32634, 0x32573a3f, 0x75af6a7b,
+  0x32856d5e, 0x759b9c9b, 0x32b398b3, 0x7587bc98,
+  0x32e1bc36, 0x7573ca75, 0x330fd7e1, 0x755fc635,
+  0x333debab, 0x754bafdc, 0x336bf78f, 0x7537876c,
+  0x3399fb85, 0x75234ce8, 0x33c7f785, 0x750f0054,
+  0x33f5eb89, 0x74faa1b3, 0x3423d78a, 0x74e63108,
+  0x3451bb81, 0x74d1ae55, 0x347f9766, 0x74bd199f,
+  0x34ad6b32, 0x74a872e8, 0x34db36df, 0x7493ba34,
+  0x3508fa66, 0x747eef85, 0x3536b5be, 0x746a12df,
+  0x356468e2, 0x74552446, 0x359213c9, 0x744023bc,
+  0x35bfb66e, 0x742b1144, 0x35ed50c9, 0x7415ece2,
+  0x361ae2d3, 0x7400b69a, 0x36486c86, 0x73eb6e6e,
+  0x3675edd9, 0x73d61461, 0x36a366c6, 0x73c0a878,
+  0x36d0d746, 0x73ab2ab4, 0x36fe3f52, 0x73959b1b,
+  0x372b9ee3, 0x737ff9ae, 0x3758f5f2, 0x736a4671,
+  0x37864477, 0x73548168, 0x37b38a6d, 0x733eaa96,
+  0x37e0c7cc, 0x7328c1ff, 0x380dfc8d, 0x7312c7a5,
+  0x383b28a9, 0x72fcbb8c, 0x38684c19, 0x72e69db7,
+  0x389566d6, 0x72d06e2b, 0x38c278d9, 0x72ba2cea,
+  0x38ef821c, 0x72a3d9f7, 0x391c8297, 0x728d7557,
+  0x39497a43, 0x7276ff0d, 0x39766919, 0x7260771b,
+  0x39a34f13, 0x7249dd86, 0x39d02c2a, 0x72333251,
+  0x39fd0056, 0x721c7580, 0x3a29cb91, 0x7205a716,
+  0x3a568dd4, 0x71eec716, 0x3a834717, 0x71d7d585,
+  0x3aaff755, 0x71c0d265, 0x3adc9e86, 0x71a9bdba,
+  0x3b093ca3, 0x71929789, 0x3b35d1a5, 0x717b5fd3,
+  0x3b625d86, 0x7164169d, 0x3b8ee03e, 0x714cbbeb,
+  0x3bbb59c7, 0x71354fc0, 0x3be7ca1a, 0x711dd220,
+  0x3c143130, 0x7106430e, 0x3c408f03, 0x70eea28e,
+  0x3c6ce38a, 0x70d6f0a4, 0x3c992ec0, 0x70bf2d53,
+  0x3cc5709e, 0x70a7589f, 0x3cf1a91c, 0x708f728b,
+  0x3d1dd835, 0x70777b1c, 0x3d49fde1, 0x705f7255,
+  0x3d761a19, 0x70475839, 0x3da22cd7, 0x702f2ccd,
+  0x3dce3614, 0x7016f014, 0x3dfa35c8, 0x6ffea212,
+  0x3e262bee, 0x6fe642ca, 0x3e52187f, 0x6fcdd241,
+  0x3e7dfb73, 0x6fb5507a, 0x3ea9d4c3, 0x6f9cbd79,
+  0x3ed5a46b, 0x6f841942, 0x3f016a61, 0x6f6b63d8,
+  0x3f2d26a0, 0x6f529d40, 0x3f58d921, 0x6f39c57d,
+  0x3f8481dd, 0x6f20dc92, 0x3fb020ce, 0x6f07e285,
+  0x3fdbb5ec, 0x6eeed758, 0x40074132, 0x6ed5bb10,
+  0x4032c297, 0x6ebc8db0, 0x405e3a16, 0x6ea34f3d,
+  0x4089a7a8, 0x6e89ffb9, 0x40b50b46, 0x6e709f2a,
+  0x40e064ea, 0x6e572d93, 0x410bb48c, 0x6e3daaf8,
+  0x4136fa27, 0x6e24175c, 0x416235b2, 0x6e0a72c5,
+  0x418d6729, 0x6df0bd35, 0x41b88e84, 0x6dd6f6b1,
+  0x41e3abbc, 0x6dbd1f3c, 0x420ebecb, 0x6da336dc,
+  0x4239c7aa, 0x6d893d93, 0x4264c653, 0x6d6f3365,
+  0x428fbabe, 0x6d551858, 0x42baa4e6, 0x6d3aec6e,
+  0x42e584c3, 0x6d20afac, 0x43105a50, 0x6d066215,
+  0x433b2585, 0x6cec03af, 0x4365e65b, 0x6cd1947c,
+  0x43909ccd, 0x6cb71482, 0x43bb48d4, 0x6c9c83c3,
+  0x43e5ea68, 0x6c81e245, 0x44108184, 0x6c67300b,
+  0x443b0e21, 0x6c4c6d1a, 0x44659039, 0x6c319975,
+  0x449007c4, 0x6c16b521, 0x44ba74bd, 0x6bfbc021,
+  0x44e4d71c, 0x6be0ba7b, 0x450f2edb, 0x6bc5a431,
+  0x45397bf4, 0x6baa7d49, 0x4563be60, 0x6b8f45c7,
+  0x458df619, 0x6b73fdae, 0x45b82318, 0x6b58a503,
+  0x45e24556, 0x6b3d3bcb, 0x460c5cce, 0x6b21c208,
+  0x46366978, 0x6b0637c1, 0x46606b4e, 0x6aea9cf8,
+  0x468a624a, 0x6acef1b2, 0x46b44e65, 0x6ab335f4,
+  0x46de2f99, 0x6a9769c1, 0x470805df, 0x6a7b8d1e,
+  0x4731d131, 0x6a5fa010, 0x475b9188, 0x6a43a29a,
+  0x478546de, 0x6a2794c1, 0x47aef12c, 0x6a0b7689,
+  0x47d8906d, 0x69ef47f6, 0x48022499, 0x69d3090e,
+  0x482badab, 0x69b6b9d3, 0x48552b9b, 0x699a5a4c,
+  0x487e9e64, 0x697dea7b, 0x48a805ff, 0x69616a65,
+  0x48d16265, 0x6944da10, 0x48fab391, 0x6928397e,
+  0x4923f97b, 0x690b88b5, 0x494d341e, 0x68eec7b9,
+  0x49766373, 0x68d1f68f, 0x499f8774, 0x68b5153a,
+  0x49c8a01b, 0x689823bf, 0x49f1ad61, 0x687b2224,
+  0x4a1aaf3f, 0x685e106c, 0x4a43a5b0, 0x6840ee9b,
+  0x4a6c90ad, 0x6823bcb7, 0x4a957030, 0x68067ac3,
+  0x4abe4433, 0x67e928c5, 0x4ae70caf, 0x67cbc6c0,
+  0x4b0fc99d, 0x67ae54ba, 0x4b387af9, 0x6790d2b6,
+  0x4b6120bb, 0x677340ba, 0x4b89badd, 0x67559eca,
+  0x4bb24958, 0x6737ecea, 0x4bdacc28, 0x671a2b20,
+  0x4c034345, 0x66fc596f, 0x4c2baea9, 0x66de77dc,
+  0x4c540e4e, 0x66c0866d, 0x4c7c622d, 0x66a28524,
+  0x4ca4aa41, 0x66847408, 0x4ccce684, 0x6666531d,
+  0x4cf516ee, 0x66482267, 0x4d1d3b7a, 0x6629e1ec,
+  0x4d455422, 0x660b91af, 0x4d6d60df, 0x65ed31b5,
+  0x4d9561ac, 0x65cec204, 0x4dbd5682, 0x65b0429f,
+  0x4de53f5a, 0x6591b38c, 0x4e0d1c30, 0x657314cf,
+  0x4e34ecfc, 0x6554666d, 0x4e5cb1b9, 0x6535a86b,
+  0x4e846a60, 0x6516dacd, 0x4eac16eb, 0x64f7fd98,
+  0x4ed3b755, 0x64d910d1, 0x4efb4b96, 0x64ba147d,
+  0x4f22d3aa, 0x649b08a0, 0x4f4a4f89, 0x647bed3f,
+  0x4f71bf2e, 0x645cc260, 0x4f992293, 0x643d8806,
+  0x4fc079b1, 0x641e3e38, 0x4fe7c483, 0x63fee4f8,
+  0x500f0302, 0x63df7c4d, 0x50363529, 0x63c0043b,
+  0x505d5af1, 0x63a07cc7, 0x50847454, 0x6380e5f6,
+  0x50ab814d, 0x63613fcd, 0x50d281d5, 0x63418a50,
+  0x50f975e6, 0x6321c585, 0x51205d7b, 0x6301f171,
+  0x5147388c, 0x62e20e17, 0x516e0715, 0x62c21b7e,
+  0x5194c910, 0x62a219aa, 0x51bb7e75, 0x628208a1,
+  0x51e22740, 0x6261e866, 0x5208c36a, 0x6241b8ff,
+  0x522f52ee, 0x62217a72, 0x5255d5c5, 0x62012cc2,
+  0x527c4bea, 0x61e0cff5, 0x52a2b556, 0x61c06410,
+  0x52c91204, 0x619fe918, 0x52ef61ee, 0x617f5f12,
+  0x5315a50e, 0x615ec603, 0x533bdb5d, 0x613e1df0,
+  0x536204d7, 0x611d66de, 0x53882175, 0x60fca0d2,
+  0x53ae3131, 0x60dbcbd1, 0x53d43406, 0x60bae7e1,
+  0x53fa29ed, 0x6099f505, 0x542012e1, 0x6078f344,
+  0x5445eedb, 0x6057e2a2, 0x546bbdd7, 0x6036c325,
+  0x54917fce, 0x601594d1, 0x54b734ba, 0x5ff457ad,
+  0x54dcdc96, 0x5fd30bbc, 0x5502775c, 0x5fb1b104,
+  0x55280505, 0x5f90478a, 0x554d858d, 0x5f6ecf53,
+  0x5572f8ed, 0x5f4d4865, 0x55985f20, 0x5f2bb2c5,
+  0x55bdb81f, 0x5f0a0e77, 0x55e303e6, 0x5ee85b82,
+  0x5608426e, 0x5ec699e9, 0x562d73b2, 0x5ea4c9b3,
+  0x565297ab, 0x5e82eae5, 0x5677ae54, 0x5e60fd84,
+  0x569cb7a8, 0x5e3f0194, 0x56c1b3a1, 0x5e1cf71c,
+  0x56e6a239, 0x5dfade20, 0x570b8369, 0x5dd8b6a7,
+  0x5730572e, 0x5db680b4, 0x57551d80, 0x5d943c4e,
+  0x5779d65b, 0x5d71e979, 0x579e81b8, 0x5d4f883b,
+  0x57c31f92, 0x5d2d189a, 0x57e7afe4, 0x5d0a9a9a,
+  0x580c32a7, 0x5ce80e41, 0x5830a7d6, 0x5cc57394,
+  0x58550f6c, 0x5ca2ca99, 0x58796962, 0x5c801354,
+  0x589db5b3, 0x5c5d4dcc, 0x58c1f45b, 0x5c3a7a05,
+  0x58e62552, 0x5c179806, 0x590a4893, 0x5bf4a7d2,
+  0x592e5e19, 0x5bd1a971, 0x595265df, 0x5bae9ce7,
+  0x59765fde, 0x5b8b8239, 0x599a4c12, 0x5b68596d,
+  0x59be2a74, 0x5b452288, 0x59e1faff, 0x5b21dd90,
+  0x5a05bdae, 0x5afe8a8b, 0x5a29727b, 0x5adb297d,
+  0x5a4d1960, 0x5ab7ba6c, 0x5a70b258, 0x5a943d5e,
+};
+
+/*split radix bit reverse table for FFT of size up to 2048*/
+
+const uint16_t revtab[1<<12] = {
+0, 3072, 1536, 2816, 768, 3840, 1408, 2432, 384, 3456, 1920, 2752, 704, 
+3776, 1216, 2240, 192, 3264, 1728, 3008, 960, 4032, 1376, 2400, 352, 3424, 
+1888, 2656, 608, 3680, 1120, 2144, 96, 3168, 1632, 2912, 864, 3936, 1504, 
+2528, 480, 3552, 2016, 2736, 688, 3760, 1200, 2224, 176, 3248, 1712, 2992, 
+944, 4016, 1328, 2352, 304, 3376, 1840, 2608, 560, 3632, 1072, 2096, 48, 
+3120, 1584, 2864, 816, 3888, 1456, 2480, 432, 3504, 1968, 2800, 752, 3824, 
+1264, 2288, 240, 3312, 1776, 3056, 1008, 4080, 1368, 2392, 344, 3416, 1880, 
+2648, 600, 3672, 1112, 2136, 88, 3160, 1624, 2904, 856, 3928, 1496, 2520, 
+472, 3544, 2008, 2712, 664, 3736, 1176, 2200, 152, 3224, 1688, 2968, 920, 
+3992, 1304, 2328, 280, 3352, 1816, 2584, 536, 3608, 1048, 2072, 24, 3096, 
+1560, 2840, 792, 3864, 1432, 2456, 408, 3480, 1944, 2776, 728, 3800, 1240, 
+2264, 216, 3288, 1752, 3032, 984, 4056, 1400, 2424, 376, 3448, 1912, 2680, 
+632, 3704, 1144, 2168, 120, 3192, 1656, 2936, 888, 3960, 1528, 2552, 504, 
+3576, 2040, 2732, 684, 3756, 1196, 2220, 172, 3244, 1708, 2988, 940, 4012, 
+1324, 2348, 300, 3372, 1836, 2604, 556, 3628, 1068, 2092, 44, 3116, 1580, 
+2860, 812, 3884, 1452, 2476, 428, 3500, 1964, 2796, 748, 3820, 1260, 2284, 
+236, 3308, 1772, 3052, 1004, 4076, 1356, 2380, 332, 3404, 1868, 2636, 588, 
+3660, 1100, 2124, 76, 3148, 1612, 2892, 844, 3916, 1484, 2508, 460, 3532, 
+1996, 2700, 652, 3724, 1164, 2188, 140, 3212, 1676, 2956, 908, 3980, 1292, 
+2316, 268, 3340, 1804, 2572, 524, 3596, 1036, 2060, 12, 3084, 1548, 2828, 
+780, 3852, 1420, 2444, 396, 3468, 1932, 2764, 716, 3788, 1228, 2252, 204, 
+3276, 1740, 3020, 972, 4044, 1388, 2412, 364, 3436, 1900, 2668, 620, 3692, 
+1132, 2156, 108, 3180, 1644, 2924, 876, 3948, 1516, 2540, 492, 3564, 2028, 
+2748, 700, 3772, 1212, 2236, 188, 3260, 1724, 3004, 956, 4028, 1340, 2364, 
+316, 3388, 1852, 2620, 572, 3644, 1084, 2108, 60, 3132, 1596, 2876, 828, 
+3900, 1468, 2492, 444, 3516, 1980, 2812, 764, 3836, 1276, 2300, 252, 3324, 
+1788, 3068, 1020, 4092, 1366, 2390, 342, 3414, 1878, 2646, 598, 3670, 1110, 
+2134, 86, 3158, 1622, 2902, 854, 3926, 1494, 2518, 470, 3542, 2006, 2710, 
+662, 3734, 1174, 2198, 150, 3222, 1686, 2966, 918, 3990, 1302, 2326, 278, 
+3350, 1814, 2582, 534, 3606, 1046, 2070, 22, 3094, 1558, 2838, 790, 3862, 
+1430, 2454, 406, 3478, 1942, 2774, 726, 3798, 1238, 2262, 214, 3286, 1750, 
+3030, 982, 4054, 1398, 2422, 374, 3446, 1910, 2678, 630, 3702, 1142, 2166, 
+118, 3190, 1654, 2934, 886, 3958, 1526, 2550, 502, 3574, 2038, 2726, 678, 
+3750, 1190, 2214, 166, 3238, 1702, 2982, 934, 4006, 1318, 2342, 294, 3366, 
+1830, 2598, 550, 3622, 1062, 2086, 38, 3110, 1574, 2854, 806, 3878, 1446, 
+2470, 422, 3494, 1958, 2790, 742, 3814, 1254, 2278, 230, 3302, 1766, 3046, 
+998, 4070, 1350, 2374, 326, 3398, 1862, 2630, 582, 3654, 1094, 2118, 70, 
+3142, 1606, 2886, 838, 3910, 1478, 2502, 454, 3526, 1990, 2694, 646, 3718, 
+1158, 2182, 134, 3206, 1670, 2950, 902, 3974, 1286, 2310, 262, 3334, 1798, 
+2566, 518, 3590, 1030, 2054, 6, 3078, 1542, 2822, 774, 3846, 1414, 2438, 
+390, 3462, 1926, 2758, 710, 3782, 1222, 2246, 198, 3270, 1734, 3014, 966, 
+4038, 1382, 2406, 358, 3430, 1894, 2662, 614, 3686, 1126, 2150, 102, 3174, 
+1638, 2918, 870, 3942, 1510, 2534, 486, 3558, 2022, 2742, 694, 3766, 1206, 
+2230, 182, 3254, 1718, 2998, 950, 4022, 1334, 2358, 310, 3382, 1846, 2614, 
+566, 3638, 1078, 2102, 54, 3126, 1590, 2870, 822, 3894, 1462, 2486, 438, 
+3510, 1974, 2806, 758, 3830, 1270, 2294, 246, 3318, 1782, 3062, 1014, 4086, 
+1374, 2398, 350, 3422, 1886, 2654, 606, 3678, 1118, 2142, 94, 3166, 1630, 
+2910, 862, 3934, 1502, 2526, 478, 3550, 2014, 2718, 670, 3742, 1182, 2206, 
+158, 3230, 1694, 2974, 926, 3998, 1310, 2334, 286, 3358, 1822, 2590, 542, 
+3614, 1054, 2078, 30, 3102, 1566, 2846, 798, 3870, 1438, 2462, 414, 3486, 
+1950, 2782, 734, 3806, 1246, 2270, 222, 3294, 1758, 3038, 990, 4062, 1406, 
+2430, 382, 3454, 1918, 2686, 638, 3710, 1150, 2174, 126, 3198, 1662, 2942, 
+894, 3966, 1534, 2558, 510, 3582, 2046, 2731, 683, 3755, 1195, 2219, 171, 
+3243, 1707, 2987, 939, 4011, 1323, 2347, 299, 3371, 1835, 2603, 555, 3627, 
+1067, 2091, 43, 3115, 1579, 2859, 811, 3883, 1451, 2475, 427, 3499, 1963, 
+2795, 747, 3819, 1259, 2283, 235, 3307, 1771, 3051, 1003, 4075, 1355, 2379, 
+331, 3403, 1867, 2635, 587, 3659, 1099, 2123, 75, 3147, 1611, 2891, 843, 
+3915, 1483, 2507, 459, 3531, 1995, 2699, 651, 3723, 1163, 2187, 139, 3211, 
+1675, 2955, 907, 3979, 1291, 2315, 267, 3339, 1803, 2571, 523, 3595, 1035, 
+2059, 11, 3083, 1547, 2827, 779, 3851, 1419, 2443, 395, 3467, 1931, 2763, 
+715, 3787, 1227, 2251, 203, 3275, 1739, 3019, 971, 4043, 1387, 2411, 363, 
+3435, 1899, 2667, 619, 3691, 1131, 2155, 107, 3179, 1643, 2923, 875, 3947, 
+1515, 2539, 491, 3563, 2027, 2747, 699, 3771, 1211, 2235, 187, 3259, 1723, 
+3003, 955, 4027, 1339, 2363, 315, 3387, 1851, 2619, 571, 3643, 1083, 2107, 
+59, 3131, 1595, 2875, 827, 3899, 1467, 2491, 443, 3515, 1979, 2811, 763, 
+3835, 1275, 2299, 251, 3323, 1787, 3067, 1019, 4091, 1363, 2387, 339, 3411, 
+1875, 2643, 595, 3667, 1107, 2131, 83, 3155, 1619, 2899, 851, 3923, 1491, 
+2515, 467, 3539, 2003, 2707, 659, 3731, 1171, 2195, 147, 3219, 1683, 2963, 
+915, 3987, 1299, 2323, 275, 3347, 1811, 2579, 531, 3603, 1043, 2067, 19, 
+3091, 1555, 2835, 787, 3859, 1427, 2451, 403, 3475, 1939, 2771, 723, 3795, 
+1235, 2259, 211, 3283, 1747, 3027, 979, 4051, 1395, 2419, 371, 3443, 1907, 
+2675, 627, 3699, 1139, 2163, 115, 3187, 1651, 2931, 883, 3955, 1523, 2547, 
+499, 3571, 2035, 2723, 675, 3747, 1187, 2211, 163, 3235, 1699, 2979, 931, 
+4003, 1315, 2339, 291, 3363, 1827, 2595, 547, 3619, 1059, 2083, 35, 3107, 
+1571, 2851, 803, 3875, 1443, 2467, 419, 3491, 1955, 2787, 739, 3811, 1251, 
+2275, 227, 3299, 1763, 3043, 995, 4067, 1347, 2371, 323, 3395, 1859, 2627, 
+579, 3651, 1091, 2115, 67, 3139, 1603, 2883, 835, 3907, 1475, 2499, 451, 
+3523, 1987, 2691, 643, 3715, 1155, 2179, 131, 3203, 1667, 2947, 899, 3971, 
+1283, 2307, 259, 3331, 1795, 2563, 515, 3587, 1027, 2051, 3, 3075, 1539, 
+2819, 771, 3843, 1411, 2435, 387, 3459, 1923, 2755, 707, 3779, 1219, 2243, 
+195, 3267, 1731, 3011, 963, 4035, 1379, 2403, 355, 3427, 1891, 2659, 611, 
+3683, 1123, 2147, 99, 3171, 1635, 2915, 867, 3939, 1507, 2531, 483, 3555, 
+2019, 2739, 691, 3763, 1203, 2227, 179, 3251, 1715, 2995, 947, 4019, 1331, 
+2355, 307, 3379, 1843, 2611, 563, 3635, 1075, 2099, 51, 3123, 1587, 2867, 
+819, 3891, 1459, 2483, 435, 3507, 1971, 2803, 755, 3827, 1267, 2291, 243, 
+3315, 1779, 3059, 1011, 4083, 1371, 2395, 347, 3419, 1883, 2651, 603, 3675, 
+1115, 2139, 91, 3163, 1627, 2907, 859, 3931, 1499, 2523, 475, 3547, 2011, 
+2715, 667, 3739, 1179, 2203, 155, 3227, 1691, 2971, 923, 3995, 1307, 2331, 
+283, 3355, 1819, 2587, 539, 3611, 1051, 2075, 27, 3099, 1563, 2843, 795, 
+3867, 1435, 2459, 411, 3483, 1947, 2779, 731, 3803, 1243, 2267, 219, 3291, 
+1755, 3035, 987, 4059, 1403, 2427, 379, 3451, 1915, 2683, 635, 3707, 1147, 
+2171, 123, 3195, 1659, 2939, 891, 3963, 1531, 2555, 507, 3579, 2043, 2735, 
+687, 3759, 1199, 2223, 175, 3247, 1711, 2991, 943, 4015, 1327, 2351, 303, 
+3375, 1839, 2607, 559, 3631, 1071, 2095, 47, 3119, 1583, 2863, 815, 3887, 
+1455, 2479, 431, 3503, 1967, 2799, 751, 3823, 1263, 2287, 239, 3311, 1775, 
+3055, 1007, 4079, 1359, 2383, 335, 3407, 1871, 2639, 591, 3663, 1103, 2127, 
+79, 3151, 1615, 2895, 847, 3919, 1487, 2511, 463, 3535, 1999, 2703, 655, 
+3727, 1167, 2191, 143, 3215, 1679, 2959, 911, 3983, 1295, 2319, 271, 3343, 
+1807, 2575, 527, 3599, 1039, 2063, 15, 3087, 1551, 2831, 783, 3855, 1423, 
+2447, 399, 3471, 1935, 2767, 719, 3791, 1231, 2255, 207, 3279, 1743, 3023, 
+975, 4047, 1391, 2415, 367, 3439, 1903, 2671, 623, 3695, 1135, 2159, 111, 
+3183, 1647, 2927, 879, 3951, 1519, 2543, 495, 3567, 2031, 2751, 703, 3775, 
+1215, 2239, 191, 3263, 1727, 3007, 959, 4031, 1343, 2367, 319, 3391, 1855, 
+2623, 575, 3647, 1087, 2111, 63, 3135, 1599, 2879, 831, 3903, 1471, 2495, 
+447, 3519, 1983, 2815, 767, 3839, 1279, 2303, 255, 3327, 1791, 3071, 1023, 
+4095, 1365, 2389, 341, 3413, 1877, 2645, 597, 3669, 1109, 2133, 85, 3157, 
+1621, 2901, 853, 3925, 1493, 2517, 469, 3541, 2005, 2709, 661, 3733, 1173, 
+2197, 149, 3221, 1685, 2965, 917, 3989, 1301, 2325, 277, 3349, 1813, 2581, 
+533, 3605, 1045, 2069, 21, 3093, 1557, 2837, 789, 3861, 1429, 2453, 405, 
+3477, 1941, 2773, 725, 3797, 1237, 2261, 213, 3285, 1749, 3029, 981, 4053, 
+1397, 2421, 373, 3445, 1909, 2677, 629, 3701, 1141, 2165, 117, 3189, 1653, 
+2933, 885, 3957, 1525, 2549, 501, 3573, 2037, 2725, 677, 3749, 1189, 2213, 
+165, 3237, 1701, 2981, 933, 4005, 1317, 2341, 293, 3365, 1829, 2597, 549, 
+3621, 1061, 2085, 37, 3109, 1573, 2853, 805, 3877, 1445, 2469, 421, 3493, 
+1957, 2789, 741, 3813, 1253, 2277, 229, 3301, 1765, 3045, 997, 4069, 1349, 
+2373, 325, 3397, 1861, 2629, 581, 3653, 1093, 2117, 69, 3141, 1605, 2885, 
+837, 3909, 1477, 2501, 453, 3525, 1989, 2693, 645, 3717, 1157, 2181, 133, 
+3205, 1669, 2949, 901, 3973, 1285, 2309, 261, 3333, 1797, 2565, 517, 3589, 
+1029, 2053, 5, 3077, 1541, 2821, 773, 3845, 1413, 2437, 389, 3461, 1925, 
+2757, 709, 3781, 1221, 2245, 197, 3269, 1733, 3013, 965, 4037, 1381, 2405, 
+357, 3429, 1893, 2661, 613, 3685, 1125, 2149, 101, 3173, 1637, 2917, 869, 
+3941, 1509, 2533, 485, 3557, 2021, 2741, 693, 3765, 1205, 2229, 181, 3253, 
+1717, 2997, 949, 4021, 1333, 2357, 309, 3381, 1845, 2613, 565, 3637, 1077, 
+2101, 53, 3125, 1589, 2869, 821, 3893, 1461, 2485, 437, 3509, 1973, 2805, 
+757, 3829, 1269, 2293, 245, 3317, 1781, 3061, 1013, 4085, 1373, 2397, 349, 
+3421, 1885, 2653, 605, 3677, 1117, 2141, 93, 3165, 1629, 2909, 861, 3933, 
+1501, 2525, 477, 3549, 2013, 2717, 669, 3741, 1181, 2205, 157, 3229, 1693, 
+2973, 925, 3997, 1309, 2333, 285, 3357, 1821, 2589, 541, 3613, 1053, 2077, 
+29, 3101, 1565, 2845, 797, 3869, 1437, 2461, 413, 3485, 1949, 2781, 733, 
+3805, 1245, 2269, 221, 3293, 1757, 3037, 989, 4061, 1405, 2429, 381, 3453, 
+1917, 2685, 637, 3709, 1149, 2173, 125, 3197, 1661, 2941, 893, 3965, 1533, 
+2557, 509, 3581, 2045, 2729, 681, 3753, 1193, 2217, 169, 3241, 1705, 2985, 
+937, 4009, 1321, 2345, 297, 3369, 1833, 2601, 553, 3625, 1065, 2089, 41, 
+3113, 1577, 2857, 809, 3881, 1449, 2473, 425, 3497, 1961, 2793, 745, 3817, 
+1257, 2281, 233, 3305, 1769, 3049, 1001, 4073, 1353, 2377, 329, 3401, 1865, 
+2633, 585, 3657, 1097, 2121, 73, 3145, 1609, 2889, 841, 3913, 1481, 2505, 
+457, 3529, 1993, 2697, 649, 3721, 1161, 2185, 137, 3209, 1673, 2953, 905, 
+3977, 1289, 2313, 265, 3337, 1801, 2569, 521, 3593, 1033, 2057, 9, 3081, 
+1545, 2825, 777, 3849, 1417, 2441, 393, 3465, 1929, 2761, 713, 3785, 1225, 
+2249, 201, 3273, 1737, 3017, 969, 4041, 1385, 2409, 361, 3433, 1897, 2665, 
+617, 3689, 1129, 2153, 105, 3177, 1641, 2921, 873, 3945, 1513, 2537, 489, 
+3561, 2025, 2745, 697, 3769, 1209, 2233, 185, 3257, 1721, 3001, 953, 4025, 
+1337, 2361, 313, 3385, 1849, 2617, 569, 3641, 1081, 2105, 57, 3129, 1593, 
+2873, 825, 3897, 1465, 2489, 441, 3513, 1977, 2809, 761, 3833, 1273, 2297, 
+249, 3321, 1785, 3065, 1017, 4089, 1361, 2385, 337, 3409, 1873, 2641, 593, 
+3665, 1105, 2129, 81, 3153, 1617, 2897, 849, 3921, 1489, 2513, 465, 3537, 
+2001, 2705, 657, 3729, 1169, 2193, 145, 3217, 1681, 2961, 913, 3985, 1297, 
+2321, 273, 3345, 1809, 2577, 529, 3601, 1041, 2065, 17, 3089, 1553, 2833, 
+785, 3857, 1425, 2449, 401, 3473, 1937, 2769, 721, 3793, 1233, 2257, 209, 
+3281, 1745, 3025, 977, 4049, 1393, 2417, 369, 3441, 1905, 2673, 625, 3697, 
+1137, 2161, 113, 3185, 1649, 2929, 881, 3953, 1521, 2545, 497, 3569, 2033, 
+2721, 673, 3745, 1185, 2209, 161, 3233, 1697, 2977, 929, 4001, 1313, 2337, 
+289, 3361, 1825, 2593, 545, 3617, 1057, 2081, 33, 3105, 1569, 2849, 801, 
+3873, 1441, 2465, 417, 3489, 1953, 2785, 737, 3809, 1249, 2273, 225, 3297, 
+1761, 3041, 993, 4065, 1345, 2369, 321, 3393, 1857, 2625, 577, 3649, 1089, 
+2113, 65, 3137, 1601, 2881, 833, 3905, 1473, 2497, 449, 3521, 1985, 2689, 
+641, 3713, 1153, 2177, 129, 3201, 1665, 2945, 897, 3969, 1281, 2305, 257, 
+3329, 1793, 2561, 513, 3585, 1025, 2049, 1, 3073, 1537, 2817, 769, 3841, 
+1409, 2433, 385, 3457, 1921, 2753, 705, 3777, 1217, 2241, 193, 3265, 1729, 
+3009, 961, 4033, 1377, 2401, 353, 3425, 1889, 2657, 609, 3681, 1121, 2145, 
+97, 3169, 1633, 2913, 865, 3937, 1505, 2529, 481, 3553, 2017, 2737, 689, 
+3761, 1201, 2225, 177, 3249, 1713, 2993, 945, 4017, 1329, 2353, 305, 3377, 
+1841, 2609, 561, 3633, 1073, 2097, 49, 3121, 1585, 2865, 817, 3889, 1457, 
+2481, 433, 3505, 1969, 2801, 753, 3825, 1265, 2289, 241, 3313, 1777, 3057, 
+1009, 4081, 1369, 2393, 345, 3417, 1881, 2649, 601, 3673, 1113, 2137, 89, 
+3161, 1625, 2905, 857, 3929, 1497, 2521, 473, 3545, 2009, 2713, 665, 3737, 
+1177, 2201, 153, 3225, 1689, 2969, 921, 3993, 1305, 2329, 281, 3353, 1817, 
+2585, 537, 3609, 1049, 2073, 25, 3097, 1561, 2841, 793, 3865, 1433, 2457, 
+409, 3481, 1945, 2777, 729, 3801, 1241, 2265, 217, 3289, 1753, 3033, 985, 
+4057, 1401, 2425, 377, 3449, 1913, 2681, 633, 3705, 1145, 2169, 121, 3193, 
+1657, 2937, 889, 3961, 1529, 2553, 505, 3577, 2041, 2733, 685, 3757, 1197, 
+2221, 173, 3245, 1709, 2989, 941, 4013, 1325, 2349, 301, 3373, 1837, 2605, 
+557, 3629, 1069, 2093, 45, 3117, 1581, 2861, 813, 3885, 1453, 2477, 429, 
+3501, 1965, 2797, 749, 3821, 1261, 2285, 237, 3309, 1773, 3053, 1005, 4077, 
+1357, 2381, 333, 3405, 1869, 2637, 589, 3661, 1101, 2125, 77, 3149, 1613, 
+2893, 845, 3917, 1485, 2509, 461, 3533, 1997, 2701, 653, 3725, 1165, 2189, 
+141, 3213, 1677, 2957, 909, 3981, 1293, 2317, 269, 3341, 1805, 2573, 525, 
+3597, 1037, 2061, 13, 3085, 1549, 2829, 781, 3853, 1421, 2445, 397, 3469, 
+1933, 2765, 717, 3789, 1229, 2253, 205, 3277, 1741, 3021, 973, 4045, 1389, 
+2413, 365, 3437, 1901, 2669, 621, 3693, 1133, 2157, 109, 3181, 1645, 2925, 
+877, 3949, 1517, 2541, 493, 3565, 2029, 2749, 701, 3773, 1213, 2237, 189, 
+3261, 1725, 3005, 957, 4029, 1341, 2365, 317, 3389, 1853, 2621, 573, 3645, 
+1085, 2109, 61, 3133, 1597, 2877, 829, 3901, 1469, 2493, 445, 3517, 1981, 
+2813, 765, 3837, 1277, 2301, 253, 3325, 1789, 3069, 1021, 4093, 1367, 2391, 
+343, 3415, 1879, 2647, 599, 3671, 1111, 2135, 87, 3159, 1623, 2903, 855, 
+3927, 1495, 2519, 471, 3543, 2007, 2711, 663, 3735, 1175, 2199, 151, 3223, 
+1687, 2967, 919, 3991, 1303, 2327, 279, 3351, 1815, 2583, 535, 3607, 1047, 
+2071, 23, 3095, 1559, 2839, 791, 3863, 1431, 2455, 407, 3479, 1943, 2775, 
+727, 3799, 1239, 2263, 215, 3287, 1751, 3031, 983, 4055, 1399, 2423, 375, 
+3447, 1911, 2679, 631, 3703, 1143, 2167, 119, 3191, 1655, 2935, 887, 3959, 
+1527, 2551, 503, 3575, 2039, 2727, 679, 3751, 1191, 2215, 167, 3239, 1703, 
+2983, 935, 4007, 1319, 2343, 295, 3367, 1831, 2599, 551, 3623, 1063, 2087, 
+39, 3111, 1575, 2855, 807, 3879, 1447, 2471, 423, 3495, 1959, 2791, 743, 
+3815, 1255, 2279, 231, 3303, 1767, 3047, 999, 4071, 1351, 2375, 327, 3399, 
+1863, 2631, 583, 3655, 1095, 2119, 71, 3143, 1607, 2887, 839, 3911, 1479, 
+2503, 455, 3527, 1991, 2695, 647, 3719, 1159, 2183, 135, 3207, 1671, 2951, 
+903, 3975, 1287, 2311, 263, 3335, 1799, 2567, 519, 3591, 1031, 2055, 7, 
+3079, 1543, 2823, 775, 3847, 1415, 2439, 391, 3463, 1927, 2759, 711, 3783, 
+1223, 2247, 199, 3271, 1735, 3015, 967, 4039, 1383, 2407, 359, 3431, 1895, 
+2663, 615, 3687, 1127, 2151, 103, 3175, 1639, 2919, 871, 3943, 1511, 2535, 
+487, 3559, 2023, 2743, 695, 3767, 1207, 2231, 183, 3255, 1719, 2999, 951, 
+4023, 1335, 2359, 311, 3383, 1847, 2615, 567, 3639, 1079, 2103, 55, 3127, 
+1591, 2871, 823, 3895, 1463, 2487, 439, 3511, 1975, 2807, 759, 3831, 1271, 
+2295, 247, 3319, 1783, 3063, 1015, 4087, 1375, 2399, 351, 3423, 1887, 2655, 
+607, 3679, 1119, 2143, 95, 3167, 1631, 2911, 863, 3935, 1503, 2527, 479, 
+3551, 2015, 2719, 671, 3743, 1183, 2207, 159, 3231, 1695, 2975, 927, 3999, 
+1311, 2335, 287, 3359, 1823, 2591, 543, 3615, 1055, 2079, 31, 3103, 1567, 
+2847, 799, 3871, 1439, 2463, 415, 3487, 1951, 2783, 735, 3807, 1247, 2271, 
+223, 3295, 1759, 3039, 991, 4063, 1407, 2431, 383, 3455, 1919, 2687, 639, 
+3711, 1151, 2175, 127, 3199, 1663, 2943, 895, 3967, 1535, 2559, 511, 3583, 
+2047, 2730, 682, 3754, 1194, 2218, 170, 3242, 1706, 2986, 938, 4010, 1322, 
+2346, 298, 3370, 1834, 2602, 554, 3626, 1066, 2090, 42, 3114, 1578, 2858, 
+810, 3882, 1450, 2474, 426, 3498, 1962, 2794, 746, 3818, 1258, 2282, 234, 
+3306, 1770, 3050, 1002, 4074, 1354, 2378, 330, 3402, 1866, 2634, 586, 3658, 
+1098, 2122, 74, 3146, 1610, 2890, 842, 3914, 1482, 2506, 458, 3530, 1994, 
+2698, 650, 3722, 1162, 2186, 138, 3210, 1674, 2954, 906, 3978, 1290, 2314, 
+266, 3338, 1802, 2570, 522, 3594, 1034, 2058, 10, 3082, 1546, 2826, 778, 
+3850, 1418, 2442, 394, 3466, 1930, 2762, 714, 3786, 1226, 2250, 202, 3274, 
+1738, 3018, 970, 4042, 1386, 2410, 362, 3434, 1898, 2666, 618, 3690, 1130, 
+2154, 106, 3178, 1642, 2922, 874, 3946, 1514, 2538, 490, 3562, 2026, 2746, 
+698, 3770, 1210, 2234, 186, 3258, 1722, 3002, 954, 4026, 1338, 2362, 314, 
+3386, 1850, 2618, 570, 3642, 1082, 2106, 58, 3130, 1594, 2874, 826, 3898, 
+1466, 2490, 442, 3514, 1978, 2810, 762, 3834, 1274, 2298, 250, 3322, 1786, 
+3066, 1018, 4090, 1362, 2386, 338, 3410, 1874, 2642, 594, 3666, 1106, 2130, 
+82, 3154, 1618, 2898, 850, 3922, 1490, 2514, 466, 3538, 2002, 2706, 658, 
+3730, 1170, 2194, 146, 3218, 1682, 2962, 914, 3986, 1298, 2322, 274, 3346, 
+1810, 2578, 530, 3602, 1042, 2066, 18, 3090, 1554, 2834, 786, 3858, 1426, 
+2450, 402, 3474, 1938, 2770, 722, 3794, 1234, 2258, 210, 3282, 1746, 3026, 
+978, 4050, 1394, 2418, 370, 3442, 1906, 2674, 626, 3698, 1138, 2162, 114, 
+3186, 1650, 2930, 882, 3954, 1522, 2546, 498, 3570, 2034, 2722, 674, 3746, 
+1186, 2210, 162, 3234, 1698, 2978, 930, 4002, 1314, 2338, 290, 3362, 1826, 
+2594, 546, 3618, 1058, 2082, 34, 3106, 1570, 2850, 802, 3874, 1442, 2466, 
+418, 3490, 1954, 2786, 738, 3810, 1250, 2274, 226, 3298, 1762, 3042, 994, 
+4066, 1346, 2370, 322, 3394, 1858, 2626, 578, 3650, 1090, 2114, 66, 3138, 
+1602, 2882, 834, 3906, 1474, 2498, 450, 3522, 1986, 2690, 642, 3714, 1154, 
+2178, 130, 3202, 1666, 2946, 898, 3970, 1282, 2306, 258, 3330, 1794, 2562, 
+514, 3586, 1026, 2050, 2, 3074, 1538, 2818, 770, 3842, 1410, 2434, 386, 
+3458, 1922, 2754, 706, 3778, 1218, 2242, 194, 3266, 1730, 3010, 962, 4034, 
+1378, 2402, 354, 3426, 1890, 2658, 610, 3682, 1122, 2146, 98, 3170, 1634, 
+2914, 866, 3938, 1506, 2530, 482, 3554, 2018, 2738, 690, 3762, 1202, 2226, 
+178, 3250, 1714, 2994, 946, 4018, 1330, 2354, 306, 3378, 1842, 2610, 562, 
+3634, 1074, 2098, 50, 3122, 1586, 2866, 818, 3890, 1458, 2482, 434, 3506, 
+1970, 2802, 754, 3826, 1266, 2290, 242, 3314, 1778, 3058, 1010, 4082, 1370, 
+2394, 346, 3418, 1882, 2650, 602, 3674, 1114, 2138, 90, 3162, 1626, 2906, 
+858, 3930, 1498, 2522, 474, 3546, 2010, 2714, 666, 3738, 1178, 2202, 154, 
+3226, 1690, 2970, 922, 3994, 1306, 2330, 282, 3354, 1818, 2586, 538, 3610, 
+1050, 2074, 26, 3098, 1562, 2842, 794, 3866, 1434, 2458, 410, 3482, 1946, 
+2778, 730, 3802, 1242, 2266, 218, 3290, 1754, 3034, 986, 4058, 1402, 2426, 
+378, 3450, 1914, 2682, 634, 3706, 1146, 2170, 122, 3194, 1658, 2938, 890, 
+3962, 1530, 2554, 506, 3578, 2042, 2734, 686, 3758, 1198, 2222, 174, 3246, 
+1710, 2990, 942, 4014, 1326, 2350, 302, 3374, 1838, 2606, 558, 3630, 1070, 
+2094, 46, 3118, 1582, 2862, 814, 3886, 1454, 2478, 430, 3502, 1966, 2798, 
+750, 3822, 1262, 2286, 238, 3310, 1774, 3054, 1006, 4078, 1358, 2382, 334, 
+3406, 1870, 2638, 590, 3662, 1102, 2126, 78, 3150, 1614, 2894, 846, 3918, 
+1486, 2510, 462, 3534, 1998, 2702, 654, 3726, 1166, 2190, 142, 3214, 1678, 
+2958, 910, 3982, 1294, 2318, 270, 3342, 1806, 2574, 526, 3598, 1038, 2062, 
+14, 3086, 1550, 2830, 782, 3854, 1422, 2446, 398, 3470, 1934, 2766, 718, 
+3790, 1230, 2254, 206, 3278, 1742, 3022, 974, 4046, 1390, 2414, 366, 3438, 
+1902, 2670, 622, 3694, 1134, 2158, 110, 3182, 1646, 2926, 878, 3950, 1518, 
+2542, 494, 3566, 2030, 2750, 702, 3774, 1214, 2238, 190, 3262, 1726, 3006, 
+958, 4030, 1342, 2366, 318, 3390, 1854, 2622, 574, 3646, 1086, 2110, 62, 
+3134, 1598, 2878, 830, 3902, 1470, 2494, 446, 3518, 1982, 2814, 766, 3838, 
+1278, 2302, 254, 3326, 1790, 3070, 1022, 4094, 1364, 2388, 340, 3412, 1876, 
+2644, 596, 3668, 1108, 2132, 84, 3156, 1620, 2900, 852, 3924, 1492, 2516, 
+468, 3540, 2004, 2708, 660, 3732, 1172, 2196, 148, 3220, 1684, 2964, 916, 
+3988, 1300, 2324, 276, 3348, 1812, 2580, 532, 3604, 1044, 2068, 20, 3092, 
+1556, 2836, 788, 3860, 1428, 2452, 404, 3476, 1940, 2772, 724, 3796, 1236, 
+2260, 212, 3284, 1748, 3028, 980, 4052, 1396, 2420, 372, 3444, 1908, 2676, 
+628, 3700, 1140, 2164, 116, 3188, 1652, 2932, 884, 3956, 1524, 2548, 500, 
+3572, 2036, 2724, 676, 3748, 1188, 2212, 164, 3236, 1700, 2980, 932, 4004, 
+1316, 2340, 292, 3364, 1828, 2596, 548, 3620, 1060, 2084, 36, 3108, 1572, 
+2852, 804, 3876, 1444, 2468, 420, 3492, 1956, 2788, 740, 3812, 1252, 2276, 
+228, 3300, 1764, 3044, 996, 4068, 1348, 2372, 324, 3396, 1860, 2628, 580, 
+3652, 1092, 2116, 68, 3140, 1604, 2884, 836, 3908, 1476, 2500, 452, 3524, 
+1988, 2692, 644, 3716, 1156, 2180, 132, 3204, 1668, 2948, 900, 3972, 1284, 
+2308, 260, 3332, 1796, 2564, 516, 3588, 1028, 2052, 4, 3076, 1540, 2820, 
+772, 3844, 1412, 2436, 388, 3460, 1924, 2756, 708, 3780, 1220, 2244, 196, 
+3268, 1732, 3012, 964, 4036, 1380, 2404, 356, 3428, 1892, 2660, 612, 3684, 
+1124, 2148, 100, 3172, 1636, 2916, 868, 3940, 1508, 2532, 484, 3556, 2020, 
+2740, 692, 3764, 1204, 2228, 180, 3252, 1716, 2996, 948, 4020, 1332, 2356, 
+308, 3380, 1844, 2612, 564, 3636, 1076, 2100, 52, 3124, 1588, 2868, 820, 
+3892, 1460, 2484, 436, 3508, 1972, 2804, 756, 3828, 1268, 2292, 244, 3316, 
+1780, 3060, 1012, 4084, 1372, 2396, 348, 3420, 1884, 2652, 604, 3676, 1116, 
+2140, 92, 3164, 1628, 2908, 860, 3932, 1500, 2524, 476, 3548, 2012, 2716, 
+668, 3740, 1180, 2204, 156, 3228, 1692, 2972, 924, 3996, 1308, 2332, 284, 
+3356, 1820, 2588, 540, 3612, 1052, 2076, 28, 3100, 1564, 2844, 796, 3868, 
+1436, 2460, 412, 3484, 1948, 2780, 732, 3804, 1244, 2268, 220, 3292, 1756, 
+3036, 988, 4060, 1404, 2428, 380, 3452, 1916, 2684, 636, 3708, 1148, 2172, 
+124, 3196, 1660, 2940, 892, 3964, 1532, 2556, 508, 3580, 2044, 2728, 680, 
+3752, 1192, 2216, 168, 3240, 1704, 2984, 936, 4008, 1320, 2344, 296, 3368, 
+1832, 2600, 552, 3624, 1064, 2088, 40, 3112, 1576, 2856, 808, 3880, 1448, 
+2472, 424, 3496, 1960, 2792, 744, 3816, 1256, 2280, 232, 3304, 1768, 3048, 
+1000, 4072, 1352, 2376, 328, 3400, 1864, 2632, 584, 3656, 1096, 2120, 72, 
+3144, 1608, 2888, 840, 3912, 1480, 2504, 456, 3528, 1992, 2696, 648, 3720, 
+1160, 2184, 136, 3208, 1672, 2952, 904, 3976, 1288, 2312, 264, 3336, 1800, 
+2568, 520, 3592, 1032, 2056, 8, 3080, 1544, 2824, 776, 3848, 1416, 2440, 
+392, 3464, 1928, 2760, 712, 3784, 1224, 2248, 200, 3272, 1736, 3016, 968, 
+4040, 1384, 2408, 360, 3432, 1896, 2664, 616, 3688, 1128, 2152, 104, 3176, 
+1640, 2920, 872, 3944, 1512, 2536, 488, 3560, 2024, 2744, 696, 3768, 1208, 
+2232, 184, 3256, 1720, 3000, 952, 4024, 1336, 2360, 312, 3384, 1848, 2616, 
+568, 3640, 1080, 2104, 56, 3128, 1592, 2872, 824, 3896, 1464, 2488, 440, 
+3512, 1976, 2808, 760, 3832, 1272, 2296, 248, 3320, 1784, 3064, 1016, 4088, 
+1360, 2384, 336, 3408, 1872, 2640, 592, 3664, 1104, 2128, 80, 3152, 1616, 
+2896, 848, 3920, 1488, 2512, 464, 3536, 2000, 2704, 656, 3728, 1168, 2192, 
+144, 3216, 1680, 2960, 912, 3984, 1296, 2320, 272, 3344, 1808, 2576, 528, 
+3600, 1040, 2064, 16, 3088, 1552, 2832, 784, 3856, 1424, 2448, 400, 3472, 
+1936, 2768, 720, 3792, 1232, 2256, 208, 3280, 1744, 3024, 976, 4048, 1392, 
+2416, 368, 3440, 1904, 2672, 624, 3696, 1136, 2160, 112, 3184, 1648, 2928, 
+880, 3952, 1520, 2544, 496, 3568, 2032, 2720, 672, 3744, 1184, 2208, 160, 
+3232, 1696, 2976, 928, 4000, 1312, 2336, 288, 3360, 1824, 2592, 544, 3616, 
+1056, 2080, 32, 3104, 1568, 2848, 800, 3872, 1440, 2464, 416, 3488, 1952, 
+2784, 736, 3808, 1248, 2272, 224, 3296, 1760, 3040, 992, 4064, 1344, 2368, 
+320, 3392, 1856, 2624, 576, 3648, 1088, 2112, 64, 3136, 1600, 2880, 832, 
+3904, 1472, 2496, 448, 3520, 1984, 2688, 640, 3712, 1152, 2176, 128, 3200, 
+1664, 2944, 896, 3968, 1280, 2304, 256, 3328, 1792, 2560, 512, 3584, 1024, 
+2048};
+
+
--- a/lib/rbcodec/codecs/lib/mdct_lookup.h
+++ b/lib/rbcodec/codecs/lib/mdct_lookup.h
@ -0,0 +1,24 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE.   *
+ *                                                                  *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002    *
+ * BY THE Xiph.Org FOUNDATION http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+ function: sin,cos lookup tables
+
+ ********************************************************************/
+
+
+extern const int32_t sincos_lookup0[1026];
+extern const int32_t sincos_lookup1[1024];
+extern const uint16_t revtab[1<<12];
+
+
+
--- a/lib/rbcodec/codecs/lib/osx.dummy.c
+++ b/lib/rbcodec/codecs/lib/osx.dummy.c
--- a/lib/rbcodec/codecs/liba52/AUTHORS
+++ b/lib/rbcodec/codecs/liba52/AUTHORS
@ -0,0 +1,23 @@
+Aaron Holtzman <aholtzma@ess.engr.uvic.ca> started the project and
+made the initial working implementation.
+
+Michel Lespinasse <walken@zoy.org> did major changes for speed and
+conformance and is the current maintainer.
+
+Other contributors include:
+	Gildas Bazin <gbazin@netcourrier.com> - mingw32 port
+	Billy Biggs <vektor@div8.net> - most of liba52.txt
+	Jeroen Dobbelaere <jeroen.dobbelaere@acunia.com> - fixed point version
+	Eduard Hasenleithner <eduardh@aon.at> - gcc 3.0 fixes
+	Håkan Hjort <d95hjort@dtek.chalmers.se> - Solaris output, mlib code
+	Charles M. Hannum <root@ihack.net> - fixes
+	Chris Hodges <hodges@stradis.com> - made the library reentrant
+	Michael Holzt <kju@flummi.de> - OSS output.c and misc errata
+	Angelos Keromytis <angelos@dsl.cis.upenn.edu> - OpenBSD fixes
+	David I. Lehn <dlehn@vt.edu> - API cleanup suggestion
+	Don Mahurin <dmahurin@dma.org> - stdin support for extract_a52
+	Jim Miller <jmiller@heli.engr.sgi.com> - IRIX output.c
+	Takefumi Sayo <stake@niagara.shiojiri.ne.jp> - FreeBSD tweak
+	Shoji Tokunaga <toku@mac.com> - aif file output
+
+(let me know if I forgot anyone)
--- a/lib/rbcodec/codecs/liba52/COPYING
+++ b/lib/rbcodec/codecs/liba52/COPYING
@ -0,0 +1,340 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+                       59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) 19yy  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) 19yy name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
--- a/lib/rbcodec/codecs/liba52/ChangeLog
+++ b/lib/rbcodec/codecs/liba52/ChangeLog
@ -0,0 +1,97 @@
+a52dec-0.7.4 Sat Jul 27 20:44:00 PDT 2002
+-The library is now fully reentrant.
+-Added win32 output module, al file output, gain control.
+-A few additional portability enhancements.
+
+a52dec-0.7.3 Wed Feb 20 23:38:22 PST 2002
+-rewrite of the imdct code, making a52dec 40% to 80% faster than version 0.7.2
+-fixed one memory corruption problem in parse.c
+-small liba52 portability fixes
+-byte order and CRLF bugfixes in wav file output
+-aif file output
+-IRIX al sound output (untested, tell us if it works)
+
+a52dec-0.7.2 Sun Dec 16 14:39:56 PST 2001
+-demuxer improvements, with support for TS streams.
+-smaller demux buffer, making it easier to use a52dec as a pipe
+-wav output
+-avoid -fPIC when possible (-prefer-non-pic)
+-support for vc++ and TenDRA
+-portability fixes
+	
+a52dec-0.7.1b Fri Aug 31 02:37:23 PDT 2001
+-removed an #include <stdint.h> that was only breaking libc5 builds
+
+a52dec-0.7.1 Thu Aug 30 02:13:23 PDT 2001
+-gcc 3.0 fixes
+-mlib fixes (now passes the test suite)
+-in a52dec.c, made sample_data and flags static
+-removed a few statics in liba52/parse.c (still not reentrant, but closer)
+
+a52dec-0.7.0 Thu Aug 23 23:18:00 PDT 2001
+-Downmix to arbitrary speaker configurations
+-Dynamic range compression
+-Major speedups: 2x for 2.0 streams with 2.0 output,
+                 3x for 5.1 streams with 2.0 output.
+-New library interface
+-Rematrixing bugfix
+-Higher precision
+
+ac3dec-0.6.1 Mon Mar 27 20:27:06 EST 2000
+-Fix another 2.0 problem (rematrix was wrong).
+-Fix the never resync on a bad crc bug.
+
+ac3dec-0.6.0 Sat Mar 18 19:43:25 EST 2000
+-New library interface 
+-Fix bug wrt coupling channels that was causing sound quality problems. 
+-Fix 2.0 mode problems (aka the I forgot to implement the phase flags bug). 
+-All around speed improvements (almost twice as fast) 
+-Improved robustness when fed bad data. The entire frame is checksummed before playback.
+
+ac3dec-0.5.6 Tue Nov 16 00:37:34 EST 1999
+-Irix support 
+-Alpha fixes 
+-Minor performance enhancements to downmix and imdct
+-OpenBSD fixes 
+-extract_ac3 can now read from stdin
+-Change output_linux to block on write instead of using the
+ ring buffer. Let me know if this causes/fixes any problems
+
+ac3dec-0.5.5 Wed Aug 25 15:36:44 EDT 1999
+-Fixed a cut and paste bug (argh!) in parse.c which potentially 
+ screwed up delta bit allocation info.
+-Martin Mueller <mamueller@topmail.de> informed me that I was missing
+ some corrections from the AC-3 errata document. It turns out that 
+ I used an earlier version of the errata when I initially wrote ac3dec.
+ Fortunately the errata fix the outstanding bugs that I was pulling
+ my hair out on for a long time. Woohoo! Thanks Martin. Kudos to Dolby
+ Labs for keeping their documentation up to date as well.
+-stereo downmixing (downmix.c) is now in. Matrix encoded surround 
+ (Dolby Prologic Surround) should work too.
+-clipping due to high level signals has been fixed. We normalize a
+ block of samples by its maximum absolute value if the max exceeds 
+ the %100 digital level. This shouldn't be a problem, but for some 
+ reason some channels have a dynamic range that exceeds [-1.0,1.0].
+ I blame the encoder :)
+-Multiple track support in extract_ac3. Simply just give it the track
+ number you want [1,8] after the filename.
+
+ac3dec-0.5.4 Thu Jul 29 16:55:10 PDT 1999
+-Fixed a stupid bug with the coupling channel that was causing
+ high frequencies to be attenuated.
+-Re-wrote the extract_ac3 tool.
+-Added to a tool to verify the checksums on a given AC3 stream. 
+ (tools/verify_ac3)
+
+ac3dec-0.5.3  Mon Jul 12 10:45:56 PDT 1999
+-Fixed problems related to streams with coupling channel enabled.
+-Minor performance enhancements
+
+ac3dec-0.5.2  Sun Jul  4 12:00:25 PDT 1999
+-output_linux.c patch provided by Michael Holzt <kju@flummi.de>
+
+ac3dec-0.5.1  Wed Jun 30 17:48:52 PDT 1999
+-Compiles and dies gracefully under Linux now.
+
+ac3dec-0.5.0  Wed Jun 23 11:06:06 EDT 1999
+-First public release of ac3dec.
--- a/lib/rbcodec/codecs/liba52/HISTORY
+++ b/lib/rbcodec/codecs/liba52/HISTORY
@ -0,0 +1,28 @@
+changes that affected the PCM output:
+
+2001/05/14 05:48:59 - aaron's 0.6.1 was cutting before the end
+
+2001/06/04 01:42:47 - slightly adjusted some values (volume, downmix adjustemtn, clev/slev tables)
+2001/06/04 05:48:31 - added adjust_level to the downmix
+
+2001/06/12
+2001/06/13          - changed the order of the dither() calls
+                      also introduced bug with thx_2_0 stream
+
+2001/06/22 08:23:37 - fixed bug with thx_2_0 stream (see parse.c)
+
+2001/07/02 08:44:55 - changed float-to-int conversion (+ added saturation)
+
+2001/07/06 06:53:01 - more precise imdct init, more precise q_* coefficients
+
+2001/07/06 08:52:30 - rematrixing fixes
+
+2001/07/26 21:31:39 - do 3dB dither reduction in coeff_get() not dither_gen()
+
+2001/08/09 08:11:31 - implemented dynamic range compression
+
+2002/02/24 08:54:49 - window function computed at runtime, with more precision
+
+2003/01/28 06.57:37 - switched to integer q_* coefficients
+
+2003/01/28 07:39:35 - use level of 0.75 for dithering instead of 0.707
--- a/lib/rbcodec/codecs/liba52/INSTALL
+++ b/lib/rbcodec/codecs/liba52/INSTALL
@ -0,0 +1,58 @@
+Unix build instructions
+-----------------------
+
+./configure
+make
+make install
+
+If you install from CVS you'll have to run ./bootstrap first
+
+
+Building for win32
+------------------
+
+There are at least three ways to do it:
+
+- natively on Windows using Microsoft VC++ and the vc++ project
+  included in this distribution.
+
+- natively on Windows using MSYS + MINGW (www.mingw.org) (MSYS is a
+  minimal build environnement to compile unixish projects under
+  windows. It provides all the common unix tools like sh, gmake...)
+
+- or on Linux, using the mingw32 cross-compiler
+
+
+Building using MSYS + MINGW on windows
+--------------------------------------
+
+First you will need to download and install the latest MSYS (version
+1.0.7 as of now) and MINGW. The installation is really easy. Begin
+with the MSYS auto-installer and once this is done, extract MINGW into
+c:\msys\1.0\mingw. You also have to remember to remove the make
+utility included with MINGW as it conflicts with the one from MSYS
+(just rename or remove c:\msys\1.0\mingw\bin\make.exe).
+
+http://prdownloads.sourceforge.net/mingw/MSYS-1.0.7-i686-2002.04.24-1.exe
+http://prdownloads.sourceforge.net/mingw/MinGW-1.1.tar.gz
+
+Then you can build the package using:
+# ./configure
+# make
+
+
+Building using the mingw32 cross-compiler
+-----------------------------------------
+
+You need to install mingw32 first. For Debian GNU/Linux users, there
+is a mingw32 package. Otherwise you might get it from the mingw site
+at http://www.mingw.org/download.shtml.
+
+The videolan project also keeps precompiled mingw32 binaries at
+http://www.videolan.org/vlc/windows.html . If you install these,
+you'll have to set your PATH accordingly to include
+/usr/local/cross-tools/bin too.
+
+The build should then proceed using something like:
+# CC=i586-mingw32msvc-gcc ./configure --host=i586-mingw32msvc
+# make
--- a/lib/rbcodec/codecs/liba52/NEWS
+++ b/lib/rbcodec/codecs/liba52/NEWS
@ -0,0 +1,46 @@
+a52dec-0.7.4 Sat Jul 27 20:44:00 PDT 2002
+
+The library is now fully reentrant.
+
+Added win32 output module, al file output, gain control.
+
+A few additional portability enhancements.
+
+
+a52dec-0.7.3 Wed Feb 20 23:38:22 PST 2002 
+
+Performance enhancements, from 40% to 80% depending on streams.
+
+Fixed a few embarassing bugs in liba52: one memory corruption issue
+and a few minor portability problems.
+
+Several new output modules, and fixes in the existing .wav file output.
+
+
+a52dec-0.7.2 Sun Dec 16 14:39:56 PST 2001
+
+Minor bugfixes, performance and portability enhancements.
+
+Also added wav format output, and reduced the demux buffer size which
+makes it easier to use a52dec as a pipe.
+
+
+a52dec-0.7.1 Thu Aug 30 02:13:23 PDT 2001
+
+Minor release for bugfixes. Looks like 0.7.0 was a bit rushed out.
+
+Now compiles with gcc 3.0, made sure mlib implementation works, and
+fixed a small bug in the a52dec test program.
+
+
+a52dec-0.7.0 Thu Aug 23 23:18:00 PDT 2001 
+
+First release since more than one year !
+
+The most user-noticeable additions are the downmix to arbitrary
+speaker configurations, and the implementation of dynamic range
+compression.
+
+The speed has been improved by a factor of 2 to 3, the conformance and
+precision should be higher, and we also fixed a small bug when playing
+stereo rematrixed streams.
--- a/lib/rbcodec/codecs/liba52/README
+++ b/lib/rbcodec/codecs/liba52/README
@ -0,0 +1,180 @@
+
+
+ABOUT LIBA52
+
+liba52 is a free library for decoding ATSC A/52 streams. It is
+released under the terms of the GPL license. The A/52 standard is used
+in a variety of applications, including digital television and DVD. It
+is also known as AC-3.
+
+The main goals in liba52 development are:
+
+      *	Portability - Currently all of the code is written in C, and
+	when we write platform-specific optimizations we will always
+	keep a generic C routine to fall back on.
+
+      *	Reuseability - we do not want liba52 to include any
+	project-specific code, but it should still include enough
+	features to be used by very diverse projects.
+
+      *	Precision - We are trying to implement all of the A/52
+	standard, and to have a very precise output by doing all the
+	calculations in floating point. We have a test suite that
+	detects any deviation in the output when compared to previous
+	versions. We do not have access to official A/52 test vectors
+	though, so we have to use our judgement to ensure that such
+	deviations are only intruduced when we fix bugs !
+
+      *	Speed - liba52 is really fast, on any modern PC it should take
+	only a few percent of CPU time.
+
+The project homepage is at http://liba52.sourceforge.net/
+
+
+A52DEC
+
+a52dec is a test program for liba52. It decodes ATSC A/52 streams, and
+also includes a demultiplexer for mpeg-1 and mpeg-2 program streams.
+
+The liba52 source code is always distributed in the a52dec package, to
+make sure it easier for people to test it.
+
+The basic usage is to just type "a52dec file" where file is an ATSC
+A/52 file.
+
+The "-s" option must be used for multiplexed (audio and video) mpeg-2
+files. These files are usualy found on the internet or on unencrypted
+DVDs.
+
+The "-o" option is used to select a given output layer. By default
+a52dec does a stereo downmix and outputs to your speakers, but you can
+try other choices using this option. This is also used for performance
+testing and conformance testing.
+
+The "-c" option is used to disable all optimizations (currently only djbfft).
+
+The "-r" option is used to disable the dynamic range compression.
+
+
+OTHER PROJECTS USING LIBA52
+
+liba52 (and its ancestor libac3) is being used by various other
+projects, including:
+
+      *	xine (http://xine.sourceforge.net/) - started as a simple
+	mpeg-2 audio and video decoder, but it since became a
+	full-featured DVD and video media player.
+
+      * VideoLAN (http://www.videolan.org/) - video streaming over an
+	ethernet network, can also be used as a standalone player.
+
+      *	MPlayer (http://www.MPlayerHQ.hu) - another good player, it is
+	also very robust against damaged streams.
+
+      *	movietime (http://movietime.sourceforge.net/) - still quite
+	young, but it looks very promising !
+
+      *	ffmpeg (http://ffmpeg.sourceforge.net/) - a nice audio/video
+	encoder and transcoder, uses liba52 for decoding A/52 streams.
+
+      *	Ogle (http://www.dtek.chalmers.se/groups/dvd/) - a good DVD
+	player with menu support
+
+      *	a52decX (http://homepage1.nifty.com/~toku/software_en.html) -
+	a graphical interface for a52dec in macintosh osX.
+
+      *	TCVP (http://tcvp.sf.net) - video and music player for unix.
+
+      *	bd4go (http://denisx.dyndns.org/bd4go/) - another graphical
+	interface for macintosh osX.
+
+      *	drip (http://drip.sourceforge.net/) - a DVD to DIVX transcoder.
+
+      *	OMS (http://www.linuxvideo.org/oms/)
+
+      *	XMPS (http://xmps.sourceforge.net/)
+
+      *	GStreamer (http://www.gstreamer.net/) - a framework for
+	streaming media; it has an A/52 decoding plugin based on liba52.
+
+      *	mpeglib (http://mpeglib.sourceforge.net/) - a video decoding
+	library that usess liba52 when decoding A/52 streams.
+
+If you use liba52 in another project, let us know !
+
+
+TASKS
+
+There are several places where we could easily use some help:
+
+      *	Web design: This site sucks ! at the very least, we'd like to
+	come up with a nicer background picture and a logo.
+
+      *	Testing: If you find any stream that does not decode right
+	with liba52, let us know ! The best thing would be to mail to
+	the liba52-devel mailing list. Also if you have access to
+	encoders, we'd love to get test streams that would be free of
+	rights - so that we can put them on this server.
+
+      *	Coding: you can have a look in the TODO file first ! The most
+	important item is probably to make the code fully reentrant.
+
+      *	Porting: If you're porting to a new architecture, you might
+	want to experiment with the compile flags defined in
+	configure.in . When you figure out whats fastest on your
+	platform, send us a patch !
+
+
+REFERENCES
+
+The A/52 standard, as published by the ATSC, is available at
+http://www.atsc.org/standards/a_52a.pdf
+
+
+CVS SNAPSHOTS
+
+A daily snapshot is created using "make distcheck" every night and
+uploaded to http://liba52.sourceforge.net/files/a52dec-snapshot.tar.gz .
+It is easier to use than the CVS repository, because you do not need
+to have the right versions of automake, autoconf and libtool
+installed. It might be convenient when working on a liba52 port for
+example.
+
+
+CVS REPOSITORY
+
+The latest liba52 and a52dec source code can always be found by
+anonymous CVS:
+
+# export CVSROOT=:pserver:anonymous@cvs.liba52.sourceforge.net:/cvsroot/liba52
+# cvs login (Just press Return when prompted for a password)
+# cvs checkout a52dec
+
+You can also browse the latest changes online at
+http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/liba52/a52dec/
+
+The other CVS modules are ac3dec-livid for the CVS history of the
+project while it was still hosted on the linuxvideo.org servers, and
+ac3dec for the CVS history of the project while the linuxvideo.org
+servers were down and before the library switched its name to liba52.
+
+
+MAILING LISTS
+
+See the subscription information at http://liba52.sourceforge.net/lists.html
+
+liba52-devel
+
+This is the main mailing list for technical discussion about
+liba52. Anyone wanting to work on liba52, or maybe just stay informed
+about the development process, should probably subscribe to this list.
+
+liba52-checkins
+
+All liba52 checkins are announced there. This is a good way to keep
+track of what goes into CVS.
+
+liba52-announce
+
+This is a very low traffic mailing list, only for announcements of new
+versions of liba52. Only project administrators can post there.
--- a/lib/rbcodec/codecs/liba52/README.rockbox
+++ b/lib/rbcodec/codecs/liba52/README.rockbox
@ -0,0 +1,27 @@
+Library: liba52-0.7.5 (CVS version 2005-02-16)
+Imported: 2005-02-16 by Dave Chapman
+
+
+This directory contains a local version of liba52 for decoding ATSC
+A/52 (aka AC-3) audio streams.  A/52 is commonly used in digital TV and
+on DVDs.
+
+LICENSING INFORMATION
+
+liba52 is released under the GNU General Public License as described
+in the COPYING file in this directory.
+
+
+IMPORT DETAILS
+
+The base version first imported into Rockbox was the CVS version of
+liba52-0.7.5 (0.7.4 was at the time the latest official) which was 
+checked out of sourceforge on 2005-02-16.
+
+The .[ch] files from a52dec/liba52/ and a52dec/include/ as well as the
+documentation files in a52dec/ were imported into Rockbox.  The other
+files in the archive relate to the test player (a52dec) and were not
+imported.
+
+A simple config.h file was added to enable liba52's fixed-point
+integer-only mode and to specify the endianness of the target CPU.
--- a/lib/rbcodec/codecs/liba52/SOURCES
+++ b/lib/rbcodec/codecs/liba52/SOURCES
@ -0,0 +1,5 @@
+bit_allocate.c
+bitstream.c
+downmix.c
+imdct.c
+parse.c
--- a/lib/rbcodec/codecs/liba52/TODO
+++ b/lib/rbcodec/codecs/liba52/TODO
@ -0,0 +1,17 @@
+* look at possible overflow/precision issues in integer port
+
+* redo all bit allocation if previous frame had zero_snr_offsets
+* make dynrng work in dual-channel streams
+
+* implement A/52a downmix extensions
+* reduce size of delay buffer by 50%
+* include float->s16 conversion in liba52 API ?
+* include up/downsampling 44100<->48000 in liba52 API ?
+* include audio dithering in liba52 API ?
+* API extensions might be at a different level (base vs. extended)
+
+* use restrict pointers where appropriate
+* avoid overflows, including reading the a52 stream !!!
+* faster bitstream parsing ?
+* make dither code faster (generate dither table in advance ?)
+* SIMD optimizations
--- a/lib/rbcodec/codecs/liba52/a52.h
+++ b/lib/rbcodec/codecs/liba52/a52.h
@ -0,0 +1,67 @@
+/*
+ * a52.h
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of a52dec, a free ATSC A-52 stream decoder.
+ * See http://liba52.sourceforge.net/ for updates.
+ *
+ * a52dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * a52dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef A52_H
+#define A52_H
+
+#if defined(LIBA52_FIXED)
+typedef int32_t sample_t;
+typedef int32_t level_t;
+#elif defined(LIBA52_DOUBLE)
+typedef double sample_t;
+typedef double level_t;
+#else
+typedef float sample_t;
+typedef float level_t;
+#endif
+
+typedef struct a52_state_s a52_state_t;
+
+#define A52_CHANNEL 0
+#define A52_MONO 1
+#define A52_STEREO 2
+#define A52_3F 3
+#define A52_2F1R 4
+#define A52_3F1R 5
+#define A52_2F2R 6
+#define A52_3F2R 7
+#define A52_CHANNEL1 8
+#define A52_CHANNEL2 9
+#define A52_DOLBY 10
+#define A52_CHANNEL_MASK 15
+
+#define A52_LFE 16
+#define A52_ADJUST_LEVEL 32
+
+a52_state_t * a52_init (uint32_t mm_accel);
+sample_t * a52_samples (a52_state_t * state);
+int a52_syncinfo (uint8_t * buf, int * flags,
+                  int * sample_rate, int * bit_rate);
+int a52_frame (a52_state_t * state, uint8_t * buf, int * flags,
+               level_t * level, sample_t bias);
+void a52_dynrng (a52_state_t * state,
+                 level_t (* call) (level_t, void *), void * data);
+int a52_block (a52_state_t * state);
+void a52_free (a52_state_t * state);
+
+#endif /* A52_H */
--- a/lib/rbcodec/codecs/liba52/a52_internal.h
+++ b/lib/rbcodec/codecs/liba52/a52_internal.h
@ -0,0 +1,215 @@
+/*
+ * a52_internal.h
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of a52dec, a free ATSC A-52 stream decoder.
+ * See http://liba52.sourceforge.net/ for updates.
+ *
+ * a52dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * a52dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+typedef struct {
+    uint8_t bai;                /* fine SNR offset, fast gain */
+    uint8_t deltbae;            /* delta bit allocation exists */
+    int8_t deltba[50];          /* per-band delta bit allocation */
+} ba_t;
+
+typedef struct {
+    uint8_t exp[256];           /* decoded channel exponents */
+    int8_t bap[256];            /* derived channel bit allocation */
+} expbap_t;
+
+struct a52_state_s {
+    uint8_t fscod;              /* sample rate */
+    uint8_t halfrate;           /* halfrate factor */
+    uint8_t acmod;              /* coded channels */
+    uint8_t lfeon;              /* coded lfe channel */
+    level_t clev;               /* centre channel mix level */
+    level_t slev;               /* surround channels mix level */
+
+    int output;                 /* type of output */
+    level_t level;              /* output level */
+    sample_t bias;              /* output bias */
+
+    int dynrnge;                /* apply dynamic range */
+    level_t dynrng;             /* dynamic range */
+    void * dynrngdata;          /* dynamic range callback funtion and data */
+    level_t (* dynrngcall) (level_t range, void * dynrngdata);
+
+    uint8_t chincpl;            /* channel coupled */
+    uint8_t phsflginu;          /* phase flags in use (stereo only) */
+    uint8_t cplstrtmant;        /* coupling channel start mantissa */
+    uint8_t cplendmant;         /* coupling channel end mantissa */
+    uint32_t cplbndstrc;        /* coupling band structure */
+    level_t cplco[5][18];       /* coupling coordinates */
+
+    /* derived information */
+    uint8_t cplstrtbnd;         /* coupling start band (for bit allocation) */
+    uint8_t ncplbnd;            /* number of coupling bands */
+
+    uint8_t rematflg;           /* stereo rematrixing */
+
+    uint8_t endmant[5];         /* channel end mantissa */
+
+    uint16_t bai;               /* bit allocation information */
+
+    uint32_t * buffer_start;
+    uint16_t lfsr_state;        /* dither state */
+    uint32_t bits_left;
+    uint32_t current_word;
+
+    uint8_t csnroffst;          /* coarse SNR offset */
+    ba_t cplba;                 /* coupling bit allocation parameters */
+    ba_t ba[5];                 /* channel bit allocation parameters */
+    ba_t lfeba;                 /* lfe bit allocation parameters */
+
+    uint8_t cplfleak;           /* coupling fast leak init */
+    uint8_t cplsleak;           /* coupling slow leak init */
+
+    expbap_t cpl_expbap;
+    expbap_t fbw_expbap[5];
+    expbap_t lfe_expbap;
+
+    sample_t * samples;
+    int downmixed;
+};
+
+#define LEVEL_PLUS6DB 2.0
+#define LEVEL_PLUS3DB 1.4142135623730951
+#define LEVEL_3DB 0.7071067811865476
+#define LEVEL_45DB 0.5946035575013605
+#define LEVEL_6DB 0.5
+
+#define EXP_REUSE (0)
+#define EXP_D15   (1)
+#define EXP_D25   (2)
+#define EXP_D45   (3)
+
+#define DELTA_BIT_REUSE (0)
+#define DELTA_BIT_NEW (1)
+#define DELTA_BIT_NONE (2)
+#define DELTA_BIT_RESERVED (3)
+
+void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart,
+                       int start, int end, int fastleak, int slowleak,
+                       expbap_t * expbap);
+
+int a52_downmix_init (int input, int flags, level_t * level,
+                      level_t clev, level_t slev);
+int a52_downmix_coeff (level_t * coeff, int acmod, int output, level_t level,
+                       level_t clev, level_t slev);
+void a52_downmix (sample_t * samples, int acmod, int output, 
+                  level_t clev, level_t slev);
+void a52_upmix (sample_t * samples, int acmod, int output);
+
+void a52_imdct_init (uint32_t mm_accel);
+void a52_imdct_256 (sample_t * data, sample_t * delay);
+void a52_imdct_512 (sample_t * data, sample_t * delay);
+
+#define ROUND(x) ((int)((x) + ((x) > 0 ? 0.5 : -0.5)))
+
+#ifndef LIBA52_FIXED
+
+typedef sample_t quantizer_t;
+#define SAMPLE(x) (x)
+#define LEVEL(x) (x)
+#define MUL(a,b) ((a) * (b))
+#define MUL_L(a,b) ((a) * (b))
+#define MUL_C(a,b) ((a) * (b))
+#define DIV(a,b) ((a) / (b))
+#define BIAS(x) ((x) + bias)
+
+#else /* LIBA52_FIXED */
+
+typedef int16_t quantizer_t;
+#define SAMPLE(x) (sample_t)((x) * (1 << 30))
+#define LEVEL(x) (level_t)((x) * (1 << 26))
+
+#if 0
+#define MUL(a,b) ((int)(((int64_t)(a) * (b) + (1 << 29)) >> 30))
+#define MUL_L(a,b) ((int)(((int64_t)(a) * (b) + (1 << 25)) >> 26))
+#elif defined(CPU_COLDFIRE)
+/* loses 1 bit of accuracy */
+#define MUL(a, b) \
+({ \
+    int32_t t; \
+    asm volatile ( \
+        "mac.l %[A], %[B], %%acc0\n\t" \
+        "movclr.l %%acc0, %[t]\n\t" \
+        "asl.l #1, %[t]" \
+        : [t] "=d" (t) \
+        : [A] "r" ((a)), [B] "r" ((b))); \
+    t; \
+})
+/* loses 5 bits of accuracy */
+#define MUL_L(a, b) \
+({ \
+    int32_t t; \
+    asm volatile ( \
+        "mac.l %[A], %[B], %%acc0\n\t" \
+        "movclr.l %%acc0, %[t]\n\t" \
+        "asl.l #5, %[t]" \
+        : [t] "=d" (t) \
+        : [A] "r" ((a)), [B] "r" ((b))); \
+    t; \
+})
+
+#elif defined(CPU_ARM)
+#define MUL(x, y)  \
+    ({ int32_t __hi;  \
+       uint32_t __lo;  \
+       int32_t __result;  \
+       asm ("smull   %0, %1, %3, %4\n\t"  \
+            "movs    %2, %1, lsl #2"  \
+            : "=&r" (__lo), "=&r" (__hi), "=r" (__result)  \
+            : "%r" (x), "r" (y)  \
+            : "cc");  \
+       __result;  \
+    })
+
+
+#define MUL_L(x, y)  \
+    ({ int32_t __hi;  \
+       uint32_t __lo;  \
+       int32_t __result;  \
+       asm ("smull   %0, %1, %3, %4\n\t"  \
+            "movs    %0, %0, lsr %5\n\t"  \
+            "adc    %2, %0, %1, lsl %6"  \
+            : "=&r" (__lo), "=&r" (__hi), "=r" (__result)  \
+            : "%r" (x), "r" (y),  \
+              "M" (26), "M" (32 - 26)  \
+            : "cc");  \
+       __result;  \
+    })
+    
+
+#elif 1
+#define MUL(a,b) \
+({ int32_t _ta=(a), _tb=(b), _tc; \
+   _tc=(_ta & 0xffff)*(_tb >> 16)+(_ta >> 16)*(_tb & 0xffff); (int32_t)(((_tc >> 14))+ (((_ta >> 16)*(_tb >> 16)) << 2 )); })
+#define MUL_L(a,b) \
+({ int32_t _ta=(a), _tb=(b), _tc; \
+   _tc=(_ta & 0xffff)*(_tb >> 16)+(_ta >> 16)*(_tb & 0xffff); (int32_t)((_tc >> 10) + (((_ta >> 16)*(_tb >> 16)) << 6)); })
+#else
+#define MUL(a,b) (((a) >> 15) * ((b) >> 15))
+#define MUL_L(a,b) (((a) >> 13) * ((b) >> 13))
+#endif
+
+#define MUL_C(a,b) MUL_L (a, LEVEL (b))
+#define DIV(a,b) ((((int64_t)LEVEL (a)) << 26) / (b))
+#define BIAS(x) ((x))
+
+#endif
--- a/lib/rbcodec/codecs/liba52/attributes.h
+++ b/lib/rbcodec/codecs/liba52/attributes.h
@ -0,0 +1,37 @@
+/*
+ * attributes.h
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of a52dec, a free ATSC A-52 stream decoder.
+ * See http://liba52.sourceforge.net/ for updates.
+ *
+ * a52dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * a52dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/* use gcc attribs to align critical data structures */
+#ifdef ATTRIBUTE_ALIGNED_MAX
+#define ATTR_ALIGN(align) __attribute__ ((__aligned__ ((ATTRIBUTE_ALIGNED_MAX < align) ? ATTRIBUTE_ALIGNED_MAX : align)))
+#else
+#define ATTR_ALIGN(align)
+#endif
+
+#ifdef HAVE_BUILTIN_EXPECT
+#define likely(x) __builtin_expect ((x) != 0, 1)
+#define unlikely(x) __builtin_expect ((x) != 0, 0)
+#else
+#define likely(x) (x)
+#define unlikely(x) (x)
+#endif
--- a/lib/rbcodec/codecs/liba52/bit_allocate.c
+++ b/lib/rbcodec/codecs/liba52/bit_allocate.c
@ -0,0 +1,265 @@
+/*
+ * bit_allocate.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of a52dec, a free ATSC A-52 stream decoder.
+ * See http://liba52.sourceforge.net/ for updates.
+ *
+ * a52dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * a52dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config-a52.h"
+
+#include <inttypes.h>
+
+#include "a52.h"
+#include "a52_internal.h"
+
+static int hthtab[3][50] IDATA_ATTR = {
+    {0x730, 0x730, 0x7c0, 0x800, 0x820, 0x840, 0x850, 0x850, 0x860, 0x860,
+     0x860, 0x860, 0x860, 0x870, 0x870, 0x870, 0x880, 0x880, 0x890, 0x890,
+     0x8a0, 0x8a0, 0x8b0, 0x8b0, 0x8c0, 0x8c0, 0x8d0, 0x8e0, 0x8f0, 0x900,
+     0x910, 0x910, 0x910, 0x910, 0x900, 0x8f0, 0x8c0, 0x870, 0x820, 0x7e0,
+     0x7a0, 0x770, 0x760, 0x7a0, 0x7c0, 0x7c0, 0x6e0, 0x400, 0x3c0, 0x3c0},
+    {0x710, 0x710, 0x7a0, 0x7f0, 0x820, 0x830, 0x840, 0x850, 0x850, 0x860,
+     0x860, 0x860, 0x860, 0x860, 0x870, 0x870, 0x870, 0x880, 0x880, 0x880,
+     0x890, 0x890, 0x8a0, 0x8a0, 0x8b0, 0x8b0, 0x8c0, 0x8c0, 0x8e0, 0x8f0,
+     0x900, 0x910, 0x910, 0x910, 0x910, 0x900, 0x8e0, 0x8b0, 0x870, 0x820,
+     0x7e0, 0x7b0, 0x760, 0x770, 0x7a0, 0x7c0, 0x780, 0x5d0, 0x3c0, 0x3c0},
+    {0x680, 0x680, 0x750, 0x7b0, 0x7e0, 0x810, 0x820, 0x830, 0x840, 0x850,
+     0x850, 0x850, 0x860, 0x860, 0x860, 0x860, 0x860, 0x860, 0x860, 0x860,
+     0x870, 0x870, 0x870, 0x870, 0x880, 0x880, 0x880, 0x890, 0x8a0, 0x8b0,
+     0x8c0, 0x8d0, 0x8e0, 0x8f0, 0x900, 0x910, 0x910, 0x910, 0x900, 0x8f0,
+     0x8d0, 0x8b0, 0x840, 0x7f0, 0x790, 0x760, 0x7a0, 0x7c0, 0x7b0, 0x720}
+};
+
+static int8_t baptab[305] IDATA_ATTR = {
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, /* 93 padding elems */
+
+    16, 16, 16, 16, 16, 16, 16, 16, 16, 14, 14, 14, 14, 14, 14, 14,
+    14, 12, 12, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10,  9,  9,  9,
+     9,  8,  8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  5,  5,  5,
+     5,  4,  4, -3, -3,  3,  3,  3, -2, -2, -1, -1, -1, -1, -1,  0,
+
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+     0,  0,  0,  0                                      /* 148 padding elems */
+};
+
+static int bndtab[30] IDATA_ATTR = {21, 22,  23,  24,  25,  26,  27,  28,  31,  34,
+                         37, 40,  43,  46,  49,  55,  61,  67,  73,  79,
+                         85, 97, 109, 121, 133, 157, 181, 205, 229, 253};
+
+static int8_t latab[256] IDATA_ATTR = {
+    -64, -63, -62, -61, -60, -59, -58, -57, -56, -55, -54, -53,
+    -52, -52, -51, -50, -49, -48, -47, -47, -46, -45, -44, -44,
+    -43, -42, -41, -41, -40, -39, -38, -38, -37, -36, -36, -35,
+    -35, -34, -33, -33, -32, -32, -31, -30, -30, -29, -29, -28,
+    -28, -27, -27, -26, -26, -25, -25, -24, -24, -23, -23, -22,
+    -22, -21, -21, -21, -20, -20, -19, -19, -19, -18, -18, -18,
+    -17, -17, -17, -16, -16, -16, -15, -15, -15, -14, -14, -14,
+    -13, -13, -13, -13, -12, -12, -12, -12, -11, -11, -11, -11,
+    -10, -10, -10, -10, -10,  -9,  -9,  -9,  -9,  -9,  -8,  -8,
+     -8,  -8,  -8,  -8,  -7,  -7,  -7,  -7,  -7,  -7,  -6,  -6,
+     -6,  -6,  -6,  -6,  -6,  -6,  -5,  -5,  -5,  -5,  -5,  -5,
+     -5,  -5,  -4,  -4,  -4,  -4,  -4,  -4,  -4,  -4,  -4,  -4,
+     -4,  -3,  -3,  -3,  -3,  -3,  -3,  -3,  -3,  -3,  -3,  -3,
+     -3,  -3,  -3,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,
+     -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -1,  -1,
+     -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+     -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
+     -1,  -1,  -1,  -1,  -1,  -1,   0,   0,   0,   0,   0,   0,
+      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+      0,   0,   0,   0
+};
+
+#define UPDATE_LEAK()           \
+do {                            \
+    fastleak += fdecay;         \
+    if (fastleak > psd + fgain) \
+        fastleak = psd + fgain; \
+    slowleak += sdecay;         \
+    if (slowleak > psd + sgain) \
+        slowleak = psd + sgain; \
+} while (0)
+
+#define COMPUTE_MASK()                          \
+do {                                            \
+    if (psd > dbknee)                           \
+        mask -= (psd - dbknee) >> 2;            \
+    if (mask > hth [i >> halfrate])             \
+        mask = hth [i >> halfrate];             \
+    mask -= snroffset + 128 * deltba[i];        \
+    mask = (mask > 0) ? 0 : ((-mask) >> 5);     \
+    mask -= floor;                              \
+} while (0)
+
+void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart,
+                       int start, int end, int fastleak, int slowleak,
+                       expbap_t * expbap)
+{
+    static int slowgain[4] = {0x540, 0x4d8, 0x478, 0x410};
+    static int dbpbtab[4]  = {0xc00, 0x500, 0x300, 0x100};
+    static int floortab[8] = {0x910, 0x950, 0x990, 0x9d0,
+                              0xa10, 0xa90, 0xb10, 0x1400};
+
+    int i, j;
+    uint8_t * exp;
+    int8_t * bap;
+    int fdecay, fgain, sdecay, sgain, dbknee, floor, snroffset;
+    int psd, mask;
+    int8_t * deltba;
+    int * hth;
+    int halfrate;
+
+    halfrate = state->halfrate;
+    fdecay = (63 + 20 * ((state->bai >> 7) & 3)) >> halfrate;   /* fdcycod */
+    fgain = 128 + 128 * (ba->bai & 7);                          /* fgaincod */
+    sdecay = (15 + 2 * (state->bai >> 9)) >> halfrate;          /* sdcycod */
+    sgain = slowgain[(state->bai >> 5) & 3];                    /* sgaincod */
+    dbknee = dbpbtab[(state->bai >> 3) & 3];                    /* dbpbcod */
+    hth = hthtab[state->fscod];
+    /*
+     * if there is no delta bit allocation, make deltba point to an area
+     * known to contain zeroes. baptab+156 here.
+     */
+    deltba = (ba->deltbae == DELTA_BIT_NONE) ? baptab + 156 : ba->deltba;
+    floor = floortab[state->bai & 7];                           /* floorcod */
+    snroffset = 960 - 64 * state->csnroffst - 4 * (ba->bai >> 3) + floor;
+    floor >>= 5;
+
+    exp = expbap->exp;
+    bap = expbap->bap;
+
+    i = bndstart;
+    j = start;
+    if (start == 0) {   /* not the coupling channel */
+        int lowcomp;
+
+        lowcomp = 0;
+        j = end - 1;
+        do {
+            if (i < j) {
+                if (exp[i+1] == exp[i] - 2)
+                    lowcomp = 384;
+                else if (lowcomp && (exp[i+1] > exp[i]))
+                    lowcomp -= 64;
+            }
+            psd = 128 * exp[i];
+            mask = psd + fgain + lowcomp;
+            COMPUTE_MASK ();
+            bap[i] = (baptab+156)[mask + 4 * exp[i]];
+            i++;
+        } while ((i < 3) || ((i < 7) && (exp[i] > exp[i-1])));
+        fastleak = psd + fgain;
+        slowleak = psd + sgain;
+
+        while (i < 7) {
+            if (i < j) {
+                if (exp[i+1] == exp[i] - 2)
+                    lowcomp = 384;
+                else if (lowcomp && (exp[i+1] > exp[i]))
+                    lowcomp -= 64;
+            }
+            psd = 128 * exp[i];
+            UPDATE_LEAK ();
+            mask = ((fastleak + lowcomp < slowleak) ?
+                    fastleak + lowcomp : slowleak);
+            COMPUTE_MASK ();
+            bap[i] = (baptab+156)[mask + 4 * exp[i]];
+            i++;
+        }
+
+        if (end == 7)   /* lfe channel */
+            return;
+
+        do {
+            if (exp[i+1] == exp[i] - 2)
+                lowcomp = 320;
+            else if (lowcomp && (exp[i+1] > exp[i]))
+                lowcomp -= 64;
+            psd = 128 * exp[i];
+            UPDATE_LEAK ();
+            mask = ((fastleak + lowcomp < slowleak) ?
+                    fastleak + lowcomp : slowleak);
+            COMPUTE_MASK ();
+            bap[i] = (baptab+156)[mask + 4 * exp[i]];
+            i++;
+        } while (i < 20);
+
+        while (lowcomp > 128) {         /* two iterations maximum */
+            lowcomp -= 128;
+            psd = 128 * exp[i];
+            UPDATE_LEAK ();
+            mask = ((fastleak + lowcomp < slowleak) ?
+                    fastleak + lowcomp : slowleak);
+            COMPUTE_MASK ();
+            bap[i] = (baptab+156)[mask + 4 * exp[i]];
+            i++;
+        }
+        j = i;
+    }
+
+    do {
+        int startband, endband;
+
+        startband = j;
+        endband = (bndtab[i-20] < end) ? bndtab[i-20] : end;
+        psd = 128 * exp[j++];
+        while (j < endband) {
+            int next, delta;
+
+            next = 128 * exp[j++];
+            delta = next - psd;
+            switch (delta >> 9) {
+            case -6: case -5: case -4: case -3: case -2:
+                psd = next;
+                break;
+            case -1:
+                psd = next + latab[(-delta) >> 1];
+                break;
+            case 0:
+                psd += latab[delta >> 1];
+                break;
+            }
+        }
+        /* minpsd = -289 */
+        UPDATE_LEAK ();
+        mask = (fastleak < slowleak) ? fastleak : slowleak;
+        COMPUTE_MASK ();
+        i++;
+        j = startband;
+        do {
+            /* max(mask+4*exp)=147=-(minpsd+fgain-deltba-snroffset)>>5+4*exp */
+            /* min(mask+4*exp)=-156=-(sgain-deltba-snroffset)>>5 */
+            bap[j] = (baptab+156)[mask + 4 * exp[j]];
+        } while (++j < endband);
+    } while (j < end);
+}
--- a/lib/rbcodec/codecs/liba52/bitstream.c
+++ b/lib/rbcodec/codecs/liba52/bitstream.c
@ -0,0 +1,97 @@
+/*
+ * bitstream.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of a52dec, a free ATSC A-52 stream decoder.
+ * See http://liba52.sourceforge.net/ for updates.
+ *
+ * a52dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * a52dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config-a52.h"
+
+#include <inttypes.h>
+
+#include "a52.h"
+#include "a52_internal.h"
+#include "bitstream.h"
+
+#define BUFFER_SIZE 4096
+
+void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf)
+{
+    int align;
+
+    align = (long)buf & 3;
+    state->buffer_start = (uint32_t *) (buf - align);
+    state->bits_left = 0;
+    state->current_word = 0;
+    bitstream_get (state, align * 8);
+    bitstream_get_2 (state, 0); /* pretend function is used - keep gcc happy */
+}
+
+static inline void bitstream_fill_current (a52_state_t * state)
+{
+    uint32_t tmp;
+
+    tmp = *(state->buffer_start++);
+    state->current_word = swab32 (tmp);
+}
+
+/*
+ * The fast paths for _get is in the
+ * bitstream.h header file so it can be inlined.
+ *
+ * The "bottom half" of this routine is suffixed _bh
+ *
+ * -ah
+ */
+
+uint32_t a52_bitstream_get_bh (a52_state_t * state, uint32_t num_bits)
+{
+    uint32_t result;
+
+    num_bits -= state->bits_left;
+    result = ((state->current_word << (32 - state->bits_left)) >>
+              (32 - state->bits_left));
+
+    bitstream_fill_current (state);
+
+    if (num_bits != 0)
+        result = (result << num_bits) | (state->current_word >> (32 - num_bits));
+
+    state->bits_left = 32 - num_bits;
+
+    return result;
+}
+
+int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits)
+{
+    int32_t result;
+
+    num_bits -= state->bits_left;
+    result = ((((int32_t)state->current_word) << (32 - state->bits_left)) >>
+              (32 - state->bits_left));
+
+    bitstream_fill_current(state);
+
+    if (num_bits != 0)
+        result = (result << num_bits) | (state->current_word >> (32 - num_bits));
+        
+    state->bits_left = 32 - num_bits;
+
+    return result;
+}
--- a/lib/rbcodec/codecs/liba52/bitstream.h
+++ b/lib/rbcodec/codecs/liba52/bitstream.h
@ -0,0 +1,54 @@
+/*
+ * bitstream.h
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of a52dec, a free ATSC A-52 stream decoder.
+ * See http://liba52.sourceforge.net/ for updates.
+ *
+ * a52dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * a52dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#define swab32(x) (betoh32(x))
+
+void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf);
+uint32_t a52_bitstream_get_bh (a52_state_t * state, uint32_t num_bits);
+int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits);
+
+static inline uint32_t bitstream_get (a52_state_t * state, uint32_t num_bits)
+{
+    uint32_t result;
+        
+    if (num_bits < state->bits_left) {
+        result = (state->current_word << (32 - state->bits_left)) >> (32 - num_bits);
+        state->bits_left -= num_bits;
+        return result;
+    }
+
+    return a52_bitstream_get_bh (state, num_bits);
+}
+
+static inline int32_t bitstream_get_2 (a52_state_t * state, uint32_t num_bits)
+{
+    int32_t result;
+        
+    if (num_bits < state->bits_left) {
+        result = (((int32_t)state->current_word) << (32 - state->bits_left)) >> (32 - num_bits);
+        state->bits_left -= num_bits;
+        return result;
+    }
+
+    return a52_bitstream_get_bh_2 (state, num_bits);
+}
--- a/lib/rbcodec/codecs/liba52/config-a52.h
+++ b/lib/rbcodec/codecs/liba52/config-a52.h
@ -0,0 +1,26 @@
+#include "codeclib.h"
+
+/* a52dec profiling */
+/* #undef A52DEC_GPROF */
+
+/* Define to 1 if you have the `memalign' function. */
+/* #undef HAVE_MEMALIGN 1 */
+
+/* liba52 djbfft support */
+/* #undef LIBA52_DJBFFT */
+
+/* a52 sample precision */
+/* #undef LIBA52_DOUBLE */
+
+/* use fixed-point arithmetic */
+#define LIBA52_FIXED
+
+/* Define to 1 if your processor stores words with the most significant byte
+   first (like Motorola and SPARC, unlike Intel and VAX). */
+
+/* Used in bitstream.h */
+
+#ifdef ROCKBOX_BIG_ENDIAN
+#define WORDS_BIGENDIAN 1
+#endif
+
--- a/lib/rbcodec/codecs/liba52/downmix.c
+++ b/lib/rbcodec/codecs/liba52/downmix.c
@ -0,0 +1,688 @@
+/*
+ * downmix.c
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of a52dec, a free ATSC A-52 stream decoder.
+ * See http://liba52.sourceforge.net/ for updates.
+ *
+ * a52dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * a52dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "config-a52.h"
+
+#include <string.h>
+#include <inttypes.h>
+
+#include "a52.h"
+#include "a52_internal.h"
+
+#define CONVERT(acmod,output) (((output) << 3) + (acmod))
+
+int a52_downmix_init (int input, int flags, level_t * level,
+                      level_t clev, level_t slev)
+{
+    static uint8_t table[11][8] = {
+        {A52_CHANNEL,   A52_DOLBY,      A52_STEREO,     A52_STEREO,
+         A52_STEREO,    A52_STEREO,     A52_STEREO,     A52_STEREO},
+        {A52_MONO,      A52_MONO,       A52_MONO,       A52_MONO,
+         A52_MONO,      A52_MONO,       A52_MONO,       A52_MONO},
+        {A52_CHANNEL,   A52_DOLBY,      A52_STEREO,     A52_STEREO,
+         A52_STEREO,    A52_STEREO,     A52_STEREO,     A52_STEREO},
+        {A52_CHANNEL,   A52_DOLBY,      A52_STEREO,     A52_3F,
+         A52_STEREO,    A52_3F,         A52_STEREO,     A52_3F},
+        {A52_CHANNEL,   A52_DOLBY,      A52_STEREO,     A52_STEREO,
+         A52_2F1R,      A52_2F1R,       A52_2F1R,       A52_2F1R},
+        {A52_CHANNEL,   A52_DOLBY,      A52_STEREO,     A52_STEREO,
+         A52_2F1R,      A52_3F1R,       A52_2F1R,       A52_3F1R},
+        {A52_CHANNEL,   A52_DOLBY,      A52_STEREO,     A52_3F,
+         A52_2F2R,      A52_2F2R,       A52_2F2R,       A52_2F2R},
+        {A52_CHANNEL,   A52_DOLBY,      A52_STEREO,     A52_3F,
+         A52_2F2R,      A52_3F2R,       A52_2F2R,       A52_3F2R},
+        {A52_CHANNEL1,  A52_MONO,       A52_MONO,       A52_MONO,
+         A52_MONO,      A52_MONO,       A52_MONO,       A52_MONO},
+        {A52_CHANNEL2,  A52_MONO,       A52_MONO,       A52_MONO,
+         A52_MONO,      A52_MONO,       A52_MONO,       A52_MONO},
+        {A52_CHANNEL,   A52_DOLBY,      A52_STEREO,     A52_DOLBY,
+         A52_DOLBY,     A52_DOLBY,      A52_DOLBY,      A52_DOLBY}
+    };
+    int output;
+
+    output = flags & A52_CHANNEL_MASK;
+    if (output > A52_DOLBY)
+        return -1;
+
+    output = table[output][input & 7];
+
+    if (output == A52_STEREO &&
+        (input == A52_DOLBY || (input == A52_3F && clev == LEVEL (LEVEL_3DB))))
+        output = A52_DOLBY;
+
+    if (flags & A52_ADJUST_LEVEL) {
+        level_t adjust;
+
+        switch (CONVERT (input & 7, output)) {
+
+        case CONVERT (A52_3F, A52_MONO):
+            adjust = DIV (LEVEL_3DB, LEVEL (1) + clev);
+            break;
+
+        case CONVERT (A52_STEREO, A52_MONO):
+        case CONVERT (A52_2F2R, A52_2F1R):
+        case CONVERT (A52_3F2R, A52_3F1R):
+        level_3db:
+            adjust = LEVEL (LEVEL_3DB);
+            break;
+
+        case CONVERT (A52_3F2R, A52_2F1R):
+            if (clev < LEVEL (LEVEL_PLUS3DB - 1))
+                goto level_3db;
+            /* break thru */
+        case CONVERT (A52_3F, A52_STEREO):
+        case CONVERT (A52_3F1R, A52_2F1R):
+        case CONVERT (A52_3F1R, A52_2F2R):
+        case CONVERT (A52_3F2R, A52_2F2R):
+            adjust = DIV (1, LEVEL (1) + clev);
+            break;
+
+        case CONVERT (A52_2F1R, A52_MONO):
+            adjust = DIV (LEVEL_PLUS3DB, LEVEL (2) + slev);
+            break;
+
+        case CONVERT (A52_2F1R, A52_STEREO):
+        case CONVERT (A52_3F1R, A52_3F):
+            adjust = DIV (1, LEVEL (1) + MUL_C (slev, LEVEL_3DB));
+            break;
+
+        case CONVERT (A52_3F1R, A52_MONO):
+            adjust = DIV (LEVEL_3DB, LEVEL (1) + clev + MUL_C (slev, 0.5));
+            break;
+
+        case CONVERT (A52_3F1R, A52_STEREO):
+            adjust = DIV (1, LEVEL (1) + clev + MUL_C (slev, LEVEL_3DB));
+            break;
+
+        case CONVERT (A52_2F2R, A52_MONO):
+            adjust = DIV (LEVEL_3DB, LEVEL (1) + slev);
+            break;
+
+        case CONVERT (A52_2F2R, A52_STEREO):
+        case CONVERT (A52_3F2R, A52_3F):
+            adjust = DIV (1, LEVEL (1) + slev);
+            break;
+
+        case CONVERT (A52_3F2R, A52_MONO):
+            adjust = DIV (LEVEL_3DB, LEVEL (1) + clev + slev);
+            break;
+
+        case CONVERT (A52_3F2R, A52_STEREO):
+            adjust = DIV (1, LEVEL (1) + clev + slev);
+            break;
+
+        case CONVERT (A52_MONO, A52_DOLBY):
+            adjust = LEVEL (LEVEL_PLUS3DB);
+            break;
+
+        case CONVERT (A52_3F, A52_DOLBY):
+        case CONVERT (A52_2F1R, A52_DOLBY):
+            adjust = LEVEL (1 / (1 + LEVEL_3DB));
+            break;
+
+        case CONVERT (A52_3F1R, A52_DOLBY):
+        case CONVERT (A52_2F2R, A52_DOLBY):
+            adjust = LEVEL (1 / (1 + 2 * LEVEL_3DB));
+            break;
+
+        case CONVERT (A52_3F2R, A52_DOLBY):
+            adjust = LEVEL (1 / (1 + 3 * LEVEL_3DB));
+            break;
+
+        default:
+            return output;
+        }
+
+        *level = MUL_L (*level, adjust);
+    }
+
+    return output;
+}
+
+int a52_downmix_coeff (level_t * coeff, int acmod, int output, level_t level,
+                       level_t clev, level_t slev)
+{
+    level_t level_3db;
+
+    level_3db = MUL_C (level, LEVEL_3DB);
+
+    switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
+
+    case CONVERT (A52_CHANNEL, A52_CHANNEL):
+    case CONVERT (A52_MONO, A52_MONO):
+    case CONVERT (A52_STEREO, A52_STEREO):
+    case CONVERT (A52_3F, A52_3F):
+    case CONVERT (A52_2F1R, A52_2F1R):
+    case CONVERT (A52_3F1R, A52_3F1R):
+    case CONVERT (A52_2F2R, A52_2F2R):
+    case CONVERT (A52_3F2R, A52_3F2R):
+    case CONVERT (A52_STEREO, A52_DOLBY):
+        coeff[0] = coeff[1] = coeff[2] = coeff[3] = coeff[4] = level;
+        return 0;
+
+    case CONVERT (A52_CHANNEL, A52_MONO):
+        coeff[0] = coeff[1] = MUL_C (level, LEVEL_6DB);
+        return 3;
+
+    case CONVERT (A52_STEREO, A52_MONO):
+        coeff[0] = coeff[1] = level_3db;
+        return 3;
+
+    case CONVERT (A52_3F, A52_MONO):
+        coeff[0] = coeff[2] = level_3db;
+        coeff[1] = MUL_C (MUL_L (level_3db, clev), LEVEL_PLUS6DB);
+        return 7;
+
+    case CONVERT (A52_2F1R, A52_MONO):
+        coeff[0] = coeff[1] = level_3db;
+        coeff[2] = MUL_L (level_3db, slev);
+        return 7;
+
+    case CONVERT (A52_2F2R, A52_MONO):
+        coeff[0] = coeff[1] = level_3db;
+        coeff[2] = coeff[3] = MUL_L (level_3db, slev);
+        return 15;
+
+    case CONVERT (A52_3F1R, A52_MONO):
+        coeff[0] = coeff[2] = level_3db;
+        coeff[1] = MUL_C (MUL_L (level_3db, clev), LEVEL_PLUS6DB);
+        coeff[3] = MUL_L (level_3db, slev);
+        return 15;
+
+    case CONVERT (A52_3F2R, A52_MONO):
+        coeff[0] = coeff[2] = level_3db;
+        coeff[1] = MUL_C (MUL_L (level_3db, clev), LEVEL_PLUS6DB);
+        coeff[3] = coeff[4] = MUL_L (level_3db, slev);
+        return 31;
+
+    case CONVERT (A52_MONO, A52_DOLBY):
+        coeff[0] = level_3db;
+        return 0;
+
+    case CONVERT (A52_3F, A52_DOLBY):
+        coeff[0] = coeff[2] = coeff[3] = coeff[4] = level;
+        coeff[1] = level_3db;
+        return 7;
+
+    case CONVERT (A52_3F, A52_STEREO):
+    case CONVERT (A52_3F1R, A52_2F1R):
+    case CONVERT (A52_3F2R, A52_2F2R):
+        coeff[0] = coeff[2] = coeff[3] = coeff[4] = level;
+        coeff[1] = MUL_L (level, clev);
+        return 7;
+
+    case CONVERT (A52_2F1R, A52_DOLBY):
+        coeff[0] = coeff[1] = level;
+        coeff[2] = level_3db;
+        return 7;
+
+    case CONVERT (A52_2F1R, A52_STEREO):
+        coeff[0] = coeff[1] = level;
+        coeff[2] = MUL_L (level_3db, slev);
+        return 7;
+
+    case CONVERT (A52_3F1R, A52_DOLBY):
+        coeff[0] = coeff[2] = level;
+        coeff[1] = coeff[3] = level_3db;
+        return 15;
+
+    case CONVERT (A52_3F1R, A52_STEREO):
+        coeff[0] = coeff[2] = level;
+        coeff[1] = MUL_L (level, clev);
+        coeff[3] = MUL_L (level_3db, slev);
+        return 15;
+
+    case CONVERT (A52_2F2R, A52_DOLBY):
+        coeff[0] = coeff[1] = level;
+        coeff[2] = coeff[3] = level_3db;
+        return 15;
+
+    case CONVERT (A52_2F2R, A52_STEREO):
+        coeff[0] = coeff[1] = level;
+        coeff[2] = coeff[3] = MUL_L (level, slev);
+        return 15;
+
+    case CONVERT (A52_3F2R, A52_DOLBY):
+        coeff[0] = coeff[2] = level;
+        coeff[1] = coeff[3] = coeff[4] = level_3db;
+        return 31;
+
+    case CONVERT (A52_3F2R, A52_2F1R):
+        coeff[0] = coeff[2] = level;
+        coeff[1] = MUL_L (level, clev);
+        coeff[3] = coeff[4] = level_3db;
+        return 31;
+
+    case CONVERT (A52_3F2R, A52_STEREO):
+        coeff[0] = coeff[2] = level;
+        coeff[1] = MUL_L (level, clev);
+        coeff[3] = coeff[4] = MUL_L (level, slev);
+        return 31;
+
+    case CONVERT (A52_3F1R, A52_3F):
+        coeff[0] = coeff[1] = coeff[2] = level;
+        coeff[3] = MUL_L (level_3db, slev);
+        return 13;
+
+    case CONVERT (A52_3F2R, A52_3F):
+        coeff[0] = coeff[1] = coeff[2] = level;
+        coeff[3] = coeff[4] = MUL_L (level, slev);
+        return 29;
+
+    case CONVERT (A52_2F2R, A52_2F1R):
+        coeff[0] = coeff[1] = level;
+        coeff[2] = coeff[3] = level_3db;
+        return 12;
+
+    case CONVERT (A52_3F2R, A52_3F1R):
+        coeff[0] = coeff[1] = coeff[2] = level;
+        coeff[3] = coeff[4] = level_3db;
+        return 24;
+
+    case CONVERT (A52_2F1R, A52_2F2R):
+        coeff[0] = coeff[1] = level;
+        coeff[2] = level_3db;
+        return 0;
+
+    case CONVERT (A52_3F1R, A52_2F2R):
+        coeff[0] = coeff[2] = level;
+        coeff[1] = MUL_L (level, clev);
+        coeff[3] = level_3db;
+        return 7;
+
+    case CONVERT (A52_3F1R, A52_3F2R):
+        coeff[0] = coeff[1] = coeff[2] = level;
+        coeff[3] = level_3db;
+        return 0;
+
+    case CONVERT (A52_CHANNEL, A52_CHANNEL1):
+        coeff[0] = level;
+        coeff[1] = 0;
+        return 0;
+
+    case CONVERT (A52_CHANNEL, A52_CHANNEL2):
+        coeff[0] = 0;
+        coeff[1] = level;
+        return 0;
+    }
+
+    return -1;  /* NOTREACHED */
+}
+
+static void mix2to1 (sample_t * dest, sample_t * src)
+{
+    int i;
+
+    for (i = 0; i < 256; i++)
+        dest[i] += BIAS (src[i]);
+}
+
+static void mix3to1 (sample_t * samples)
+{
+    int i;
+
+    for (i = 0; i < 256; i++)
+        samples[i] += BIAS (samples[i + 256] + samples[i + 512]);
+}
+
+static void mix4to1 (sample_t * samples)
+{
+    int i;
+
+    for (i = 0; i < 256; i++)
+        samples[i] += BIAS (samples[i + 256] + samples[i + 512] +
+                            samples[i + 768]);
+}
+
+static void mix5to1 (sample_t * samples)
+{
+    int i;
+
+    for (i = 0; i < 256; i++)
+        samples[i] += BIAS (samples[i + 256] + samples[i + 512] +
+                            samples[i + 768] + samples[i + 1024]);
+}
+
+static void mix3to2 (sample_t * samples)
+{
+    int i;
+    sample_t common;
+
+    for (i = 0; i < 256; i++) {
+        common = BIAS (samples[i + 256]);
+        samples[i] += common;
+        samples[i + 256] = samples[i + 512] + common;
+    }
+}
+
+static void mix21to2 (sample_t * left, sample_t * right)
+{
+    int i;
+    sample_t common;
+
+    for (i = 0; i < 256; i++) {
+        common = BIAS (right[i + 256]);
+        left[i] += common;
+        right[i] += common;
+    }
+}
+
+static void mix21toS (sample_t * samples)
+{
+    int i;
+    sample_t surround;
+
+    for (i = 0; i < 256; i++) {
+        surround = samples[i + 512];
+        samples[i] += BIAS (-surround);
+        samples[i + 256] += BIAS (surround);
+    }
+}
+
+static void mix31to2 (sample_t * samples)
+{
+    int i;
+    sample_t common;
+
+    for (i = 0; i < 256; i++) {
+        common = BIAS (samples[i + 256] + samples[i + 768]);
+        samples[i] += common;
+        samples[i + 256] = samples[i + 512] + common;
+    }
+}
+
+static void mix31toS (sample_t * samples)
+{
+    int i;
+    sample_t common, surround;
+
+    for (i = 0; i < 256; i++) {
+        common = BIAS (samples[i + 256]);
+        surround = samples[i + 768];
+        samples[i] += common - surround;
+        samples[i + 256] = samples[i + 512] + common + surround;
+    }
+}
+
+static void mix22toS (sample_t * samples)
+{
+    int i;
+    sample_t surround;
+
+    for (i = 0; i < 256; i++) {
+        surround = samples[i + 512] + samples[i + 768];
+        samples[i] += BIAS (-surround);
+        samples[i + 256] += BIAS (surround);
+    }
+}
+
+static void mix32to2 (sample_t * samples)
+{
+    int i;
+    sample_t common;
+
+    for (i = 0; i < 256; i++) {
+        common = BIAS (samples[i + 256]);
+        samples[i] += common + samples[i + 768];
+        samples[i + 256] = common + samples[i + 512] + samples[i + 1024];
+    }
+}
+
+static void mix32toS (sample_t * samples)
+{
+    int i;
+    sample_t common, surround;
+
+    for (i = 0; i < 256; i++) {
+        common = BIAS (samples[i + 256]);
+        surround = samples[i + 768] + samples[i + 1024];
+        samples[i] += common - surround;
+        samples[i + 256] = samples[i + 512] + common + surround;
+    }
+}
+
+static void move2to1 (sample_t * src, sample_t * dest)
+{
+    int i;
+
+    for (i = 0; i < 256; i++)
+        dest[i] = BIAS (src[i] + src[i + 256]);
+}
+
+static void zero (sample_t * samples)
+{
+    int i;
+
+    for (i = 0; i < 256; i++)
+        samples[i] = 0;
+}
+
+void a52_downmix (sample_t * samples, int acmod, int output,
+                  level_t clev, level_t slev)
+{
+    /* avoid compiler warning */
+    (void)clev;
+
+    switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
+
+    case CONVERT (A52_CHANNEL, A52_CHANNEL2):
+        memcpy (samples, samples + 256, 256 * sizeof (sample_t));
+        break;
+
+    case CONVERT (A52_CHANNEL, A52_MONO):
+    case CONVERT (A52_STEREO, A52_MONO):
+    mix_2to1:
+        mix2to1 (samples, samples + 256);
+        break;
+
+    case CONVERT (A52_2F1R, A52_MONO):
+        if (slev == 0)
+            goto mix_2to1;
+    case CONVERT (A52_3F, A52_MONO):
+    mix_3to1:
+        mix3to1 (samples);
+        break;
+
+    case CONVERT (A52_3F1R, A52_MONO):
+        if (slev == 0)
+            goto mix_3to1;
+    case CONVERT (A52_2F2R, A52_MONO):
+        if (slev == 0)
+            goto mix_2to1;
+        mix4to1 (samples);
+        break;
+
+    case CONVERT (A52_3F2R, A52_MONO):
+        if (slev == 0)
+            goto mix_3to1;
+        mix5to1 (samples);
+        break;
+
+    case CONVERT (A52_MONO, A52_DOLBY):
+        memcpy (samples + 256, samples, 256 * sizeof (sample_t));
+        break;
+
+    case CONVERT (A52_3F, A52_STEREO):
+    case CONVERT (A52_3F, A52_DOLBY):
+    mix_3to2:
+        mix3to2 (samples);
+        break;
+
+    case CONVERT (A52_2F1R, A52_STEREO):
+        if (slev == 0)
+            break;
+        mix21to2 (samples, samples + 256);
+        break;
+
+    case CONVERT (A52_2F1R, A52_DOLBY):
+        mix21toS (samples);
+        break;
+
+    case CONVERT (A52_3F1R, A52_STEREO):
+        if (slev == 0)
+            goto mix_3to2;
+        mix31to2 (samples);
+        break;
+
+    case CONVERT (A52_3F1R, A52_DOLBY):
+        mix31toS (samples);
+        break;
+
+    case CONVERT (A52_2F2R, A52_STEREO):
+        if (slev == 0)
+            break;
+        mix2to1 (samples, samples + 512);
+        mix2to1 (samples + 256, samples + 768);
+        break;
+
+    case CONVERT (A52_2F2R, A52_DOLBY):
+        mix22toS (samples);
+        break;
+
+    case CONVERT (A52_3F2R, A52_STEREO):
+        if (slev == 0)
+            goto mix_3to2;
+        mix32to2 (samples);
+        break;
+
+    case CONVERT (A52_3F2R, A52_DOLBY):
+        mix32toS (samples);
+        break;
+
+    case CONVERT (A52_3F1R, A52_3F):
+        if (slev == 0)
+            break;
+        mix21to2 (samples, samples + 512);
+        break;
+
+    case CONVERT (A52_3F2R, A52_3F):
+        if (slev == 0)
+            break;
+        mix2to1 (samples, samples + 768);
+        mix2to1 (samples + 512, samples + 1024);
+        break;
+
+    case CONVERT (A52_3F1R, A52_2F1R):
+        mix3to2 (samples);
+        memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
+        break;
+
+    case CONVERT (A52_2F2R, A52_2F1R):
+        mix2to1 (samples + 512, samples + 768);
+        break;
+
+    case CONVERT (A52_3F2R, A52_2F1R):
+        mix3to2 (samples);
+        move2to1 (samples + 768, samples + 512);
+        break;
+
+    case CONVERT (A52_3F2R, A52_3F1R):
+        mix2to1 (samples + 768, samples + 1024);
+        break;
+
+    case CONVERT (A52_2F1R, A52_2F2R):
+        memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
+        break;
+
+    case CONVERT (A52_3F1R, A52_2F2R):
+        mix3to2 (samples);
+        memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
+        break;
+
+    case CONVERT (A52_3F2R, A52_2F2R):
+        mix3to2 (samples);
+        memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
+        memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t));
+        break;
+
+    case CONVERT (A52_3F1R, A52_3F2R):
+        memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
+        break;
+    }
+}
+
+void a52_upmix (sample_t * samples, int acmod, int output)
+{
+    switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
+
+    case CONVERT (A52_CHANNEL, A52_CHANNEL2):
+        memcpy (samples + 256, samples, 256 * sizeof (sample_t));
+        break;
+
+    case CONVERT (A52_3F2R, A52_MONO):
+        zero (samples + 1024);
+    case CONVERT (A52_3F1R, A52_MONO):
+    case CONVERT (A52_2F2R, A52_MONO):
+        zero (samples + 768);
+    case CONVERT (A52_3F, A52_MONO):
+    case CONVERT (A52_2F1R, A52_MONO):
+        zero (samples + 512);
+    case CONVERT (A52_CHANNEL, A52_MONO):
+    case CONVERT (A52_STEREO, A52_MONO):
+        zero (samples + 256);
+        break;
+
+    case CONVERT (A52_3F2R, A52_STEREO):
+    case CONVERT (A52_3F2R, A52_DOLBY):
+        zero (samples + 1024);
+    case CONVERT (A52_3F1R, A52_STEREO):
+    case CONVERT (A52_3F1R, A52_DOLBY):
+        zero (samples + 768);
+    case CONVERT (A52_3F, A52_STEREO):
+    case CONVERT (A52_3F, A52_DOLBY):
+    mix_3to2:
+        memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t));
+        zero (samples + 256);
+        break;
+
+    case CONVERT (A52_2F2R, A52_STEREO):
+    case CONVERT (A52_2F2R, A52_DOLBY):
+        zero (samples + 768);
+    case CONVERT (A52_2F1R, A52_STEREO):
+    case CONVERT (A52_2F1R, A52_DOLBY):
+        zero (samples + 512);
+        break;
+
+    case CONVERT (A52_3F2R, A52_3F):
+        zero (samples + 1024);
+    case CONVERT (A52_3F1R, A52_3F):
+    case CONVERT (A52_2F2R, A52_2F1R):
+        zero (samples + 768);
+        break;
+
+    case CONVERT (A52_3F2R, A52_3F1R):
+        zero (samples + 1024);
+        break;
+
+    case CONVERT (A52_3F2R, A52_2F1R):
+        zero (samples + 1024);
+    case CONVERT (A52_3F1R, A52_2F1R):
+    mix_31to21:
+        memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
+        goto mix_3to2;
+
+    case CONVERT (A52_3F2R, A52_2F2R):
+        memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
+        goto mix_31to21;
+    }
+}
--- a/Show more
+++ b/Show more