diff --git a/apps/codec_thread.c b/apps/codec_thread.c
index 39db741054..17ca980e41 100644
--- a/apps/codec_thread.c
+++ b/apps/codec_thread.c
@@ -213,49 +213,41 @@ void codec_thread_do_callback(void (*fn)(void), unsigned int *id)
 static void codec_pcmbuf_insert_callback(
         const void *ch1, const void *ch2, int count)
 {
-    const char *src[2] = { ch1, ch2 };
+    struct dsp_buffer src;
 
-    while (count > 0)
+    src.remcount  = count;
+    src.pin[0]    = ch1;
+    src.pin[1]    = ch2;
+    src.proc_mask = 0;
+
+    while (1)
     {
-        int out_count = dsp_output_count(ci.dsp, count);
-        int inp_count;
-        char *dest;
+        struct dsp_buffer dst;
+        dst.remcount = 0;
+        dst.bufcount = MAX(src.remcount, 1024); /* Arbitrary min request */
 
-        while (1)
+        while ((dst.p16out = pcmbuf_request_buffer(&dst.bufcount)) == NULL)
         {
-            if ((dest = pcmbuf_request_buffer(&out_count)) != NULL)
-                break;
-
             cancel_cpu_boost();
 
-            /* It will be awhile before space is available but we want
+            /* It may be awhile before space is available but we want
                "instant" response to any message */
             queue_wait_w_tmo(&codec_queue, NULL, HZ/20);
 
             if (!queue_empty(&codec_queue) &&
                 codec_check_queue__have_msg() < 0)
+            {
+                dsp_configure(ci.dsp, DSP_FLUSH, 0); /* Discontinuity */
                 return;
+            }
         }
 
-        /* Get the real input_size for output_size bytes, guarding
-         * against resampling buffer overflows. */
-        inp_count = dsp_input_count(ci.dsp, out_count);
+        dsp_process(ci.dsp, &src, &dst);
 
-        if (inp_count <= 0)
-            return;
-
-        /* Input size has grown, no error, just don't write more than length */
-        if (inp_count > count)
-            inp_count = count;
-
-        out_count = dsp_process(ci.dsp, dest, src, inp_count);
-
-        if (out_count <= 0)
-            return;
-
-        pcmbuf_write_complete(out_count, ci.id3->elapsed, ci.id3->offset);
-
-        count -= inp_count;
+        if (dst.remcount > 0)
+            pcmbuf_write_complete(dst.remcount, ci.id3->elapsed, ci.id3->offset);
+        else if (src.remcount <= 0)
+            break; /* No input remains and DSP purged */
     }
 }
 
@@ -352,10 +344,7 @@ static void codec_seek_complete_callback(void)
 
 static void codec_configure_callback(int setting, intptr_t value)
 {
-    if (!dsp_configure(ci.dsp, setting, value))
-    {
-        logf("Illegal key: %d", setting);
-    }
+    dsp_configure(ci.dsp, setting, value);
 }
 
 static enum codec_command_action
@@ -611,8 +600,7 @@ static void NORETURN_ATTR codec_thread(void)
 void codec_thread_init(void)
 {
     /* Init API */
-    ci.dsp              = (struct dsp_config *)dsp_configure(NULL, DSP_MYDSP,
-                                                             CODEC_IDX_AUDIO);
+    ci.dsp              = dsp_get_config(CODEC_IDX_AUDIO);
     ci.codec_get_buffer = codec_get_buffer_callback;
     ci.pcmbuf_insert    = codec_pcmbuf_insert_callback;
     ci.set_elapsed      = audio_codec_update_elapsed;
diff --git a/apps/codecs.c b/apps/codecs.c
index fafe4ac7a3..69204b7c4f 100644
--- a/apps/codecs.c
+++ b/apps/codecs.c
@@ -118,6 +118,7 @@ struct codec_api ci = {
 
     commit_dcache,
     commit_discard_dcache,
+    commit_discard_idcache,
 
     /* strings and memory */
     strcpy,
@@ -166,7 +167,6 @@ struct codec_api ci = {
     /* new stuff at the end, sort into place next time
        the API gets incompatible */
 
-    commit_discard_idcache,
 };
 
 void codec_get_full_path(char *path, const char *codec_root_fn)
diff --git a/apps/gui/bitmap/list-skinned.c b/apps/gui/bitmap/list-skinned.c
index 95430ae278..7d3620ed81 100644
--- a/apps/gui/bitmap/list-skinned.c
+++ b/apps/gui/bitmap/list-skinned.c
@@ -20,13 +20,13 @@
  ****************************************************************************/
 
 #include "config.h"
+#include "system.h"
 #include "lcd.h"
 #include "font.h"
 #include "button.h"
 #include "string.h"
 #include "settings.h"
 #include "kernel.h"
-#include "system.h"
 #include "file.h"
 
 #include "action.h"
diff --git a/apps/gui/bitmap/list.c b/apps/gui/bitmap/list.c
index 0581050091..49479c1cb3 100644
--- a/apps/gui/bitmap/list.c
+++ b/apps/gui/bitmap/list.c
@@ -22,13 +22,13 @@
 /* This file contains the code to draw the list widget on BITMAP LCDs. */
 
 #include "config.h"
+#include "system.h"
 #include "lcd.h"
 #include "font.h"
 #include "button.h"
 #include "string.h"
 #include "settings.h"
 #include "kernel.h"
-#include "system.h"
 #include "file.h"
 
 #include "action.h"
diff --git a/apps/gui/option_select.c b/apps/gui/option_select.c
index 7c3d34f024..ca206b86da 100644
--- a/apps/gui/option_select.c
+++ b/apps/gui/option_select.c
@@ -22,6 +22,7 @@
 #include <stdlib.h>
 #include "string-extra.h"
 #include "config.h"
+#include "system.h"
 #include "option_select.h"
 #include "kernel.h"
 #include "lang.h"
diff --git a/apps/main.c b/apps/main.c
index 631236852e..0fff0846a6 100644
--- a/apps/main.c
+++ b/apps/main.c
@@ -384,6 +384,9 @@ static void init(void)
     viewportmanager_init();
 
     storage_init();
+#if CONFIG_CODEC == SWCODEC
+    dsp_init();
+#endif
     settings_reset();
     settings_load(SETTINGS_ALL);
     settings_apply(true);
@@ -632,6 +635,10 @@ static void init(void)
         }
     }
 
+#if CONFIG_CODEC == SWCODEC
+    dsp_init();
+#endif
+
 #if defined(SETTINGS_RESET) || (CONFIG_KEYPAD == IPOD_4G_PAD) || \
     (CONFIG_KEYPAD == IRIVER_H10_PAD)
 #ifdef SETTINGS_RESET
diff --git a/apps/menus/eq_menu.c b/apps/menus/eq_menu.c
index 2cfb80f76a..60b2687050 100644
--- a/apps/menus/eq_menu.c
+++ b/apps/menus/eq_menu.c
@@ -70,13 +70,11 @@ const char* eq_precut_format(char* buffer, size_t buffer_size, int value, const
  */
 static void eq_apply(void)
 {
-    dsp_set_eq(global_settings.eq_enabled); 
+    dsp_eq_enable(global_settings.eq_enabled); 
     dsp_set_eq_precut(global_settings.eq_precut);    
     /* Update all bands */
-    for(int i = 0; i < 5; i++) {
-        dsp_set_eq_coefs(i, global_settings.eq_band_settings[i].cutoff,
-                         global_settings.eq_band_settings[i].q,
-                         global_settings.eq_band_settings[i].gain);
+    for(int i = 0; i < EQ_NUM_BANDS; i++) {
+        dsp_set_eq_coefs(i, &global_settings.eq_band_settings[i]);
     }
 }
 
@@ -580,9 +578,7 @@ bool eq_menu_graphical(void)
         /* Update the filter if the user changed something */
         if (has_changed) {
             dsp_set_eq_coefs(current_band,
-                             global_settings.eq_band_settings[current_band].cutoff,
-                             global_settings.eq_band_settings[current_band].q,
-                             global_settings.eq_band_settings[current_band].gain);
+                &global_settings.eq_band_settings[current_band]);
             has_changed = false;
         }
     }
diff --git a/apps/pcmbuf.c b/apps/pcmbuf.c
index d36883fc5b..9cedae0b67 100644
--- a/apps/pcmbuf.c
+++ b/apps/pcmbuf.c
@@ -47,16 +47,23 @@
    smaller math - must be < 65536 bytes */
 #define PCMBUF_CHUNK_SIZE    8192u
 
-/* Massive size is a nasty temp fix */
-#define PCMBUF_GUARD_SIZE    (1024u*12*((NATIVE_FREQUENCY+7999)/8000))
+/* Small guard buf to give decent space near end */
+#define PCMBUF_GUARD_SIZE    (PCMBUF_CHUNK_SIZE / 8)
 
 /* Mnemonics for common data commit thresholds */
 #define COMMIT_CHUNKS        PCMBUF_CHUNK_SIZE
 #define COMMIT_ALL_DATA      1u
 
- /* Size of the crossfade buffer where codec data is written to be faded
-    on commit */
-#define CROSSFADE_BUFSIZE    8192u
+/* Size of the crossfade buffer where codec data is written to be faded
+   on commit */
+#define CROSSFADE_BUFSIZE    PCMBUF_CHUNK_SIZE
+
+/* Maximum contiguous space that PCM buffer will allow (to avoid excessive
+   draining between inserts and observe low-latency mode) */
+#define PCMBUF_MAX_BUFFER    (PCMBUF_CHUNK_SIZE * 4)
+
+/* Forced buffer insert constraint can thus be from 1KB to 32KB using 8KB
+   chunks */
 
 /* Return data level in 1/4-second increments */
 #define DATA_LEVEL(quarter_secs) (NATIVE_FREQUENCY * (quarter_secs))
@@ -383,7 +390,11 @@ void * pcmbuf_request_buffer(int *count)
     /* If crossfade has begun, put the new track samples in crossfade_buffer */
     if (crossfade_status != CROSSFADE_INACTIVE && size > CROSSFADE_BUFSIZE)
         size = CROSSFADE_BUFSIZE;
-#endif
+    else
+#endif /* HAVE_CROSSFADE */
+
+    if (size > PCMBUF_MAX_BUFFER)
+        size = PCMBUF_MAX_BUFFER; /* constrain */
 
     enum channel_status status = mixer_channel_status(PCM_MIXER_CHAN_PLAYBACK);
     size_t remaining = pcmbuf_unplayed_bytes();
@@ -432,11 +443,22 @@ void * pcmbuf_request_buffer(int *count)
             pcmbuf_play_start();
     }
 
-    void *buf =
+    void *buf;
+
 #ifdef HAVE_CROSSFADE
-        crossfade_status != CROSSFADE_INACTIVE ? crossfade_buffer :
+    if (crossfade_status != CROSSFADE_INACTIVE)
+    {
+        buf = crossfade_buffer; /* always CROSSFADE_BUFSIZE */
+    }
+    else
 #endif
-        get_write_buffer(&size);
+    {
+        /* Give the maximum amount available if there's more */
+        if (size + PCMBUF_CHUNK_SIZE < freespace)
+            size = freespace - PCMBUF_CHUNK_SIZE;
+
+        buf = get_write_buffer(&size);
+    }
 
     *count = size / 4;
     return buf;
diff --git a/apps/plugin.c b/apps/plugin.c
index 5406610e23..129fd6954e 100644
--- a/apps/plugin.c
+++ b/apps/plugin.c
@@ -565,13 +565,15 @@ static const struct plugin_api rockbox_api = {
     audio_set_output_source,
     audio_set_input_source,
 #endif
-    dsp_set_crossfeed,
-    dsp_set_eq,
+    dsp_crossfeed_enable,
+    dsp_eq_enable,
     dsp_dither_enable,
+#ifdef HAVE_PITCHSCREEN
+    dsp_set_timestretch,
+#endif
     dsp_configure,
+    dsp_get_config,
     dsp_process,
-    dsp_input_count,
-    dsp_output_count,
 
     mixer_channel_status,
     mixer_channel_get_buffer,
@@ -584,7 +586,7 @@ static const struct plugin_api rockbox_api = {
 
     system_sound_play,
     keyclick_click,
-#endif
+#endif /* CONFIG_CODEC == SWCODEC */
     /* playback control */
     playlist_amount,
     playlist_resume,
diff --git a/apps/plugin.h b/apps/plugin.h
index 8123414ff0..3820c7ede6 100644
--- a/apps/plugin.h
+++ b/apps/plugin.h
@@ -153,12 +153,12 @@ void* plugin_get_buffer(size_t *buffer_size);
 #define PLUGIN_MAGIC 0x526F634B /* RocK */
 
 /* increase this every time the api struct changes */
-#define PLUGIN_API_VERSION 218
+#define PLUGIN_API_VERSION 219
 
 /* update this to latest version if a change to the api struct breaks
    backwards compatibility (and please take the opportunity to sort in any
    new function which are "waiting" at the end of the function table) */
-#define PLUGIN_MIN_API_VERSION 218
+#define PLUGIN_MIN_API_VERSION 219
 
 /* plugin return codes */
 /* internal returns start at 0x100 to make exit(1..255) work */
@@ -680,15 +680,17 @@ struct plugin_api {
     void (*audio_set_output_source)(int monitor);
     void (*audio_set_input_source)(int source, unsigned flags);
 #endif
-    void (*dsp_set_crossfeed)(bool enable);
-    void (*dsp_set_eq)(bool enable);
+    void (*dsp_crossfeed_enable)(bool enable);
+    void (*dsp_eq_enable)(bool enable);
     void (*dsp_dither_enable)(bool enable);
-    intptr_t (*dsp_configure)(struct dsp_config *dsp, int setting,
-                              intptr_t value);
-    int (*dsp_process)(struct dsp_config *dsp, char *dest,
-                       const char *src[], int count);
-    int (*dsp_input_count)(struct dsp_config *dsp, int count);
-    int (*dsp_output_count)(struct dsp_config *dsp, int count);
+#ifdef HAVE_PITCHSCREEN
+    void (*dsp_set_timestretch)(int32_t percent);
+#endif
+    intptr_t (*dsp_configure)(struct dsp_config *dsp,
+                              unsigned int setting, intptr_t value);
+    struct dsp_config * (*dsp_get_config)(enum dsp_ids id);
+    void (*dsp_process)(struct dsp_config *dsp, struct dsp_buffer *src,
+                        struct dsp_buffer *dst);
 
     enum channel_status (*mixer_channel_status)(enum pcm_mixer_channel channel);
     const void * (*mixer_channel_get_buffer)(enum pcm_mixer_channel channel,
diff --git a/apps/plugins/SOURCES b/apps/plugins/SOURCES
index db690a638a..e5f026c5b4 100644
--- a/apps/plugins/SOURCES
+++ b/apps/plugins/SOURCES
@@ -33,6 +33,7 @@ flipit.c
 shopper.c
 resistor.c
 
+test_codec.c
 
 #ifdef USB_ENABLE_HID
 remote_control.c
diff --git a/apps/plugins/mpegplayer/audio_thread.c b/apps/plugins/mpegplayer/audio_thread.c
index f976fd6007..b06727f759 100644
--- a/apps/plugins/mpegplayer/audio_thread.c
+++ b/apps/plugins/mpegplayer/audio_thread.c
@@ -36,6 +36,7 @@ struct audio_thread_data
     unsigned samplerate;    /* Current stream sample rate */
     int nchannels;          /* Number of audio channels */
     struct dsp_config *dsp; /* The DSP we're using */
+    struct dsp_buffer src;  /* Current audio data for DSP processing */
 };
 
 /* The audio thread is stolen from the core codec thread */
@@ -479,12 +480,13 @@ static void audio_thread(void)
     /* We need this here to init the EMAC for Coldfire targets */
     init_mad();
 
-    td.dsp = (struct dsp_config *)rb->dsp_configure(NULL, DSP_MYDSP,
-                                                    CODEC_IDX_AUDIO);
+    td.dsp = rb->dsp_get_config(CODEC_IDX_AUDIO);
 #ifdef HAVE_PITCHSCREEN
     rb->sound_set_pitch(PITCH_SPEED_100);
+    rb->dsp_set_timestretch(PITCH_SPEED_100);
 #endif
     rb->dsp_configure(td.dsp, DSP_RESET, 0);
+    rb->dsp_configure(td.dsp, DSP_FLUSH, 0);
     rb->dsp_configure(td.dsp, DSP_SET_SAMPLE_DEPTH, MAD_F_FRACBITS);
 
     goto message_wait;
@@ -631,43 +633,53 @@ static void audio_thread(void)
                                 STEREO_MONO : STEREO_NONINTERLEAVED);
         }
 
+        td.src.remcount  = synth.pcm.length;
+        td.src.pin[0]    = synth.pcm.samples[0];
+        td.src.pin[1]    = synth.pcm.samples[1];
+        td.src.proc_mask = 0;
+
         td.state  = TSTATE_RENDER_WAIT;
 
         /* Add a frame of audio to the pcm buffer. Maximum is 1152 samples. */
     render_wait:
-        if (synth.pcm.length > 0)
+        rb->yield();
+
+        while (1)
         {
-            const char *src[2] =
-                { (char *)synth.pcm.samples[0], (char *)synth.pcm.samples[1] };
-            int out_count = (synth.pcm.length * CLOCK_RATE
-                                + (td.samplerate - 1)) / td.samplerate;
-            unsigned char *out_buf;
-            ssize_t size = out_count*4;
+            struct dsp_buffer dst;
+            dst.remcount = 0;
+            dst.bufcount = MAX(td.src.remcount, 1024);
+
+            ssize_t size = dst.bufcount * 2 * sizeof(int16_t);
 
             /* Wait for required amount of free buffer space */
-            while ((out_buf = pcm_output_get_buffer(&size)) == NULL)
+            while ((dst.p16out = pcm_output_get_buffer(&size)) == NULL)
             {
                 /* Wait one frame */
-                int timeout = out_count*HZ / td.samplerate;
+                int timeout = dst.bufcount*HZ / td.samplerate;
                 str_get_msg_w_tmo(&audio_str, &td.ev, MAX(timeout, 1));
                 if (td.ev.id != SYS_TIMEOUT)
                     goto message_process;
             }
 
-            out_count = rb->dsp_process(td.dsp, out_buf, src, synth.pcm.length);
+            dst.bufcount = size / (2 * sizeof (int16_t));
+            rb->dsp_process(td.dsp, &td.src, &dst);
 
-            if (out_count <= 0)
+            if (dst.remcount > 0)
+            {
+                /* Make this data available to DMA */
+                pcm_output_commit_data(dst.remcount * 2 * sizeof(int16_t),
+                                       audio_queue.curr->time);
+
+                /* As long as we're on this timestamp, the time is just
+                   incremented by the number of samples */
+                audio_queue.curr->time += dst.remcount;
+            }
+            else if (td.src.remcount <= 0)
+            {
                 break;
-
-            /* Make this data available to DMA */
-            pcm_output_commit_data(out_count*4, audio_queue.curr->time);
-
-            /* As long as we're on this timestamp, the time is just
-               incremented by the number of samples */
-            audio_queue.curr->time += out_count;
+            }
         }
-
-        rb->yield();
     } /* end decoding loop */
 }
 
diff --git a/apps/plugins/mpegplayer/mpeg_settings.c b/apps/plugins/mpegplayer/mpeg_settings.c
index 1c3f3c0b92..7f92fb7c69 100644
--- a/apps/plugins/mpegplayer/mpeg_settings.c
+++ b/apps/plugins/mpegplayer/mpeg_settings.c
@@ -457,13 +457,13 @@ static void sync_audio_setting(int setting, bool global)
         break;
 
     case MPEG_AUDIO_CROSSFEED:
-        rb->dsp_set_crossfeed((global || settings.crossfeed) ?
-                              rb->global_settings->crossfeed : false);
+        rb->dsp_crossfeed_enable((global || settings.crossfeed) ?
+                                 rb->global_settings->crossfeed : false);
         break;
 
     case MPEG_AUDIO_EQUALIZER:
-        rb->dsp_set_eq((global || settings.equalizer) ?
-                       rb->global_settings->eq_enabled : false);
+        rb->dsp_eq_enable((global || settings.equalizer) ?
+                          rb->global_settings->eq_enabled : false);
         break;
 
     case MPEG_AUDIO_DITHERING:
diff --git a/apps/plugins/test_codec.c b/apps/plugins/test_codec.c
index dafcf35710..920be54d56 100644
--- a/apps/plugins/test_codec.c
+++ b/apps/plugins/test_codec.c
@@ -164,6 +164,7 @@ static inline void int2le16(unsigned char* buf, int16_t x)
 
 static unsigned char *wavbuffer;
 static unsigned char *dspbuffer;
+static int dspbuffer_count;
 
 void init_wav(char* filename)
 {
@@ -215,34 +216,31 @@ static void* codec_get_buffer(size_t *size)
 
 static int process_dsp(const void *ch1, const void *ch2, int count)
 {
-    const char *src[2] = { ch1, ch2 };
-    int written_count = 0;
-    char *dest = dspbuffer;
-    
-    while (count > 0)
+    struct dsp_buffer src;
+    src.remcount = count;
+    src.pin[0] = ch1;
+    src.pin[1] = ch2;
+    src.proc_mask = 0;
+
+    struct dsp_buffer dst;
+    dst.remcount = 0;
+    dst.p16out = (int16_t *)dspbuffer;
+    dst.bufcount = dspbuffer_count;
+
+    while (1)
     {
-        int out_count = rb->dsp_output_count(ci.dsp, count);
+        int old_remcount = dst.remcount;
+        rb->dsp_process(ci.dsp, &src, &dst);
         
-        int inp_count = rb->dsp_input_count(ci.dsp, out_count);
-        
-        if (inp_count <= 0)
+        if (dst.bufcount <= 0 ||
+            (src.remcount <= 0 && dst.remcount <= old_remcount))
+        {
+            /* Dest is full or no input left and DSP purged */
             break;
-        
-        if (inp_count > count)
-            inp_count = count;
-        
-        out_count = rb->dsp_process(ci.dsp, dest, src, inp_count);
-        
-        if (out_count <= 0)
-            break;
-        
-        written_count += out_count;
-        dest += out_count * 4;
-        
-        count -= inp_count;
+        }
     }
     
-    return written_count;
+    return dst.remcount;
 }
 
 /* Null output */
@@ -502,7 +500,6 @@ static void configure(int setting, intptr_t value)
         rb->dsp_configure(ci.dsp, setting, value);
     switch(setting)
     {
-        case DSP_SWITCH_FREQUENCY:
         case DSP_SET_FREQUENCY:
             DEBUGF("samplerate=%d\n",(int)value);
             wavinfo.samplerate = use_dsp ? NATIVE_FREQUENCY : (int)value;
@@ -525,9 +522,7 @@ static void init_ci(void)
 {
     /* --- Our "fake" implementations of the codec API functions. --- */
 
-    ci.dsp = (struct dsp_config *)rb->dsp_configure(NULL, DSP_MYDSP,
-                                                    CODEC_IDX_AUDIO);
-
+    ci.dsp = rb->dsp_get_config(CODEC_IDX_AUDIO);
     ci.codec_get_buffer = codec_get_buffer;
 
     if (wavinfo.fd >= 0 || checksum) {
@@ -849,6 +844,8 @@ enum plugin_status plugin_start(const void* parameter)
 
     wavbuffer = rb->plugin_get_buffer(&buffer_size);
     dspbuffer = wavbuffer + buffer_size / 2;
+    dspbuffer_count = (buffer_size - (dspbuffer - wavbuffer)) /
+                        (2 * sizeof (int16_t));
 
     codec_mallocbuf = rb->plugin_get_audio_buffer(&audiosize);
     /* Align codec_mallocbuf to pointer size, tlsf wants that */
diff --git a/apps/settings.c b/apps/settings.c
index acc38c2388..49d239a2c1 100644
--- a/apps/settings.c
+++ b/apps/settings.c
@@ -979,20 +979,17 @@ void settings_apply(bool read_disk)
     audio_set_crossfade(global_settings.crossfade);
 #endif
     dsp_set_replaygain();
-    dsp_set_crossfeed(global_settings.crossfeed);
+    dsp_crossfeed_enable(global_settings.crossfeed);
     dsp_set_crossfeed_direct_gain(global_settings.crossfeed_direct_gain);
     dsp_set_crossfeed_cross_params(global_settings.crossfeed_cross_gain,
                                    global_settings.crossfeed_hf_attenuation,
                                    global_settings.crossfeed_hf_cutoff);
 
     /* Configure software equalizer, hardware eq is handled in audio_init() */
-    dsp_set_eq(global_settings.eq_enabled);
+    dsp_eq_enable(global_settings.eq_enabled);
     dsp_set_eq_precut(global_settings.eq_precut);
-
-    for(int i = 0; i < 5; i++) {
-        dsp_set_eq_coefs(i, global_settings.eq_band_settings[i].cutoff,
-                         global_settings.eq_band_settings[i].q,
-                         global_settings.eq_band_settings[i].gain);
+    for(int i = 0; i < EQ_NUM_BANDS; i++) {
+        dsp_set_eq_coefs(i, &global_settings.eq_band_settings[i]);
     }
 
     dsp_dither_enable(global_settings.dithering_enabled);
diff --git a/apps/settings.h b/apps/settings.h
index b312c1e784..4d94ca8ba8 100644
--- a/apps/settings.h
+++ b/apps/settings.h
@@ -32,6 +32,7 @@
 #include "button.h"
 #if CONFIG_CODEC == SWCODEC
 #include "audio.h"
+#include "dsp.h"
 #endif
 #include "rbpaths.h"
 
@@ -339,13 +340,7 @@ struct user_settings
     /* EQ */
     bool eq_enabled;            /* Enable equalizer */
     unsigned int eq_precut;     /* dB */
-
-    struct eq_band_setting
-    {
-        int cutoff;        /* Hz */
-        int q;
-        int gain;          /* +/- dB */
-    } eq_band_settings[5];
+    struct eq_band_setting eq_band_settings[EQ_NUM_BANDS]; /* for each band */
 
     /* Misc. swcodec */
     int  beep;              /* system beep volume when changing tracks etc. */
@@ -772,14 +767,7 @@ struct user_settings
 #endif
 
 #if CONFIG_CODEC == SWCODEC
-    struct compressor_settings
-    {
-        int threshold;
-        int makeup_gain;
-        int ratio;
-        int knee;
-        int release_time;
-    } compressor_settings;
+    struct compressor_settings compressor_settings;
 #endif
 
     int sleeptimer_duration; /* In minutes; 0=off */
diff --git a/apps/settings_list.c b/apps/settings_list.c
index af48d11c85..82cccd891f 100644
--- a/apps/settings_list.c
+++ b/apps/settings_list.c
@@ -1398,7 +1398,7 @@ const struct settings_list settings[] = {
 
     /* crossfeed */
     OFFON_SETTING(F_SOUNDSETTING, crossfeed, LANG_CROSSFEED, false,
-                  "crossfeed", dsp_set_crossfeed),
+                  "crossfeed", dsp_crossfeed_enable),
     INT_SETTING_NOWRAP(F_SOUNDSETTING, crossfeed_direct_gain,
                        LANG_CROSSFEED_DIRECT_GAIN, -15,
                        "crossfeed direct gain", UNIT_DB, -60, 0, 5,
diff --git a/apps/voice_thread.c b/apps/voice_thread.c
index 07a67256c4..cff703adfa 100644
--- a/apps/voice_thread.c
+++ b/apps/voice_thread.c
@@ -133,9 +133,8 @@ struct voice_thread_data
     SpeexBits bits;         /* Bit cursor */
     struct dsp_config *dsp; /* DSP used for voice output */
     struct voice_info vi;   /* Copy of clip data */
-    const char *src[2];     /* Current output buffer pointers */
     int lookahead;          /* Number of samples to drop at start of clip */
-    int count;              /* Count of samples remaining to send to PCM */
+    struct dsp_buffer src;  /* Speex output buffer/input to DSP */
 };
 
 /* Functions called in their repective state that return the next state to
@@ -264,9 +263,7 @@ void voice_wait(void)
  * setup the DSP parameters */
 static void voice_data_init(struct voice_thread_data *td)
 {
-    td->dsp = (struct dsp_config *)dsp_configure(NULL, DSP_MYDSP,
-                                                 CODEC_IDX_VOICE);
-
+    td->dsp = dsp_get_config(CODEC_IDX_VOICE);
     dsp_configure(td->dsp, DSP_RESET, 0);
     dsp_configure(td->dsp, DSP_SET_FREQUENCY, VOICE_SAMPLE_RATE);
     dsp_configure(td->dsp, DSP_SET_SAMPLE_DEPTH, VOICE_SAMPLE_DEPTH);
@@ -378,7 +375,8 @@ static enum voice_state voice_decode(struct voice_thread_data *td)
         else
         {
             /* If all clips are done and not playing, force pcm playback. */
-            voice_start_playback();
+            if (voice_unplayed_frames() > 0)
+                voice_start_playback();
             return VOICE_STATE_MESSAGE;
         }
     }
@@ -387,12 +385,14 @@ static enum voice_state voice_decode(struct voice_thread_data *td)
         yield();
 
         /* Output the decoded frame */
-        td->count = VOICE_FRAME_COUNT - td->lookahead;
-        td->src[0] = (const char *)&voice_output_buf[td->lookahead];
-        td->src[1] = NULL;
+        td->src.remcount  = VOICE_FRAME_COUNT - td->lookahead;
+        td->src.pin[0]    = &voice_output_buf[td->lookahead];
+        td->src.pin[1]    = NULL;
+        td->src.proc_mask = 0;
+
         td->lookahead -= MIN(VOICE_FRAME_COUNT, td->lookahead);
 
-        if (td->count > 0)
+        if (td->src.remcount > 0)
             return VOICE_STATE_BUFFER_INSERT;
     }
 
@@ -405,12 +405,21 @@ static enum voice_state voice_buffer_insert(struct voice_thread_data *td)
     if (!queue_empty(&voice_queue))
         return VOICE_STATE_MESSAGE;
 
-    char *dest = (char *)voice_buf_get();
+    struct dsp_buffer dst;
 
-    if (dest != NULL)
+    if ((dst.p16out = voice_buf_get()) != NULL)
     {
-        voice_buf_commit(dsp_process(td->dsp, dest, td->src, td->count));
-        return VOICE_STATE_DECODE;
+        dst.remcount = 0;
+        dst.bufcount = VOICE_PCM_FRAME_COUNT;
+
+        dsp_process(td->dsp, &td->src, &dst);
+
+        voice_buf_commit(dst.remcount);
+
+        /* Unless other effects are introduced to voice that have delays,
+           all output should have been purged to dst in one call */
+        return td->src.remcount > 0 ?
+            VOICE_STATE_BUFFER_INSERT : VOICE_STATE_DECODE;
     }
 
     sleep(0);
diff --git a/firmware/export/system.h b/firmware/export/system.h
index d93d10c9e2..b1959c496d 100644
--- a/firmware/export/system.h
+++ b/firmware/export/system.h
@@ -300,6 +300,12 @@ static inline uint32_t swaw32_hw(uint32_t value)
 #define BIT_N(n) (1U << (n))
 #endif
 
+#ifndef MASK_N
+/* Make a mask of n contiguous bits, shifted left by 'shift' */
+#define MASK_N(type, n, shift) \
+    ((type)((((type)1 << (n)) - (type)1) << (shift)))
+#endif
+
 /* Declare this as HIGHEST_IRQ_LEVEL if they don't differ */
 #ifndef DISABLE_INTERRUPTS
 #define DISABLE_INTERRUPTS  HIGHEST_IRQ_LEVEL
@@ -352,7 +358,7 @@ static inline uint32_t swaw32_hw(uint32_t value)
 
 /* Define MEM_ALIGN_ATTR which may be used to align e.g. buffers for faster
  * access. */
-#if   defined(CPU_ARM)
+#if defined(CPU_ARM)
     /* Use ARMs cache alignment. */
     #define MEM_ALIGN_ATTR CACHEALIGN_ATTR
     #define MEM_ALIGN_SIZE CACHEALIGN_SIZE
@@ -361,12 +367,16 @@ static inline uint32_t swaw32_hw(uint32_t value)
     #define MEM_ALIGN_ATTR __attribute__((aligned(16)))
     #define MEM_ALIGN_SIZE 16
 #else
-    /* Do nothing. */
-    #define MEM_ALIGN_ATTR
     /* Align pointer size */
+    #define MEM_ALIGN_ATTR __attribute__((aligned(sizeof(intptr_t))))
     #define MEM_ALIGN_SIZE sizeof(intptr_t)
 #endif
 
+#define MEM_ALIGN_UP(x) \
+    ((typeof (x))ALIGN_UP((uintptr_t)(x), MEM_ALIGN_SIZE))
+#define MEM_ALIGN_DOWN(x) \
+    ((typeof (x))ALIGN_DOWN((uintptr_t)(x), MEM_ALIGN_SIZE))
+
 #ifdef STORAGE_WANTS_ALIGN
     #define STORAGE_ALIGN_ATTR __attribute__((aligned(CACHEALIGN_SIZE)))
     #define STORAGE_ALIGN_DOWN(x) \
diff --git a/lib/rbcodec/SOURCES b/lib/rbcodec/SOURCES
index 3ac2660a38..c293f3c028 100644
--- a/lib/rbcodec/SOURCES
+++ b/lib/rbcodec/SOURCES
@@ -3,22 +3,31 @@ metadata/id3tags.c
 metadata/mp3.c
 metadata/mp3data.c
 #if CONFIG_CODEC == SWCODEC
+dsp/channel_mode.c
 dsp/compressor.c
-dsp/dsp.c
+dsp/crossfeed.c
+dsp/dsp_core.c
+dsp/dsp_filter.c
+dsp/dsp_misc.c
+dsp/dsp_sample_input.c
+dsp/dsp_sample_output.c
 dsp/eq.c
+dsp/lin_resample.c
+dsp/pga.c
+# ifdef HAVE_PITCHSCREEN
+dsp/tdspeed.c
+# endif
+# ifdef HAVE_SW_TONE_CONTROLS
+dsp/tone_controls.c
+# endif
 # if defined(CPU_COLDFIRE)
 dsp/dsp_cf.S
-dsp/eq_cf.S
 # elif defined(CPU_ARM)
 dsp/dsp_arm.S
-dsp/eq_arm.S
 #  if ARM_ARCH >= 6
 dsp/dsp_arm_v6.S
 #  endif
 # endif
-# ifdef HAVE_PITCHSCREEN
-dsp/tdspeed.c
-# endif
 metadata/replaygain.c
 metadata/metadata_common.c
 metadata/a52.c
diff --git a/lib/rbcodec/codecs/codecs.h b/lib/rbcodec/codecs/codecs.h
index bad8cdd469..d4a51a4aba 100644
--- a/lib/rbcodec/codecs/codecs.h
+++ b/lib/rbcodec/codecs/codecs.h
@@ -75,12 +75,12 @@
 #define CODEC_ENC_MAGIC 0x52454E43 /* RENC */
 
 /* increase this every time the api struct changes */
-#define CODEC_API_VERSION 44
+#define CODEC_API_VERSION 45
 
 /* update this to latest version if a change to the api struct breaks
    backwards compatibility (and please take the opportunity to sort in any
    new function which are "waiting" at the end of the function table) */
-#define CODEC_MIN_API_VERSION 43
+#define CODEC_MIN_API_VERSION 45
 
 /* reasons for calling codec main entrypoint */
 enum codec_entry_call_reason {
@@ -171,6 +171,7 @@ struct codec_api {
 
     void (*commit_dcache)(void);
     void (*commit_discard_dcache)(void);
+    void (*commit_discard_idcache)(void);
 
     /* strings and memory */
     char* (*strcpy)(char *dst, const char *src);
@@ -223,7 +224,6 @@ struct codec_api {
 
     /* new stuff at the end, sort into place next time
        the API gets incompatible */
-    void (*commit_discard_idcache)(void);
 };
 
 /* codec header */
diff --git a/lib/rbcodec/codecs/lib/codeclib.c b/lib/rbcodec/codecs/lib/codeclib.c
index 36f4279941..4ca6c8c993 100644
--- a/lib/rbcodec/codecs/lib/codeclib.c
+++ b/lib/rbcodec/codecs/lib/codeclib.c
@@ -26,6 +26,7 @@
 #include "dsp.h"
 #include "codeclib.h"
 #include "metadata.h"
+#include "dsp_proc_entry.h"
 
 /* The following variables are used by codec_malloc() to make use of free RAM
  * within the statically allocated codec buffer. */
@@ -44,10 +45,15 @@ int codec_init(void)
 
 void codec_set_replaygain(const struct mp3entry *id3)
 {
-    ci->configure(DSP_SET_TRACK_GAIN, id3->track_gain);
-    ci->configure(DSP_SET_ALBUM_GAIN, id3->album_gain);
-    ci->configure(DSP_SET_TRACK_PEAK, id3->track_peak);
-    ci->configure(DSP_SET_ALBUM_PEAK, id3->album_peak);
+    struct dsp_replay_gains gains =
+    {
+        .track_gain = id3->track_gain,
+        .album_gain = id3->album_gain,
+        .track_peak = id3->track_peak,
+        .album_peak = id3->album_peak,
+    };
+
+    ci->configure(REPLAYGAIN_SET_GAINS, (intptr_t)&gains);
 }
 
 /* Various "helper functions" common to all the xxx2wav decoder plugins  */
diff --git a/lib/rbcodec/dsp/channel_mode.c b/lib/rbcodec/dsp/channel_mode.c
new file mode 100644
index 0000000000..5b678887c2
--- /dev/null
+++ b/lib/rbcodec/dsp/channel_mode.c
@@ -0,0 +1,264 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2006 Thom Johansen
+ * Copyright (C) 2012 Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#include "config.h"
+#include "system.h"
+#include "dsp.h"
+#include "settings.h"
+#include "sound.h"
+#include "fixedpoint.h"
+#include "fracmul.h"
+#include "dsp_proc_entry.h"
+
+#if 0
+/* SOUND_CHAN_STEREO mode is a noop so has no function - just outline one for
+ * completeness. */
+void channel_mode_proc_stereo(struct dsp_proc_entry *this,
+                              struct dsp_buffer **buf_p);
+#endif
+void channel_mode_proc_mono(struct dsp_proc_entry *this,
+                            struct dsp_buffer **buf_p);
+void channel_mode_proc_mono_left(struct dsp_proc_entry *this,
+                                 struct dsp_buffer **buf_p);
+void channel_mode_proc_mono_right(struct dsp_proc_entry *this,
+                                  struct dsp_buffer **buf_p);
+void channel_mode_proc_custom(struct dsp_proc_entry *this,
+                              struct dsp_buffer **buf_p);
+void channel_mode_proc_karaoke(struct dsp_proc_entry *this,
+                               struct dsp_buffer **buf_p);
+
+static struct channel_mode_data
+{
+    long  sw_gain;   /* 00h: for mode: custom */
+    long  sw_cross;  /* 04h: for mode: custom */
+    struct dsp_config *dsp;
+    int   mode;
+    const dsp_proc_fn_type fns[SOUND_CHAN_NUM_MODES];
+} channel_mode_data =
+{
+    .sw_gain = 0,
+    .sw_cross = 0,
+    .mode = SOUND_CHAN_STEREO,
+    .fns =
+    {
+        [SOUND_CHAN_STEREO]     = NULL,
+        [SOUND_CHAN_MONO]       = channel_mode_proc_mono,
+        [SOUND_CHAN_CUSTOM]     = channel_mode_proc_custom,
+        [SOUND_CHAN_MONO_LEFT]  = channel_mode_proc_mono_left,
+        [SOUND_CHAN_MONO_RIGHT] = channel_mode_proc_mono_right,
+        [SOUND_CHAN_KARAOKE]    = channel_mode_proc_karaoke,
+    },
+};
+
+static dsp_proc_fn_type get_process_fn(void)
+{
+    return channel_mode_data.fns[channel_mode_data.mode];
+}
+
+#if 0
+/* SOUND_CHAN_STEREO mode is a noop so has no function - just outline one for
+ * completeness. */
+void channel_mode_proc_stereo(struct dsp_proc_entry *this,
+                              struct dsp_buffer **buf_p)
+{
+    /* The channels are each just themselves */
+    (void)this; (void)buf_p;
+}
+#endif
+
+#if !defined(CPU_COLDFIRE) && !defined(CPU_ARM)
+/* Unoptimized routines */
+void channel_mode_proc_mono(struct dsp_proc_entry *this,
+                            struct dsp_buffer **buf_p)
+{
+    struct dsp_buffer *buf = *buf_p;
+    int32_t *sl = buf->p32[0];
+    int32_t *sr = buf->p32[1];
+    int count = buf->remcount;
+
+    do
+    {
+        int32_t lr = *sl / 2 + *sr / 2;
+        *sl++ = lr;
+        *sr++ = lr;
+    }
+    while (--count > 0);
+
+    (void)this;
+}
+
+void channel_mode_proc_custom(struct dsp_proc_entry *this,
+                              struct dsp_buffer **buf_p)
+{
+    struct channel_mode_data *data = (void *)this->data;
+    struct dsp_buffer *buf = *buf_p;
+
+    int32_t *sl = buf->p32[0];
+    int32_t *sr = buf->p32[1];
+    int count = buf->remcount;
+
+    const int32_t gain  = data->sw_gain;
+    const int32_t cross = data->sw_cross;
+
+    do
+    {
+        int32_t l = *sl;
+        int32_t r = *sr;
+        *sl++ = FRACMUL(l, gain) + FRACMUL(r, cross);
+        *sr++ = FRACMUL(r, gain) + FRACMUL(l, cross);
+    }
+    while (--count > 0);
+}
+
+void channel_mode_proc_karaoke(struct dsp_proc_entry *this,
+                               struct dsp_buffer **buf_p)
+{
+    struct dsp_buffer *buf = *buf_p;
+    int32_t *sl = buf->p32[0];
+    int32_t *sr = buf->p32[1];
+    int count = buf->remcount;
+
+    do
+    {
+        int32_t ch = *sl / 2 - *sr / 2;
+        *sl++ = ch;
+        *sr++ = -ch;
+    }
+    while (--count > 0);
+
+    (void)this;
+}
+#endif /* CPU */
+
+void channel_mode_proc_mono_left(struct dsp_proc_entry *this,
+                                 struct dsp_buffer **buf_p)
+{
+    /* Just copy over the other channel */
+    struct dsp_buffer *buf = *buf_p;
+    memcpy(buf->p32[1], buf->p32[0], buf->remcount * sizeof (int32_t));
+    (void)this;
+}
+
+void channel_mode_proc_mono_right(struct dsp_proc_entry *this,
+                                  struct dsp_buffer **buf_p)
+{
+    /* Just copy over the other channel */
+    struct dsp_buffer *buf = *buf_p;
+    memcpy(buf->p32[0], buf->p32[1], buf->remcount * sizeof (int32_t));
+    (void)this;
+}
+
+/* This is the initial function pointer when first enabled/changed in order
+ * to facilitate verification of the format compatibility at the proper time
+ * This gets called for changes even if stage is inactive. */
+static void channel_mode_process_new_format(struct dsp_proc_entry *this,
+                                            struct dsp_buffer **buf_p)
+{
+    struct channel_mode_data *data = (void *)this->data;
+    struct dsp_buffer *buf = *buf_p;
+
+    DSP_PRINT_FORMAT(DSP_PROC_CHANNEL_MODE, DSP_PROC_CHANNEL_MODE,
+                     buf->format);
+
+    bool active = buf->format.num_channels >= 2;
+    dsp_proc_activate(data->dsp, DSP_PROC_CHANNEL_MODE, active);
+
+    if (!active)
+    {
+        /* Can't do this. Sleep until next change. */
+        DEBUGF("  DSP_PROC_CHANNEL_MODE- deactivated\n");
+        return;
+    }
+
+    /* Switch to the real function and call it once */
+    this->process[0] = get_process_fn();
+    dsp_proc_call(this, buf_p, (unsigned)buf->format.changed - 1);
+}
+
+void channel_mode_set_config(int value)
+{
+    if (value < 0 || value >= SOUND_CHAN_NUM_MODES)
+        value = SOUND_CHAN_STEREO; /* Out of range */
+
+    if (value == channel_mode_data.mode)
+        return;
+
+    channel_mode_data.mode = value;
+    dsp_proc_enable(dsp_get_config(CODEC_IDX_AUDIO), DSP_PROC_CHANNEL_MODE,
+                    value != SOUND_CHAN_STEREO);
+}
+
+void channel_mode_custom_set_width(int value)
+{
+    long width, straight, cross;
+
+    width = value * 0x7fffff / 100;
+
+    if (value <= 100)
+    {
+        straight = (0x7fffff + width) / 2;
+        cross = straight - width;
+    }
+    else
+    {
+        /* straight = (1 + width) / (2 * width) */
+        straight = fp_div(0x7fffff + width, width, 22);
+        cross = straight - 0x7fffff;
+    }
+
+    channel_mode_data.sw_gain  = straight << 8;
+    channel_mode_data.sw_cross = cross << 8;
+}
+
+/* DSP message hook */
+static intptr_t channel_mode_configure(struct dsp_proc_entry *this,
+                                       struct dsp_config *dsp,
+                                       unsigned int setting,
+                                       intptr_t value)
+{
+    switch (setting)
+    {
+    case DSP_PROC_INIT:
+        if (value == 0)
+        {
+            /* New object */
+            this->data = (intptr_t)&channel_mode_data;
+            this->process[1] = channel_mode_process_new_format;
+            ((struct channel_mode_data *)this->data)->dsp = dsp;
+        }
+
+        /* Force format change call each time */
+        this->process[0] = channel_mode_process_new_format;
+        dsp_proc_activate(dsp, DSP_PROC_CHANNEL_MODE, true);
+        break;
+
+    case DSP_PROC_CLOSE:
+        ((struct channel_mode_data *)this->data)->dsp = NULL;
+        break;
+    }
+
+    return 1;
+}
+
+/* Database entry */
+DSP_PROC_DB_ENTRY(
+    CHANNEL_MODE,
+    channel_mode_configure);
diff --git a/lib/rbcodec/dsp/channel_mode.h b/lib/rbcodec/dsp/channel_mode.h
new file mode 100644
index 0000000000..7ca0f74204
--- /dev/null
+++ b/lib/rbcodec/dsp/channel_mode.h
@@ -0,0 +1,27 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2006 Thom Johansen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#ifndef CHANNEL_MODE_H
+#define CHANNEL_MODE_H
+
+void channel_mode_set_config(int value);
+void channel_mode_custom_set_width(int value);
+
+#endif /* CHANNEL_MODE_H */
diff --git a/lib/rbcodec/dsp/compressor.c b/lib/rbcodec/dsp/compressor.c
index a6c1ac1018..1816bfef9c 100644
--- a/lib/rbcodec/dsp/compressor.c
+++ b/lib/rbcodec/dsp/compressor.c
@@ -19,16 +19,18 @@
  *
  ****************************************************************************/
 #include "config.h"
+#include "system.h"
 #include "fixedpoint.h"
 #include "fracmul.h"
-#include "settings.h"
 #include "dsp.h"
-#include "compressor.h"
+#include <string.h>
 
 /* Define LOGF_ENABLE to enable logf output in this file */
 /*#define LOGF_ENABLE*/
 #include "logf.h"
 
+#include "dsp_proc_entry.h"
+
 static struct compressor_settings curr_set; /* Cached settings */
 
 static int32_t comp_rel_slope IBSS_ATTR;   /* S7.24 format */
@@ -251,10 +253,10 @@ bool compressor_update(const struct compressor_settings *settings)
  *  Returns the required gain factor in S7.24 format in order to compress the
  *  sample in accordance with the compression curve.  Always 1 or less.
  */
-static inline int32_t get_compression_gain(struct dsp_data *data,
+static inline int32_t get_compression_gain(struct sample_format *format,
                                            int32_t sample)
 {
-    const int frac_bits_offset = data->frac_bits - 15;
+    const int frac_bits_offset = format->frac_bits - 15;
     
     /* sample must be positive */
     if (sample < 0)
@@ -292,24 +294,40 @@ static inline int32_t get_compression_gain(struct dsp_data *data,
     return -1;
 }
 
+/** DSP interface **/
+
+/** SET COMPRESSOR
+ *  Enable or disable the compressor based upon the settings
+ */
+void dsp_set_compressor(const struct compressor_settings *settings)
+{
+    /* enable/disable the compressor depending upon settings */
+    bool enable = compressor_update(settings);
+    struct dsp_config *dsp = dsp_get_config(CODEC_IDX_AUDIO);
+    dsp_proc_enable(dsp, DSP_PROC_COMPRESSOR, enable);
+    dsp_proc_activate(dsp, DSP_PROC_COMPRESSOR, true);
+}
+
 /** COMPRESSOR PROCESS
  *  Changes the gain of the samples according to the compressor curve
  */
-void compressor_process(int count, struct dsp_data *data, int32_t *buf[])
+static void compressor_process(struct dsp_proc_entry *this,
+                               struct dsp_buffer **buf_p)
 {
-    const int num_chan = data->num_channels;
-    int32_t *in_buf[2] = {buf[0], buf[1]};
-    
+    struct dsp_buffer *buf = *buf_p;
+    int count = buf->remcount;
+    int32_t *in_buf[2] = { buf->p32[0], buf->p32[1] };
+    const int num_chan = buf->format.num_channels;
+
     while (count-- > 0)
     {
-        int ch;
         /* use lowest (most compressed) gain factor of the output buffer
            sample pair for both samples (mono is also handled correctly here)
          */
         int32_t sample_gain = UNITY;
-        for (ch = 0; ch < num_chan; ch++)
+        for (int ch = 0; ch < num_chan; ch++)
         {
-            int32_t this_gain = get_compression_gain(data, *in_buf[ch]);
+            int32_t this_gain = get_compression_gain(&buf->format, *in_buf[ch]);
             if (this_gain < sample_gain)
                 sample_gain = this_gain;
         }
@@ -345,7 +363,7 @@ void compressor_process(int count, struct dsp_data *data, int32_t *buf[])
            output buffer sample pair/mono sample */
         if (total_gain != UNITY)
         {
-            for (ch = 0; ch < num_chan; ch++)
+            for (int ch = 0; ch < num_chan; ch++)
             {
                 *in_buf[ch] = FRACMUL_SHL(total_gain, *in_buf[ch], 7);
             }
@@ -353,9 +371,33 @@ void compressor_process(int count, struct dsp_data *data, int32_t *buf[])
         in_buf[0]++;
         in_buf[1]++;
     }
+
+    (void)this;
 }
 
-void compressor_reset(void)
+/* DSP message hook */
+static intptr_t compressor_configure(struct dsp_proc_entry *this,
+                                     struct dsp_config *dsp,
+                                     unsigned int setting,
+                                     intptr_t value)
 {
-    release_gain = UNITY;
+    switch (setting)
+    {
+    case DSP_PROC_INIT:
+        if (value != 0)
+            break; /* Already enabled */
+        this->process[0] = compressor_process;
+    case DSP_RESET:
+    case DSP_FLUSH:
+        release_gain = UNITY;
+        break;
+    }
+
+    return 1;
+    (void)dsp;
 }
+
+/* Database entry */
+DSP_PROC_DB_ENTRY(
+    COMPRESSOR,
+    compressor_configure);
diff --git a/lib/rbcodec/dsp/compressor.h b/lib/rbcodec/dsp/compressor.h
index d0e33f6e2c..e41950926e 100644
--- a/lib/rbcodec/dsp/compressor.h
+++ b/lib/rbcodec/dsp/compressor.h
@@ -18,12 +18,18 @@
  * KIND, either express or implied.
  *
  ****************************************************************************/
-
 #ifndef COMPRESSOR_H
 #define COMPRESSOR_H
 
-void compressor_process(int count, struct dsp_data *data, int32_t *buf[]);
-bool compressor_update(const struct compressor_settings *settings);
-void compressor_reset(void);
+struct compressor_settings
+{
+    int threshold;
+    int makeup_gain;
+    int ratio;
+    int knee;
+    int release_time;
+};
+
+void dsp_set_compressor(const struct compressor_settings *settings);
 
 #endif /* COMPRESSOR_H */
diff --git a/lib/rbcodec/dsp/crossfeed.c b/lib/rbcodec/dsp/crossfeed.c
new file mode 100644
index 0000000000..ecb55644ee
--- /dev/null
+++ b/lib/rbcodec/dsp/crossfeed.c
@@ -0,0 +1,214 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2006 Thom Johansen
+ * Copyright (C) 2012 Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#include "config.h"
+#include "system.h"
+#include "dsp.h"
+#include "dsp_filter.h"
+#include "fixedpoint.h"
+#include "fracmul.h"
+#include "replaygain.h"
+#include <string.h>
+#include "dsp_proc_entry.h"
+
+/* Implemented here or in target assembly code */
+void crossfeed_process(struct dsp_proc_entry *this, struct dsp_buffer **buf_p);
+
+/**
+ * Applies crossfeed to the stereo signal.
+ *
+ * Crossfeed is a process where listening over speakers is simulated. This
+ * is good for old hard panned stereo records, which might be quite fatiguing
+ * to listen to on headphones with no crossfeed.
+ */
+
+/* Crossfeed */
+static struct crossfeed_state
+{
+    int32_t gain;           /* 00h: Direct path gain */
+    int32_t coefs[3];       /* 04h: Coefficients for the shelving filter */
+    int32_t history[4];     /* 10h: Format is x[n - 1], y[n - 1] (L + R) */
+    int32_t delay[13*2];    /* 20h: Delay line buffer (L + R interleaved) */
+    int32_t *index;         /* 88h: Current pointer into the delay line */
+    struct dsp_config *dsp; /* 8ch: Current DSP */
+                            /* 90h */
+} crossfeed_state IBSS_ATTR;
+
+/* Discard the sample histories */
+static void crossfeed_flush(struct dsp_proc_entry *this)
+{
+    struct crossfeed_state *state = (void *)this->data;
+    memset(state->history, 0, sizeof (state->history));
+    memset(state->delay, 0, sizeof (state->delay));
+    state->index = state->delay;
+}
+
+
+/** DSP interface **/
+
+/* Crossfeed boot/format change function */
+static void crossfeed_process_new_format(struct dsp_proc_entry *this,
+                                         struct dsp_buffer **buf_p)
+{
+    struct crossfeed_state *state = (void *)this->data;
+    struct dsp_buffer *buf = *buf_p;
+
+    DSP_PRINT_FORMAT(DSP_PROC_CROSSFEED, DSP_PROC_CROSSFEED, buf->format);
+
+    bool active = buf->format.num_channels >= 2;
+    dsp_proc_activate(state->dsp, DSP_PROC_CROSSFEED, active);
+
+    if (!active)
+    {
+        /* Can't do this. Sleep until next change */
+        crossfeed_flush(this);
+        DEBUGF("  DSP_PROC_CROSSFEED- deactivated\n");
+        return;
+    }
+
+    /* Switch to the real function and call it once */
+    this->process[0] = crossfeed_process;
+    dsp_proc_call(this, buf_p, (unsigned)buf->format.changed - 1);
+}
+
+/* Enable or disable the crossfeed */
+void dsp_crossfeed_enable(bool enable)
+{
+    if (enable != !crossfeed_state.dsp)
+        return;
+
+    struct dsp_config *dsp = dsp_get_config(CODEC_IDX_AUDIO);
+    dsp_proc_enable(dsp, DSP_PROC_CROSSFEED, enable);
+}
+
+/* Set the gain of the dry mix */
+void dsp_set_crossfeed_direct_gain(int gain)
+{
+    uint32_t gain32 = get_replaygain_int(gain * 10);
+    crossfeed_state.gain =
+        gain32 >= (0x80000000ul >> 7) ? 0x7ffffffful: (gain32 << 7);
+}
+
+/* Both gains should be below 0 dB */
+void dsp_set_crossfeed_cross_params(long lf_gain, long hf_gain, long cutoff)
+{
+    int32_t *c = crossfeed_state.coefs;
+    long scaler = get_replaygain_int(lf_gain * 10) << 7;
+
+    cutoff = 0xffffffff / NATIVE_FREQUENCY * cutoff;
+    hf_gain -= lf_gain;
+    /* Divide cutoff by sqrt(10^(hf_gain/20)) to place cutoff at the -3 dB
+     * point instead of shelf midpoint. This is for compatibility with the old
+     * crossfeed shelf filter and should be removed if crossfeed settings are
+     * ever made incompatible for any other good reason.
+     */
+    cutoff = fp_div(cutoff, get_replaygain_int(hf_gain*5), 24);
+    filter_shelf_coefs(cutoff, hf_gain, false, c);
+    /* Scale coefs by LF gain and shift them to s0.31 format. We have no gains
+     * over 1 and can do this safely
+     */
+    c[0] = FRACMUL_SHL(c[0], scaler, 4);
+    c[1] = FRACMUL_SHL(c[1], scaler, 4);
+    c[2] <<= 4;
+}
+
+#if !defined(CPU_COLDFIRE) && !defined(CPU_ARM)
+/* Apply the crossfade to the buffer in place */
+void crossfeed_process(struct dsp_proc_entry *this, struct dsp_buffer **buf_p)
+{
+    struct crossfeed_state *state = (void *)this->data;
+    struct dsp_buffer *buf = *buf_p;
+   
+    int32_t *hist_l = &state->history[0];
+    int32_t *hist_r = &state->history[2];
+    int32_t *delay = state->delay;
+    int32_t *coefs = &state->coefs[0];
+    int32_t gain = state->gain;
+    int32_t *di = state->index;
+
+    int count = buf->remcount;
+
+    for (int i = 0; i < count; i++)
+    {
+        int32_t left = buf->p32[0][i];
+        int32_t right = buf->p32[1][i];
+
+        /* Filter delayed sample from left speaker */
+        int32_t acc = FRACMUL(*di, coefs[0]);
+        acc += FRACMUL(hist_l[0], coefs[1]);
+        acc += FRACMUL(hist_l[1], coefs[2]);
+        /* Save filter history for left speaker */
+        hist_l[1] = acc;
+        hist_l[0] = *di;
+        *di++ = left;
+        /* Filter delayed sample from right speaker */
+        acc = FRACMUL(*di, coefs[0]);
+        acc += FRACMUL(hist_r[0], coefs[1]);
+        acc += FRACMUL(hist_r[1], coefs[2]);
+        /* Save filter history for right speaker */
+        hist_r[1] = acc;
+        hist_r[0] = *di;
+        *di++ = right;
+        /* Now add the attenuated direct sound and write to outputs */
+        buf->p32[0][i] = FRACMUL(left, gain) + hist_r[1];
+        buf->p32[1][i] = FRACMUL(right, gain) + hist_l[1];
+
+        /* Wrap delay line index if bigger than delay line size */
+        if (di >= delay + 13*2)
+            di = delay;
+    }
+
+    /* Write back local copies of data we've modified */
+    state->index = di;
+}
+#endif /* CPU */
+
+/* DSP message hook */
+static intptr_t crossfeed_configure(struct dsp_proc_entry *this,
+                                    struct dsp_config *dsp,
+                                    unsigned int setting,
+                                    intptr_t value)
+{
+    switch (setting)
+    {
+    case DSP_PROC_INIT:
+        this->data = (intptr_t)&crossfeed_state;
+        this->process[0] = crossfeed_process_new_format;
+        this->process[1] = crossfeed_process_new_format;
+        ((struct crossfeed_state *)this->data)->dsp = dsp;
+        dsp_proc_activate(dsp, DSP_PROC_CROSSFEED, true);
+    case DSP_FLUSH:
+        crossfeed_flush(this);
+        break;
+
+    case DSP_PROC_CLOSE:
+        ((struct crossfeed_state *)this->data)->dsp = NULL;
+        break;
+    }
+
+    return 1;
+    (void)value;
+}
+
+/* Database entry */
+DSP_PROC_DB_ENTRY(
+    CROSSFEED,
+    crossfeed_configure);
diff --git a/lib/rbcodec/dsp/crossfeed.h b/lib/rbcodec/dsp/crossfeed.h
new file mode 100644
index 0000000000..63261bde9f
--- /dev/null
+++ b/lib/rbcodec/dsp/crossfeed.h
@@ -0,0 +1,28 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2006 Thom Johansen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#ifndef CROSSFEED_H
+#define CROSSFEED_H
+
+void dsp_crossfeed_enable(bool enable);
+void dsp_set_crossfeed_direct_gain(int gain);
+void dsp_set_crossfeed_cross_params(long lf_gain, long hf_gain, long cutoff);
+
+#endif /* CROSSFEED_H */
diff --git a/lib/rbcodec/dsp/dsp.c b/lib/rbcodec/dsp/dsp.c
deleted file mode 100644
index de647dc0dd..0000000000
--- a/lib/rbcodec/dsp/dsp.c
+++ /dev/null
@@ -1,1568 +0,0 @@
-/***************************************************************************
- *             __________               __   ___.
- *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
- *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
- *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
- *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
- *                     \/            \/     \/    \/            \/
- * $Id$
- *
- * Copyright (C) 2005 Miika Pekkarinen
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
- * KIND, either express or implied.
- *
- ****************************************************************************/
-#include "config.h"
-#include "system.h"
-#include <sound.h>
-#include "dsp.h"
-#include "dsp-util.h"
-#include "eq.h"
-#include "compressor.h"
-#include "kernel.h"
-#include "settings.h"
-#include "replaygain.h"
-#include "tdspeed.h"
-#include "core_alloc.h"
-#include "fixedpoint.h"
-#include "fracmul.h"
-
-/* Define LOGF_ENABLE to enable logf output in this file */
-/*#define LOGF_ENABLE*/
-#include "logf.h"
-
-/* 16-bit samples are scaled based on these constants. The shift should be
- * no more than 15.
- */
-#define WORD_SHIFT              12
-#define WORD_FRACBITS           27
-
-#define NATIVE_DEPTH            16
-#define SMALL_SAMPLE_BUF_COUNT  128 /* Per channel */
-#define DEFAULT_GAIN            0x01000000
-
-/* enums to index conversion properly with stereo mode and other settings */
-enum
-{
-    SAMPLE_INPUT_LE_NATIVE_I_STEREO  = STEREO_INTERLEAVED,
-    SAMPLE_INPUT_LE_NATIVE_NI_STEREO = STEREO_NONINTERLEAVED,
-    SAMPLE_INPUT_LE_NATIVE_MONO      = STEREO_MONO,
-    SAMPLE_INPUT_GT_NATIVE_I_STEREO  = STEREO_INTERLEAVED + STEREO_NUM_MODES,
-    SAMPLE_INPUT_GT_NATIVE_NI_STEREO = STEREO_NONINTERLEAVED + STEREO_NUM_MODES,
-    SAMPLE_INPUT_GT_NATIVE_MONO      = STEREO_MONO + STEREO_NUM_MODES,
-    SAMPLE_INPUT_GT_NATIVE_1ST_INDEX = STEREO_NUM_MODES
-};
-
-enum
-{
-    SAMPLE_OUTPUT_MONO = 0,
-    SAMPLE_OUTPUT_STEREO,
-    SAMPLE_OUTPUT_DITHERED_MONO,
-    SAMPLE_OUTPUT_DITHERED_STEREO
-};
-
-/* No asm...yet */
-struct dither_data
-{
-    long error[3];  /* 00h */
-    long random;    /* 0ch */
-                    /* 10h */
-};
-
-struct crossfeed_data
-{
-    int32_t gain;           /* 00h - Direct path gain */
-    int32_t coefs[3];       /* 04h - Coefficients for the shelving filter */
-    int32_t history[4];     /* 10h - Format is x[n - 1], y[n - 1] for both channels */
-    int32_t delay[13][2];   /* 20h */
-    int32_t *index;         /* 88h - Current pointer into the delay line */
-                            /* 8ch */
-};
-
-/* Current setup is one lowshelf filters three peaking filters and one
- *  highshelf filter. Varying the number of shelving filters make no sense,
- *  but adding peaking filters is possible.
- */
-struct eq_state
-{
-    char enabled[5];            /* 00h - Flags for active filters */
-    struct eqfilter filters[5]; /* 08h - packing is 4? */
-                                /* 10ch */
-};
-
-/* Include header with defines which functions are implemented in assembly
-   code for the target */
-#include <dsp_asm.h>
-
-/* Typedefs keep things much neater in this case */
-typedef void (*sample_input_fn_type)(int count, const char *src[],
-                                     int32_t *dst[]);
-typedef int (*resample_fn_type)(int count, struct dsp_data *data,
-                                const int32_t *src[], int32_t *dst[]);
-typedef void (*sample_output_fn_type)(int count, struct dsp_data *data,
-                                      const int32_t *src[], int16_t *dst);
-
-/* Single-DSP channel processing in place */
-typedef void (*channels_process_fn_type)(int count, int32_t *buf[]);
-/* DSP local channel processing in place */
-typedef void (*channels_process_dsp_fn_type)(int count, struct dsp_data *data,
-                                             int32_t *buf[]);
-
-/*
- ***************************************************************************/
-
-struct dsp_config
-{
-    struct dsp_data data; /* Config members for use in external routines */
-    long codec_frequency; /* Sample rate of data coming from the codec */
-    long frequency;       /* Effective sample rate after pitch shift (if any) */
-    int  sample_depth;
-    int  sample_bytes;
-    int  stereo_mode;
-    int32_t  tdspeed_percent; /* Speed% * PITCH_SPEED_PRECISION */
-#ifdef HAVE_PITCHSCREEN
-    bool tdspeed_active;  /* Timestretch is in use */
-#endif
-#ifdef HAVE_SW_TONE_CONTROLS
-    /* Filter struct for software bass/treble controls */
-    struct eqfilter tone_filter;
-#endif
-    /* Functions that change depending upon settings - NULL if stage is
-       disabled */
-    sample_input_fn_type         input_samples;
-    resample_fn_type             resample;
-    sample_output_fn_type        output_samples;
-    /* These will be NULL for the voice codec and is more economical that
-       way */
-    channels_process_dsp_fn_type apply_gain;
-    channels_process_fn_type     apply_crossfeed;
-    channels_process_fn_type     eq_process;
-    channels_process_fn_type     channels_process;
-    channels_process_dsp_fn_type compressor_process;
-};
-
-/* General DSP config */
-static struct dsp_config dsp_conf[2] IBSS_ATTR;     /* 0=A, 1=V */
-/* Dithering */
-static struct dither_data dither_data[2] IBSS_ATTR; /* 0=left, 1=right */
-static long   dither_mask IBSS_ATTR;
-static long   dither_bias IBSS_ATTR;
-/* Crossfeed */
-struct crossfeed_data crossfeed_data IDATA_ATTR =    /* A */
-{
-    .index = (int32_t *)crossfeed_data.delay
-};
-
-/* Equalizer */
-static struct eq_state eq_data;                     /* A */
-
-/* Software tone controls */
-#ifdef HAVE_SW_TONE_CONTROLS
-static int prescale;                                /* A/V */
-static int bass;                                    /* A/V */
-static int treble;                                  /* A/V */
-#endif
-
-/* Settings applicable to audio codec only */
-#ifdef HAVE_PITCHSCREEN
-static int32_t  pitch_ratio = PITCH_SPEED_100;
-static int  big_sample_locks;
-#endif
-static int  channels_mode;
-       long dsp_sw_gain;
-       long dsp_sw_cross;
-static bool dither_enabled;
-static long eq_precut;
-static long track_gain;
-static bool new_gain;
-static long album_gain;
-static long track_peak;
-static long album_peak;
-static long replaygain;
-static bool crossfeed_enabled;
-
-#define AUDIO_DSP (dsp_conf[CODEC_IDX_AUDIO])
-#define VOICE_DSP (dsp_conf[CODEC_IDX_VOICE])
-
-/* The internal format is 32-bit samples, non-interleaved, stereo. This
- * format is similar to the raw output from several codecs, so the amount
- * of copying needed is minimized for that case.
- */
-
-#define RESAMPLE_RATIO              4 /* Enough for 11,025 Hz -> 44,100 Hz */
-#define SMALL_RESAMPLE_BUF_COUNT    (SMALL_SAMPLE_BUF_COUNT * RESAMPLE_RATIO)
-#define BIG_SAMPLE_BUF_COUNT        SMALL_RESAMPLE_BUF_COUNT
-#define BIG_RESAMPLE_BUF_COUNT      (BIG_SAMPLE_BUF_COUNT * RESAMPLE_RATIO)
-
-static int32_t small_sample_buf[2][SMALL_SAMPLE_BUF_COUNT] IBSS_ATTR;
-static int32_t small_resample_buf[2][SMALL_RESAMPLE_BUF_COUNT] IBSS_ATTR;
-
-#ifdef HAVE_PITCHSCREEN
-static int32_t (* big_sample_buf)[BIG_SAMPLE_BUF_COUNT] = NULL;
-static int32_t (* big_resample_buf)[BIG_RESAMPLE_BUF_COUNT] = NULL;
-#endif
-
-static int sample_buf_count = SMALL_SAMPLE_BUF_COUNT;
-static int32_t *sample_buf[2] = { small_sample_buf[0], small_sample_buf[1] };
-static int resample_buf_count = SMALL_RESAMPLE_BUF_COUNT;
-static int32_t *resample_buf[2] = { small_resample_buf[0], small_resample_buf[1] };
-
-#ifdef HAVE_PITCHSCREEN
-int32_t sound_get_pitch(void)
-{
-    return pitch_ratio;
-}
-
-void sound_set_pitch(int32_t percent)
-{
-    pitch_ratio = percent;
-    dsp_configure(&AUDIO_DSP, DSP_SWITCH_FREQUENCY,
-                  AUDIO_DSP.codec_frequency);
-}
-
-static void tdspeed_set_pointers( bool time_stretch_active )
-{
-    if( time_stretch_active )
-    {
-        sample_buf_count = BIG_SAMPLE_BUF_COUNT;
-        resample_buf_count = BIG_RESAMPLE_BUF_COUNT;
-        sample_buf[0] = big_sample_buf[0];
-        sample_buf[1] = big_sample_buf[1];
-        resample_buf[0] = big_resample_buf[0];
-        resample_buf[1] = big_resample_buf[1];
-    }
-    else
-    {
-        sample_buf_count = SMALL_SAMPLE_BUF_COUNT;
-        resample_buf_count = SMALL_RESAMPLE_BUF_COUNT;
-        sample_buf[0] = small_sample_buf[0];
-        sample_buf[1] = small_sample_buf[1];
-        resample_buf[0] = small_resample_buf[0];
-        resample_buf[1] = small_resample_buf[1];
-    }
-}
- 
-static void tdspeed_setup(struct dsp_config *dspc)
-{
-    /* Assume timestretch will not be used */
-    dspc->tdspeed_active = false;
-
-    tdspeed_set_pointers( false );
-
-    if (!dsp_timestretch_available())
-        return; /* Timestretch not enabled or buffer not allocated */
-
-    if (dspc->tdspeed_percent == 0)
-        dspc->tdspeed_percent = PITCH_SPEED_100;
-
-    if (!tdspeed_config(
-        dspc->codec_frequency == 0 ? NATIVE_FREQUENCY : dspc->codec_frequency,
-        dspc->stereo_mode != STEREO_MONO,
-        dspc->tdspeed_percent))
-        return; /* Timestretch not possible or needed with these parameters */
-
-    /* Timestretch is to be used */
-    dspc->tdspeed_active = true;
-
-    tdspeed_set_pointers( true );
-}
-
-
-static int move_callback(int handle, void* current, void* new)
-{
-    (void)handle;(void)current;
-
-    if ( big_sample_locks > 0 )
-        return BUFLIB_CB_CANNOT_MOVE;
-    
-    big_sample_buf = new;
-    
-    /* no allocation without timestretch enabled */
-    tdspeed_set_pointers( true );
-    return BUFLIB_CB_OK;
-}
-
-static void lock_sample_buf( bool lock )
-{
-    if ( lock )
-        big_sample_locks++;
-    else
-        big_sample_locks--;
-}
-
-static struct buflib_callbacks ops = {
-    .move_callback = move_callback,
-    .shrink_callback = NULL,
-};
-
-
-void dsp_timestretch_enable(bool enabled)
-{
-    /* Hook to set up timestretch buffer on first call to settings_apply() */
-    static int handle = -1;
-    if (enabled)
-    {
-        if (big_sample_buf)
-            return; /* already allocated and enabled */
-
-        /* Set up timestretch buffers */
-        big_sample_buf = &small_resample_buf[0];
-        handle = core_alloc_ex("resample buf",
-                               2 * BIG_RESAMPLE_BUF_COUNT * sizeof(int32_t),
-                               &ops);
-        big_sample_locks = 0;
-        enabled = handle >= 0;
-
-        if (enabled)
-        {
-            /* success, now setup tdspeed */
-            big_resample_buf = core_get_data(handle);
-
-            tdspeed_init();
-            tdspeed_setup(&AUDIO_DSP);
-        }
-    }
-
-    if (!enabled)
-    {
-        dsp_set_timestretch(PITCH_SPEED_100);
-        tdspeed_finish();
-
-        if (handle >= 0)
-            core_free(handle);
-
-        handle = -1;
-        big_sample_buf = NULL;
-    }
-}
-
-void dsp_set_timestretch(int32_t percent)
-{
-    AUDIO_DSP.tdspeed_percent = percent;
-    tdspeed_setup(&AUDIO_DSP);
-}
-
-int32_t dsp_get_timestretch()
-{
-    return AUDIO_DSP.tdspeed_percent;
-}
-
-bool dsp_timestretch_available()
-{
-    return (global_settings.timestretch_enabled && big_sample_buf);
-}
-#endif /* HAVE_PITCHSCREEN */
-
-/* Convert count samples to the internal format, if needed.  Updates src
- * to point past the samples "consumed" and dst is set to point to the
- * samples to consume. Note that for mono, dst[0] equals dst[1], as there
- * is no point in processing the same data twice.
- */
-
-/* convert count 16-bit mono to 32-bit mono */
-static void sample_input_lte_native_mono(
-    int count, const char *src[], int32_t *dst[])
-{
-    const int16_t *s = (int16_t *) src[0];
-    const int16_t * const send = s + count;
-    int32_t *d = dst[0] = dst[1] = sample_buf[0];
-    int scale = WORD_SHIFT;
-
-    while (s < send)
-    {
-        *d++ = *s++ << scale;
-    }
-
-    src[0] = (char *)s;
-}
-
-/* convert count 16-bit interleaved stereo to 32-bit noninterleaved */
-static void sample_input_lte_native_i_stereo(
-    int count, const char *src[], int32_t *dst[])
-{
-    const int32_t *s = (int32_t *) src[0];
-    const int32_t * const send = s + count;
-    int32_t *dl = dst[0] = sample_buf[0];
-    int32_t *dr = dst[1] = sample_buf[1];
-    int scale = WORD_SHIFT;
-
-    while (s < send)
-    {
-        int32_t slr = *s++;
-#ifdef ROCKBOX_LITTLE_ENDIAN
-        *dl++ = (slr >> 16) << scale;
-        *dr++ = (int32_t)(int16_t)slr << scale;
-#else  /* ROCKBOX_BIG_ENDIAN */
-        *dl++ = (int32_t)(int16_t)slr << scale;
-        *dr++ = (slr >> 16) << scale;
-#endif
-    }
-
-    src[0] = (char *)s;
-}
-
-/* convert count 16-bit noninterleaved stereo to 32-bit noninterleaved */
-static void sample_input_lte_native_ni_stereo(
-    int count, const char *src[], int32_t *dst[])
-{
-    const int16_t *sl = (int16_t *) src[0];
-    const int16_t *sr = (int16_t *) src[1];
-    const int16_t * const slend = sl + count;
-    int32_t *dl = dst[0] = sample_buf[0];
-    int32_t *dr = dst[1] = sample_buf[1];
-    int scale = WORD_SHIFT;
-
-    while (sl < slend)
-    {
-        *dl++ = *sl++ << scale;
-        *dr++ = *sr++ << scale;
-    }
-
-    src[0] = (char *)sl;
-    src[1] = (char *)sr;
-}
-
-/* convert count 32-bit mono to 32-bit mono */
-static void sample_input_gt_native_mono(
-    int count, const char *src[], int32_t *dst[])
-{
-    dst[0] = dst[1] = (int32_t *)src[0];
-    src[0] = (char *)(dst[0] + count);
-}
-
-/* convert count 32-bit interleaved stereo to 32-bit noninterleaved stereo */
-static void sample_input_gt_native_i_stereo(
-    int count, const char *src[], int32_t *dst[])
-{
-    const int32_t *s = (int32_t *)src[0];
-    const int32_t * const send = s + 2*count;
-    int32_t *dl = dst[0] = sample_buf[0];
-    int32_t *dr = dst[1] = sample_buf[1];
-
-    while (s < send)
-    {
-        *dl++ = *s++;
-        *dr++ = *s++;
-    }
-
-    src[0] = (char *)send;
-}
-
-/* convert 32 bit-noninterleaved stereo to 32-bit noninterleaved stereo */
-static void sample_input_gt_native_ni_stereo(
-    int count, const char *src[], int32_t *dst[])
-{
-    dst[0] = (int32_t *)src[0];
-    dst[1] = (int32_t *)src[1];
-    src[0] = (char *)(dst[0] + count);
-    src[1] = (char *)(dst[1] + count);
-}
-
-/**
- * sample_input_new_format()
- *
- * set the to-native sample conversion function based on dsp sample parameters
- *
- * !DSPPARAMSYNC
- * needs syncing with changes to the following dsp parameters:
- *  * dsp->stereo_mode (A/V)
- *  * dsp->sample_depth (A/V)
- */
-static void sample_input_new_format(struct dsp_config *dsp)
-{
-    static const sample_input_fn_type sample_input_functions[] =
-    {
-        [SAMPLE_INPUT_LE_NATIVE_I_STEREO]  = sample_input_lte_native_i_stereo,
-        [SAMPLE_INPUT_LE_NATIVE_NI_STEREO] = sample_input_lte_native_ni_stereo,
-        [SAMPLE_INPUT_LE_NATIVE_MONO]      = sample_input_lte_native_mono,
-        [SAMPLE_INPUT_GT_NATIVE_I_STEREO]  = sample_input_gt_native_i_stereo,
-        [SAMPLE_INPUT_GT_NATIVE_NI_STEREO] = sample_input_gt_native_ni_stereo,
-        [SAMPLE_INPUT_GT_NATIVE_MONO]      = sample_input_gt_native_mono,
-    };
-
-    int convert = dsp->stereo_mode;
-
-    if (dsp->sample_depth > NATIVE_DEPTH)
-        convert += SAMPLE_INPUT_GT_NATIVE_1ST_INDEX;
-
-    dsp->input_samples = sample_input_functions[convert];
-}
-
-
-#ifndef DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO
-/* write mono internal format to output format */
-static void sample_output_mono(int count, struct dsp_data *data,
-                               const int32_t *src[], int16_t *dst)
-{
-    const int32_t *s0 = src[0];
-    const int scale = data->output_scale;
-    const int dc_bias = 1 << (scale - 1);
-
-    while (count-- > 0)
-    {
-        int32_t lr = clip_sample_16((*s0++ + dc_bias) >> scale);
-        *dst++ = lr;
-        *dst++ = lr;
-    }
-}
-#endif /* DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO */
-
-/* write stereo internal format to output format */
-#ifndef DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO
-static void sample_output_stereo(int count, struct dsp_data *data,
-                                 const int32_t *src[], int16_t *dst)
-{
-    const int32_t *s0 = src[0];
-    const int32_t *s1 = src[1];
-    const int scale = data->output_scale;
-    const int dc_bias = 1 << (scale - 1);
-
-    while (count-- > 0)
-    {
-        *dst++ = clip_sample_16((*s0++ + dc_bias) >> scale);
-        *dst++ = clip_sample_16((*s1++ + dc_bias) >> scale);
-    }
-}
-#endif /* DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO */
-
-/**
- * The "dither" code to convert the 24-bit samples produced by libmad was
- * taken from the coolplayer project - coolplayer.sourceforge.net
- *
- * This function handles mono and stereo outputs.
- */
-static void sample_output_dithered(int count, struct dsp_data *data,
-                                   const int32_t *src[], int16_t *dst)
-{
-    const int32_t mask = dither_mask;
-    const int32_t bias = dither_bias;
-    const int scale = data->output_scale;
-    const int32_t min = data->clip_min;
-    const int32_t max = data->clip_max;
-    const int32_t range = max - min;
-    int ch;
-    int16_t *d;
-
-    for (ch = 0; ch < data->num_channels; ch++)
-    {
-        struct dither_data * const dither = &dither_data[ch];
-        const int32_t *s = src[ch];
-        int i;
-
-        for (i = 0, d = &dst[ch]; i < count; i++, s++, d += 2)
-        {
-            int32_t output, sample;
-            int32_t random;
-
-            /* Noise shape and bias (for correct rounding later) */
-            sample = *s;
-            sample += dither->error[0] - dither->error[1] + dither->error[2];
-            dither->error[2] = dither->error[1];
-            dither->error[1] = dither->error[0]/2;
-
-            output = sample + bias;
-
-            /* Dither, highpass triangle PDF */
-            random = dither->random*0x0019660dL + 0x3c6ef35fL;
-            output += (random & mask) - (dither->random & mask);
-            dither->random = random;
-
-            /* Round sample to output range */
-            output &= ~mask;
-
-            /* Error feedback */
-            dither->error[0] = sample - output;
-
-            /* Clip */
-            if ((uint32_t)(output - min) > (uint32_t)range)
-            {
-                int32_t c = min;
-                if (output > min)
-                    c += range;
-                output = c;
-            }
-
-            /* Quantize and store */
-            *d = output >> scale;
-        }
-    }
-
-    if (data->num_channels == 2)
-        return;
-
-    /* Have to duplicate left samples into the right channel since
-       pcm buffer and hardware is interleaved stereo */
-    d = &dst[0];
-
-    while (count-- > 0)
-    {
-        int16_t s = *d++;
-        *d++ = s;
-    }
-}
-
-/**
- * sample_output_new_format()
- *
- * set the from-native to ouput sample conversion routine
- *
- * !DSPPARAMSYNC
- * needs syncing with changes to the following dsp parameters:
- *  * dsp->stereo_mode (A/V)
- *  * dither_enabled (A)
- */
-static void sample_output_new_format(struct dsp_config *dsp)
-{
-    static const sample_output_fn_type sample_output_functions[] =
-    {
-        sample_output_mono,
-        sample_output_stereo,
-        sample_output_dithered,
-        sample_output_dithered
-    };
-
-    int out = dsp->data.num_channels - 1;
-
-    if (dsp == &AUDIO_DSP && dither_enabled)
-        out += 2;
-
-    dsp->output_samples = sample_output_functions[out];
-}
-
-/**
- * Linear interpolation resampling that introduces a one sample delay because
- * of our inability to look into the future at the end of a frame.
- */
-#ifndef DSP_HAVE_ASM_RESAMPLING
-static int dsp_downsample(int count, struct dsp_data *data,
-                          const int32_t *src[], int32_t *dst[])
-{
-    int ch = data->num_channels - 1;
-    uint32_t delta = data->resample_data.delta;
-    uint32_t phase, pos;
-    int32_t *d;
-
-    /* Rolled channel loop actually showed slightly faster. */
-    do
-    {
-        /* Just initialize things and not worry too much about the relatively
-         * uncommon case of not being able to spit out a sample for the frame.
-         */
-        const int32_t *s = src[ch];
-        int32_t last = data->resample_data.last_sample[ch];
-
-        data->resample_data.last_sample[ch] = s[count - 1];
-        d = dst[ch];
-        phase = data->resample_data.phase;
-        pos = phase >> 16;
-
-        /* Do we need last sample of previous frame for interpolation? */
-        if (pos > 0)
-            last = s[pos - 1];
-
-        while (pos < (uint32_t)count)
-        {
-            *d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last);
-            phase += delta;
-            pos = phase >> 16;
-            last = s[pos - 1];
-        }
-    }
-    while (--ch >= 0);
-
-    /* Wrap phase accumulator back to start of next frame. */
-    data->resample_data.phase = phase - (count << 16);
-    return d - dst[0];
-}
-
-static int dsp_upsample(int count, struct dsp_data *data,
-                        const int32_t *src[], int32_t *dst[])
-{
-    int  ch = data->num_channels - 1;
-    uint32_t delta = data->resample_data.delta;
-    uint32_t phase, pos;
-    int32_t *d;
-
-    /* Rolled channel loop actually showed slightly faster. */
-    do
-    {
-        /* Should always be able to output a sample for a ratio up to RESAMPLE_RATIO */
-        const int32_t *s = src[ch];
-        int32_t last = data->resample_data.last_sample[ch];
-
-        data->resample_data.last_sample[ch] = s[count - 1];
-        d = dst[ch];
-        phase = data->resample_data.phase;
-        pos = phase >> 16;
-
-        while (pos == 0)
-        {
-            *d++ = last + FRACMUL((phase & 0xffff) << 15, s[0] - last);
-            phase += delta;
-            pos = phase >> 16;
-        }
-
-        while (pos < (uint32_t)count)
-        {
-            last = s[pos - 1];
-            *d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last);
-            phase += delta;
-            pos = phase >> 16;
-        }
-    }
-    while (--ch >= 0);
-
-    /* Wrap phase accumulator back to start of next frame. */
-    data->resample_data.phase = phase & 0xffff;
-    return d - dst[0];
-}
-#endif /* DSP_HAVE_ASM_RESAMPLING */
-
-static void resampler_new_delta(struct dsp_config *dsp)
-{
-    dsp->data.resample_data.delta = (unsigned long)
-        dsp->frequency * 65536LL / NATIVE_FREQUENCY;
-
-    if (dsp->frequency == NATIVE_FREQUENCY)
-    {
-        /* NOTE: If fully glitch-free transistions from no resampling to
-           resampling are desired, last_sample history should be maintained
-           even when not resampling. */
-        dsp->resample = NULL;
-        dsp->data.resample_data.phase = 0;
-        dsp->data.resample_data.last_sample[0] = 0;
-        dsp->data.resample_data.last_sample[1] = 0;
-    }
-    else if (dsp->frequency < NATIVE_FREQUENCY)
-        dsp->resample = dsp_upsample;
-    else
-        dsp->resample = dsp_downsample;
-}
-
-/* Resample count stereo samples. Updates the src array, if resampling is
- * done, to refer to the resampled data. Returns number of stereo samples
- * for further processing.
- */
-static inline int resample(struct dsp_config *dsp, int count, int32_t *src[])
-{
-    int32_t *dst[2] =
-    {
-        resample_buf[0],
-        resample_buf[1]
-    };
-    lock_sample_buf( true );
-    count = dsp->resample(count, &dsp->data, (const int32_t **)src, dst);
-
-    src[0] = dst[0];
-    src[1] = dst[dsp->data.num_channels - 1];
-    lock_sample_buf( false );
-    return count;
-}
-
-static void dither_init(struct dsp_config *dsp)
-{
-    memset(dither_data, 0, sizeof (dither_data));
-    dither_bias = (1L << (dsp->data.frac_bits - NATIVE_DEPTH));
-    dither_mask = (1L << (dsp->data.frac_bits + 1 - NATIVE_DEPTH)) - 1;
-}
-
-void dsp_dither_enable(bool enable)
-{
-    struct dsp_config *dsp = &AUDIO_DSP;
-    dither_enabled = enable;
-    sample_output_new_format(dsp);
-}
-
-/* Applies crossfeed to the stereo signal in src.
- * Crossfeed is a process where listening over speakers is simulated. This
- * is good for old hard panned stereo records, which might be quite fatiguing
- * to listen to on headphones with no crossfeed.
- */
-#ifndef DSP_HAVE_ASM_CROSSFEED
-static void apply_crossfeed(int count, int32_t *buf[])
-{
-    int32_t *hist_l = &crossfeed_data.history[0];
-    int32_t *hist_r = &crossfeed_data.history[2];
-    int32_t *delay = &crossfeed_data.delay[0][0];
-    int32_t *coefs = &crossfeed_data.coefs[0];
-    int32_t gain = crossfeed_data.gain;
-    int32_t *di = crossfeed_data.index;
-
-    int32_t acc;
-    int32_t left, right;
-    int i;
-
-    for (i = 0; i < count; i++)
-    {
-        left = buf[0][i];
-        right = buf[1][i];
-
-        /* Filter delayed sample from left speaker */
-        acc = FRACMUL(*di, coefs[0]);
-        acc += FRACMUL(hist_l[0], coefs[1]);
-        acc += FRACMUL(hist_l[1], coefs[2]);
-        /* Save filter history for left speaker */
-        hist_l[1] = acc;
-        hist_l[0] = *di;
-        *di++ = left;
-        /* Filter delayed sample from right speaker */
-        acc = FRACMUL(*di, coefs[0]);
-        acc += FRACMUL(hist_r[0], coefs[1]);
-        acc += FRACMUL(hist_r[1], coefs[2]);
-        /* Save filter history for right speaker */
-        hist_r[1] = acc;
-        hist_r[0] = *di;
-        *di++ = right;
-        /* Now add the attenuated direct sound and write to outputs */
-        buf[0][i] = FRACMUL(left, gain) + hist_r[1];
-        buf[1][i] = FRACMUL(right, gain) + hist_l[1];
-
-        /* Wrap delay line index if bigger than delay line size */
-        if (di >= delay + 13*2)
-            di = delay;
-    }
-    /* Write back local copies of data we've modified */
-    crossfeed_data.index = di;
-}
-#endif /* DSP_HAVE_ASM_CROSSFEED */
-
-/**
- * dsp_set_crossfeed(bool enable)
- *
- * !DSPPARAMSYNC
- * needs syncing with changes to the following dsp parameters:
- *  * dsp->stereo_mode (A)
- */
-void dsp_set_crossfeed(bool enable)
-{
-    crossfeed_enabled = enable;
-    AUDIO_DSP.apply_crossfeed = (enable && AUDIO_DSP.data.num_channels > 1)
-                                    ? apply_crossfeed : NULL;
-}
-
-void dsp_set_crossfeed_direct_gain(int gain)
-{
-    crossfeed_data.gain = get_replaygain_int(gain * 10) << 7;
-    /* If gain is negative, the calculation overflowed and we need to clamp */
-    if (crossfeed_data.gain < 0)
-        crossfeed_data.gain = 0x7fffffff;
-}
-
-/* Both gains should be below 0 dB */
-void dsp_set_crossfeed_cross_params(long lf_gain, long hf_gain, long cutoff)
-{
-    int32_t *c = crossfeed_data.coefs;
-    long scaler = get_replaygain_int(lf_gain * 10) << 7;
-
-    cutoff = 0xffffffff/NATIVE_FREQUENCY*cutoff;
-    hf_gain -= lf_gain;
-    /* Divide cutoff by sqrt(10^(hf_gain/20)) to place cutoff at the -3 dB
-     * point instead of shelf midpoint. This is for compatibility with the old
-     * crossfeed shelf filter and should be removed if crossfeed settings are
-     * ever made incompatible for any other good reason.
-     */
-    cutoff = fp_div(cutoff, get_replaygain_int(hf_gain*5), 24);
-    filter_shelf_coefs(cutoff, hf_gain, false, c);
-    /* Scale coefs by LF gain and shift them to s0.31 format. We have no gains
-     * over 1 and can do this safely
-     */
-    c[0] = FRACMUL_SHL(c[0], scaler, 4);
-    c[1] = FRACMUL_SHL(c[1], scaler, 4);
-    c[2] <<= 4;
-}
-
-/* Apply a constant gain to the samples (e.g., for ReplayGain).
- * Note that this must be called before the resampler.
- */
-#ifndef DSP_HAVE_ASM_APPLY_GAIN
-static void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[])
-{
-    const int32_t gain = data->gain;
-    int ch;
-
-    for (ch = 0; ch < data->num_channels; ch++)
-    {
-        int32_t *d = buf[ch];
-        int i;
-
-        for (i = 0; i < count; i++)
-            d[i] = FRACMUL_SHL(d[i], gain, 8);
-    }
-}
-#endif /* DSP_HAVE_ASM_APPLY_GAIN */
-
-/* Combine all gains to a global gain. */
-static void set_gain(struct dsp_config *dsp)
-{
-    /* gains are in S7.24 format */
-    dsp->data.gain = DEFAULT_GAIN;
-
-    /* Replay gain not relevant to voice */
-    if (dsp == &AUDIO_DSP && replaygain)
-    {
-        dsp->data.gain = replaygain;
-    }
-
-    if (dsp->eq_process && eq_precut)
-    {
-        dsp->data.gain = fp_mul(dsp->data.gain, eq_precut, 24);
-    }
-
-#ifdef HAVE_SW_VOLUME_CONTROL
-    if (global_settings.volume < SW_VOLUME_MAX ||
-        global_settings.volume > SW_VOLUME_MIN)
-    {
-        int vol_gain = get_replaygain_int(global_settings.volume * 100);
-        dsp->data.gain = (long) (((int64_t) dsp->data.gain * vol_gain) >> 24);
-    }
-#endif
-
-    if (dsp->data.gain == DEFAULT_GAIN)
-    {
-        dsp->data.gain = 0;
-    }
-    else
-    {
-        dsp->data.gain >>= 1;   /* convert gain to S8.23 format */
-    }
-
-    dsp->apply_gain = dsp->data.gain != 0 ? dsp_apply_gain : NULL;
-}
-
-/**
- * Update the amount to cut the audio before applying the equalizer.
- *
- * @param precut to apply in decibels (multiplied by 10)
- */
-void dsp_set_eq_precut(int precut)
-{
-    eq_precut = get_replaygain_int(precut * -10);
-    set_gain(&AUDIO_DSP);
-}
-
-/**
- * Synchronize the equalizer filter coefficients with the global settings.
- *
- * @param band the equalizer band to synchronize
- */
-void dsp_set_eq_coefs(int band, int cutoff, int q, int gain)
-{
-    /* Convert user settings to format required by coef generator functions */
-    cutoff = 0xffffffff / NATIVE_FREQUENCY * cutoff;
-
-    if (q == 0)
-        q = 1;
-
-    /* NOTE: The coef functions assume the EMAC unit is in fractional mode,
-       which it should be, since we're executed from the main thread. */
-
-    /* Assume a band is disabled if the gain is zero */
-    if (gain == 0)
-    {
-        eq_data.enabled[band] = 0;
-    }
-    else
-    {
-        if (band == 0)
-            eq_ls_coefs(cutoff, q, gain, eq_data.filters[band].coefs);
-        else if (band == 4)
-            eq_hs_coefs(cutoff, q, gain, eq_data.filters[band].coefs);
-        else
-            eq_pk_coefs(cutoff, q, gain, eq_data.filters[band].coefs);
-
-        eq_data.enabled[band] = 1;
-    }
-}
-
-/* Apply EQ filters to those bands that have got it switched on. */
-static void eq_process(int count, int32_t *buf[])
-{
-    static const int shifts[] =
-    {
-        EQ_SHELF_SHIFT,  /* low shelf  */
-        EQ_PEAK_SHIFT,   /* peaking    */
-        EQ_PEAK_SHIFT,   /* peaking    */
-        EQ_PEAK_SHIFT,   /* peaking    */
-        EQ_SHELF_SHIFT,  /* high shelf */
-    };
-    unsigned int channels = AUDIO_DSP.data.num_channels;
-    int i;
-
-    /* filter configuration currently is 1 low shelf filter, 3 band peaking
-       filters and 1 high shelf filter, in that order. we need to know this
-       so we can choose the correct shift factor.
-     */
-    for (i = 0; i < 5; i++)
-    {
-        if (!eq_data.enabled[i])
-            continue;
-        eq_filter(buf, &eq_data.filters[i], count, channels, shifts[i]);
-    }
-}
-
-/**
- * Use to enable the equalizer.
- *
- * @param enable true to enable the equalizer
- */
-void dsp_set_eq(bool enable)
-{
-    AUDIO_DSP.eq_process = enable ? eq_process : NULL;
-    set_gain(&AUDIO_DSP);
-}
-
-static void dsp_set_stereo_width(int value)
-{
-    long width, straight, cross;
-
-    width = value * 0x7fffff / 100;
-
-    if (value <= 100)
-    {
-        straight = (0x7fffff + width) / 2;
-        cross = straight - width;
-    }
-    else
-    {
-        /* straight = (1 + width) / (2 * width) */
-        straight = ((int64_t)(0x7fffff + width) << 22) / width;
-        cross = straight - 0x7fffff;
-    }
-
-    dsp_sw_gain  = straight << 8;
-    dsp_sw_cross = cross << 8;
-}
-
-/**
- * Implements the different channel configurations and stereo width.
- */
-
-/* SOUND_CHAN_STEREO mode is a noop so has no function - just outline one for
- * completeness. */
-#if 0
-static void channels_process_sound_chan_stereo(int count, int32_t *buf[])
-{
-    /* The channels are each just themselves */
-    (void)count; (void)buf;
-}
-#endif
-
-#ifndef DSP_HAVE_ASM_SOUND_CHAN_MONO
-static void channels_process_sound_chan_mono(int count, int32_t *buf[])
-{
-    int32_t *sl = buf[0], *sr = buf[1];
-
-    while (count-- > 0)
-    {
-        int32_t lr = *sl/2 + *sr/2;
-        *sl++ = lr;
-        *sr++ = lr;
-    }
-}
-#endif /* DSP_HAVE_ASM_SOUND_CHAN_MONO */
-
-#ifndef DSP_HAVE_ASM_SOUND_CHAN_CUSTOM
-static void channels_process_sound_chan_custom(int count, int32_t *buf[])
-{
-    const int32_t gain  = dsp_sw_gain;
-    const int32_t cross = dsp_sw_cross;
-    int32_t *sl = buf[0], *sr = buf[1];
-
-    while (count-- > 0)
-    {
-        int32_t l = *sl;
-        int32_t r = *sr;
-        *sl++ = FRACMUL(l, gain) + FRACMUL(r, cross);
-        *sr++ = FRACMUL(r, gain) + FRACMUL(l, cross);
-    }
-}
-#endif /* DSP_HAVE_ASM_SOUND_CHAN_CUSTOM */
-
-static void channels_process_sound_chan_mono_left(int count, int32_t *buf[])
-{
-    /* Just copy over the other channel */
-    memcpy(buf[1], buf[0], count * sizeof (*buf));
-}
-
-static void channels_process_sound_chan_mono_right(int count, int32_t *buf[])
-{
-    /* Just copy over the other channel */
-    memcpy(buf[0], buf[1], count * sizeof (*buf));
-}
-
-#ifndef DSP_HAVE_ASM_SOUND_CHAN_KARAOKE
-static void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
-{
-    int32_t *sl = buf[0], *sr = buf[1];
-
-    while (count-- > 0)
-    {
-        int32_t ch = *sl/2 - *sr/2;
-        *sl++ = ch;
-        *sr++ = -ch;
-    }
-}
-#endif /* DSP_HAVE_ASM_SOUND_CHAN_KARAOKE */
-
-static void dsp_set_channel_config(int value)
-{
-    static const channels_process_fn_type channels_process_functions[] =
-    {
-        /* SOUND_CHAN_STEREO = All-purpose index for no channel processing */
-        [SOUND_CHAN_STEREO]     = NULL,
-        [SOUND_CHAN_MONO]       = channels_process_sound_chan_mono,
-        [SOUND_CHAN_CUSTOM]     = channels_process_sound_chan_custom,
-        [SOUND_CHAN_MONO_LEFT]  = channels_process_sound_chan_mono_left,
-        [SOUND_CHAN_MONO_RIGHT] = channels_process_sound_chan_mono_right,
-        [SOUND_CHAN_KARAOKE]    = channels_process_sound_chan_karaoke,
-    };
-
-    if ((unsigned)value >= ARRAYLEN(channels_process_functions) ||
-        AUDIO_DSP.stereo_mode == STEREO_MONO)
-    {
-        value = SOUND_CHAN_STEREO;
-    }
-
-    /* This doesn't apply to voice */
-    channels_mode = value;
-    AUDIO_DSP.channels_process = channels_process_functions[value];
-}
-
-#if CONFIG_CODEC == SWCODEC
-
-#ifdef HAVE_SW_TONE_CONTROLS
-static void set_tone_controls(void)
-{
-    filter_bishelf_coefs(0xffffffff/NATIVE_FREQUENCY*200,
-                         0xffffffff/NATIVE_FREQUENCY*3500,
-                         bass, treble, -prescale,
-                         AUDIO_DSP.tone_filter.coefs);
-    /* Sync the voice dsp coefficients */
-    memcpy(&VOICE_DSP.tone_filter.coefs, AUDIO_DSP.tone_filter.coefs,
-           sizeof (VOICE_DSP.tone_filter.coefs));
-}
-#endif
-
-/* Hook back from firmware/ part of audio, which can't/shouldn't call apps/
- * code directly.
- */
-int dsp_callback(int msg, intptr_t param)
-{
-    switch (msg)
-    {
-#ifdef HAVE_SW_TONE_CONTROLS
-    case DSP_CALLBACK_SET_PRESCALE:
-        prescale = param;
-        set_tone_controls();
-        break;
-    /* prescaler is always set after calling any of these, so we wait with
-     * calculating coefs until the above case is hit.
-     */
-    case DSP_CALLBACK_SET_BASS:
-        bass = param;
-        break;
-    case DSP_CALLBACK_SET_TREBLE:
-        treble = param;
-        break;
-#ifdef HAVE_SW_VOLUME_CONTROL
-    case DSP_CALLBACK_SET_SW_VOLUME:
-        set_gain(&AUDIO_DSP);
-        break;
-#endif
-#endif
-    case DSP_CALLBACK_SET_CHANNEL_CONFIG:
-        dsp_set_channel_config(param);
-        break;
-    case DSP_CALLBACK_SET_STEREO_WIDTH:
-        dsp_set_stereo_width(param);
-        break;
-    default:
-        break;
-    }
-    return 0;
-}
-#endif
-
-/* Process and convert src audio to dst based on the DSP configuration,
- * reading count number of audio samples. dst is assumed to be large
- * enough; use dsp_output_count() to get the required number. src is an
- * array of pointers; for mono and interleaved stereo, it contains one
- * pointer to the start of the audio data and the other is ignored; for
- * non-interleaved stereo, it contains two pointers, one for each audio
- * channel. Returns number of bytes written to dst.
- */
-int dsp_process(struct dsp_config *dsp, char *dst, const char *src[], int count)
-{
-    static int32_t *tmp[2]; /* tdspeed_doit() needs it static */
-    static long last_yield;
-    long tick;
-    int written = 0;
-
-#if defined(CPU_COLDFIRE)
-    /* set emac unit for dsp processing, and save old macsr, we're running in
-       codec thread context at this point, so can't clobber it */
-    unsigned long old_macsr = coldfire_get_macsr();
-    coldfire_set_macsr(EMAC_FRACTIONAL | EMAC_SATURATE);
-#endif
-
-    if (new_gain)
-        dsp_set_replaygain(); /* Gain has changed */
-
-    /* Perform at least one yield before starting */
-    last_yield = current_tick;
-    yield();
-
-    /* Testing function pointers for NULL is preferred since the pointer
-       will be preloaded to be used for the call if not. */
-    while (count > 0)
-    {
-        int samples = MIN(sample_buf_count, count);
-        count -= samples;
-
-        dsp->input_samples(samples, src, tmp);
-
-#ifdef HAVE_PITCHSCREEN
-        if (dsp->tdspeed_active)
-            samples = tdspeed_doit(tmp, samples);
-#endif
-        
-        int chunk_offset = 0;
-        while (samples > 0)
-        {
-            int32_t *t2[2];
-            t2[0] = tmp[0]+chunk_offset;
-            t2[1] = tmp[1]+chunk_offset;
-
-            int chunk = MIN(sample_buf_count, samples);
-            chunk_offset += chunk;
-            samples -= chunk;
-
-            if (dsp->apply_gain)
-                dsp->apply_gain(chunk, &dsp->data, t2);
-
-            if (dsp->resample && (chunk = resample(dsp, chunk, t2)) <= 0)
-                break; /* I'm pretty sure we're downsampling here */
-
-            if (dsp->apply_crossfeed)
-                dsp->apply_crossfeed(chunk, t2);
-
-            if (dsp->eq_process)
-                dsp->eq_process(chunk, t2);
-
-#ifdef HAVE_SW_TONE_CONTROLS
-            if ((bass | treble) != 0)
-                eq_filter(t2, &dsp->tone_filter, chunk,
-                      dsp->data.num_channels, FILTER_BISHELF_SHIFT);
-#endif
-
-            if (dsp->channels_process)
-                dsp->channels_process(chunk, t2);
-            
-            if (dsp->compressor_process)
-                dsp->compressor_process(chunk, &dsp->data, t2);
-
-            dsp->output_samples(chunk, &dsp->data, (const int32_t **)t2, (int16_t *)dst);
-
-            written += chunk;
-            dst += chunk * sizeof (int16_t) * 2;
-
-            /* yield at least once each tick */
-            tick = current_tick;
-            if (TIME_AFTER(tick, last_yield))
-            {
-                last_yield = tick;
-                yield();
-            }
-        }
-    }
-
-#if defined(CPU_COLDFIRE)
-    /* set old macsr again */
-    coldfire_set_macsr(old_macsr);
-#endif
-    return written;
-}
-
-/* Given count number of input samples, calculate the maximum number of
- * samples of output data that would be generated (the calculation is not
- * entirely exact and rounds upwards to be on the safe side; during
- * resampling, the number of samples generated depends on the current state
- * of the resampler).
- */
-/* dsp_input_size MUST be called afterwards */
-int dsp_output_count(struct dsp_config *dsp, int count)
-{
-#ifdef HAVE_PITCHSCREEN
-    if (dsp->tdspeed_active)
-        count = tdspeed_est_output_size();
-#endif
-    if (dsp->resample)
-    {
-        count = (int)(((unsigned long)count * NATIVE_FREQUENCY
-                    + (dsp->frequency - 1)) / dsp->frequency);
-    }
-
-    /* Now we have the resampled sample count which must not exceed
-     * resample_buf_count to avoid resample buffer overflow. One
-     * must call dsp_input_count() to get the correct input sample
-     * count.
-     */
-    if (count > resample_buf_count)
-        count = resample_buf_count;
-        
-    return count;
-}
-
-/* Given count output samples, calculate number of input samples
- * that would be consumed in order to fill the output buffer.
- */
-int dsp_input_count(struct dsp_config *dsp, int count)
-{
-    /* count is now the number of resampled input samples. Convert to
-       original input samples. */
-    if (dsp->resample)
-    {
-        /* Use the real resampling delta =
-         * dsp->frequency * 65536 / NATIVE_FREQUENCY, and
-         * round towards zero to avoid buffer overflows. */
-        count = (int)(((unsigned long)count *
-                      dsp->data.resample_data.delta) >> 16);
-    }
-
-#ifdef HAVE_PITCHSCREEN
-    if (dsp->tdspeed_active)
-        count = tdspeed_est_input_size(count);
-#endif
-
-    return count;
-}
-
-static void dsp_set_gain_var(long *var, long value)
-{
-    *var = value;
-    new_gain = true;
-}
-
-static void dsp_update_functions(struct dsp_config *dsp)
-{
-    sample_input_new_format(dsp);
-    sample_output_new_format(dsp);
-    if (dsp == &AUDIO_DSP)
-        dsp_set_crossfeed(crossfeed_enabled);
-}
-
-intptr_t dsp_configure(struct dsp_config *dsp, int setting, intptr_t value)
-{
-    switch (setting)
-    {
-    case DSP_MYDSP:
-        switch (value)
-        {
-        case CODEC_IDX_AUDIO:
-            return (intptr_t)&AUDIO_DSP;
-        case CODEC_IDX_VOICE:
-            return (intptr_t)&VOICE_DSP;
-        default:
-            return (intptr_t)NULL;
-        }
-
-    case DSP_SET_FREQUENCY:
-        memset(&dsp->data.resample_data, 0, sizeof (dsp->data.resample_data));
-        /* Fall through!!! */
-    case DSP_SWITCH_FREQUENCY:
-        dsp->codec_frequency = (value == 0) ? NATIVE_FREQUENCY : value;
-        /* Account for playback speed adjustment when setting dsp->frequency
-           if we're called from the main audio thread. Voice UI thread should
-           not need this feature.
-         */
-#ifdef HAVE_PITCHSCREEN
-        if (dsp == &AUDIO_DSP)
-            dsp->frequency = pitch_ratio * dsp->codec_frequency / PITCH_SPEED_100;
-        else
-#endif
-            dsp->frequency = dsp->codec_frequency;
-
-        resampler_new_delta(dsp);
-#ifdef HAVE_PITCHSCREEN
-        tdspeed_setup(dsp);
-#endif
-        break;
-
-    case DSP_SET_SAMPLE_DEPTH:
-        dsp->sample_depth = value;
-
-        if (dsp->sample_depth <= NATIVE_DEPTH)
-        {
-            dsp->data.frac_bits = WORD_FRACBITS;
-            dsp->sample_bytes = sizeof (int16_t); /* samples are 16 bits */
-            dsp->data.clip_max =  ((1 << WORD_FRACBITS) - 1);
-            dsp->data.clip_min = -((1 << WORD_FRACBITS));
-        }
-        else
-        {
-            dsp->data.frac_bits = value;
-            dsp->sample_bytes = sizeof (int32_t); /* samples are 32 bits */
-            dsp->data.clip_max = (1 << value) - 1;
-            dsp->data.clip_min = -(1 << value);
-        }
-
-        dsp->data.output_scale = dsp->data.frac_bits + 1 - NATIVE_DEPTH;
-        sample_input_new_format(dsp);
-        dither_init(dsp);
-        break;
-
-    case DSP_SET_STEREO_MODE:
-        dsp->stereo_mode = value;
-        dsp->data.num_channels = value == STEREO_MONO ? 1 : 2;
-        dsp_update_functions(dsp);
-#ifdef HAVE_PITCHSCREEN
-        tdspeed_setup(dsp);
-#endif
-        break;
-
-    case DSP_RESET:
-        dsp->stereo_mode = STEREO_NONINTERLEAVED;
-        dsp->data.num_channels = 2;
-        dsp->sample_depth = NATIVE_DEPTH;
-        dsp->data.frac_bits = WORD_FRACBITS;
-        dsp->sample_bytes = sizeof (int16_t);
-        dsp->data.output_scale = dsp->data.frac_bits + 1 - NATIVE_DEPTH;
-        dsp->data.clip_max =  ((1 << WORD_FRACBITS) - 1);
-        dsp->data.clip_min = -((1 << WORD_FRACBITS));
-        dsp->codec_frequency = dsp->frequency = NATIVE_FREQUENCY;
-
-        if (dsp == &AUDIO_DSP)
-        {
-            track_gain = 0;
-            album_gain = 0;
-            track_peak = 0;
-            album_peak = 0;
-            new_gain   = true;
-        }
-
-        dsp_update_functions(dsp);
-        resampler_new_delta(dsp);
-#ifdef HAVE_PITCHSCREEN
-        tdspeed_setup(dsp);
-#endif
-        if (dsp == &AUDIO_DSP)
-            compressor_reset();
-        break;
-
-    case DSP_FLUSH:
-        memset(&dsp->data.resample_data, 0,
-               sizeof (dsp->data.resample_data));
-        resampler_new_delta(dsp);
-        dither_init(dsp);
-#ifdef HAVE_PITCHSCREEN
-        tdspeed_setup(dsp);
-#endif
-        if (dsp == &AUDIO_DSP)
-            compressor_reset();
-        break;
-
-    case DSP_SET_TRACK_GAIN:
-        if (dsp == &AUDIO_DSP)
-            dsp_set_gain_var(&track_gain, value);
-        break;
-
-    case DSP_SET_ALBUM_GAIN:
-        if (dsp == &AUDIO_DSP)
-            dsp_set_gain_var(&album_gain, value);
-        break;
-
-    case DSP_SET_TRACK_PEAK:
-        if (dsp == &AUDIO_DSP)
-            dsp_set_gain_var(&track_peak, value);
-        break;
-
-    case DSP_SET_ALBUM_PEAK:
-        if (dsp == &AUDIO_DSP)
-            dsp_set_gain_var(&album_peak, value);
-        break;
-
-    default:
-        return 0;
-    }
-
-    return 1;
-}
-
-int get_replaygain_mode(bool have_track_gain, bool have_album_gain)
-{
-    int type;
-
-    bool track = ((global_settings.replaygain_type == REPLAYGAIN_TRACK)
-        || ((global_settings.replaygain_type == REPLAYGAIN_SHUFFLE)
-            && global_settings.playlist_shuffle));
-
-    type = (!track && have_album_gain) ? REPLAYGAIN_ALBUM 
-        : have_track_gain ? REPLAYGAIN_TRACK : -1;
-    
-    return type;
-}
-
-void dsp_set_replaygain(void)
-{
-    long gain = 0;
-
-    new_gain = false;
-
-    if ((global_settings.replaygain_type != REPLAYGAIN_OFF) ||
-            global_settings.replaygain_noclip)
-    {
-        bool track_mode = get_replaygain_mode(track_gain != 0,
-            album_gain != 0) == REPLAYGAIN_TRACK;
-        long peak = (track_mode || !album_peak) ? track_peak : album_peak;
-
-        if (global_settings.replaygain_type != REPLAYGAIN_OFF)
-        {
-            gain = (track_mode || !album_gain) ? track_gain : album_gain;
-
-            if (global_settings.replaygain_preamp)
-            {
-                long preamp = get_replaygain_int(
-                    global_settings.replaygain_preamp * 10);
-
-                gain = (long) (((int64_t) gain * preamp) >> 24);
-            }
-        }
-
-        if (gain == 0)
-        {
-            /* So that noclip can work even with no gain information. */
-            gain = DEFAULT_GAIN;
-        }
-
-        if (global_settings.replaygain_noclip && (peak != 0)
-            && ((((int64_t) gain * peak) >> 24) >= DEFAULT_GAIN))
-        {
-            gain = (((int64_t) DEFAULT_GAIN << 24) / peak);
-        }
-
-        if (gain == DEFAULT_GAIN)
-        {
-            /* Nothing to do, disable processing. */
-            gain = 0;
-        }
-    }
-
-    /* Store in S7.24 format to simplify calculations. */
-    replaygain = gain;
-    set_gain(&AUDIO_DSP);
-}
-
-/** SET COMPRESSOR
- *  Called by the menu system to configure the compressor process */
-void dsp_set_compressor(const struct compressor_settings *settings)
-{
-    /* enable/disable the compressor */
-    AUDIO_DSP.compressor_process = compressor_update(settings) ?
-                                        compressor_process : NULL;
-}
diff --git a/lib/rbcodec/dsp/dsp.h b/lib/rbcodec/dsp/dsp.h
index a99df17468..feac4aa845 100644
--- a/lib/rbcodec/dsp/dsp.h
+++ b/lib/rbcodec/dsp/dsp.h
@@ -18,109 +18,159 @@
  * KIND, either express or implied.
  *
  ****************************************************************************/
-
 #ifndef _DSP_H
 #define _DSP_H
 
-#include <stdlib.h>
-#include <stdbool.h>
+struct dsp_config;
 
-#define NATIVE_FREQUENCY       44100
+/* Include all this junk here for now */
+#include "dsp_proc_settings.h"
 
-enum
+enum dsp_ids
 {
-    STEREO_INTERLEAVED = 0,
+    CODEC_IDX_AUDIO,
+    CODEC_IDX_VOICE,
+    DSP_COUNT,
+};
+
+enum dsp_settings
+{
+    DSP_INIT, /* For dsp_init */
+    DSP_RESET,
+    DSP_SET_FREQUENCY,
+    DSP_SWITCH_FREQUENCY = DSP_SET_FREQUENCY, /* deprecated */
+    DSP_SET_SAMPLE_DEPTH,
+    DSP_SET_STEREO_MODE,
+    DSP_FLUSH,
+    DSP_PROC_INIT,
+    DSP_PROC_CLOSE,
+    DSP_PROC_SETTING, /* stage-specific should be this + id */
+};
+
+#define NATIVE_FREQUENCY   44100 /* internal/output sample rate */
+
+enum dsp_stereo_modes
+{
+    STEREO_INTERLEAVED,
     STEREO_NONINTERLEAVED,
     STEREO_MONO,
     STEREO_NUM_MODES,
 };
 
-enum
+/* Format into for the buffer (if .valid == true) */
+struct sample_format
 {
-    CODEC_IDX_AUDIO = 0,
-    CODEC_IDX_VOICE,
+    uint8_t changed;         /* 00h: 0=no change, 1=changed (is also index) */
+    uint8_t num_channels;    /* 01h: number of channels of data */
+    uint8_t frac_bits;       /* 02h: number of fractional bits */
+    uint8_t output_scale;    /* 03h: output scaling shift */
+    int32_t frequency;       /* 04h: pitch-adjusted sample rate */
+    int32_t codec_frequency; /* 08h: codec-specifed sample rate */
+                             /* 0ch */
 };
 
-enum
+/* Compare format data only */
+#define EQU_SAMPLE_FORMAT(f1, f2) \
+    (!memcmp(&(f1).num_channels, &(f2).num_channels, \
+             sizeof (f1) - sizeof ((f1).changed)))
+
+static inline void format_change_set(struct sample_format *f)
+    { f->changed = 1; }
+static inline void format_change_ack(struct sample_format *f)
+    { f->changed = 0; }
+
+/* Used by ASM routines - keep field order or else fix the functions */
+struct dsp_buffer
 {
-    DSP_MYDSP = 1,
-    DSP_SET_FREQUENCY,
-    DSP_SWITCH_FREQUENCY,
-    DSP_SET_SAMPLE_DEPTH,
-    DSP_SET_STEREO_MODE,
-    DSP_RESET,
-    DSP_FLUSH,
-    DSP_SET_TRACK_GAIN,
-    DSP_SET_ALBUM_GAIN,
-    DSP_SET_TRACK_PEAK,
-    DSP_SET_ALBUM_PEAK,
-    DSP_CROSSFEED
+    int32_t remcount;       /* 00h: Samples in buffer (In, Int, Out) */
+    union
+    {
+        const void *pin[2]; /* 04h: Channel pointers (In) */
+        int32_t *p32[2];    /* 04h: Channel pointers (Int) */
+        int16_t *p16out;    /* 04h: DSP output buffer (Out) */
+    };
+    union
+    {
+        uint32_t proc_mask; /* 0Ch: In-place effects already appled to buffer
+                                    in order to avoid double-processing. Set
+                                    to zero on new buffer before passing to
+                                    DSP. */
+        int bufcount;       /* 0Ch: Buffer length/dest buffer remaining
+                                    Basically, pay no attention unless it's
+                                    *your* new buffer and is used internally
+                                    or is specifically the final output
+                                    buffer. */
+    };
+    struct sample_format format; /* 10h: Buffer format data */
+                                 /* 1ch */
 };
 
-
-/****************************************************************************
- * NOTE: Any assembly routines that use these structures must be updated
- * if current data members are moved or changed.
- */
-struct resample_data
+/* Remove samples from input buffer (In). Sample size is specified.
+   Provided to dsp_process(). */
+static inline void dsp_advance_buffer_input(struct dsp_buffer *buf,
+                                            int by_count,
+                                            size_t size_each)
 {
-    uint32_t delta;                     /* 00h */
-    uint32_t phase;                     /* 04h */
-    int32_t last_sample[2];             /* 08h */
-                                        /* 10h */
-};
+    buf->remcount -= by_count;
+    buf->pin[0] += by_count * size_each;
+    buf->pin[1] += by_count * size_each;
+}
 
-/* This is for passing needed data to external dsp routines. If another
- * dsp parameter needs to be passed, add to the end of the structure
- * and remove from dsp_config.
- * If another function type becomes assembly/external and requires dsp
- * config info, add a pointer paramter of type "struct dsp_data *".
- * If removing something from other than the end, reserve the spot or
- * else update every implementation for every target.
- * Be sure to add the offset of the new member for easy viewing as well. :)
- * It is the first member of dsp_config and all members can be accessesed
- * through the main aggregate but this is intended to make a safe haven
- * for these items whereas the c part can be rearranged at will. dsp_data
- * could even moved within dsp_config without disurbing the order.
- */
-struct dsp_data
+/* Add samples to output buffer and update remaining space (Out).
+   Provided to dsp_process() */
+static inline void dsp_advance_buffer_output(struct dsp_buffer *buf,
+                                             int by_count)
 {
-    int output_scale;                   /* 00h */
-    int num_channels;                   /* 04h */
-    struct resample_data resample_data; /* 08h */
-    int32_t clip_min;                   /* 18h */
-    int32_t clip_max;                   /* 1ch */
-    int32_t gain;                       /* 20h - Note that this is in S8.23 format. */
-    int frac_bits;                      /* 24h */
-                                        /* 28h */
-};
+    buf->bufcount -= by_count;
+    buf->remcount += by_count;
+    buf->p16out += 2 * by_count; /* Interleaved stereo */
+}
 
-struct dsp_config;
+/* Remove samples from internal input buffer (In, Int).
+   Provided to dsp_process() or by another processing stage. */
+static inline void dsp_advance_buffer32(struct dsp_buffer *buf,
+                                        int by_count)
+{
+    buf->remcount -= by_count;
+    buf->p32[0] += by_count;
+    buf->p32[1] += by_count;
+}
 
-int dsp_process(struct dsp_config *dsp, char *dest,
-                const char *src[], int count);
-int dsp_input_count(struct dsp_config *dsp, int count);
-int dsp_output_count(struct dsp_config *dsp, int count);
-intptr_t dsp_configure(struct dsp_config *dsp, int setting,
+/** For use by processing stages **/
+
+#define DSP_PRINT_FORMAT(name, id, format) \
+    DEBUGF("DSP format- " #name "\n"                          \
+           "  id:%d chg:%c ch:%u fb:%u os:%u hz:%u chz:%u\n", \
+           (int)id,                                           \
+           (format).changed ? 'y' : 'n',                      \
+           (unsigned int)(format).num_channels,               \
+           (unsigned int)(format).frac_bits,                  \
+           (unsigned int)(format).output_scale,               \
+           (unsigned int)(format).frequency,                  \
+           (unsigned int)(format).codec_frequency);
+
+/* Get DSP pointer */
+struct dsp_config * dsp_get_config(enum dsp_ids id);
+
+/* Get DSP id */
+enum dsp_ids dsp_get_id(const struct dsp_config *dsp);
+
+#if 0 /* Not needed now but enable if something must know this */
+/* Is the DSP processing a buffer? */
+bool dsp_is_busy(const struct dsp_config *dsp);
+#endif /* 0 */
+
+/** General DSP processing **/
+
+/* Process the given buffer - see implementation in dsp.c for more */
+void dsp_process(struct dsp_config *dsp, struct dsp_buffer *src,
+                 struct dsp_buffer *dst);
+
+/* Change DSP settings */
+intptr_t dsp_configure(struct dsp_config *dsp, unsigned int setting,
                        intptr_t value);
-int get_replaygain_mode(bool have_track_gain, bool have_album_gain);
-void dsp_set_replaygain(void);
-void dsp_set_crossfeed(bool enable);
-void dsp_set_crossfeed_direct_gain(int gain);
-void dsp_set_crossfeed_cross_params(long lf_gain, long hf_gain,
-                                    long cutoff);
-void dsp_set_eq(bool enable);
-void dsp_set_eq_precut(int precut);
-void dsp_set_eq_coefs(int band, int cutoff, int q, int gain);
-void dsp_dither_enable(bool enable);
-void dsp_timestretch_enable(bool enable);
-bool dsp_timestretch_available(void);
-void sound_set_pitch(int32_t r);
-int32_t sound_get_pitch(void);
-void dsp_set_timestretch(int32_t percent);
-int32_t dsp_get_timestretch(void);
-int dsp_callback(int msg, intptr_t param);
-struct compressor_settings;
-void dsp_set_compressor(const struct compressor_settings *settings);
 
-#endif
+/* One-time startup init that must come before settings reset/apply */
+void dsp_init(void);
+
+#endif /* _DSP_H */
diff --git a/lib/rbcodec/dsp/dsp_arm.S b/lib/rbcodec/dsp/dsp_arm.S
index 685aca411c..9fd19ae108 100644
--- a/lib/rbcodec/dsp/dsp_arm.S
+++ b/lib/rbcodec/dsp/dsp_arm.S
@@ -21,20 +21,19 @@
  #include "config.h"
 
 /****************************************************************************
- *  void channels_process_sound_chan_mono(int count, int32_t *buf[])
+ *  void channel_mode_proc_mono(struct dsp_proc_entry *this,
+ *                              struct dsp_buffer **buf_p)
  */
-
-#include "config.h"
-
-    .section .icode, "ax", %progbits
-    .align  2
-    .global channels_process_sound_chan_mono
-    .type   channels_process_sound_chan_mono, %function
-channels_process_sound_chan_mono:
-    @ input: r0 = count, r1 = buf
+    .section .icode
+    .global channel_mode_proc_mono
+    .type   channel_mode_proc_mono, %function
+channel_mode_proc_mono:
+    @ input: r0 = this, r1 = buf_p
+    ldr     r1, [r1]                   @ r1 = buf = *buf_p;
     stmfd   sp!, { r4, lr }            @
                                        @
-    ldmia   r1, { r1, r2 }             @ r1 = buf[0], r2 = buf[1]
+    ldmia   r1, { r0-r2 }              @ r0 = buf->remcount, r1 = buf->p32[0],
+                                       @ r2 = buf->p32[1]
     subs    r0, r0, #1                 @ odd: end at 0; even: end at -1
     beq     .mono_singlesample         @ Zero? Only one sample!
                                        @
@@ -61,25 +60,26 @@ channels_process_sound_chan_mono:
     str     r12, [r2]                  @ store Mo
                                        @
     ldmpc   regs=r4                    @
-    .size   channels_process_sound_chan_mono, \
-                .-channels_process_sound_chan_mono
+    .size   channel_mode_proc_mono, .-channel_mode_proc_mono
 
 /****************************************************************************
- * void channels_process_sound_chan_custom(int count, int32_t *buf[])
+ * void channel_mode_proc_custom(struct dsp_proc_entry *this,
+ *                               struct dsp_buffer **buf_p)
  */
-    .section .icode, "ax", %progbits
-    .align  2
-    .global channels_process_sound_chan_custom
-    .type   channels_process_sound_chan_custom, %function
-channels_process_sound_chan_custom:
+    .section .icode
+    .global channel_mode_proc_custom
+    .type   channel_mode_proc_custom, %function
+channel_mode_proc_custom:
+    @ input: r0 = this, r1 = buf_p
+    ldr     r2, [r0]                   @ r2 = &channel_mode_data = this->data
+    ldr     r1, [r1]                   @ r1 = buf = *buf_p;
+
     stmfd   sp!, { r4-r10, lr }
 
-    ldr     r3, =dsp_sw_gain
-    ldr     r4, =dsp_sw_cross
+    ldmia   r2, { r3, r4 }             @ r3 = sw_gain, r4 = sw_cross
 
-    ldmia   r1, { r1, r2 }             @ r1 = buf[0], r2 = buf[1]
-    ldr     r3, [r3]                   @ r3 = dsp_sw_gain
-    ldr     r4, [r4]                   @ r4 = dsp_sw_cross
+    ldmia   r1, { r0-r2 }              @ r0 = buf->remcount, r1 = buf->p32[0],
+                                       @ r2 = buf->p32[1]
 
     subs    r0, r0, #1
     beq     .custom_single_sample      @ Zero? Only one sample!
@@ -135,21 +135,22 @@ channels_process_sound_chan_custom:
     str     r7, [r2]                   @ Store Rc0
 
     ldmpc   regs=r4-r10
-    .size   channels_process_sound_chan_custom, \
-                .-channels_process_sound_chan_custom
+    .size   channel_mode_proc_custom, .-channel_mode_proc_custom
 
 /****************************************************************************
- *  void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
+ *  void channel_mode_proc_karaoke(struct dsp_proc_entry *this,
+ *                                 struct dsp_buffer **buf_p)
  */
-    .section .icode, "ax", %progbits
-    .align  2
-    .global channels_process_sound_chan_karaoke
-    .type   channels_process_sound_chan_karaoke, %function
-channels_process_sound_chan_karaoke:
-    @ input: r0 = count, r1 = buf
+    .section .icode
+    .global channel_mode_proc_karaoke
+    .type   channel_mode_proc_karaoke, %function
+channel_mode_proc_karaoke:
+    @ input: r0 = this, r1 = buf_p
+    ldr     r1, [r1]                   @ r1 = buf = *buf_p;
     stmfd   sp!, { r4, lr }            @
                                        @
-    ldmia   r1, { r1, r2 }             @ r1 = buf[0], r2 = buf[1]
+    ldmia   r1, { r0-r2 }              @ r0 = buf->remcount, r1 = buf->p32[0],
+                                       @ r2 = buf->p32[1]
     subs    r0, r0, #1                 @ odd: end at 0; even: end at -1
     beq     .karaoke_singlesample      @ Zero? Only one sample!
                                        @
@@ -179,24 +180,313 @@ channels_process_sound_chan_karaoke:
     str     r12, [r2]                  @ store Ro
                                        @
     ldmpc   regs=r4                    @
-    .size   channels_process_sound_chan_karaoke, \
-                .-channels_process_sound_chan_karaoke
+    .size   channel_mode_proc_karaoke, .-channel_mode_proc_karaoke
+
+/****************************************************************************
+ * void crossfeed_process(struct dsp_proc_entry *this,
+ *                        struct dsp_buffer **buf_p)
+ */
+    .section .text
+    .global crossfeed_process
+crossfeed_process:
+    @ input: r0 = this, r1 = buf_p
+    @ unfortunately, we ended up in a bit of a register squeeze here, and need
+    @ to keep the count on the stack :/
+    ldr     r1, [r1]                   @ r1 = buf = *buf_p;
+    stmfd   sp!, { r4-r11, lr }        @ stack modified regs
+    ldr     r12, [r1]                  @ r12 = buf->remcount
+    ldr     r14, [r0]                  @ r14 = this->data = &crossfeed_state
+    ldmib   r1, { r2-r3 }              @ r2 = buf->p32[0], r3 = buf->p32[1]
+    ldmia   r14!, { r4-r11 }           @ load direct gain and filter data
+    add     r0, r14, #13*2*4           @ calculate end of delay
+    stmfd   sp!, { r0, r12 }           @ stack end of delay adr, count and state
+    ldr     r0, [r0]                   @ fetch current delay line address
+
+    /* Register usage in loop:
+     * r0 = &delay[index][0], r1 = accumulator high, r2 = buf->p32[0],
+     * r3 = buf->p32[1], r4 = direct gain, r5-r7 = b0, b1, a1 (filter coefs),
+     * r8-r11 = filter history, r12 = temp, r14 = accumulator low
+     */
+.cfloop:
+    smull   r14, r1, r6, r8            @ acc = b1*dr[n - 1]
+    smlal   r14, r1, r7, r9            @ acc += a1*y_l[n - 1]
+    ldr     r8, [r0, #4]               @ r8 = dr[n]
+    smlal   r14, r1, r5, r8            @ acc += b0*dr[n]
+    mov     r9, r1, lsl #1             @ fix format for filter history
+    ldr     r12, [r2]                  @ load left input
+    smlal   r14, r1, r4, r12           @ acc += gain*x_l[n]
+    mov     r1, r1, lsl #1             @ fix format
+    str     r1, [r2], #4               @ save result
+
+    smull   r14, r1, r6, r10           @ acc = b1*dl[n - 1]
+    smlal   r14, r1, r7, r11           @ acc += a1*y_r[n - 1]
+    ldr     r10, [r0]                  @ r10 = dl[n]
+    str     r12, [r0], #4              @ save left input to delay line
+    smlal   r14, r1, r5, r10           @ acc += b0*dl[n]
+    mov     r11, r1, lsl #1            @ fix format for filter history
+    ldr     r12, [r3]                  @ load right input
+    smlal   r14, r1, r4, r12           @ acc += gain*x_r[n]
+    str     r12, [r0], #4              @ save right input to delay line
+    mov     r1, r1, lsl #1             @ fix format
+    ldmia   sp, { r12, r14 }           @ fetch delay line end addr and count from stack
+    str     r1, [r3], #4               @ save result
+
+    cmp     r0, r12                    @ need to wrap to start of delay?
+    subhs   r0, r12, #13*2*4           @ wrap back delay line ptr to start
+
+    subs    r14, r14, #1               @ are we finished?
+    strgt   r14, [sp, #4]              @ nope, save count back to stack
+    bgt     .cfloop
+
+    @ save data back to struct
+    str     r0, [r12]                  @ save delay line index
+    sub     r12, r12, #13*2*4 + 4*4    @ r12 = data->history
+    stmia   r12, { r8-r11 }            @ save filter history
+    add     sp, sp, #8                 @ remove temp variables from stack
+    ldmpc   regs=r4-r11
+    .size   crossfeed_process, .-crossfeed_process
+
+/****************************************************************************
+ * int lin_resample_resample(struct resample_data *data,
+ *                           struct dsp_buffer *src,
+ *                           struct dsp_buffer *dst)
+ */
+    .section    .text
+    .global     lin_resample_resample
+lin_resample_resample:
+    @input: r0 = data, r1 = src, r2 = dst
+    stmfd   sp!, { r4-r11, lr }     @ stack modified regs
+    ldr     r4, [r0]                @ r4 = data->delta
+    add     r10, r0, #4             @ r10 = &data->phase
+    ldrb    r3, [r1, #17]           @ r3 = num_channels,
+    stmfd   sp!, { r1, r10 }        @ stack src, &data->phase
+.lrs_channel_loop:
+    ldr     r5, [r10]               @ r5 = data->phase
+    ldr     r6, [r1]                @ r6 = srcrem = src->remcount
+    ldr     r7, [r1, r3, lsl #2]    @ r7 = src->p32[ch]
+    ldr     r8, [r2, r3, lsl #2]    @ r8 = dst->p32[ch]
+    ldr     r9, [r2, #12]           @ r9 = dstrem = dst->bufcount
+
+    cmp     r6, #0x8000             @ srcrem = MIN(srcrem, 0x8000)
+    movgt   r6, #0x8000             @
+    mov     r0, r5, lsr #16         @ pos = MIN(pos, srcrem)
+    cmp     r0, r6                  @
+    movgt   r0, r6                  @ r0 = pos = phase >> 16
+    cmp     r0, #0                  @
+    ldrle   r11, [r10, r3, lsl #2]  @ pos <= 0? r11 = last = last_sample[ch]
+    addgt   r12, r7, r0, lsl #2     @ pos > 0? r1 = last = s[pos - 1]
+    ldrgt   r11, [r12, #-4]         @
+    cmp     r0, r6                  @
+    bge     .lrs_channel_done       @ pos >= count? channel complete
+
+    cmp     r4, #0x10000            @ delta >= 1.0?
+    ldrhs   r12, [r7, r0, lsl #2]   @ yes? r12 = s[pos]
+    bhs     .lrs_dsstart            @ yes? is downsampling
+
+    /** Upsampling **/
+    mov     r5, r5, lsl #16         @ Move phase into high halfword
+    add     r7, r7, r0, lsl #2      @ r7 = &s[pos]
+    sub     r0, r6, r0              @ r0 = dte = srcrem - pos
+.lrs_usloop_1:
+    ldr     r12, [r7], #4           @ r12 = s[pos]
+    sub     r14, r12, r11           @ r14 = diff = s[pos] - s[pos - 1]
+.lrs_usloop_0:
+    mov     r1, r5, lsr #16         @ r1 = frac = phase >> 16
+    @ keep frac in Rs to take advantage of multiplier early termination
+    smull   r1, r10, r14, r1        @ r1, r10 = diff * frac (lo, hi)
+    add     r1, r11, r1, lsr #16    @ r1 = out = last + frac*diff
+    add     r1, r1, r10, lsl #16    @
+    str     r1, [r8], #4            @ *d++ = out
+    subs    r9, r9, #1              @ destination full?
+    bls     .lrs_usfull             @ yes? channel is done
+    adds    r5, r5, r4, lsl #16     @ phase += delta << 16
+    bcc     .lrs_usloop_0           @ if carry is set, pos is incremented
+    subs    r0, r0, #1              @ if srcrem > 0, do another sample
+    mov     r11, r12                @ r11 = last = s[pos-1] (pos changed)
+    bgt     .lrs_usloop_1
+    b       .lrs_usdone
+
+.lrs_usfull:
+    adds    r5, r5, r4, lsl #16     @ do missed phase increment
+    subcs   r0, r0, #1              @ do missed srcrem decrement
+    movcs   r11, r12                @ r11 = s[pos-1] (pos changed)
+
+.lrs_usdone:
+    sub     r0, r6, r0              @ r0 = pos = srcrem - dte
+    orr     r5, r5, r0              @ reconstruct swapped phase
+    mov     r5, r5, ror #16         @ swap pos and frac for phase
+    b       .lrs_channel_done       @
+
+    /** Downsampling **/
+.lrs_dsloop:
+    add     r10, r7, r0, lsl #2     @ r10 = &s[pos]
+    ldmda   r10, { r11, r12 }       @ r11 = last, r12 = s[pos]
+.lrs_dsstart:
+    sub     r14, r12, r11           @ r14 = diff = s[pos] - s[pos - 1]
+    @ keep frac in Rs to take advantage of multiplier early termination
+    bic     r1, r5, r0, lsl #16     @ frac = phase & 0xffff
+    smull   r1, r10, r14, r1        @ r1, r10 = diff * frac (lo, hi)
+    add     r5, r5, r4              @ phase += delta
+    subs    r9, r9, #1              @ destination full? ...
+    mov     r0, r5, lsr #16         @ pos = phase >> 16
+    add     r1, r11, r1, lsr #16    @ r1 = out = last + frac*diff
+    add     r1, r1, r10, lsl #16    @
+    str     r1, [r8], #4            @ *d++ = out
+    cmpgt   r6, r0                  @ ... || pos >= srcrem? ...
+    bgt     .lrs_dsloop             @ ... no, do more samples
+
+    cmp     r0, r6                  @ pos = MIN(pos, srcrem)
+    movgt   r0, r6                  @
+    sub     r1, r0, #1              @ pos must always be > 0 since step >= 1.0
+    ldr     r11, [r7, r1, lsl #2]   @ r11 = s[pos - 1]
+
+.lrs_channel_done:
+    ldmia   sp, { r1, r10 }         @ recover src, &data->phase
+    str     r11, [r10, r3, lsl #2]  @ last_sample[ch] = last
+    subs    r3, r3, #1              @
+    bgt     .lrs_channel_loop       @
+
+    ldr     r6, [r2, #12]           @ r6 = dst->bufcount
+    sub     r5, r5, r0, lsl #16     @ r5 = phase - (pos << 16)
+    str     r5, [r10]               @ data->phase = r5
+    sub     r6, r6, r9              @ r6 = dst->bufcount - dstrem = dstcount
+    str     r6, [r2]                @ dst->remcount = dstcount
+    add     sp, sp, #8              @ adjust stack for temp variables
+    ldmpc   regs=r4-r11             @ ... and we're out
+    .size   lin_resample_resample, .-lin_resample_resample
+
+/****************************************************************************
+ *  void pga_process(struct dsp_proc_entry *this, struct dsp_buffer **buf_p)
+ */
+    .section .icode
+    .global pga_process
+    .type   pga_process, %function
+pga_process:
+    @ input: r0 = this, r1 = buf_p
+    ldr     r0, [r0]                @ r0 = data = this->data (&pga_data)
+    ldr     r1, [r1]                @ r1 = buf = *buf_p;
+    stmfd   sp!, { r4-r8, lr }
+
+    ldr     r4, [r0]                @ r4 = data->gain
+    ldr     r0, [r1], #4            @ r0 = buf->remcount, r1 = buf->p32
+    ldrb    r3, [r1, #13]           @ r3 = buf->format.num_channels
+
+.pga_channelloop:
+    ldr     r2, [r1], #4            @ r2 = buf->p32[ch] and inc index of p32
+    subs    r12, r0, #1             @ r12 = count - 1
+    beq     .pga_singlesample       @ Zero? Only one sample!
+
+.pga_loop:
+    ldmia   r2, { r5, r6 }          @ load r5, r6 from r2 (*p32[ch])
+    smull   r7, r8, r5, r4          @ r7 = FRACMUL_SHL(r5, r4, 8)
+    smull   r14, r5, r6, r4         @ r14 = FRACMUL_SHL(r6, r4, 8)
+    subs    r12, r12, #2
+    mov     r7, r7, lsr #23
+    mov     r14, r14, lsr #23
+    orr     r7, r7, r8, asl #9
+    orr     r14, r14, r5, asl #9
+    stmia   r2!, { r7, r14 }        @ save r7, r14 to *p32[ch] and increment
+    bgt     .pga_loop               @ end of pga loop
+
+    blt     .pga_evencount          @ < 0? even count
+
+.pga_singlesample:
+    ldr     r5, [r2]                @ handle odd sample
+    smull   r7, r8, r5, r4          @ r7 = FRACMUL_SHL(r5, r4, 8)
+    mov     r7, r7, lsr #23
+    orr     r7, r7, r8, asl #9
+    str     r7, [r2]
+
+.pga_evencount:
+    subs    r3, r3, #1
+    bgt     .pga_channelloop        @ end of channel loop
+
+    ldmpc   regs=r4-r8
+    .size   pga_process, .-pga_process
+
+/****************************************************************************
+ * void filter_process(struct dsp_filter *f, int32_t *buf[], int count,
+ *                     unsigned int channels)
+ *
+ * define HIGH_PRECISION as '1' to make filtering calculate lower bits after
+ * shifting. without this, "shift" - 1 of the lower bits will be lost here.
+ */
+#define HIGH_PRECISION 0
+
+#if CONFIG_CPU == PP5002
+    .section    .icode,"ax",%progbits
+#else
+    .text
+#endif
+    .global filter_process
+filter_process:
+    @input: r0 = f, r1 = buf, r2 = count, r3 = channels
+    stmfd   sp!, { r4-r11, lr }     @ save all clobbered regs
+    ldmia   r0!, { r4-r8 }          @ load coefs, r0 = f->history
+    sub     r3, r3, #1              @ r3 = ch = channels - 1
+    stmfd   sp!, { r0-r3 }          @ save adjusted params
+    ldrb    r14, [r0, #32]          @ r14 = shift
+
+    @ Channels are processed high to low while history is saved low to high
+    @ It's really noone's business how we do this
+.fp_channelloop:
+    ldmia   r0, { r9-r12 }          @ load history, r0 = history[channels-ch-1]
+    ldr     r3, [r1, r3, lsl #2]    @ r3 = buf[ch]
+
+    @ r9-r12 = history, r4-r8 = coefs, r0..r1 = accumulator,
+    @ r2 = number of samples, r3 = buf[ch], r14 = shift amount
+.fp_loop:
+    @ Direct form 1 filtering code.
+    @ y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2],
+    @ where y[] is output and x[] is input. This is performed out of order to
+    @ reuse registers, we're pretty short on regs.
+    smull   r0, r1, r5, r9          @ acc = b1*x[i - 1]
+    smlal   r0, r1, r6, r10         @ acc += b2*x[i - 2]
+    mov     r10, r9                 @ fix input history
+    ldr     r9, [r3]                @ load input and fix history
+    smlal   r0, r1, r7, r11         @ acc += a1*y[i - 1]
+    smlal   r0, r1, r8, r12         @ acc += a2*y[i - 2]
+    smlal   r0, r1, r4, r9          @ acc += b0*x[i] /* avoid stall on arm9 */
+    mov     r12, r11                @ fix output history
+    mov     r11, r1, asl r14        @ get upper part of result and shift left
+#if HIGH_PRECISION
+    rsb     r1, r14, #32            @ get shift amount for lower part
+    orr     r11, r11, r0, lsr r1    @ then mix in correctly shifted lower part
+#endif
+    str     r11, [r3], #4           @ save result
+    subs    r2, r2, #1              @ are we done with this channel?
+    bgt     .fp_loop                @
+
+    ldr     r3, [sp, #12]           @ r3 = ch
+    ldr     r0, [sp]                @ r0 = history[channels-ch-1]
+    subs    r3, r3, #1              @ all channels processed?
+    stmia   r0!, { r9-r12 }         @ save back history, history++
+    ldmhsib sp, { r1-r2 }           @ r1 = buf, r2 = count
+    strhs   r3, [sp, #12]           @ store ch
+    strhs   r0, [sp]                @ store history[channels-ch-1]
+    bhs     .fp_channelloop
+
+    add     sp, sp, #16             @ compensate for temp storage
+    ldmpc   regs=r4-r11
+    .size   filter_process, .-filter_process
 
 #if ARM_ARCH < 6
 /****************************************************************************
- *  void sample_output_mono(int count, struct dsp_data *data,
- *                          const int32_t *src[], int16_t *dst)
+ *  void sample_output_mono(struct sample_io_data *this,
+ *                          struct dsp_buffer *src,
+ *                          struct dsp_buffer *dst)
  */
-    .section .icode, "ax", %progbits
-    .align  2
+    .section .icode
     .global sample_output_mono
     .type   sample_output_mono, %function
 sample_output_mono:
-    @ input: r0 = count, r1 = data, r2 = src, r3 = dst
+    @ input: r0 = this, r1 = src, r2 = dst
     stmfd   sp!, { r4-r6, lr }
 
-    ldr     r1, [r1]                   @ lr = data->output_scale
-    ldr     r2, [r2]                   @ r2 = src[0]
+    ldr     r0, [r0]                   @ r0 = this->outcount
+    ldr     r3, [r2, #4]               @ r2 = dst->p16out
+    ldr     r2, [r1, #4]               @ r1 = src->p32[0]
+    ldrb    r1, [r1, #19]              @ r2 = src->format.output_scale
 
     mov     r4, #1
     mov     r4, r4, lsl r1             @ r4 = 1 << (scale-1)
@@ -246,19 +536,21 @@ sample_output_mono:
     .size   sample_output_mono, .-sample_output_mono
 
 /****************************************************************************
- * void sample_output_stereo(int count, struct dsp_data *data,
- *                           const int32_t *src[], int16_t *dst)
+ *  void sample_output_stereo(struct sample_io_data *this,
+ *                          struct dsp_buffer *src,
+ *                          struct dsp_buffer *dst)
  */
-    .section .icode, "ax", %progbits
-    .align  2
+    .section .icode
     .global sample_output_stereo
     .type   sample_output_stereo, %function
 sample_output_stereo:
-    @ input: r0 = count, r1 = data, r2 = src, r3 = dst
+    @ input: r0 = this, r1 = src, r2 = dst
     stmfd   sp!, { r4-r9, lr }
 
-    ldr     r1, [r1]                   @ r1 = data->output_scale
-    ldmia   r2, { r2, r5 }             @ r2 = src[0], r5 = src[1]
+    ldr     r0, [r0]                   @ r0 = this->outcount
+    ldr     r3, [r2, #4]               @ r3 = dsp->p16out
+    ldmib   r1, { r2, r5 }             @ r2 = src->p32[0], r5 = src->p32[1]
+    ldrb    r1, [r1, #19]              @ r1 = src->format.output_scale
 
     mov     r4, #1
     mov     r4, r4, lsl r1             @ r4 = 1 << (scale-1)
@@ -330,232 +622,3 @@ sample_output_stereo:
     ldmpc   regs=r4-r9
     .size   sample_output_stereo, .-sample_output_stereo
 #endif /* ARM_ARCH < 6 */
-
-/****************************************************************************
- * void apply_crossfeed(int count, int32_t* src[])
- */
-    .section .text
-    .global apply_crossfeed
-apply_crossfeed:
-    @ unfortunately, we ended up in a bit of a register squeeze here, and need
-    @ to keep the count on the stack :/
-    stmdb   sp!, { r4-r11, lr }        @ stack modified regs
-    ldmia   r1, { r2-r3 }              @ r2 = src[0], r3 = src[1]
-
-    ldr     r1, =crossfeed_data
-    ldmia   r1!, { r4-r11 }            @ load direct gain and filter data
-    mov     r12, r0                    @ better to ldm delay + count later
-    add     r0, r1, #13*4*2            @ calculate end of delay
-    stmdb   sp!, { r0, r12 }           @ stack end of delay adr and count
-    ldr     r0, [r1, #13*4*2]          @ fetch current delay line address
-
-    /* Register usage in loop:
-     * r0 = &delay[index][0], r1 = accumulator high, r2 = src[0], r3 = src[1],
-     * r4 = direct gain, r5-r7 = b0, b1, a1 (filter coefs),
-     * r8-r11 = filter history, r12 = temp, r14 = accumulator low
-     */
-.cfloop:
-    smull   r14, r1, r6, r8            @ acc = b1*dr[n - 1]
-    smlal   r14, r1, r7, r9            @ acc += a1*y_l[n - 1]
-    ldr     r8, [r0, #4]               @ r8 = dr[n]
-    smlal   r14, r1, r5, r8            @ acc += b0*dr[n]
-    mov     r9, r1, lsl #1             @ fix format for filter history
-    ldr     r12, [r2]                  @ load left input
-    smlal   r14, r1, r4, r12           @ acc += gain*x_l[n]
-    mov     r1, r1, lsl #1             @ fix format
-    str     r1, [r2], #4               @ save result
-
-    smull   r14, r1, r6, r10           @ acc = b1*dl[n - 1]
-    smlal   r14, r1, r7, r11           @ acc += a1*y_r[n - 1]
-    ldr     r10, [r0]                  @ r10 = dl[n]
-    str     r12, [r0], #4              @ save left input to delay line
-    smlal   r14, r1, r5, r10           @ acc += b0*dl[n]
-    mov     r11, r1, lsl #1            @ fix format for filter history
-    ldr     r12, [r3]                  @ load right input
-    smlal   r14, r1, r4, r12           @ acc += gain*x_r[n]
-    str     r12, [r0], #4              @ save right input to delay line
-    mov     r1, r1, lsl #1             @ fix format
-    ldmia   sp, { r12, r14 }           @ fetch delay line end addr and count from stack
-    str     r1, [r3], #4               @ save result
-
-    cmp     r0, r12                    @ need to wrap to start of delay?
-    subeq   r0, r0, #13*4*2            @ wrap back delay line ptr to start
-
-    subs    r14, r14, #1               @ are we finished?
-    strne   r14, [sp, #4]              @ nope, save count back to stack
-    bne     .cfloop
-
-    @ save data back to struct
-    ldr     r12, =crossfeed_data + 4*4
-    stmia   r12, { r8-r11 }            @ save filter history
-    str     r0, [r12, #30*4]           @ save delay line index
-    add     sp, sp, #8                 @ remove temp variables from stack
-    ldmpc   regs=r4-r11
-    .size   apply_crossfeed, .-apply_crossfeed
-
-/****************************************************************************
- * int dsp_downsample(int count, struct dsp_data *data,
- *                    in32_t *src[], int32_t *dst[])
- */
-    .section    .text
-    .global     dsp_downsample
-dsp_downsample:
-    stmdb   sp!, { r4-r11, lr }     @ stack modified regs
-    ldmib   r1, { r5-r6 }           @ r5 = num_channels,r6 = resample_data.delta
-    sub     r5, r5, #1              @ pre-decrement num_channels for use
-    add     r4, r1, #12             @ r4 = &resample_data.phase
-    mov     r12, #0xff
-    orr     r12, r12, #0xff00       @ r12 = 0xffff
-.dschannel_loop:
-    ldr     r1, [r4]                @ r1 = resample_data.phase
-    ldr     r7, [r2, r5, lsl #2]    @ r7 = s = src[ch - 1]
-    ldr     r8, [r3, r5, lsl #2]    @ r8 = d = dst[ch - 1]
-    add     r9, r4, #4              @ r9 = &last_sample[0]
-    ldr     r10, [r9, r5, lsl #2]   @ r10 = last_sample[ch - 1]
-    sub     r11, r0, #1
-    ldr     r14, [r7, r11, lsl #2]  @ load last sample in s[] ...
-    str     r14, [r9, r5, lsl #2]   @ and write as next frame's last_sample
-    movs    r9, r1, lsr #16         @ r9 = pos = phase >> 16
-    ldreq   r11, [r7]               @ if pos = 0, load src[0] and jump into loop
-    beq     .dsuse_last_start
-    cmp     r9, r0                  @ if pos >= count, we're already done
-    bge     .dsloop_skip
-
-    @ Register usage in loop:
-    @ r0 = count, r1 = phase, r4 = &resample_data.phase, r5 = cur_channel,
-    @ r6 = delta, r7 = s, r8 = d, r9 = pos, r10 = s[pos - 1], r11 = s[pos]
-.dsloop:
-    add     r9, r7, r9, lsl #2      @ r9 = &s[pos]
-    ldmda   r9, { r10, r11 }        @ r10 = s[pos - 1], r11 = s[pos]
-.dsuse_last_start:
-    sub     r11, r11, r10           @ r11 = diff = s[pos] - s[pos - 1]
-    @ keep frac in lower bits to take advantage of multiplier early termination
-    and     r9, r1, r12             @ frac = phase & 0xffff
-    smull   r9, r14, r11, r9
-    add     r1, r1, r6              @ phase += delta
-    add     r10, r10, r9, lsr #16   @ r10 = out = s[pos - 1] + frac*diff
-    add     r10, r10, r14, lsl #16
-    str     r10, [r8], #4           @ *d++ = out
-    mov     r9, r1, lsr #16         @ pos = phase >> 16
-    cmp     r9, r0                  @ pos < count?
-    blt     .dsloop                 @ yup, do more samples
-.dsloop_skip:
-    subs    r5, r5, #1
-    bpl     .dschannel_loop         @ if (--ch) >= 0, do another channel
-    sub     r1, r1, r0, lsl #16     @ wrap phase back to start
-    str     r1, [r4]                @ store back
-    ldr     r1, [r3]                @ r1 = &dst[0]
-    sub     r8, r8, r1              @ dst - &dst[0]
-    mov     r0, r8, lsr #2          @ convert bytes->samples
-    ldmpc   regs=r4-r11             @ ... and we're out
-    .size   dsp_downsample, .-dsp_downsample
-
-/****************************************************************************
- * int dsp_upsample(int count, struct dsp_data *dsp,
- *                  in32_t *src[], int32_t *dst[])
- */
-    .section    .text
-    .global     dsp_upsample
-dsp_upsample:
-    stmfd   sp!, { r4-r11, lr }     @ stack modified regs
-    ldmib   r1, { r5-r6 }           @ r5 = num_channels,r6 = resample_data.delta
-    sub     r5, r5, #1              @ pre-decrement num_channels for use
-    add     r4, r1, #12             @ r4 = &resample_data.phase
-    mov     r6, r6, lsl #16         @ we'll use carry to detect pos increments
-    stmfd   sp!, { r0, r4 }         @ stack count and &resample_data.phase
-.uschannel_loop:
-    ldr     r12, [r4]               @ r12 = resample_data.phase
-    ldr     r7, [r2, r5, lsl #2]    @ r7 = s = src[ch - 1]
-    ldr     r8, [r3, r5, lsl #2]    @ r8 = d = dst[ch - 1]
-    add     r9, r4, #4              @ r9 = &last_sample[0]
-    mov     r1, r12, lsl #16        @ we'll use carry to detect pos increments
-    sub     r11, r0, #1
-    ldr     r14, [r7, r11, lsl #2]  @ load last sample in s[] ...
-    ldr     r10, [r9, r5, lsl #2]   @ r10 = last_sample[ch - 1]
-    str     r14, [r9, r5, lsl #2]   @ and write as next frame's last_sample
-    movs    r14, r12, lsr #16       @ pos = resample_data.phase >> 16
-    beq     .usstart_0              @ pos = 0
-    cmp     r14, r0                 @ if pos >= count, we're already done
-    bge     .usloop_skip
-    add     r7, r7, r14, lsl #2     @ r7 = &s[pos]
-    ldr     r10, [r7, #-4]          @ r11 = s[pos - 1]
-    b       .usstart_0
-
-    @ Register usage in loop:
-    @ r0 = count, r1 = phase, r4 = &resample_data.phase, r5 = cur_channel,
-    @ r6 = delta, r7 = s, r8 = d, r9 = diff, r10 = s[pos - 1], r11 = s[pos]
-.usloop_1:
-    mov     r10, r11                @ r10 = previous sample
-.usstart_0:
-    ldr     r11, [r7], #4           @ r11 = next sample
-    mov     r4, r1, lsr #16         @ r4 = frac = phase >> 16
-    sub     r9, r11, r10            @ r9 = diff = s[pos] - s[pos - 1]
-.usloop_0:
-    smull   r12, r14, r4, r9
-    adds    r1, r1, r6              @ phase += delta << 16
-    mov     r4, r1, lsr #16         @ r4 = frac = phase >> 16
-    add     r14, r10, r14, lsl #16
-    add     r14, r14, r12, lsr #16  @ r14 = out = s[pos - 1] + frac*diff
-    str     r14, [r8], #4           @ *d++ = out
-    bcc     .usloop_0               @ if carry is set, pos is incremented
-    subs    r0, r0, #1              @ if count > 0, do another sample
-    bgt     .usloop_1
-.usloop_skip:
-    subs    r5, r5, #1
-    ldmfd   sp, { r0, r4 }          @ reload count and &resample_data.phase
-    bpl     .uschannel_loop         @ if (--ch) >= 0, do another channel
-    mov     r1, r1, lsr #16         @ wrap phase back to start of next frame
-    ldr     r2, [r3]                @ r1 = &dst[0]
-    str     r1, [r4]                @ store phase
-    sub     r8, r8, r2              @ dst - &dst[0]
-    mov     r0, r8, lsr #2          @ convert bytes->samples
-    add     sp, sp, #8              @ adjust stack for temp variables
-    ldmpc   regs=r4-r11             @ ... and we're out
-    .size       dsp_upsample, .-dsp_upsample
-
-/****************************************************************************
- *  void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[])
- */
-    .section .icode, "ax", %progbits
-    .align  2
-    .global dsp_apply_gain
-    .type   dsp_apply_gain, %function
-dsp_apply_gain:
-    @ input: r0 = count, r1 = data, r2 = buf[]
-    stmfd   sp!, { r4-r8, lr }
-
-    ldr     r3, [r1,  #4]           @ r3 = data->num_channels
-    ldr     r4, [r1, #32]           @ r5 = data->gain
-
-.dag_outerloop:
-    ldr     r1, [r2], #4            @ r1 = buf[0] and increment index of buf[]
-    subs    r12, r0, #1             @ r12 = r0 = count - 1
-    beq     .dag_singlesample       @ Zero? Only one sample!
-
-.dag_innerloop:
-    ldmia   r1, { r5, r6 }          @ load r5, r6 from r1
-    smull   r7, r8, r5, r4          @ r7 = FRACMUL_SHL(r5, r4, 8)
-    smull   r14, r5, r6, r4         @ r14 = FRACMUL_SHL(r6, r4, 8)
-    subs    r12, r12, #2
-    mov     r7, r7, lsr #23
-    mov     r14, r14, lsr #23
-    orr     r7, r7, r8, asl #9
-    orr     r14, r14, r5, asl #9
-    stmia   r1!, { r7, r14 }        @ save r7, r14 to [r1] and increment r1
-    bgt     .dag_innerloop          @ end of inner loop
-
-    blt     .dag_evencount          @ < 0? even count
-
-.dag_singlesample:
-    ldr     r5, [r1]                @ handle odd sample
-    smull   r7, r8, r5, r4          @ r7 = FRACMUL_SHL(r5, r4, 8)
-    mov     r7, r7, lsr #23
-    orr     r7, r7, r8, asl #9
-    str     r7, [r1]
-
-.dag_evencount:
-    subs    r3, r3, #1
-    bgt     .dag_outerloop          @ end of outer loop
-
-    ldmpc   regs=r4-r8
-    .size   dsp_apply_gain, .-dsp_apply_gain
diff --git a/lib/rbcodec/dsp/dsp_arm_v6.S b/lib/rbcodec/dsp/dsp_arm_v6.S
index a9a88ce5bf..a36760f744 100644
--- a/lib/rbcodec/dsp/dsp_arm_v6.S
+++ b/lib/rbcodec/dsp/dsp_arm_v6.S
@@ -20,19 +20,21 @@
  ****************************************************************************/
 
 /****************************************************************************
- *  void sample_output_mono(int count, struct dsp_data *data,
- *                          const int32_t *src[], int16_t *dst)
+ *  void sample_output_mono(struct sample_io_data *this,
+ *                          struct dsp_buffer *src,
+ *                          struct dsp_buffer *dst)
  */
-    .section .text, "ax", %progbits
-    .align  2
+    .section .text
     .global sample_output_mono
     .type   sample_output_mono, %function
 sample_output_mono:
-    @ input: r0 = count, r1 = data, r2 = src, r3 = dst
+    @ input: r0 = this, r1 = src, r2 = dst
     stmfd   sp!, { r4, lr }            @
                                        @
-    ldr     r1, [r1]                   @ r1 = data->output_scale
-    ldr     r2, [r2]                   @ r2 = src[0]
+    ldr     r0, [r0]                   @ r0 = this->outcount
+    ldr     r3, [r2, #4]               @ r3 = dst->p16out
+    ldr     r2, [r1, #4]               @ r2 = src->p32[0]
+    ldrb    r1, [r1, #19]              @ r1 = src->format.output_scale
                                        @
     mov     r4, #1                     @ r4 = 1 << (scale - 1)
     mov     r4, r4, lsl r1             @
@@ -68,19 +70,21 @@ sample_output_mono:
     .size   sample_output_mono, .-sample_output_mono
 
 /****************************************************************************
- * void sample_output_stereo(int count, struct dsp_data *data,
- *                           const int32_t *src[], int16_t *dst)
+ * void sample_output_stereo(struct sample_io_data *this,
+ *                           struct dsp_buffer *src,
+ *                           struct dsp_buffer *dst)
  */
-    .section .text, "ax", %progbits
-    .align  2
+    .section .text
     .global sample_output_stereo
     .type   sample_output_stereo, %function
 sample_output_stereo:
-    @ input: r0 = count, r1 = data, r2 = src, r3 = dst
+    @ input: r0 = this, r1 = src, r2 = dst
     stmfd   sp!, { r4-r7, lr }         @
                                        @
-    ldr     r1, [r1]                   @ r1 = data->output_scale
-    ldmia   r2, { r2, r4 }             @ r2 = src[0], r4 = src[1]
+    ldr     r0, [r0]                   @ r0 = this->outcount
+    ldr     r3, [r2, #4]               @ r3 = dst->p16out
+    ldmib   r1, { r2, r4 }             @ r2 = src->p32[0], r4 = src->p32[1]
+    ldrb    r1, [r1, #19]              @ r1 = src->format.output_scale
                                        @
     mov     r5, #1                     @ r5 = 1 << (scale - 1)
     mov     r5, r5, lsl r1             @
diff --git a/lib/rbcodec/dsp/dsp_asm.h b/lib/rbcodec/dsp/dsp_asm.h
deleted file mode 100644
index 7bf18370a3..0000000000
--- a/lib/rbcodec/dsp/dsp_asm.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/***************************************************************************
- *             __________               __   ___.
- *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
- *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
- *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
- *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
- *                     \/            \/     \/    \/            \/
- * $Id$
- *
- * Copyright (C) 2006 Thom Johansen
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
- * KIND, either express or implied.
- *
- ****************************************************************************/
-
-#include <config.h>
-
-#ifndef _DSP_ASM_H
-#define _DSP_ASM_H
-
-/* Set the appropriate #defines based on CPU or whatever matters */
-#if defined(CPU_ARM)
-#define DSP_HAVE_ASM_APPLY_GAIN
-#define DSP_HAVE_ASM_RESAMPLING
-#define DSP_HAVE_ASM_CROSSFEED
-#define DSP_HAVE_ASM_SOUND_CHAN_MONO
-#define DSP_HAVE_ASM_SOUND_CHAN_CUSTOM
-#define DSP_HAVE_ASM_SOUND_CHAN_KARAOKE
-#define DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO
-#define DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO
-#elif defined (CPU_COLDFIRE)
-#define DSP_HAVE_ASM_APPLY_GAIN
-#define DSP_HAVE_ASM_RESAMPLING
-#define DSP_HAVE_ASM_CROSSFEED
-#define DSP_HAVE_ASM_SOUND_CHAN_MONO
-#define DSP_HAVE_ASM_SOUND_CHAN_CUSTOM
-#define DSP_HAVE_ASM_SOUND_CHAN_KARAOKE
-#define DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO
-#define DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO
-#endif /* CPU_COLDFIRE */
-
-/* Declare prototypes based upon what's #defined above */
-#ifdef DSP_HAVE_ASM_CROSSFEED
-void apply_crossfeed(int count, int32_t *buf[]);
-#endif
-
-#ifdef DSP_HAVE_ASM_APPLY_GAIN
-void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[]);
-#endif /* DSP_HAVE_ASM_APPLY_GAIN* */
-
-#ifdef DSP_HAVE_ASM_RESAMPLING
-int dsp_upsample(int count, struct dsp_data *data,
-                 const int32_t *src[], int32_t *dst[]);
-int dsp_downsample(int count, struct dsp_data *data,
-                   const int32_t *src[], int32_t *dst[]);
-#endif /* DSP_HAVE_ASM_RESAMPLING */
-
-#ifdef DSP_HAVE_ASM_SOUND_CHAN_MONO
-void channels_process_sound_chan_mono(int count, int32_t *buf[]);
-#endif
-
-#ifdef DSP_HAVE_ASM_SOUND_CHAN_CUSTOM
-void channels_process_sound_chan_custom(int count, int32_t *buf[]);
-#endif
-
-#ifdef DSP_HAVE_ASM_SOUND_CHAN_KARAOKE
-void channels_process_sound_chan_karaoke(int count, int32_t *buf[]);
-#endif
-
-#ifdef DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO
-void sample_output_stereo(int count, struct dsp_data *data,
-                          const int32_t *src[], int16_t *dst);
-#endif
-
-#ifdef DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO
-void sample_output_mono(int count, struct dsp_data *data,
-                        const int32_t *src[], int16_t *dst);
-#endif
-
-#endif /* _DSP_ASM_H */
diff --git a/lib/rbcodec/dsp/dsp_cf.S b/lib/rbcodec/dsp/dsp_cf.S
index 15ec7eb383..c710df5177 100644
--- a/lib/rbcodec/dsp/dsp_cf.S
+++ b/lib/rbcodec/dsp/dsp_cf.S
@@ -19,23 +19,27 @@
  * KIND, either express or implied.
  *
  ****************************************************************************/
+#include "config.h"
 
 /****************************************************************************
- * void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[])
+ * void pga_process(struct dsp_proc_entry *this, struct dsp_buffer **buf_p)
  */
     .section    .text
     .align      2
-    .global     dsp_apply_gain
-dsp_apply_gain:
+    .global     pga_process
+pga_process:
+    | input: 4(sp) = this, 8(sp) = buf_p
+    movem.l     4(%sp), %a0-%a1         | %a0 = this, %a1 = buf_p
+    move.l      (%a0), %a0              | %a0 = this->data = &pga_data
+    move.l      (%a0), %a0              | %a0 = data->gain
+    move.l      (%a1), %a1              | %a1 = buf = *buf_p
     lea.l       -20(%sp), %sp           | save registers
     movem.l     %d2-%d4/%a2-%a3, (%sp)  |
-    movem.l     28(%sp), %a0-%a1        | %a0 = data,
-                                        | %a1 = buf
-    move.l      4(%a0), %d1             | %d1 = data->num_channels
-    move.l      32(%a0), %a0            | %a0 = data->gain (in s8.23)
+    clr.l       %d1                     | %d1 = buf->format.num_channels
+    move.b      17(%a1), %d1            |
 10: | channel loop                      |
-    move.l      24(%sp), %d0            | %d0 = count
-    move.l      -4(%a1, %d1.l*4), %a2   | %a2 = s = buf[ch-1]
+    move.l      (%a1), %d0              | %d0 = buf->remcount
+    move.l      (%a1, %d1.l*4), %a2     | %a2 = s = buf->p32[ch-1]
     move.l      %a2, %a3                | %a3 = d = s
     move.l      (%a2)+, %d2             | %d2 = *s++,
     mac.l       %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1)
@@ -61,25 +65,29 @@ dsp_apply_gain:
     movem.l     (%sp), %d2-%d4/%a2-%a3  | restore registers
     lea.l       20(%sp), %sp            | cleanup stack
     rts                                 |
-    .size       dsp_apply_gain,.-dsp_apply_gain
+    .size       pga_process, .-pga_process
 
 /****************************************************************************
- * void apply_crossfeed(int count, int32_t *buf[])
+ * void crossfeed_process(struct dsp_proc_entry *this,
+ *                        struct dsp_buffer **buf_p)
  */
     .section    .text
     .align      2
-    .global     apply_crossfeed
-apply_crossfeed:
+    .global     crossfeed_process
+crossfeed_process:
+    | input: 4(sp) = this, 8(sp) = buf_p
     lea.l       -44(%sp), %sp           |
     movem.l     %d2-%d7/%a2-%a6, (%sp)  | save all regs
-    movem.l     48(%sp), %d7/%a4        | %d7 = count, %a4 = src
-    movem.l     (%a4), %a4-%a5          | %a4 = src[0], %a5 = src[1]
-    lea.l       crossfeed_data, %a1     | %a1 = &crossfeed_data
+    movem.l     48(%sp), %a1/%a4        | %a1 = this, %a4 = buf_p
+    move.l      (%a4), %a4              | %a4 = buf = *buf_p
+    movem.l     (%a4), %d7/%a4-%a5      | %d7 = buf->remcount, %a4 = buf->p32[0],
+                                        | %a5 = buf->p32[1]
+    move.l      (%a1), %a1              | %a1 = &crossfeed_state
     move.l      (%a1)+, %d6             | %d6 = direct gain
     movem.l     12(%a1), %d0-%d3        | fetch filter history samples
-    move.l      132(%a1), %a0           | fetch delay line address
+    lea.l       132(%a1), %a6           | %a6 = delay line wrap limit
+    move.l      (%a6), %a0              | fetch delay line address
     movem.l     (%a1), %a1-%a3          | load filter coefs
-    lea.l       crossfeed_data+136, %a6 | %a6 = delay line wrap limit
     bra.b       20f | loop start        | go to loop start point
     /* Register usage in loop:
      * %a0 = delay_p, %a1..%a3 = b0, b1, a1 (filter coefs),
@@ -109,174 +117,181 @@ apply_crossfeed:
     mac.l       %d6, %d5, %acc1         | %acc1 += gain*x_r[n]
     cmp.l       %a6, %a0                | wrap %a0 if passed end
     bhs.b       30f | wrap buffer       |
-    .word       0x51fb | tpf.l          | trap the buffer wrap
+    tpf.l                               | trap the buffer wrap
 30: | wrap buffer                       | ...fwd taken branches more costly
-    lea.l       -104(%a0), %a0          | wrap it up
+    lea.l       -104(%a6), %a0          | wrap it up
     subq.l      #1, %d7                 | --count > 0 ?
     bgt.b       10b | loop              | yes? do more
     movclr.l    %acc0, %d4              | write last outputs
     move.l      %d4, (%a4)              | .
     movclr.l    %acc1, %d5              | .
     move.l      %d5, (%a5)              | .
-    lea.l       crossfeed_data+16, %a1  | save data back to struct
-    movem.l     %d0-%d3, (%a1)          | ...history
-    move.l      %a0, 120(%a1)           | ...delay_p
+    movem.l     %d0-%d3, -120(%a6)      | ...history
+    move.l      %a0, (%a6)              | ...delay_p
     movem.l     (%sp), %d2-%d7/%a2-%a6  | restore all regs
     lea.l       44(%sp), %sp            |
     rts                                 |
-    .size       apply_crossfeed,.-apply_crossfeed
+    .size       crossfeed_process,.-crossfeed_process
 
 /****************************************************************************
- * int dsp_downsample(int count, struct dsp_data *data,
- *                    in32_t *src[], int32_t *dst[])
+ * int lin_resample_resample(struct resample_data *data,
+ *                           struct dsp_buffer *src,
+ *                           struct dsp_buffer *dst)
  */
     .section    .text
     .align      2
-    .global     dsp_downsample
-dsp_downsample:
-    lea.l       -40(%sp), %sp           | save non-clobberables
-    movem.l     %d2-%d7/%a2-%a5, (%sp)  |
-    movem.l     44(%sp), %d2/%a0-%a2    | %d2 = count
-                                        | %a0 = data
+    .global     lin_resample_resample
+lin_resample_resample:
+    | input: 4(sp) = data, 8(sp) = src, 12(sp) = dst
+    lea.l       -44(%sp), %sp           | save non-volatiles
+    movem.l     %d2-%d7/%a2-%a6, (%sp)  |
+    movem.l     48(%sp), %a0-%a2        | %a0 = data
                                         | %a1 = src
                                         | %a2 = dst
-    movem.l     4(%a0), %d3-%d4         | %d3 = ch = data->num_channels
-                                        | %d4 = delta = data->resample_data.delta
+    clr.l       %d1                     | %d1 = ch = src->format.num_channels
+    move.b      17(%a1), %d1            |
     moveq.l     #16, %d7                | %d7 = shift
-10: | channel loop                      |
-    move.l      12(%a0), %d5            | %d5 = phase = data->resample_data.phase
-    move.l      -4(%a1, %d3.l*4), %a3   | %a3 = s = src[ch-1]
-    move.l      -4(%a2, %d3.l*4), %a4   | %a4 = d = dst[ch-1]
-    lea.l       12(%a0, %d3.l*4), %a5   | %a5 = &data->resample_data.ast_sample[ch-1]
-    move.l      (%a5), %d0              | %d0 = last = data->resample_data.last_sample[ch-1]
-    move.l      -4(%a3, %d2.l*4), (%a5) | data->resample_data.last_sample[ch-1] = s[count-1]
-    move.l      %d5, %d6                | %d6 = pos = phase >> 16
-    lsr.l       %d7, %d6                |
-    cmp.l       %d2, %d6                | past end of samples?
-    bge.b       40f | skip resample loop| yes? skip loop
-    tst.l       %d6                     | need last sample of prev. frame?
-    bne.b       20f | resample loop     | no? start main loop
-    move.l      (%a3, %d6.l*4), %d1     | %d1 = s[pos]
-    bra.b       30f | resample start last | start with last (last in %d0)
-20: | resample loop                     |
-    lea.l       -4(%a3, %d6.l*4), %a5   | load s[pos-1] and s[pos]
-    movem.l     (%a5), %d0-%d1          |
-30: | resample start last               |
-    sub.l       %d0, %d1                | %d1 = diff = s[pos] - s[pos-1]
-    move.l      %d0, %acc0              | %acc0 = previous sample
-    move.l      %d5, %d0                | frac = (phase << 16) >> 1
+.lrs_channel_loop:                      |
+    movem.l     (%a0), %d2-%d3          | %d2 = delta = data->delta,
+                                        | %d3 = phase = data->phase
+    move.l      (%a1), %d4              | %d4 = srcrem = src->remcount
+    move.l      12(%a2), %d5            | %d5 = dstrem = dst->bufcount
+    cmp.l       #0x8000, %d4            | %d4 = MIN(srcrem, 0x8000)
+    ble.b       10f                     |
+    move.l      #0x8000, %d4            |
+10:                                     |
+    move.l      (%a1, %d1.l*4), %a3     | %a3 = s = src->p32[ch]
+    move.l      (%a2, %d1.l*4), %a4     | %a4 = d = dst->p32[ch]
+    move.l      %d3, %d0                | %d0 = pos
+    lsr.l       %d7, %d0                | ...
+    beq.b       11f                     | pos == 0?
+    cmp.l       %d4, %d0                | pos = MIN(pos, srcrem)
+    blt.b       12f                     |
+    move.l      %d4, %d0                | pos = srcrem
+    move.l      -4(%a3, %d0.l*4), %d6   | %d6 = last = s[pos - 1]
+    bra.w       .lrs_channel_complete   | at limit; nothing to do but next
+11:                                     |
+    move.l      4(%a0, %d1.l*4), %d6    | %d6 = last = last_sample[ch]
+    tpf.l                               | trap next move.l (last = s[pos - 1])
+12:                                     |
+    move.l      -4(%a3, %d0.l*4), %d6   | %d6 = last = s[pos - 1]
+    cmp.l       #0x10000, %d2           | delta >= 1.0?
+    bhs.b       .lrs_downsample         | yes? downsampling
+                                        |
+    /** Upsampling **/                  |
+    lea.l       (%a3, %d0.l*4), %a3     | %a3 = &s[pos]
+    sub.l       %d4, %d0                | %d0 = pos - srcrem = -dte
+    lsl.l       %d7, %d2                | move delta to bits 30..15
+    lsr.l       #1, %d2                 |
+    lsl.l       %d7, %d3                | move phase to bits 30..15
+    lsr.l       #1, %d3                 |
+    move.l      (%a3)+, %a5             | %a5 = s[pos]
+    move.l      %a5, %a6                | %a6 = diff = s[pos] - last
+    sub.l       %d6, %a6                |
+    bra.b       22f                     |
+    /* Funky loop structure is to avoid emac latency stalls */
+20:                                     |
+    move.l      (%a3)+, %a5             | %a5 = s[pos]
+    move.l      %a5, %a6                | %a6 = diff = s[pos] - last
+    sub.l       %d6, %a6                |
+21:                                     |
+    movclr.l    %acc0, %d7              | *d++ = %d7 = result
+    move.l      %d7, (%a4)+             |
+22:                                     |
+    move.l      %d6, %acc0              | %acc0 = last
+    mac.l       %d3, %a6, %acc0         | %acc0 += frac * diff
+    subq.l      #1, %d5                 | dstrem <= 0?
+    ble.b       23f                     | yes? stop
+    add.l       %d2, %d3                | phase += delta
+    bpl.b       21b                     | load next values?
+    move.l      %a5, %d6                |
+    bclr.l      #31, %d3                | clear sign bit
+    addq.l      #1, %d0                 | dte > 0?
+    bmi.b       20b                     | yes? continue resampling
+    tpf.w                               | trap next add.l (phase += delta)
+23:                                     |
+    add.l       %d2, %d3                | phase += delta
+    lsl.l       #1, %d3                 | frac -> phase
+    bcs.b       24f                     | was sign bit set?
+    tpf.l                               |
+24:                                     |
+    move.l      %a5, %d6                | yes? was going to move to new s[pos]
+    addq.l      #1, %d0                 |
+    movclr.l    %acc0, %d7              | *d = %d7 = result
+    move.l      %d7, (%a4)              |
+    add.l       %d4, %d0                | %d0 = -dte + srcrem = pos
+    or.l        %d0, %d3                | restore phase
+    swap.w      %d3                     |
+    moveq.l     #16, %d7                | %d7 = shift
+    bra.b       .lrs_channel_complete   |
+                                        |
+    /** Downsampling **/                |
+.lrs_downsample:                        |
+    move.l      (%a3, %d0.l*4), %a5     | %a5 = s[pos]
+    bra.b       31f                     |
+30:                                     |
+    lea.l       -4(%a3, %d0.l*4), %a5   | %d6 = s[pos - 1], %a5 = s[pos]
+    movem.l     (%a5), %d6/%a5          |
+31:                                     |
+    move.l      %d6, %acc0              | %acc0 = last
+    sub.l       %d6, %a5                | %a5 = diff = s[pos] - s[pos - 1]
+    move.l      %d3, %d0                | frac = (phase << 16) >> 1
     lsl.l       %d7, %d0                |
     lsr.l       #1, %d0                 |
-    mac.l       %d0, %d1, %acc0         | %acc0 += frac * diff
-    add.l       %d4, %d5                | phase += delta
-    move.l      %d5, %d6                | pos = phase >> 16
-    lsr.l       %d7, %d6                |
-    movclr.l    %acc0, %d0              |
-    move.l      %d0, (%a4)+             | *d++ = %d0
-    cmp.l       %d2, %d6                | pos < count?
-    blt.b       20b | resample loop     | yes? continue resampling
-40: | skip resample loop                |
-    subq.l      #1, %d3                 | ch > 0?
-    bgt.b       10b | channel loop      | yes? process next channel
-    lsl.l       %d7, %d2                | wrap phase to start of next frame
-    sub.l       %d2, %d5                | data->resample_data.phase =
-    move.l      %d5, 12(%a0)            | ... phase - (count << 16)
-    move.l      %a4, %d0                | return d - d[0]
-    sub.l       (%a2), %d0              |
-    asr.l       #2, %d0                 | convert bytes->samples
-    movem.l     (%sp), %d2-%d7/%a2-%a5  | restore non-clobberables
-    lea.l       40(%sp), %sp            | cleanup stack
+    mac.l       %d0, %a5, %acc0         | %acc0 += frac * diff
+    add.l       %d2, %d3                | phase += delta
+    move.l      %d3, %d0                | pos = phase >> 16
+    lsr.l       %d7, %d0                |
+    movclr.l    %acc0, %a5              |
+    move.l      %a5, (%a4)+             | *d++ = %d0
+    subq.l      #1, %d5                 | dst full?
+    ble.b       32f                     | yes? stop
+    cmp.l       %d4, %d0                | pos < srcrem?
+    blt.b       30b                     | yes? continue resampling
+    tpf.l                               | trap cmp.l and ble.b
+32:                                     |
+    cmp.l       %d4, %d0                | pos = MIN(pos, srcrem)
+    ble.b       33f                     |
+    move.l      %d4, %d0                |
+33:                                     |
+    move.l      -4(%a3, %d0.l*4), %d6   | %d6 = s[pos - 1]
+                                        |
+.lrs_channel_complete:                  |
+    move.l      %d6, 4(%a0, %d1.l*4)    | last_sample[ch] = last
+    subq.l      #1, %d1                 | ch > 0?
+    bgt.w       .lrs_channel_loop       | yes? process next channel
+                                        |
+    move.l      12(%a2), %d1            | %d1 = dst->bufcount
+    sub.l       %d5, %d1                | written = dst->bufcount - dstrem
+    move.l      %d1, (%a2)              | dst->remcount = written
+    move.l      %d0, %d1                | wrap phase to position in next frame
+    lsl.l       %d7, %d1                | data->phase = phase - (pos << 16)
+    sub.l       %d1, %d3                | ...
+    move.l      %d3, 4(%a0)             | ...
+    movem.l     (%sp), %d2-%d7/%a2-%a6  | restore non-volatiles
+    lea.l       44(%sp), %sp            | cleanup stack
     rts                                 | buh-bye
-    .size       dsp_downsample,.-dsp_downsample
+
+    .size       lin_resample_resample, .-lin_resample_resample
+
 
 /****************************************************************************
- * int dsp_upsample(int count, struct dsp_data *dsp,
- *                  const int32_t *src[], int32_t *dst[])
- */
-    .section    .text
-    .align      2
-    .global     dsp_upsample
-dsp_upsample:
-    lea.l       -40(%sp), %sp           | save non-clobberables
-    movem.l     %d2-%d7/%a2-%a5, (%sp)  |
-    movem.l     44(%sp), %d2/%a0-%a2    | %d2 = count
-                                        | %a0 = data
-                                        | %a1 = src
-                                        | %a2 = dst
-    movem.l      4(%a0), %d3-%d4        | %d3 = ch = channels
-                                        | %d4 = delta = data->resample_data.delta
-    swap        %d4                     | swap delta to high word to use...
-                                        | ...carries to increment position
-10: | channel loop                      |
-    move.l      12(%a0), %d5            | %d5 = phase = data->resample_data.phase
-    move.l      -4(%a1, %d3.l*4), %a3   | %a3 = s = src[ch-1]
-    lea.l       12(%a0, %d3.l*4), %a4   | %a4 = &data->resample_data.last_sample[ch-1]
-    lea.l       -4(%a3, %d2.l*4), %a5   | %a5 = src_end = &src[count-1]
-    move.l      (%a4), %d0              | %d0 = last = data->resample_data.last_sample[ch-1]
-    move.l      (%a5), (%a4)            | data->resample_data.last_sample[ch-1] = s[count-1]
-    move.l      -4(%a2, %d3.l*4), %a4   | %a4 = d = dst[ch-1]
-    move.l      (%a3)+, %d1             | fetch first sample - might throw this...
-                                        | ...away later but we'll be preincremented
-    move.l      %d1, %d6                | save sample value
-    sub.l       %d0, %d1                | %d1 = diff = s[0] - last
-    swap        %d5                     | swap phase to high word to use
-                                        | carries to increment position
-    move.l      %d5, %d7                | %d7 = pos = phase >> 16
-    clr.w       %d5                     |
-    eor.l       %d5, %d7                | pos == 0?
-    beq.b       40f | loop start        | yes? start loop
-    cmp.l       %d2, %d7                | past end of samples?
-    bge.b       50f | skip resample loop| yes? go to next channel and collect info
-    lea.l       (%a3, %d7.l*4), %a3     | %a3 = s = &s[pos+1]
-    movem.l     -8(%a3), %d0-%d1        | %d0 = s[pos-1], %d1 = s[pos]
-    move.l      %d1, %d6                | save sample value
-    sub.l       %d0, %d1                | %d1 = diff = s[pos] - s[pos-1]
-    bra.b       40f | loop start        |
-20: | next sample loop                  |
-    move.l      %d6, %d0                | move previous sample to %d0
-    move.l      (%a3)+, %d1             | fetch next sample
-    move.l      %d1, %d6                | save sample value
-    sub.l       %d0, %d1                | %d1 = diff = s[pos] - s[pos-1]
-30: | same sample loop                  |
-    movclr.l    %acc0, %d7              | %d7 = result
-    move.l      %d7, (%a4)+             | *d++ = %d7
-40: | loop start                        |
-    lsr.l       #1, %d5                 | make phase into frac
-    move.l      %d0, %acc0              | %acc0 = s[pos-1]
-    mac.l       %d1, %d5, %acc0         | %acc0 = diff * frac
-    lsl.l       #1, %d5                 | restore frac to phase
-    add.l       %d4, %d5                | phase += delta
-    bcc.b       30b | same sample loop  | load next values?
-    cmp.l       %a5, %a3                | src <= src_end?
-    bls.b       20b | next sample loop  | yes? continue resampling
-    movclr.l    %acc0, %d7              | %d7 = result
-    move.l      %d7, (%a4)+             | *d++ = %d7
-50: | skip resample loop                |
-    subq.l      #1, %d3                 | ch > 0?
-    bgt.b       10b | channel loop      | yes? process next channel
-    swap        %d5                     | wrap phase to start of next frame
-    move.l      %d5, 12(%a0)            | ...and save in data->resample_data.phase
-    move.l      %a4, %d0                | return d - d[0]
-    sub.l       (%a2), %d0              |
-    movem.l     (%sp), %d2-%d7/%a2-%a5  | restore non-clobberables
-    asr.l       #2, %d0                 | convert bytes->samples
-    lea.l       40(%sp), %sp            | cleanup stack
-    rts                                 | buh-bye
-    .size       dsp_upsample,.-dsp_upsample
-
-/****************************************************************************
- * void channels_process_sound_chan_mono(int count, int32_t *buf[])
+ * void channel_mode_proc_mono(struct dsp_proc_entry *this,
+ *                             struct dsp_buffer **buf_p)
  *
  * Mix left and right channels 50/50 into a center channel.
  */
     .section    .text
     .align      2
-    .global     channels_process_sound_chan_mono
-channels_process_sound_chan_mono:
-    movem.l     4(%sp), %d0/%a0         | %d0 = count, %a0 = buf
+    .global     channel_mode_proc_mono
+channel_mode_proc_mono:
+    | input: 4(sp) = this, 8(sp) = buf_p
+    move.l      8(%sp), %a0             | %a0 = buf_p
+    move.l      (%a0), %a0              | %a0 = buf = *buf_p
     lea.l       -20(%sp), %sp           | save registers
     movem.l     %d2-%d4/%a2-%a3, (%sp)  |
-    movem.l     (%a0), %a0-%a1          | get channel pointers
+    movem.l     (%a0), %d0/%a0-%a1      | %d0 = buf->remcount, %a0 = buf->p32[0],
+                                        | %a1 = buf->p32[1]
     move.l      %a0, %a2                | use separate dst pointers since read
     move.l      %a1, %a3                | pointers run one ahead of write
     move.l      #0x40000000, %d3        | %d3 = 0.5
@@ -301,26 +316,29 @@ channels_process_sound_chan_mono:
     movem.l     (%sp), %d2-%d4/%a2-%a3  | restore registers
     lea.l       20(%sp), %sp            | cleanup
     rts                                 |
-    .size       channels_process_sound_chan_mono, \
-                .-channels_process_sound_chan_mono
+    .size       channel_mode_proc_mono, .-channel_mode_proc_mono
 
 /****************************************************************************
- * void channels_process_sound_chan_custom(int count, int32_t *buf[])
+ * void channel_mode_proc_custom(struct dsp_proc_entry *this,
+ *                               struct dsp_buffer **buf_p)
  *
  * Apply stereo width (narrowing/expanding) effect.
  */
     .section    .text
     .align      2
-    .global     channels_process_sound_chan_custom
-channels_process_sound_chan_custom:
-    movem.l     4(%sp), %d0/%a0         | %d0 = count, %a0 = buf
+    .global     channel_mode_proc_custom
+channel_mode_proc_custom:
+    | input: 4(sp) = this, 8(sp) = buf_p
     lea.l       -28(%sp), %sp           | save registers
     movem.l     %d2-%d6/%a2-%a3, (%sp)  |
-    movem.l     (%a0), %a0-%a1          | get channel pointers
+    movem.l     32(%sp), %a0-%a1        | %a0 = this, %a1 = buf_p
+    move.l      (%a1), %a1              | %a1 = buf = *buf_p
+    move.l      (%a0), %a2              | %a2 = this->data = &channel_mode_data
+    movem.l     (%a1), %d0/%a0-%a1      | %d0 = buf->remcount, %a0 = buf->p32[0],
+                                        | %a1 = buf->p32[1]
+    movem.l     (%a2), %d3-%d4          | %d3 = sw_gain, %d4 = sw_cross
     move.l      %a0, %a2                | use separate dst pointers since read
     move.l      %a1, %a3                | pointers run one ahead of write
-    move.l      dsp_sw_gain, %d3        | load straight (mid) gain
-    move.l      dsp_sw_cross, %d4       | load cross (side) gain
     move.l      (%a0)+, %d1             | prime the input registers
     move.l      (%a1)+, %d2             |
     mac.l       %d1, %d3             , %acc0 |  L = l*gain + r*cross
@@ -348,22 +366,25 @@ channels_process_sound_chan_custom:
     movem.l     (%sp), %d2-%d6/%a2-%a3  | restore registers
     lea.l       28(%sp), %sp            | cleanup
     rts                                 |
-    .size       channels_process_sound_chan_custom, \
-                .-channels_process_sound_chan_custom
+    .size       channel_mode_proc_custom, .-channel_mode_proc_custom
 
 /****************************************************************************
- *  void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
+ *  void channel_mode_proc_karaoke(struct dsp_proc_entry *this,
+ *                                 struct dsp_buffer **buf_p)
  *
  *  Separate channels into side channels.
  */
     .section    .text
     .align      2
-    .global     channels_process_sound_chan_karaoke
-channels_process_sound_chan_karaoke:
-    movem.l     4(%sp), %d0/%a0         | %d0 = count, %a0 = buf
+    .global     channel_mode_proc_karaoke
+channel_mode_proc_karaoke:
+    | input: 4(sp) = this, 8(sp) = buf_p
+    move.l      8(%sp), %a0             | %a0 = buf_p
+    move.l      (%a0), %a0              | %a0 = buf = *buf_p
     lea.l       -20(%sp), %sp           | save registers
     movem.l     %d2-%d4/%a2-%a3, (%sp)  |
-    movem.l     (%a0), %a0-%a1          | get channel src pointers
+    movem.l     (%a0), %d0/%a0-%a1      | %d0 = buf->remcount, %a0 = buf->p32[0],
+                                        | %a1 = buf->p32[1]
     move.l      %a0, %a2                | use separate dst pointers since read
     move.l      %a1, %a3                | pointers run one ahead of write
     move.l      #0x40000000, %d3        | %d3 = 0.5
@@ -390,12 +411,90 @@ channels_process_sound_chan_karaoke:
     movem.l     (%sp), %d2-%d4/%a2-%a3  | restore registers
     lea.l       20(%sp), %sp            | cleanup
     rts                                 |
-    .size       channels_process_sound_chan_karaoke, \
-                .-channels_process_sound_chan_karaoke
+    .size       channel_mode_proc_karaoke, .-channel_mode_proc_karaoke
 
 /****************************************************************************
- * void sample_output_stereo(int count, struct dsp_data *data,
- *                           const int32_t *src[], int16_t *dst)
+ * void filter_process(struct dsp_filter *f, int32_t *buf[], int count,
+ *                     unsigned int channels)
+ *
+ * define HIGH_PRECISION as '1' to make filtering calculate lower bits after
+ * shifting. without this, "shift" - 1 of the lower bits will be lost here.
+ */
+#define HIGH_PRECISION 0
+    .text
+    .global filter_process
+filter_process:
+    | input: 4(sp) = f, 8(sp) = buf, 12(sp) = count, 16(sp) = channels
+    lea.l       -44(%sp), %sp           | save clobbered regs
+#if HIGH_PRECISION
+    movem.l     %d2-%d7/%a2-%a6, (%sp)  | .
+#else
+    movem.l     %d2-%d6/%a2-%a6, (%sp)  |
+#endif
+    move.l      48(%sp), %a5            | fetch filter structure address
+    clr.l       %d6                     | load shift count
+    move.b      52(%a5), %d6            | .
+    subq.l      #1, %d6                 | EMAC gives us one free shift
+#if HIGH_PRECISION
+    moveq.l     #8, %d7
+    sub.l       %d6, %d7                | shift for lower part of accumulator
+#endif
+    movem.l     (%a5), %a0-%a4          | load coefs
+    lea.l       20(%a5), %a5            | point to filter history
+
+10: | channel loop
+    move.l      52(%sp), %a6            | load input channel pointer
+    addq.l      #4, 52(%sp)             | point x to next channel
+    move.l      (%a6), %a6              |
+    move.l      56(%sp), %d5            | number of samples
+    movem.l     (%a5), %d0-%d3          | load filter history
+
+    | d0-d3 = history, d4 = temp, d5 = sample count, d6 = upper shift amount,
+    | d7 = lower shift amount,a0-a4 = coefs, a5 = history pointer, a6 = buf[ch]
+20: | loop
+    | Direct form 1 filtering code. We assume DSP has put EMAC in frac mode.
+    | y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2],
+    | where y[] is output and x[] is input. This is performed out of order
+    | to do parallel load of input value.
+    mac.l       %a2, %d1, %acc0         | acc = b2*x[i - 2]
+    move.l      %d0, %d1                | fix input history
+    mac.l       %a1, %d0, (%a6), %d0, %acc0 | acc += b1*x[i - 1], x[i] -> d0
+    mac.l       %a0, %d0, %acc0         | acc += b0*x[i]
+    mac.l       %a3, %d2, %acc0         | acc += a1*y[i - 1]
+    mac.l       %a4, %d3, %acc0         | acc += a2*y[i - 2]
+    move.l      %d2, %d3                | fix output history
+#if HIGH_PRECISION
+    move.l      %accext01, %d2          | fetch lower part of accumulator
+    move.b      %d2, %d4                | clear upper three bytes
+    lsr.l       %d7, %d4                | shift lower bits
+#endif
+    movclr.l    %acc0, %d2              | fetch upper part of result
+    asl.l       %d6, %d2                | restore fixed point format
+#if HIGH_PRECISION
+    or.l        %d2, %d4                | combine lower and upper parts
+#endif
+    move.l      %d2, (%a6)+             | save result
+    subq.l      #1, %d5                 | are we done with this channel?
+    bgt         20b | loop
+
+    movem.l     %d0-%d3, (%a5)          | save history back to struct
+    lea.l       16(%a5), %a5            | point to next channel's history
+    subq.l      #1, 60(%sp)             | have we processed both channels?
+    bhi         10b | channel loop
+
+#if HIGH_PRECISION
+    movem.l     (%sp), %d2-%d7/%a2-%a6
+#else
+    movem.l     (%sp), %d2-%d6/%a2-%a6
+#endif
+    lea.l       44(%sp), %sp
+    rts
+    .size       filter_process, .-filter_process
+
+/****************************************************************************
+ * void sample_output_stereo(struct sample_io_data *this,
+ *                           struct dsp_buffer *src,
+ *                           struct dsp_buffer *dst)
  *
  * Framework based on the ubiquitous Rockbox line transfer logic for
  * Coldfire CPUs.
@@ -417,20 +516,24 @@ channels_process_sound_chan_karaoke:
     .align      2
     .global    sample_output_stereo
 sample_output_stereo:
+    | input: 4(sp) = count, 8(sp) = src, 12(sp) = dst
     lea.l       -48(%sp), %sp             | save registers
     move.l      %macsr, %d1               | do it now as at many lines will
     movem.l     %d1-%d7/%a2-%a6, (%sp)    | be the far more common condition
     move.l      #0x80, %macsr             | put emac unit in signed int mode
-    movem.l     52(%sp), %a0-%a2/%a4      |
-    lea.l       (%a4, %a0.l*4), %a0       | %a0 = end address
-    move.l      (%a1), %d1                | %a1 = multiplier: (1 << (16 - scale))
+    movem.l     52(%sp), %a0-%a2          | %a0 = this, %a1 = src, %a2 = dst
+    move.l      (%a0), %a0                | %a0 = this->outcount
+    move.l      4(%a2), %a4               | %a4 = dst->p16out
+    lea.l       (%a4, %a0.l*4), %a0       | %a0 = count -> end address
+    movem.l     4(%a1), %a2-%a3           | %a2 = src->p32[0], %a3 = src->p32[1]
+    clr.l       %d1                       | %a1 = multiplier: (1 << (16 - scale))
+    move.b      19(%a1), %d1              | %d1 = src->format.output_scale
     sub.l       #16, %d1                  |
     neg.l       %d1                       |
     moveq.l     #1, %d0                   |
     asl.l       %d1, %d0                  |
     move.l      %d0, %a1                  |
     move.l      #0x8000, %a6              | %a6 = rounding term
-    movem.l     (%a2), %a2-%a3            | get L/R channel pointers
     moveq.l     #28, %d0                  | %d0 = second line bound
     add.l       %a4, %d0                  |
     and.l       #0xfffffff0, %d0          |
@@ -447,7 +550,7 @@ sample_output_stereo:
     mac.l       %d2, %a1, %acc1           | shift R to high word
     movclr.l    %acc0, %d1                | get possibly saturated results
     movclr.l    %acc1, %d2                |
-    swap        %d2                       | move R to low word
+    swap.w      %d2                       | move R to low word
     move.w      %d2, %d1                  | interleave MS 16 bits of each
     move.l      %d1, (%a4)+               | ...and write both
     cmp.l       %a4, %d0                  |
@@ -477,10 +580,10 @@ sample_output_stereo:
     mac.l       %d1, %a1, (%a2)+, %d2, %acc1 | with saturation
     mac.l       %d2, %a1, (%a2)+, %d3, %acc2 |
     mac.l       %d3, %a1             , %acc3 |
-    swap        %d4                       | a) interleave most significant...
-    swap        %d5                       |
-    swap        %d6                       |
-    swap        %d7                       |
+    swap.w      %d4                       | a) interleave most significant...
+    swap.w      %d5                       |
+    swap.w      %d6                       |
+    swap.w      %d7                       |
     movclr.l    %acc0, %d0                | obtain L results
     movclr.l    %acc1, %d1                |
     movclr.l    %acc2, %d2                |
@@ -503,7 +606,7 @@ sample_output_stereo:
     mac.l       %d2, %a1, %acc1           |
     movclr.l    %acc0, %d1                |
     movclr.l    %acc1, %d2                |
-    swap        %d2                       |
+    swap.w      %d2                       |
     move.w      %d2, %d1                  |
     move.l      %d1, (%a4)+               |
     cmp.l       %a4, %a0                  |
@@ -516,8 +619,9 @@ sample_output_stereo:
     .size      sample_output_stereo, .-sample_output_stereo
 
 /****************************************************************************
- * void sample_output_mono(int count, struct dsp_data *data,
- *                         const int32_t *src[], int16_t *dst)
+ * void sample_output_mono(struct sample_io_data *this,
+ *                         struct dsp_buffer *src,
+ *                         struct dsp_buffer *dst)
  *
  * Same treatment as sample_output_stereo but for one channel.
  */
@@ -525,19 +629,23 @@ sample_output_stereo:
     .align      2
     .global    sample_output_mono
 sample_output_mono:
+    | input: 4(sp) = count, 8(sp) = src, 12(sp) = dst
     lea.l       -32(%sp), %sp             | save registers
     move.l      %macsr, %d1               | do it now as at many lines will
     movem.l     %d1-%d5/%a2-%a4, (%sp)    | be the far more common condition
     move.l      #0x80, %macsr             | put emac unit in signed int mode
-    movem.l     36(%sp), %a0-%a3          |
-    lea.l       (%a3, %a0.l*4), %a0       | %a0 = end address
-    move.l      (%a1), %d1                | %d5 = multiplier: (1 << (16 - scale))
+    movem.l     36(%sp), %a0-%a2          | %a0 = this, %a1 = src, %a2 = dst
+    move.l      (%a0), %a0                | %a0 = this->outcount
+    move.l      4(%a2), %a3               | %a3 = dst->p16out
+    movem.l     4(%a1), %a2               | %a2 = src->p32[0]
+    lea.l       (%a3, %a0.l*4), %a0       | %a0 = count -> end address
+    clr.l       %d1                       | %d5 = multiplier: (1 << (16 - scale))
+    move.b      19(%a1), %d1              | %d1 = src->format.output_scale
     sub.l       #16, %d1                  |
     neg.l       %d1                       |
     moveq.l     #1, %d5                   |
     asl.l       %d1, %d5                  |
     move.l      #0x8000, %a4              | %a4 = rounding term
-    movem.l     (%a2), %a2                | get source channel pointer
     moveq.l     #28, %d0                  | %d0 = second line bound
     add.l       %a3, %d0                  |
     and.l       #0xfffffff0, %d0          |
@@ -552,7 +660,7 @@ sample_output_mono:
     mac.l       %d1, %d5, %acc0           | shift L to high word
     movclr.l    %acc0, %d1                | get possibly saturated results
     move.l      %d1, %d2                  |
-    swap        %d2                       | move R to low word
+    swap.w      %d2                       | move R to low word
     move.w      %d2, %d1                  | duplicate single channel into
     move.l      %d1, (%a3)+               | L and R
     cmp.l       %a3, %d0                  |
@@ -575,16 +683,16 @@ sample_output_mono:
     movclr.l    %acc2, %d2                |
     movclr.l    %acc3, %d3                |
     move.l      %d0, %d4                  | duplicate single channel
-    swap        %d4                       | into L and R
+    swap.w      %d4                       | into L and R
     move.w      %d4, %d0                  |
     move.l      %d1, %d4                  |
-    swap        %d4                       |
+    swap.w      %d4                       |
     move.w      %d4, %d1                  |
     move.l      %d2, %d4                  |
-    swap        %d4                       |
+    swap.w      %d4                       |
     move.w      %d4, %d2                  |
     move.l      %d3, %d4                  |
-    swap        %d4                       |
+    swap.w      %d4                       |
     move.w      %d4, %d3                  |
     movem.l     %d0-%d3, -16(%a3)         | write four stereo samples
     cmp.l       %a3, %a1                  |
@@ -598,7 +706,7 @@ sample_output_mono:
     mac.l       %d1, %d5, %acc0           | the same way as leading ones
     movclr.l    %acc0, %d1                |
     move.l      %d1, %d2                  |
-    swap        %d2                       |
+    swap.w      %d2                       |
     move.w      %d2, %d1                  |
     move.l      %d1, (%a3)+               |
     cmp.l       %a3, %a0                  |
diff --git a/lib/rbcodec/dsp/dsp_core.c b/lib/rbcodec/dsp/dsp_core.c
new file mode 100644
index 0000000000..84fe64adb0
--- /dev/null
+++ b/lib/rbcodec/dsp/dsp_core.c
@@ -0,0 +1,554 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2005 Miika Pekkarinen
+ * Copyright (C) 2012 Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#include "config.h"
+#include "system.h"
+#include "dsp.h"
+#include "dsp_sample_io.h"
+#include <sys/types.h>
+
+/* Define LOGF_ENABLE to enable logf output in this file */
+/*#define LOGF_ENABLE*/
+#include "logf.h"
+
+/* Actually generate the database of stages */
+#define DSP_PROC_DB_CREATE
+#include "dsp_proc_entry.h"
+
+/* Linked lists give fewer loads in processing loop compared to some index
+ * list, which is more important than keeping occasionally executed code
+ * simple */
+
+struct dsp_config
+{
+    /** General DSP-local data **/
+    struct sample_io_data io_data; /* Sample input-output data (first) */
+    uint32_t slot_free_mask;       /* Mask of free slots for this DSP */
+    uint32_t proc_masks[2];        /* Mask of active/enabled stages */
+    struct dsp_proc_slot
+    {
+        struct dsp_proc_entry proc_entry; /* This enabled stage */
+        struct dsp_proc_slot *next[2]; /* [0]=active next, [1]=enabled next */
+        const struct dsp_proc_db_entry *db_entry;
+    } *proc_slots[2];              /* Pointer to first in list of
+                                      active/enabled stages */
+
+    /** Misc. extra stuff **/
+#ifdef CPU_COLDFIRE
+    unsigned long old_macsr;       /* Old macsr value to restore */
+#endif
+#if 0 /* Not needed now but enable if something must know this */
+    bool processing;               /* DSP is processing (to thwart inopportune
+                                      buffer moves) */
+#endif
+};
+
+/* Pool of slots for stages - supports 32 or fewer combined as-is atm. */
+static struct dsp_proc_slot
+dsp_proc_slot_arr[DSP_NUM_PROC_STAGES+DSP_VOICE_NUM_PROC_STAGES] IBSS_ATTR;
+
+/* General DSP config */
+static struct dsp_config dsp_conf[DSP_COUNT] IBSS_ATTR;
+
+/** Processing stages support functions **/
+
+/* Find the slot for a given enabled id */
+static struct dsp_proc_slot * find_proc_slot(struct dsp_config *dsp,
+                                             unsigned int id)
+{
+    const uint32_t mask = BIT_N(id);
+
+    if ((dsp->proc_masks[1] & mask) == 0)
+        return NULL; /* Not enabled */
+
+    struct dsp_proc_slot *s = dsp->proc_slots[1];
+
+    while (1) /* In proc_masks == it must be there */
+    {
+        if (BIT_N(s->db_entry->id) == mask)
+            return s;
+
+        s = s->next[1];
+    }
+}
+
+/* Broadcast to all enabled stages or to the one with the specifically
+ * crafted setting */
+static intptr_t proc_broadcast(struct dsp_config *dsp, unsigned int setting,
+                               intptr_t value)
+{
+    bool multi = setting < DSP_PROC_SETTING;
+    struct dsp_proc_slot *s = multi ?
+        dsp->proc_slots[1] : find_proc_slot(dsp, setting - DSP_PROC_SETTING);
+
+    while (s != NULL)
+    {
+        intptr_t ret = s->db_entry->configure(&s->proc_entry, dsp, setting,
+                                              value);
+        if (!multi)
+            return ret;
+
+        s = s->next[1];
+    }
+
+    return multi ? 1 : 0;
+}
+
+/* Generic handler for this->process[0] */
+static void dsp_process_null(struct dsp_proc_entry *this,
+                             struct dsp_buffer **buf_p)
+{
+    (void)this; (void)buf_p;
+}
+
+/* Generic handler for this->process[1] */
+static void dsp_format_change_process(struct dsp_proc_entry *this,
+                                      struct dsp_buffer **buf_p)
+{
+    enum dsp_proc_ids id =
+        TYPE_FROM_MEMBER(struct dsp_proc_slot, this, proc_entry)->db_entry->id;
+
+    DSP_PRINT_FORMAT(<Default Handler>, id, (*buf_p)->format);
+
+    /* We don't keep back references to the DSP, so just search for it */
+    struct dsp_config *dsp;
+    for (int i = 0; (dsp = dsp_get_config(i)); i++)
+    {
+        struct dsp_proc_slot *slot = find_proc_slot(dsp, id);
+        /* Found one with the id, check if it's this one */
+        if (&slot->proc_entry == this && dsp_proc_active(dsp, id))
+        {
+            dsp_proc_call(this, buf_p, 0);
+            break;
+        }
+    }
+}
+
+/* Add an item to the enabled list */
+static struct dsp_proc_slot *
+dsp_proc_enable_enlink(struct dsp_config *dsp, uint32_t mask)
+{
+    /* Use the lowest-indexed available slot */
+    int slot = find_first_set_bit(dsp->slot_free_mask);
+
+    if (slot == 32)
+    {
+        /* Should NOT happen, ever, unless called before init */
+        DEBUGF("DSP %d: no slots!\n", (int)dsp_get_id(dsp));
+        return NULL;
+    }
+
+    const struct dsp_proc_db_entry *db_entry_prev = NULL;
+    const struct dsp_proc_db_entry *db_entry;
+
+    /* Order of enabled list is same as DB array */
+    for (unsigned int i = 0;; i++)
+    {
+        if (i >= DSP_NUM_PROC_STAGES)
+            return NULL;
+
+        db_entry = dsp_proc_database[i];
+
+        uint32_t m = BIT_N(db_entry->id);
+
+        if (m == mask)
+            break; /* This is the one */
+
+        if (dsp->proc_masks[1] & m)
+            db_entry_prev = db_entry;
+    }
+
+    struct dsp_proc_slot *s = &dsp_proc_slot_arr[slot];
+
+    if (db_entry_prev != NULL)
+    {
+        struct dsp_proc_slot *prev = find_proc_slot(dsp, db_entry_prev->id);
+        s->next[0] = prev->next[0];
+        s->next[1] = prev->next[1];
+        prev->next[1] = s;
+    }
+    else
+    {
+        s->next[0] = dsp->proc_slots[0];
+        s->next[1] = dsp->proc_slots[1];
+        dsp->proc_slots[1] = s;
+    }
+
+    s->db_entry = db_entry; /* record DB entry */
+    dsp->proc_masks[1] |= mask;
+    dsp->slot_free_mask &= ~BIT_N(slot);
+
+    return s;
+}
+
+/* Remove an item from the enabled list */
+static struct dsp_proc_slot *
+dsp_proc_enable_delink(struct dsp_config *dsp, uint32_t mask)
+{
+    struct dsp_proc_slot *s = dsp->proc_slots[1];
+    struct dsp_proc_slot *prev = NULL;
+
+    while (1) /* In proc_masks == it must be there */
+    {
+        if (BIT_N(s->db_entry->id) == mask)
+        {
+            if (prev)
+                prev->next[1] = s->next[1];
+            else
+                dsp->proc_slots[1] = s->next[1];
+
+            dsp->proc_masks[1] &= ~mask;
+            dsp->slot_free_mask |= BIT_N(s - dsp_proc_slot_arr);
+            return s;
+        }
+
+        prev = s;
+        s = s->next[1];
+    }
+}
+
+void dsp_proc_enable(struct dsp_config *dsp, enum dsp_proc_ids id,
+                     bool enable)
+{
+    uint32_t mask = BIT_N(id);
+    bool enabled = dsp->proc_masks[1] & mask;
+
+    if (enable)
+    {
+        /* If enabled, just find it in list, if not, link a new one */
+        struct dsp_proc_slot *s = enabled ? find_proc_slot(dsp, id) :
+                                            dsp_proc_enable_enlink(dsp, mask);
+
+        if (s == NULL)
+        {
+            DEBUGF("DSP- proc id not valid: %d\n", (int)id);
+            return;
+        }
+
+        if (!enabled)
+        {
+            /* New entry - set defaults */
+            s->proc_entry.data = 0;
+            s->proc_entry.ip_mask = mask;
+            s->proc_entry.process[0] = dsp_process_null;
+            s->proc_entry.process[1] = dsp_format_change_process;
+        }
+
+        enabled = s->db_entry->configure(&s->proc_entry, dsp, DSP_PROC_INIT,
+                                         enabled) >= 0;
+        if (enabled)
+            return;
+
+        DEBUGF("DSP- proc init failed: %d\n", (int)id);
+        /* Cleanup below */
+    }
+    else if (!enabled)
+    {
+        return; /* No change */
+    }
+
+    dsp_proc_activate(dsp, id, false); /* Deactivate it first */
+    struct dsp_proc_slot *s = dsp_proc_enable_delink(dsp, mask);
+    s->db_entry->configure(&s->proc_entry, dsp, DSP_PROC_CLOSE, 0);
+}
+
+/* Maintain the list structure for the active list where each enabled entry
+ * has a link to the next active item, even if not active which facilitates
+ * switching out of format change mode by a stage during a format change.
+ * When that happens, the iterator must jump over inactive but enabled
+ * stages after its current position. */
+static struct dsp_proc_slot *
+dsp_proc_activate_link(struct dsp_config *dsp, uint32_t mask,
+                       struct dsp_proc_slot *s)
+{
+    uint32_t m = BIT_N(s->db_entry->id);
+    uint32_t mor = m | mask;
+
+    if (mor == m) /* Only if same single bit in common */
+    {
+        dsp->proc_masks[0] |= mask;
+        return s;
+    }
+    else if (~mor == 0) /* Only if bits complement */
+    {
+        dsp->proc_masks[0] &= mask;
+        return s->next[0];
+    }
+
+    struct dsp_proc_slot *next = s->next[1];
+    next = dsp_proc_activate_link(dsp, mask, next);
+
+    s->next[0] = next;
+
+    return (m & dsp->proc_masks[0]) ? s : next;
+}
+
+/* Activate or deactivate a stage */
+void dsp_proc_activate(struct dsp_config *dsp, enum dsp_proc_ids id,
+                       bool activate)
+{
+    const uint32_t mask = BIT_N(id);
+
+    if (!(dsp->proc_masks[1] & mask))
+        return; /* Not enabled */
+
+    if (activate != !(dsp->proc_masks[0] & mask))
+        return; /* No change in state */
+
+    /* Send mask bit if activating and ones complement if deactivating */
+    dsp->proc_slots[0] = dsp_proc_activate_link(
+            dsp, activate ? mask : ~mask, dsp->proc_slots[1]);
+}
+
+/* Is the stage specified by the id currently active? */
+bool dsp_proc_active(struct dsp_config *dsp, enum dsp_proc_ids id)
+{
+    return (dsp->proc_masks[0] & BIT_N(id)) != 0;
+}
+
+/* Determine by the rules if the processing function should be called */
+static FORCE_INLINE bool dsp_proc_should_call(struct dsp_proc_entry *this,
+                                              struct dsp_buffer *buf,
+                                              unsigned int fmt)
+{
+    uint32_t ip_mask = this->ip_mask;
+
+    return UNLIKELY(fmt != 0) || /* Also pass override value */
+           ip_mask == 0 || /* Not in-place */
+           ((ip_mask & buf->proc_mask) == 0 &&
+            (buf->proc_mask |= ip_mask, buf->remcount > 0));
+}
+
+/* Call this->process[fmt] according to the rules (for external call) */
+bool dsp_proc_call(struct dsp_proc_entry *this, struct dsp_buffer **buf_p,
+                   unsigned int fmt)
+{
+    if (dsp_proc_should_call(this, *buf_p, fmt))
+    {
+        this->process[fmt == (0u-1u) ? 0 : fmt](this, buf_p);
+        return true;
+    }
+
+    return false;
+}
+
+static inline void dsp_process_start(struct dsp_config *dsp)
+{
+#if defined(CPU_COLDFIRE)
+    /* set emac unit for dsp processing, and save old macsr, we're running in
+       codec thread context at this point, so can't clobber it */
+    dsp->old_macsr = coldfire_get_macsr();
+    coldfire_set_macsr(EMAC_FRACTIONAL | EMAC_SATURATE);
+#endif
+#if 0 /* Not needed now but enable if something must know this */
+    dsp->processing = true;
+#endif
+    (void)dsp;
+}
+
+static inline void dsp_process_end(struct dsp_config *dsp)
+{
+#if 0 /* Not needed now but enable if something must know this */
+    dsp->processing = false;
+#endif
+#if defined(CPU_COLDFIRE)
+    /* set old macsr again */
+    coldfire_set_macsr(dsp->old_macsr);
+#endif
+    (void)dsp;
+}
+
+/**
+ * dsp_process:
+ *
+ * Process and convert src audio to dst based on the DSP configuration.
+ * dsp:            the DSP instance in use
+ *
+ * src:
+ *     remcount  = number of input samples remaining; set to desired
+ *                 number of samples to be processed
+ *     pin[0]    = left channel if non-interleaved, audio data if
+ *                 interleaved or mono
+ *     pin[1]    = right channel if non-interleaved, ignored if
+ *                 interleaved or mono
+ *     proc_mask = set to zero on first call, updated by this function
+ *                 to keep track of which in-place stages have been
+ *                 run on the buffers to avoid multiple applications of
+ *                 them
+ *     format    = for internal buffers, gives the relevant format
+ *                 details
+ *
+ * dst:
+ *     remcount  = number of samples placed in buffer so far; set to
+ *                 zero on first call
+ *     p16out    = current fill pointer in destination buffer; set to
+ *                 buffer start on first call
+ *     bufcount  = remaining buffer space in samples; set to maximum
+ *                 desired output count on first call
+ *     format    = ignored
+ *
+ * Processing stops when src is exhausted or dst is filled, whichever
+ * happens first. Samples can still be output when src buffer is empty
+ * if samples are held internally. Generally speaking, continue calling
+ * until no data is consumed and no data is produced to purge the DSP
+ * to the maximum extent feasible. Some internal processing stages may
+ * require more input before more output can be generated, thus there
+ * is no guarantee the DSP is free of data awaiting processing at that
+ * point.
+ *
+ * Additionally, samples consumed and samples produced do not necessarily
+ * have a direct correlation. Samples may be consumed without producing
+ * any output and samples may be produced without consuming any input.
+ * It depends on which stages are actively processing data at the time
+ * of the call and how they function internally.
+ */
+void dsp_process(struct dsp_config *dsp, struct dsp_buffer *src,
+                 struct dsp_buffer *dst)
+{
+    if (dst->bufcount <= 0)
+    {
+        /* No place to put anything thus nothing may be safely consumed */
+        return;
+    }
+
+    /* At least perform one yield before starting */
+    long last_yield = current_tick;
+    yield();
+
+    dsp_process_start(dsp);
+
+    /* Tag input with codec-specified sample format */
+    src->format = dsp->io_data.format;
+
+    while (1)
+    {
+        /* Out-of-place-processing stages take the current buf as input
+         * and switch the buffer to their own output buffer */
+        struct dsp_buffer *buf = src;
+        unsigned int fmt = buf->format.changed;
+
+        /* Convert input samples to internal format */
+        dsp->io_data.input_samples[fmt](&dsp->io_data, &buf);
+        fmt = buf->format.changed;
+
+        struct dsp_proc_slot *s = dsp->proc_slots[fmt];
+
+        /* Call all active/enabled stages depending if format is
+           same/changed on the last output buffer */
+        while (s != NULL)
+        {
+            if (dsp_proc_should_call(&s->proc_entry, buf, fmt))
+            {
+                s->proc_entry.process[fmt](&s->proc_entry, &buf);
+                fmt = buf->format.changed;
+            }
+
+            /* The buffer may have changed along with the format flag */
+            s = s->next[fmt];
+        }
+
+        /* Don't overread/write src/destination */
+        int outcount = MIN(dst->bufcount, buf->remcount);
+
+        if (fmt == 0 && outcount <= 0)
+            break; /* Output full or purged internal buffers */
+
+        dsp->io_data.outcount = outcount;
+        dsp->io_data.output_samples[fmt](&dsp->io_data, buf, dst);
+
+        /* Advance buffers by what output consumed and produced */
+        dsp_advance_buffer32(buf, outcount);
+        dsp_advance_buffer_output(dst, outcount);
+
+        /* Yield at least once each tick */
+        long tick = current_tick;
+        if (TIME_AFTER(tick, last_yield))
+        {
+            last_yield = tick;
+            yield();
+        }
+    } /* while */
+
+    dsp_process_end(dsp);
+}
+
+intptr_t dsp_configure(struct dsp_config *dsp, unsigned int setting,
+                       intptr_t value)
+{
+    dsp_sample_io_configure(&dsp->io_data, setting, value);
+    return proc_broadcast(dsp, setting, value);
+}
+
+struct dsp_config * dsp_get_config(enum dsp_ids id)
+{
+    if (id >= DSP_COUNT)
+        return NULL;
+
+    return &dsp_conf[id];
+}
+
+/* Return the id given a dsp pointer (or even via something within
+   the struct itself) */
+enum dsp_ids dsp_get_id(const struct dsp_config *dsp)
+{
+    ptrdiff_t id = dsp - dsp_conf;
+
+    if (id < 0 || id >= DSP_COUNT)
+        return DSP_COUNT; /* obviously invalid */
+
+    return (enum dsp_ids)id;
+}
+
+#if 0 /* Not needed now but enable if something must know this */
+bool dsp_is_busy(const struct dsp_config *dsp)
+{
+    return dsp->processing;
+}
+#endif /* 0 */
+
+/* Do what needs initializing before enable/disable calls can be made.
+ * Must be done before changing settings for the first time. */
+void INIT_ATTR dsp_init(void)
+{
+    static const uint8_t slot_count[DSP_COUNT] /* INITDATA_ATTR */ =
+    {
+        [CODEC_IDX_AUDIO] = DSP_NUM_PROC_STAGES,
+        [CODEC_IDX_VOICE] = DSP_VOICE_NUM_PROC_STAGES
+    };
+
+    for (unsigned int i = 0, count, shift = 0;
+         i < DSP_COUNT;
+         i++, shift += count)
+    {
+        struct dsp_config *dsp = &dsp_conf[i];
+
+        count = slot_count[i];
+        dsp->slot_free_mask = MASK_N(uint32_t, count, shift);
+
+        dsp_sample_io_configure(&dsp->io_data, DSP_INIT, i);
+
+        /* Notify each db entry of global init for each DSP */
+        for (unsigned int j = 0; j < DSP_NUM_PROC_STAGES; j++)
+            dsp_proc_database[j]->configure(NULL, dsp, DSP_INIT, i);
+
+        dsp_configure(dsp, DSP_RESET, 0);
+    }
+}
diff --git a/lib/rbcodec/dsp/dsp_filter.c b/lib/rbcodec/dsp/dsp_filter.c
new file mode 100644
index 0000000000..ee0ce1b18f
--- /dev/null
+++ b/lib/rbcodec/dsp/dsp_filter.c
@@ -0,0 +1,306 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2006-2007 Thom Johansen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#include <stdbool.h>
+#include <string.h>
+#include "config.h"
+#include "fixedpoint.h"
+#include "fracmul.h"
+#include "dsp_filter.h"
+#include "replaygain.h"
+
+enum filter_shift
+{
+    FILTER_BISHELF_SHIFT = 5, /* For bishelf (bass/treble) */
+    FILTER_PEAK_SHIFT = 4,    /* Each peaking filter */
+    FILTER_SHELF_SHIFT = 6,   /* Each high/low shelving filter */
+};
+
+/** 
+ * Calculate first order shelving filter. Filter is not directly usable by the
+ * filter_process() function.
+ * @param cutoff shelf midpoint frequency. See eq_pk_coefs for format.
+ * @param A decibel value multiplied by ten, describing gain/attenuation of
+ * shelf. Max value is 24 dB.
+ * @param low true for low-shelf filter, false for high-shelf filter.
+ * @param c pointer to coefficient storage. Coefficients are s4.27 format.
+ */
+void filter_shelf_coefs(unsigned long cutoff, long A, bool low, int32_t *c)
+{
+    long sin, cos;
+    int32_t b0, b1, a0, a1; /* s3.28 */
+    const long g = get_replaygain_int(A*5) << 4; /* 10^(db/40), s3.28 */
+
+    sin = fp_sincos(cutoff/2, &cos);
+    if (low) {
+        const int32_t sin_div_g = fp_div(sin, g, 25);
+        const int32_t sin_g = FRACMUL(sin, g);
+        cos >>= 3;
+        b0 = sin_g + cos;             /* 0.25 .. 4.10 */
+        b1 = sin_g - cos;             /* -1 .. 3.98 */
+        a0 = sin_div_g + cos;         /* 0.25 .. 4.10 */
+        a1 = sin_div_g - cos;         /* -1 .. 3.98 */
+    } else {
+        const int32_t cos_div_g = fp_div(cos, g, 25);
+        const int32_t cos_g = FRACMUL(cos, g);
+        sin >>= 3;
+        b0 = sin + cos_g;             /* 0.25 .. 4.10 */
+        b1 = sin - cos_g;             /* -3.98 .. 1 */
+        a0 = sin + cos_div_g;         /* 0.25 .. 4.10 */
+        a1 = sin - cos_div_g;         /* -3.98 .. 1 */
+    }
+
+    const int32_t rcp_a0 = fp_div(1, a0, 57); /* 0.24 .. 3.98, s2.29 */
+    *c++ = FRACMUL_SHL(b0, rcp_a0, 1);       /* 0.063 .. 15.85 */
+    *c++ = FRACMUL_SHL(b1, rcp_a0, 1);       /* -15.85 .. 15.85 */
+    *c++ = -FRACMUL_SHL(a1, rcp_a0, 1);      /* -1 .. 1 */
+}
+
+#ifdef HAVE_SW_TONE_CONTROLS
+/** 
+ * Calculate second order section filter consisting of one low-shelf and one
+ * high-shelf section.
+ * @param cutoff_low low-shelf midpoint frequency. See filter_pk_coefs for format.
+ * @param cutoff_high high-shelf midpoint frequency.
+ * @param A_low decibel value multiplied by ten, describing gain/attenuation of
+ * low-shelf part. Max value is 24 dB.
+ * @param A_high decibel value multiplied by ten, describing gain/attenuation of
+ * high-shelf part. Max value is 24 dB.
+ * @param A decibel value multiplied by ten, describing additional overall gain.
+ * @param c pointer to coefficient storage. Coefficients are s4.27 format.
+ */
+void filter_bishelf_coefs(unsigned long cutoff_low, unsigned long cutoff_high,
+                          long A_low, long A_high, long A,
+                          struct dsp_filter *f)
+{
+    const long g = get_replaygain_int(A*10) << 7; /* 10^(db/20), s0.31 */
+    int32_t c_ls[3], c_hs[3];
+
+    filter_shelf_coefs(cutoff_low, A_low, true, c_ls);
+    filter_shelf_coefs(cutoff_high, A_high, false, c_hs);
+    c_ls[0] = FRACMUL(g, c_ls[0]);
+    c_ls[1] = FRACMUL(g, c_ls[1]);
+
+    /* now we cascade the two first order filters to one second order filter
+     * which can be used by filter_process(). these resulting coefficients have a
+     * really wide numerical range, so we use a fixed point format which will
+     * work for the selected cutoff frequencies (in tone_controls.c) only.
+     */
+    const int32_t b0 = c_ls[0], b1 = c_ls[1], b2 = c_hs[0], b3 = c_hs[1];
+    const int32_t a0 = c_ls[2], a1 = c_hs[2];
+
+    int32_t *c = f->coefs;
+    *c++ = FRACMUL_SHL(b0, b2, 4);
+    *c++ = FRACMUL_SHL(b0, b3, 4) + FRACMUL_SHL(b1, b2, 4);
+    *c++ = FRACMUL_SHL(b1, b3, 4);
+    *c++ = a0 + a1;
+    *c   = -FRACMUL_SHL(a0, a1, 4);
+
+    f->shift = FILTER_BISHELF_SHIFT;
+}
+#endif /* HAVE_SW_TONE_CONTROLS */
+
+/* Coef calculation taken from Audio-EQ-Cookbook.txt by Robert Bristow-Johnson.
+ * Slightly faster calculation can be done by deriving forms which use tan()
+ * instead of cos() and sin(), but the latter are far easier to use when doing
+ * fixed point math, and performance is not a big point in the calculation part.
+ * All the 'a' filter coefficients are negated so we can use only additions
+ * in the filtering equation.
+ */
+
+/** 
+ * Calculate second order section peaking filter coefficients.
+ * @param cutoff a value from 0 to 0x80000000, where 0 represents 0 Hz and
+ * 0x80000000 represents the Nyquist frequency (samplerate/2).
+ * @param Q Q factor value multiplied by ten. Lower bound is artificially set
+ * at 0.5.
+ * @param db decibel value multiplied by ten, describing gain/attenuation at
+ * peak freq. Max value is 24 dB.
+ * @param c pointer to coefficient storage. Coefficients are s3.28 format.
+ */
+void filter_pk_coefs(unsigned long cutoff, unsigned long Q, long db,
+                     struct dsp_filter *f)
+{
+    long cs;
+    const long one = 1 << 28; /* s3.28 */
+    const long A = get_replaygain_int(db*5) << 5; /* 10^(db/40), s2.29 */
+    const long alpha = fp_sincos(cutoff, &cs)/(2*Q)*10 >> 1; /* s1.30 */
+    int32_t a0, a1, a2; /* these are all s3.28 format */
+    int32_t b0, b1, b2;
+    const long alphadivA = fp_div(alpha, A, 27);
+    const long alphaA = FRACMUL(alpha, A);
+
+    /* possible numerical ranges are in comments by each coef */
+    b0 = one + alphaA;                /* [1 .. 5] */
+    b1 = a1 = -2*(cs >> 3);           /* [-2 .. 2] */
+    b2 = one - alphaA;                /* [-3 .. 1] */
+    a0 = one + alphadivA;             /* [1 .. 5] */
+    a2 = one - alphadivA;             /* [-3 .. 1] */
+
+    /* range of this is roughly [0.2 .. 1], but we'll never hit 1 completely */
+    int32_t *c = f->coefs;
+    const long rcp_a0 = fp_div(1, a0, 59); /* s0.31 */
+    *c++ = FRACMUL(b0, rcp_a0);         /* [0.25 .. 4] */
+    *c++ = FRACMUL(b1, rcp_a0);         /* [-2 .. 2] */
+    *c++ = FRACMUL(b2, rcp_a0);         /* [-2.4 .. 1] */
+    *c++ = FRACMUL(-a1, rcp_a0);        /* [-2 .. 2] */
+    *c   = FRACMUL(-a2, rcp_a0);        /* [-0.6 .. 1] */
+
+    f->shift = FILTER_PEAK_SHIFT;
+}
+
+/**
+ * Calculate coefficients for lowshelf filter. Parameters are as for
+ * filter_pk_coefs, but the coefficient format is s5.26 fixed point.
+ */
+void filter_ls_coefs(unsigned long cutoff, unsigned long Q, long db,
+                     struct dsp_filter *f)
+{
+    long cs;
+    const long one = 1 << 25; /* s6.25 */
+    const long sqrtA = get_replaygain_int(db*5/2) << 2; /* 10^(db/80), s5.26 */
+    const long A = FRACMUL_SHL(sqrtA, sqrtA, 8); /* s2.29 */
+    const long alpha = fp_sincos(cutoff, &cs)/(2*Q)*10 >> 1; /* s1.30 */
+    const long ap1 = (A >> 4) + one;
+    const long am1 = (A >> 4) - one;
+    const long ap1_cs = FRACMUL(ap1, cs);
+    const long am1_cs = FRACMUL(am1, cs);
+    const long twosqrtalpha = 2*FRACMUL(sqrtA, alpha);
+    int32_t a0, a1, a2; /* these are all s6.25 format */
+    int32_t b0, b1, b2;
+    
+    /* [0.1 .. 40] */
+    b0 = FRACMUL_SHL(A, ap1 - am1_cs + twosqrtalpha, 2);
+    /* [-16 .. 63.4] */
+    b1 = FRACMUL_SHL(A, am1 - ap1_cs, 3);
+    /* [0 .. 31.7] */
+    b2 = FRACMUL_SHL(A, ap1 - am1_cs - twosqrtalpha, 2);
+    /* [0.5 .. 10] */
+    a0 = ap1 + am1_cs + twosqrtalpha;
+    /* [-16 .. 4] */
+    a1 = -2*(am1 + ap1_cs);
+    /* [0 .. 8] */
+    a2 = ap1 + am1_cs - twosqrtalpha;
+
+    /* [0.1 .. 1.99] */
+    int32_t *c = f->coefs;
+    const long rcp_a0 = fp_div(1, a0, 55);    /* s1.30 */
+    *c++ = FRACMUL_SHL(b0, rcp_a0, 2);       /* [0.06 .. 15.9] */
+    *c++ = FRACMUL_SHL(b1, rcp_a0, 2);       /* [-2 .. 31.7] */
+    *c++ = FRACMUL_SHL(b2, rcp_a0, 2);       /* [0 .. 15.9] */
+    *c++ = FRACMUL_SHL(-a1, rcp_a0, 2);      /* [-2 .. 2] */
+    *c++ = FRACMUL_SHL(-a2, rcp_a0, 2);      /* [0 .. 1] */
+
+    f->shift = FILTER_SHELF_SHIFT;
+}
+
+/**
+ * Calculate coefficients for highshelf filter. Parameters are as for
+ * filter_pk_coefs, but the coefficient format is s5.26 fixed point.
+ */
+void filter_hs_coefs(unsigned long cutoff, unsigned long Q, long db,
+                     struct dsp_filter *f)
+{
+    long cs;
+    const long one = 1 << 25; /* s6.25 */
+    const long sqrtA = get_replaygain_int(db*5/2) << 2; /* 10^(db/80), s5.26 */
+    const long A = FRACMUL_SHL(sqrtA, sqrtA, 8); /* s2.29 */
+    const long alpha = fp_sincos(cutoff, &cs)/(2*Q)*10 >> 1; /* s1.30 */
+    const long ap1 = (A >> 4) + one;
+    const long am1 = (A >> 4) - one;
+    const long ap1_cs = FRACMUL(ap1, cs);
+    const long am1_cs = FRACMUL(am1, cs);
+    const long twosqrtalpha = 2*FRACMUL(sqrtA, alpha);
+    int32_t a0, a1, a2; /* these are all s6.25 format */
+    int32_t b0, b1, b2;
+
+    /* [0.1 .. 40] */
+    b0 = FRACMUL_SHL(A, ap1 + am1_cs + twosqrtalpha, 2);
+    /* [-63.5 .. 16] */
+    b1 = -FRACMUL_SHL(A, am1 + ap1_cs, 3);
+    /* [0 .. 32] */
+    b2 = FRACMUL_SHL(A, ap1 + am1_cs - twosqrtalpha, 2);
+    /* [0.5 .. 10] */
+    a0 = ap1 - am1_cs + twosqrtalpha;
+    /* [-4 .. 16] */
+    a1 = 2*(am1 - ap1_cs);
+    /* [0 .. 8] */
+    a2 = ap1 - am1_cs - twosqrtalpha;
+
+    /* [0.1 .. 1.99] */
+    int32_t *c = f->coefs;
+    const long rcp_a0 = fp_div(1, a0, 55);    /* s1.30 */
+    *c++ = FRACMUL_SHL(b0, rcp_a0, 2);       /* [0 .. 16] */
+    *c++ = FRACMUL_SHL(b1, rcp_a0, 2);       /* [-31.7 .. 2] */
+    *c++ = FRACMUL_SHL(b2, rcp_a0, 2);       /* [0 .. 16] */
+    *c++ = FRACMUL_SHL(-a1, rcp_a0, 2);      /* [-2 .. 2] */
+    *c   = FRACMUL_SHL(-a2, rcp_a0, 2);      /* [0 .. 1] */
+
+    f->shift = FILTER_SHELF_SHIFT;
+}
+
+/**
+ * Copy filter definition without destroying dst's history
+ */
+void filter_copy(struct dsp_filter *dst, const struct dsp_filter *src)
+{
+    memcpy(dst->coefs, src->coefs, sizeof (src->coefs));
+    dst->shift = src->shift;
+}
+
+/**
+ * Clear filter sample history
+ */
+void filter_flush(struct dsp_filter *f)
+{
+    memset(f->history, 0, sizeof (f->history));
+}
+
+/**
+ * We realise the filters as a second order direct form 1 structure. Direct
+ * form 1 was chosen because of better numerical properties for fixed point
+ * implementations.
+ */
+#if (!defined(CPU_COLDFIRE) && !defined(CPU_ARM))
+void filter_process(struct dsp_filter *f, int32_t * const buf[], int count,
+                    unsigned int channels)
+{
+    /* Direct form 1 filtering code.
+       y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2],
+       where y[] is output and x[] is input.
+     */
+    unsigned int shift = f->shift;
+
+    for (unsigned int c = 0; c < channels; c++) {
+        for (int i = 0; i < count; i++) {
+            long long acc = (long long) buf[c][i] * f->coefs[0];
+            acc += (long long) f->history[c][0] * f->coefs[1];
+            acc += (long long) f->history[c][1] * f->coefs[2];
+            acc += (long long) f->history[c][2] * f->coefs[3];
+            acc += (long long) f->history[c][3] * f->coefs[4];
+            f->history[c][1] = f->history[c][0];
+            f->history[c][0] = buf[c][i];
+            f->history[c][3] = f->history[c][2];
+            buf[c][i] = (acc << shift) >> 32;
+            f->history[c][2] = buf[c][i];
+        }
+    }
+}
+#endif /* CPU */
diff --git a/lib/rbcodec/dsp/dsp_filter.h b/lib/rbcodec/dsp/dsp_filter.h
new file mode 100644
index 0000000000..af6e20ce86
--- /dev/null
+++ b/lib/rbcodec/dsp/dsp_filter.h
@@ -0,0 +1,57 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2006-2007 Thom Johansen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#ifndef DSP_FILTER_H
+#define DSP_FILTER_H
+
+/** Basic filter implementations which may be used independently **/
+
+/* Used by: EQ, tone controls and crossfeed */
+
+/* These depend on the fixed point formats used by the different filter types
+   and need to be changed when they change.
+ */
+struct dsp_filter
+{
+    int32_t coefs[5];      /* 00h: Order is b0, b1, b2, a1, a2 */
+    int32_t history[2][4]; /* 14h: Order is x-1, x-2, y-1, y-2, per channel */
+    uint8_t shift;         /* 34h: Final shift after computation */
+                           /* 38h */
+};
+
+void filter_shelf_coefs(unsigned long cutoff, long A, bool low, int32_t *c);
+#ifdef HAVE_SW_TONE_CONTROLS
+void filter_bishelf_coefs(unsigned long cutoff_low,
+                          unsigned long cutoff_high,
+                          long A_low, long A_high, long A,
+                          struct dsp_filter *f);
+#endif /* HAVE_SW_TONE_CONTROLS */
+void filter_pk_coefs(unsigned long cutoff, unsigned long Q, long db,
+                     struct dsp_filter *f);
+void filter_ls_coefs(unsigned long cutoff, unsigned long Q, long db,
+                     struct dsp_filter *f);
+void filter_hs_coefs(unsigned long cutoff, unsigned long Q, long db,
+                     struct dsp_filter *f);
+void filter_copy(struct dsp_filter *dst, const struct dsp_filter *src);
+void filter_flush(struct dsp_filter *f);
+void filter_process(struct dsp_filter *f, int32_t * const buf[], int count,
+                    unsigned int channels);
+
+#endif /* DSP_FILTER_H */
diff --git a/lib/rbcodec/dsp/dsp_misc.c b/lib/rbcodec/dsp/dsp_misc.c
new file mode 100644
index 0000000000..7b4589151c
--- /dev/null
+++ b/lib/rbcodec/dsp/dsp_misc.c
@@ -0,0 +1,238 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2005 Miika Pekkarinen
+ * Copyright (C) 2005 Magnus Holmgren
+ * Copyright (C) 2007 Thom Johansen
+ * Copyright (C) 2012 Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#include "config.h"
+#include "system.h"
+#include "dsp.h"
+#include "dsp_sample_io.h"
+#include "replaygain.h"
+#include "sound.h"
+#include "settings.h"
+#include "fixedpoint.h"
+#include <string.h>
+#include "dsp_proc_entry.h"
+
+/** Firmware callback interface **/
+
+/* Hook back from firmware/ part of audio, which can't/shouldn't call apps/
+ * code directly. */
+int dsp_callback(int msg, intptr_t param)
+{
+    switch (msg)
+    {
+#ifdef HAVE_SW_TONE_CONTROLS
+    case DSP_CALLBACK_SET_PRESCALE:
+        tone_set_prescale(param);
+        break;
+    case DSP_CALLBACK_SET_BASS:
+        tone_set_bass(param);
+        break;
+    case DSP_CALLBACK_SET_TREBLE:
+        tone_set_treble(param);
+        break;
+    /* FIXME: This must be done by bottom-level PCM driver so it works with
+              all PCM, not here and not in mixer. I won't fully support it
+              here with all streams. -- jethead71 */
+#ifdef HAVE_SW_VOLUME_CONTROL
+    case DSP_CALLBACK_SET_SW_VOLUME:
+        if (global_settings.volume < SW_VOLUME_MAX ||
+            global_settings.volume > SW_VOLUME_MIN)
+        {
+            int vol_gain = get_replaygain_int(global_settings.volume * 100);
+            pga_set_gain(PGA_VOLUME, vol_gain);
+        }
+        break;
+#endif /* HAVE_SW_VOLUME_CONTROL */
+#endif /* HAVE_SW_TONE_CONTROLS */
+    case DSP_CALLBACK_SET_CHANNEL_CONFIG:
+        channel_mode_set_config(param);
+        break;
+    case DSP_CALLBACK_SET_STEREO_WIDTH:
+        channel_mode_custom_set_width(param);
+        break;
+    default:
+        break;
+    }
+
+    return 0;
+}
+
+/** Replaygain settings **/
+static struct dsp_replay_gains current_rpgains;
+
+static void dsp_replaygain_update(const struct dsp_replay_gains *gains)
+{
+    if (gains == NULL)
+    {
+        /* Use defaults */
+        memset(&current_rpgains, 0, sizeof (current_rpgains));
+        gains = &current_rpgains;
+    }
+    else
+    {
+        current_rpgains = *gains; /* Stash settings */
+    }
+
+    int32_t gain = PGA_UNITY;
+
+    if (global_settings.replaygain_type != REPLAYGAIN_OFF ||
+        global_settings.replaygain_noclip)
+    {
+        bool track_mode =
+            get_replaygain_mode(gains->track_gain != 0,
+                                gains->album_gain != 0) == REPLAYGAIN_TRACK;
+
+        int32_t peak = (track_mode || gains->album_peak == 0) ?
+            gains->track_peak : gains->album_peak;
+
+        if (global_settings.replaygain_type != REPLAYGAIN_OFF)
+        {
+            gain = (track_mode || gains->album_gain == 0) ?
+                gains->track_gain : gains->album_gain;
+
+            if (global_settings.replaygain_preamp)
+            {
+                int32_t preamp = get_replaygain_int(
+                    global_settings.replaygain_preamp * 10);
+
+                gain = fp_mul(gain, preamp, 24);
+            }
+        }
+
+        if (gain == 0)
+        {
+            /* So that noclip can work even with no gain information. */
+            gain = PGA_UNITY;
+        }
+
+        if (global_settings.replaygain_noclip && peak != 0 &&
+            fp_mul(gain, peak, 24) >= PGA_UNITY)
+        {
+            gain = fp_div(PGA_UNITY, peak, 24);
+        }
+    }
+
+    pga_set_gain(PGA_REPLAYGAIN, gain);
+    pga_enable_gain(PGA_REPLAYGAIN, gain != PGA_UNITY);
+}
+
+int get_replaygain_mode(bool have_track_gain, bool have_album_gain)
+{
+    bool track = false;
+
+    switch (global_settings.replaygain_type)
+    {
+    case REPLAYGAIN_TRACK:
+        track = true;
+        break;
+
+    case REPLAYGAIN_SHUFFLE:
+        track = global_settings.playlist_shuffle;
+        break;
+    }
+
+    return (!track && have_album_gain) ?
+        REPLAYGAIN_ALBUM : (have_track_gain ? REPLAYGAIN_TRACK : -1);
+}
+
+void dsp_set_replaygain(void)
+{
+    dsp_replaygain_update(&current_rpgains);
+}
+
+
+/** Pitch Settings **/
+
+#ifdef HAVE_PITCHSCREEN
+static int32_t pitch_ratio = PITCH_SPEED_100;
+
+static void dsp_pitch_update(struct dsp_config *dsp)
+{
+    /* Account for playback speed adjustment when setting dsp->frequency
+       if we're called from the main audio thread. Voice playback thread
+       does not support this feature. */
+    struct sample_io_data *data = (void *)dsp;
+    data->format.frequency =
+        (int64_t)pitch_ratio * data->format.codec_frequency / PITCH_SPEED_100;
+}
+
+int32_t sound_get_pitch(void)
+{
+    return pitch_ratio;
+}
+
+void sound_set_pitch(int32_t percent)
+{
+    pitch_ratio = percent > 0 ? percent : PITCH_SPEED_100;
+    struct dsp_config *dsp = dsp_get_config(CODEC_IDX_AUDIO);
+    struct sample_io_data *data = (void *)dsp;
+    dsp_configure(dsp, DSP_SWITCH_FREQUENCY, data->format.codec_frequency);
+}
+#endif /* HAVE_PITCHSCREEN */
+
+/* This is a null-processing stage that monitors as an enabled stage but never
+ * becomes active in processing samples. It only hooks messages. */
+
+/* DSP message hook */
+static intptr_t misc_handler_configure(struct dsp_proc_entry *this,
+                                       struct dsp_config *dsp,
+                                       unsigned setting,
+                                       intptr_t value)
+{
+    switch (setting)
+    {
+    case DSP_INIT:
+        /* Enable us for the audio DSP at startup */
+        if (value == CODEC_IDX_AUDIO)
+            dsp_proc_enable(dsp, DSP_PROC_MISC_HANDLER, true);
+        break;
+
+    case DSP_PROC_CLOSE:
+        /* This stage should be enabled at all times */
+        DEBUGF("DSP_PROC_MISC_HANDLER - Error: Closing!\n");
+        break;
+
+    case DSP_RESET:
+#ifdef HAVE_PITCHSCREEN
+        dsp_pitch_update(dsp);
+#endif
+        value = (intptr_t)NULL; /* Default gains */
+    case REPLAYGAIN_SET_GAINS:
+        dsp_replaygain_update((void *)value);
+        break;
+
+#ifdef HAVE_PITCHSCREEN
+    case DSP_SET_FREQUENCY:
+        dsp_pitch_update(dsp);
+        break;
+#endif
+    }
+
+    return 1;
+    (void)this;
+}
+
+/* Database entry */
+DSP_PROC_DB_ENTRY(
+    MISC_HANDLER,
+    misc_handler_configure);
diff --git a/lib/rbcodec/dsp/dsp_misc.h b/lib/rbcodec/dsp/dsp_misc.h
new file mode 100644
index 0000000000..74587cbb0e
--- /dev/null
+++ b/lib/rbcodec/dsp/dsp_misc.h
@@ -0,0 +1,50 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2005 Miika Pekkarinen
+ * Copyright (C) 2005 Magnus Holmgren
+ * Copyright (C) 2007 Thom Johansen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#ifndef DSP_MISC_H
+#define DSP_MISC_H
+
+/* Set the tri-pdf dithered output */
+void dsp_dither_enable(bool enable); /* in dsp_sample_output.c */
+
+/* Structure used with REPLAYGAIN_SET_GAINS message */
+#define REPLAYGAIN_SET_GAINS (DSP_PROC_SETTING+DSP_PROC_MISC_HANDLER)
+struct dsp_replay_gains
+{
+    long track_gain;
+    long album_gain;
+    long track_peak;
+    long album_peak;
+};
+
+int get_replaygain_mode(bool have_track_gain, bool have_album_gain);
+void dsp_set_replaygain(void);
+
+#ifdef HAVE_PITCHSCREEN
+void sound_set_pitch(int32_t ratio);
+int32_t sound_get_pitch(void);
+#endif /* HAVE_PITCHSCREEN */
+
+/* Callback for firmware layers to interface */
+int dsp_callback(int msg, intptr_t param);
+
+#endif /* DSP_MISC_H */
diff --git a/lib/rbcodec/dsp/dsp_proc_database.h b/lib/rbcodec/dsp/dsp_proc_database.h
new file mode 100644
index 0000000000..55f10e684b
--- /dev/null
+++ b/lib/rbcodec/dsp/dsp_proc_database.h
@@ -0,0 +1,57 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2012 Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+/****************************************************************************
+ * -_-~-_-~-_-~-_-~-_-~-_- Main database of effects _-~-_-~-_-~-_-~-_-~-_-~- 
+ *
+ * Order is not particularly relevant and has no intended correlation with
+ * IDs.
+ * 
+ * Notable exceptions in ordering:
+ *  * Sample input: which is first in line and has special responsibilities
+ *    (not an effect per se).
+ *  * Anything that depends on the native sample rate must go after the
+ *    resampling stage.
+ *  * Some bizarre dependency I didn't think of but you decided to implement.
+ *  * Sample output: Naturally, this takes the final result and converts it
+ *    to the target PCM format (not an effect per se).
+ */
+DSP_PROC_DB_START
+    DSP_PROC_DB_ITEM(MISC_HANDLER)  /* misc stuff (null stage) */
+    DSP_PROC_DB_ITEM(PGA)           /* pre-gain amp */
+#ifdef HAVE_PITCHSCREEN
+    DSP_PROC_DB_ITEM(TIMESTRETCH)   /* time-stretching */
+#endif
+    DSP_PROC_DB_ITEM(RESAMPLE)      /* resampler providing NATIVE_FREQUENCY */
+    DSP_PROC_DB_ITEM(CROSSFEED)     /* stereo crossfeed */
+    DSP_PROC_DB_ITEM(EQUALIZER)     /* n-band equalizer */
+#ifdef HAVE_SW_TONE_CONTROLS
+    DSP_PROC_DB_ITEM(TONE_CONTROLS) /* bass and treble */
+#endif
+    DSP_PROC_DB_ITEM(CHANNEL_MODE)  /* channel modes */
+    DSP_PROC_DB_ITEM(COMPRESSOR)    /* dynamic-range compressor */
+DSP_PROC_DB_STOP
+
+/* This file is included multiple times with different macro definitions so
+   clean up the current ones */
+#undef DSP_PROC_DB_START
+#undef DSP_PROC_DB_ITEM
+#undef DSP_PROC_DB_STOP
diff --git a/lib/rbcodec/dsp/dsp_proc_entry.h b/lib/rbcodec/dsp/dsp_proc_entry.h
new file mode 100644
index 0000000000..8bdfe5e0c9
--- /dev/null
+++ b/lib/rbcodec/dsp/dsp_proc_entry.h
@@ -0,0 +1,153 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2012 Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#ifndef DSP_PROC_ENTRY_H
+#define DSP_PROC_ENTRY_H
+
+#if 0 /* Set to '1' to enable local debug messages */
+#include <debug.h>
+#else
+#undef DEBUGF
+#define DEBUGF(...)
+#endif
+
+/* Macros to generate the right stuff */
+#ifdef DSP_PROC_DB_CREATE
+struct dsp_proc_db_entry;
+
+#define DSP_PROC_DB_START
+#define DSP_PROC_DB_ITEM(name) \
+    extern const struct dsp_proc_db_entry name##_proc_db_entry;
+#define DSP_PROC_DB_STOP
+
+/* Create database as externs to be able to build array */
+#include "dsp_proc_database.h"
+
+#define DSP_PROC_DB_START \
+    static struct dsp_proc_db_entry const * const dsp_proc_database[] = {
+
+#define DSP_PROC_DB_ITEM(name) \
+    &name##_proc_db_entry,
+
+#define DSP_PROC_DB_STOP };
+
+/* Create database as array */
+#include "dsp_proc_database.h"
+
+/* Number of effects in database - all available in audio DSP */
+#define DSP_NUM_PROC_STAGES ARRAYLEN(dsp_proc_database)
+
+/* Number of possible effects for voice DSP */
+#ifdef HAVE_SW_TONE_CONTROLS
+#define DSP_VOICE_NUM_PROC_STAGES 2 /* resample, tone */
+#else
+#define DSP_VOICE_NUM_PROC_STAGES 1 /* resample */
+#endif
+
+#else /* !DSP_PROC_DB_CREATE */
+
+#ifdef DEBUG
+#define DSP_PROC_DB_ENTRY(_name, _configure) \
+    const struct dsp_proc_db_entry _name##_proc_db_entry = \
+    { .id = DSP_PROC_##_name, .configure = _configure,    \
+      .name = #_name };
+#else /* !DEBUG */
+#define DSP_PROC_DB_ENTRY(_name, _configure) \
+    const struct dsp_proc_db_entry _name##_proc_db_entry = \
+    { .id = DSP_PROC_##_name, .configure = _configure };
+#endif /* DEBUG */
+
+#endif /* DSP_PROC_DB_CREATE */
+
+#define DSP_PROC_DB_START \
+    enum dsp_proc_ids             \
+    {                             \
+        ___DSP_PROC_ID_FIRST = -1,
+
+#define DSP_PROC_DB_ITEM(name) \
+    DSP_PROC_##name,
+
+#define DSP_PROC_DB_STOP };
+
+/* Create database as enums for use as ids */
+#include "dsp_proc_database.h"
+
+struct dsp_proc_entry;
+enum dsp_proc_ids;
+
+/* DSP sample transform function prototype */
+typedef void (*dsp_proc_fn_type)(struct dsp_proc_entry *this,
+                                 struct dsp_buffer **buf);
+
+/**
+ * dsp_proc_entry
+ * The structure allocated to every stage when enabled.
+ *
+ * default settings:
+ *  .data       = 0
+ *  .ip_mask    = BIT_N(dsp_proc_db_entry.id)
+ *  .process[0] = dsp_process_null
+ *  .process[1] = dsp_format_change_process
+ *
+ * DSP_PROC_INIT handler just has to change what it needs to change. It may
+ * also be modified at any time to implement the stage's demands.
+ */
+struct dsp_proc_entry
+{
+    intptr_t data;    /* 00h: any value, at beginning for easy asm use */
+    uint32_t ip_mask; /* In-place id bit (0 or id bit flag if in-place) */
+    dsp_proc_fn_type process[2]; /* Processing normal/format changes */
+};
+
+/* DSP transform configure function prototype */
+typedef intptr_t (*dsp_proc_config_fn_type)(struct dsp_proc_entry *this,
+                                            struct dsp_config *dsp,
+                                            unsigned int setting,
+                                            intptr_t value);
+
+/* Enable/disable a processing stage - not to be called during processing
+ * by processing code! */
+void dsp_proc_enable(struct dsp_config *dsp, enum dsp_proc_ids id,
+                     bool enable);
+/* Activate/deactivate processing stage, doesn't affect enabled status
+ * thus will not enable anything -
+ * may be called during processing to activate/deactivate for format
+ * changes */
+void dsp_proc_activate(struct dsp_config *dsp, enum dsp_proc_ids id,
+                       bool activate);
+
+/* Is the specified stage active on the DSP? */
+bool dsp_proc_active(struct dsp_config *dsp, enum dsp_proc_ids id);
+
+/* Call this->process[fmt] according to the rules
+ * pass (unsigned)-1 to call function 0 with no restriction */
+bool dsp_proc_call(struct dsp_proc_entry *this, struct dsp_buffer **buf_p,
+                   unsigned int fmt);
+
+struct dsp_proc_db_entry
+{
+    enum dsp_proc_ids id;              /* id of this stage */
+    dsp_proc_config_fn_type configure; /* dsp_configure hook */
+#ifdef DEBUG
+    const char *name;
+#endif
+};
+
+#endif /* DSP_PROC_ENTRY_H */
diff --git a/lib/rbcodec/dsp/dsp_proc_settings.h b/lib/rbcodec/dsp/dsp_proc_settings.h
new file mode 100644
index 0000000000..769532085e
--- /dev/null
+++ b/lib/rbcodec/dsp/dsp_proc_settings.h
@@ -0,0 +1,40 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2012 Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#ifndef DSP_PROC_SETTINGS_H
+#define DSP_PROC_SETTINGS_H
+
+struct dsp_config;
+
+/* Collect all headers together */
+#include "channel_mode.h"
+#include "compressor.h"
+#include "crossfeed.h"
+#include "dsp_misc.h"
+#include "eq.h"
+#include "pga.h"
+#ifdef HAVE_PITCHSCREEN
+#include "tdspeed.h"
+#endif
+#ifdef HAVE_SW_TONE_CONTROLS
+#include "tone_controls.h"
+#endif
+
+#endif /* DSP_PROC_SETTINGS_H */
\ No newline at end of file
diff --git a/lib/rbcodec/dsp/dsp_sample_input.c b/lib/rbcodec/dsp/dsp_sample_input.c
new file mode 100644
index 0000000000..84127e1f96
--- /dev/null
+++ b/lib/rbcodec/dsp/dsp_sample_input.c
@@ -0,0 +1,334 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2005 Miika Pekkarinen
+ * Copyright (C) 2012 Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#include "config.h"
+#include "system.h"
+#include "dsp.h"
+#include "dsp_sample_io.h"
+
+#if 1
+#include <debug.h>
+#else
+#undef DEBUGF
+#define DEBUGF(...)
+#endif
+
+/* The internal format is 32-bit samples, non-interleaved, stereo. This
+ * format is similar to the raw output from several codecs, so no copying is
+ * needed for that case.
+ *
+ * Note that for mono, dst[0] equals dst[1], as there is no point in
+ * processing the same data twice nor should it be done when modifying
+ * samples in-place.
+ *
+ * When conversion is required:
+ * Updates source buffer to point past the samples "consumed" also consuming
+ * that portion of the input buffer and the destination is set to the buffer
+ * of samples for later stages to consume.
+ *
+ * Input operates similarly to how an out-of-place processing stage should
+ * behave.
+ */
+
+extern void dsp_sample_output_init(struct sample_io_data *this);
+extern void dsp_sample_output_flush(struct sample_io_data *this);
+
+/* convert count 16-bit mono to 32-bit mono */
+static void sample_input_mono16(struct sample_io_data *this,
+                                struct dsp_buffer **buf_p)
+{
+    struct dsp_buffer *src = *buf_p;
+    struct dsp_buffer *dst = &this->sample_buf;
+
+    *buf_p = dst;
+
+    if (dst->remcount > 0)
+        return; /* data still remains */
+
+    int count = MIN(src->remcount, SAMPLE_BUF_COUNT);
+
+    dst->remcount  = count;
+    dst->p32[0]    = this->sample_buf_arr[0];
+    dst->p32[1]    = this->sample_buf_arr[0];
+    dst->proc_mask = src->proc_mask;
+
+    if (count <= 0)
+        return; /* purged sample_buf */
+
+    const int16_t *s = src->pin[0];
+    int32_t *d = dst->p32[0];
+    const int scale = WORD_SHIFT;
+
+    dsp_advance_buffer_input(src, count, sizeof (int16_t));
+
+    do
+    {
+        *d++ = *s++ << scale;
+    }
+    while (--count > 0);
+}
+
+/* convert count 16-bit interleaved stereo to 32-bit noninterleaved */
+static void sample_input_i_stereo16(struct sample_io_data *this,
+                                    struct dsp_buffer **buf_p)
+{
+    struct dsp_buffer *src = *buf_p;
+    struct dsp_buffer *dst = &this->sample_buf;
+
+    *buf_p = dst;
+
+    if (dst->remcount > 0)
+        return; /* data still remains */
+
+    int count = MIN(src->remcount, SAMPLE_BUF_COUNT);
+
+    dst->remcount  = count;
+    dst->p32[0]    = this->sample_buf_arr[0];
+    dst->p32[1]    = this->sample_buf_arr[1];
+    dst->proc_mask = src->proc_mask;
+
+    if (count <= 0)
+        return; /* purged sample_buf */
+
+    const int16_t *s = src->pin[0];
+    int32_t *dl = dst->p32[0];
+    int32_t *dr = dst->p32[1];
+    const int scale = WORD_SHIFT;
+
+    dsp_advance_buffer_input(src, count, 2*sizeof (int16_t));
+
+    do
+    {
+        *dl++ = *s++ << scale;
+        *dr++ = *s++ << scale;
+    }
+    while (--count > 0);
+}
+
+/* convert count 16-bit noninterleaved stereo to 32-bit noninterleaved */
+static void sample_input_ni_stereo16(struct sample_io_data *this,
+                                     struct dsp_buffer **buf_p)
+{
+    struct dsp_buffer *src = *buf_p;
+    struct dsp_buffer *dst = &this->sample_buf;
+
+    *buf_p = dst;
+
+    if (dst->remcount > 0)
+        return; /* data still remains */
+
+    int count = MIN(src->remcount, SAMPLE_BUF_COUNT);
+
+    dst->remcount  = count;
+    dst->p32[0]    = this->sample_buf_arr[0];
+    dst->p32[1]    = this->sample_buf_arr[1];
+    dst->proc_mask = src->proc_mask;
+
+    if (count <= 0)
+        return; /* purged sample_buf */
+
+    const int16_t *sl = src->pin[0];
+    const int16_t *sr = src->pin[1];
+    int32_t *dl = dst->p32[0];
+    int32_t *dr = dst->p32[1];
+    const int scale = WORD_SHIFT;
+
+    dsp_advance_buffer_input(src, count, sizeof (int16_t));
+
+    do
+    {
+        *dl++ = *sl++ << scale;
+        *dr++ = *sr++ << scale;
+    }
+    while (--count > 0);
+}
+
+/* convert count 32-bit mono to 32-bit mono */
+static void sample_input_mono32(struct sample_io_data *this,
+                                struct dsp_buffer **buf_p)
+{
+    struct dsp_buffer *dst = &this->sample_buf;
+
+    if (dst->remcount > 0)
+    {
+        *buf_p = dst;
+        return; /* data still remains */
+    }
+    /* else no buffer switch */
+
+    struct dsp_buffer *src = *buf_p;
+    src->p32[1] = src->p32[0];
+}
+
+
+/* convert count 32-bit interleaved stereo to 32-bit noninterleaved stereo */
+static void sample_input_i_stereo32(struct sample_io_data *this,
+                                    struct dsp_buffer **buf_p)
+{
+    struct dsp_buffer *src = *buf_p;
+    struct dsp_buffer *dst = &this->sample_buf;
+
+    *buf_p = dst;
+
+    if (dst->remcount > 0)
+        return; /* data still remains */
+
+    int count = MIN(src->remcount, SAMPLE_BUF_COUNT);
+
+    dst->remcount  = count;
+    dst->p32[0]    = this->sample_buf_arr[0];
+    dst->p32[1]    = this->sample_buf_arr[1];
+    dst->proc_mask = src->proc_mask;
+
+    if (count <= 0)
+        return; /* purged sample_buf */
+
+    const int32_t *s = src->pin[0];
+    int32_t *dl = dst->p32[0];
+    int32_t *dr = dst->p32[1];
+
+    dsp_advance_buffer_input(src, count, 2*sizeof (int32_t));
+
+    do
+    {
+        *dl++ = *s++;
+        *dr++ = *s++;
+    }
+    while (--count > 0);
+}
+
+/* convert 32 bit-noninterleaved stereo to 32-bit noninterleaved stereo */
+static void sample_input_ni_stereo32(struct sample_io_data *this,
+                                     struct dsp_buffer **buf_p)
+{
+    struct dsp_buffer *dst = &this->sample_buf;
+
+    if (dst->remcount > 0)
+        *buf_p = dst; /* data still remains */
+    /* else no buffer switch */
+}
+
+/* set the to-native sample conversion function based on dsp sample
+ * parameters */
+static void dsp_sample_input_format_change(struct sample_io_data *this,
+                                           struct dsp_buffer **buf_p)
+{
+    static const sample_input_fn_type fns[STEREO_NUM_MODES][2] =
+    {
+        [STEREO_INTERLEAVED] =
+            { sample_input_i_stereo16,
+              sample_input_i_stereo32 },
+        [STEREO_NONINTERLEAVED] =
+            { sample_input_ni_stereo16,
+              sample_input_ni_stereo32 },
+        [STEREO_MONO] =
+            { sample_input_mono16,
+              sample_input_mono32 },
+    };
+
+    struct dsp_buffer *src = *buf_p;
+    struct dsp_buffer *dst = &this->sample_buf;
+
+    /* Ack configured format change */
+    format_change_ack(&this->format);
+
+    if (dst->remcount > 0)
+    {
+        *buf_p = dst;
+        return; /* data still remains */
+    }
+
+    DSP_PRINT_FORMAT(DSP Input, -1, src->format);
+
+    /* new format - remember it and pass it along */
+    dst->format = src->format;
+    this->input_samples[0] = fns[this->stereo_mode]
+                                [this->sample_depth > NATIVE_DEPTH ? 1 : 0];
+
+    this->input_samples[0](this, buf_p);
+
+    if (*buf_p == dst) /* buffer switch? */
+        format_change_ack(&src->format);
+}
+
+static void dsp_sample_input_init(struct sample_io_data *this)
+{
+    this->input_samples[0] = sample_input_ni_stereo32;
+    this->input_samples[1] = dsp_sample_input_format_change;
+}
+
+/* discard the sample buffer */
+static void dsp_sample_input_flush(struct sample_io_data *this)
+{
+    this->sample_buf.remcount = 0;
+}
+
+void dsp_sample_io_configure(struct sample_io_data *this,
+                             unsigned int setting,
+                             intptr_t value)
+{
+    switch (setting)
+    {
+    case DSP_INIT:
+        dsp_sample_input_init(this);
+        dsp_sample_output_init(this);
+        break;
+
+    case DSP_RESET:
+        /* Reset all sample descriptions to default */
+        format_change_set(&this->format);
+        this->format.num_channels = 2;
+        this->format.frac_bits = WORD_FRACBITS;
+        this->format.output_scale = WORD_FRACBITS + 1 - NATIVE_DEPTH;
+        this->format.frequency = NATIVE_FREQUENCY;
+        this->format.codec_frequency = NATIVE_FREQUENCY;
+        this->sample_depth = NATIVE_DEPTH;
+        this->stereo_mode = STEREO_NONINTERLEAVED;
+        break;
+
+    case DSP_SET_FREQUENCY:
+        value = value > 0 ? value : NATIVE_FREQUENCY;
+        format_change_set(&this->format);
+        this->format.frequency = value;
+        this->format.codec_frequency = value;
+        break;
+
+    case DSP_SET_SAMPLE_DEPTH:
+        format_change_set(&this->format);
+        this->format.frac_bits =
+            value <= NATIVE_DEPTH ? WORD_FRACBITS : value;
+        this->format.output_scale =
+            this->format.frac_bits + 1 - NATIVE_DEPTH;
+        this->sample_depth = value;
+        break;
+
+    case DSP_SET_STEREO_MODE:
+        format_change_set(&this->format);
+        this->format.num_channels = value == STEREO_MONO ? 1 : 2;
+        this->stereo_mode = value;
+        break;
+
+    case DSP_FLUSH:
+        dsp_sample_input_flush(this);
+        dsp_sample_output_flush(this);
+        break;
+    }
+}
diff --git a/lib/rbcodec/dsp/dsp_sample_io.h b/lib/rbcodec/dsp/dsp_sample_io.h
new file mode 100644
index 0000000000..443038919d
--- /dev/null
+++ b/lib/rbcodec/dsp/dsp_sample_io.h
@@ -0,0 +1,62 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2012 Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#ifndef DSP_SAMPLE_IO_H
+#define DSP_SAMPLE_IO_H
+
+/* 16-bit samples are scaled based on these constants. The shift should be
+ * no more than 15.
+ */
+#define WORD_SHIFT      12
+#define WORD_FRACBITS   27
+#define NATIVE_DEPTH    16
+
+#define SAMPLE_BUF_COUNT 128 /* Per channel, per DSP */
+
+struct sample_io_data;
+
+/* DSP initial buffer input function call prototype */
+typedef void (*sample_input_fn_type)(struct sample_io_data *this,
+                                     struct dsp_buffer **buf_p);
+
+/* DSP final buffer output function call prototype */
+typedef void (*sample_output_fn_type)(struct sample_io_data *this,
+                                      struct dsp_buffer *src,
+                                      struct dsp_buffer *dst);
+
+/* This becomes part of the DSP aggregate */
+struct sample_io_data
+{
+    int outcount;                /* 00h: Output count */
+    struct sample_format format; /* General format info */
+    int sample_depth; /* Codec-specified sample depth */           
+    int stereo_mode;  /* Codec-specified input format */
+    sample_input_fn_type input_samples[2]; /* input functions */
+    struct dsp_buffer sample_buf; /* Buffer descriptor for converted samples */
+    int32_t sample_buf_arr[2][SAMPLE_BUF_COUNT]; /* Internal format */
+    sample_output_fn_type output_samples[2]; /* Final output functions */
+};
+
+/* Sample IO watches the format setting from the codec */
+void dsp_sample_io_configure(struct sample_io_data *this,
+                             unsigned int setting,
+                             intptr_t value);
+
+#endif /* DSP_SAMPLE_IO_H */
diff --git a/lib/rbcodec/dsp/dsp_sample_output.c b/lib/rbcodec/dsp/dsp_sample_output.c
new file mode 100644
index 0000000000..47fde0440c
--- /dev/null
+++ b/lib/rbcodec/dsp/dsp_sample_output.c
@@ -0,0 +1,214 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2005 Miika Pekkarinen
+ * Copyright (C) 2012 Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#include "config.h"
+#include "system.h"
+#include "dsp.h"
+#include "dsp_sample_io.h"
+#include "dsp-util.h"
+#include <string.h>
+
+#if 0
+#include <debug.h>
+#else
+#undef DEBUGF
+#define DEBUGF(...)
+#endif
+
+/* May be implemented in here or externally.*/
+void sample_output_mono(struct sample_io_data *this,
+                        struct dsp_buffer *src, struct dsp_buffer *dst);
+void sample_output_stereo(struct sample_io_data *this,
+                          struct dsp_buffer *src, struct dsp_buffer *dst);
+void sample_output_dithered(struct sample_io_data *this,
+                            struct dsp_buffer *src, struct dsp_buffer *dst);
+
+/** Sample output **/
+
+#if !defined(CPU_COLDFIRE) && !defined(CPU_ARM)
+/* write mono internal format to output format */
+void sample_output_mono(struct sample_io_data *this,
+                        struct dsp_buffer *src, struct dsp_buffer *dst)
+{
+    int count = this->outcount;
+    const int32_t *s0 = src->p32[0];
+    int16_t *d = dst->p16out;
+    int scale = src->format.output_scale;
+    int32_t dc_bias = 1L << (scale - 1);
+
+    do
+    {
+        int32_t lr = clip_sample_16((*s0++ + dc_bias) >> scale);
+        *d++ = lr;
+        *d++ = lr;
+    }
+    while (--count > 0);
+}
+
+/* write stereo internal format to output format */
+void sample_output_stereo(struct sample_io_data *this,
+                          struct dsp_buffer *src, struct dsp_buffer *dst)
+{
+    int count = this->outcount;
+    const int32_t *s0 = src->p32[0];
+    const int32_t *s1 = src->p32[1];
+    int16_t *d = dst->p16out;
+    int scale = src->format.output_scale;
+    int32_t dc_bias = 1L << (scale - 1);
+
+    do
+    {
+        *d++ = clip_sample_16((*s0++ + dc_bias) >> scale);
+        *d++ = clip_sample_16((*s1++ + dc_bias) >> scale);
+    }
+    while (--count > 0);
+}
+#endif /* CPU */
+
+/**
+ * The "dither" code to convert the 24-bit samples produced by libmad was
+ * taken from the coolplayer project - coolplayer.sourceforge.net
+ *
+ * This function handles mono and stereo outputs.
+ */
+static struct dither_data
+{
+    struct dither_state
+    {
+        long error[3];  /* 00h: error term history */
+        long random;    /* 0ch: last random value */
+    } state[2];         /* 0=left, 1=right */
+    bool enabled;       /* 20h: dithered output enabled */
+                        /* 24h */
+} dither_data IBSS_ATTR;
+
+void sample_output_dithered(struct sample_io_data *this,
+                            struct dsp_buffer *src, struct dsp_buffer *dst)
+{
+    int count = this->outcount;
+    int channels = src->format.num_channels;
+    int scale = src->format.output_scale;
+    int32_t dc_bias = 1L << (scale - 1); /* 1/2 bit of significance */
+    int32_t mask = (1L << scale) - 1; /* Mask of bits quantized away */
+
+    for (int ch = 0; ch < channels; ch++)
+    {
+        struct dither_state *dither = &dither_data.state[ch];
+
+        const int32_t *s = src->p32[ch];
+        int16_t *d = &dst->p16out[ch];
+
+        for (int i = 0; i < count; i++, s++, d += 2)
+        {
+            /* Noise shape and bias (for correct rounding later) */
+            int32_t sample = *s;
+
+            sample += dither->error[0] - dither->error[1] + dither->error[2];
+            dither->error[2] = dither->error[1];
+            dither->error[1] = dither->error[0] / 2;
+
+            int32_t output = sample + dc_bias;
+
+            /* Dither, highpass triangle PDF */
+            int32_t random = dither->random*0x0019660dL + 0x3c6ef35fL;
+            output += (random & mask) - (dither->random & mask);
+            dither->random = random;
+
+            /* Quantize sample to output range */
+            output >>= scale;
+
+            /* Error feedback of quantization */
+            dither->error[0] = sample - (output << scale);
+
+            /* Clip and store */
+            *d = clip_sample_16(output);
+        }
+    }
+
+    if (channels > 1)
+        return;
+
+    /* Have to duplicate left samples into the right channel since
+       output is interleaved stereo */
+    int16_t *d = dst->p16out;
+
+    do
+    {
+        int16_t s = *d++;
+        *d++ = s;
+    }
+    while (--count > 0);
+}
+
+/* Initialize the output function for settings and format */
+static void dsp_sample_output_format_change(struct sample_io_data *this,
+                                            struct dsp_buffer *src,
+                                            struct dsp_buffer *dst)
+{
+    static const sample_output_fn_type fns[2][2] =
+    {
+        { sample_output_mono,        /* DC-biased quantizing */
+          sample_output_stereo },
+        { sample_output_dithered,    /* Tri-PDF dithering */
+          sample_output_dithered },
+    };
+
+    struct sample_format *format = &src->format;
+    bool dither = dsp_get_id((void *)this) == CODEC_IDX_AUDIO &&
+                  dither_data.enabled;
+    int channels = format->num_channels;
+
+    DSP_PRINT_FORMAT(DSP Output, -1, *format);
+
+    this->output_samples[0] = fns[dither ? 1 : 0][channels - 1];
+    format_change_ack(format); /* always ack, we're last */
+
+    /* The real function mustn't be called with no data */
+    if (this->outcount > 0)
+        this->output_samples[0](this, src, dst);
+}
+
+void dsp_sample_output_init(struct sample_io_data *this)
+{
+    this->output_samples[0] = sample_output_stereo;
+    this->output_samples[1] = dsp_sample_output_format_change;
+}
+
+/* Flush the dither history */
+void dsp_sample_output_flush(struct sample_io_data *this)
+{
+    if (dsp_get_id((void *)this) == CODEC_IDX_AUDIO)
+        memset(dither_data.state, 0, sizeof (dither_data.state));
+}
+
+/** Output settings **/
+
+/* Set the tri-pdf dithered output */
+void dsp_dither_enable(bool enable)
+{
+    if (enable == dither_data.enabled)
+        return;
+
+    struct sample_io_data *data = (void *)dsp_get_config(CODEC_IDX_AUDIO);
+    dsp_sample_output_flush(data);
+    dither_data.enabled = enable;
+    data->output_samples[0] = dsp_sample_output_format_change;
+}
diff --git a/lib/rbcodec/dsp/eq.c b/lib/rbcodec/dsp/eq.c
index 122a46a4c5..4e7df9bf5a 100644
--- a/lib/rbcodec/dsp/eq.c
+++ b/lib/rbcodec/dsp/eq.c
@@ -7,7 +7,8 @@
  *                     \/            \/     \/    \/            \/
  * $Id$
  *
- * Copyright (C) 2006-2007 Thom Johansen 
+ * Copyright (C) 2006-2007 Thom Johansen
+ * Copyright (C) 2012 Michael Sevakis
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -18,251 +19,156 @@
  * KIND, either express or implied.
  *
  ****************************************************************************/
-
-#include <inttypes.h>
 #include "config.h"
+#include "system.h"
 #include "fixedpoint.h"
 #include "fracmul.h"
-#include "eq.h"
+#include "dsp.h"
+#include "dsp_filter.h"
 #include "replaygain.h"
-
-/** 
- * Calculate first order shelving filter. Filter is not directly usable by the
- * eq_filter() function.
- * @param cutoff shelf midpoint frequency. See eq_pk_coefs for format.
- * @param A decibel value multiplied by ten, describing gain/attenuation of
- * shelf. Max value is 24 dB.
- * @param low true for low-shelf filter, false for high-shelf filter.
- * @param c pointer to coefficient storage. Coefficients are s4.27 format.
- */
-void filter_shelf_coefs(unsigned long cutoff, long A, bool low, int32_t *c)
-{
-    long sin, cos;
-    int32_t b0, b1, a0, a1; /* s3.28 */
-    const long g = get_replaygain_int(A*5) << 4; /* 10^(db/40), s3.28 */
-
-    sin = fp_sincos(cutoff/2, &cos);
-    if (low) {
-        const int32_t sin_div_g = fp_div(sin, g, 25);
-        const int32_t sin_g = FRACMUL(sin, g);
-        cos >>= 3;
-        b0 = sin_g + cos;             /* 0.25 .. 4.10 */
-        b1 = sin_g - cos;             /* -1 .. 3.98 */
-        a0 = sin_div_g + cos;         /* 0.25 .. 4.10 */
-        a1 = sin_div_g - cos;         /* -1 .. 3.98 */
-    } else {
-        const int32_t cos_div_g = fp_div(cos, g, 25);
-        const int32_t cos_g = FRACMUL(cos, g);
-        sin >>= 3;
-        b0 = sin + cos_g;             /* 0.25 .. 4.10 */
-        b1 = sin - cos_g;             /* -3.98 .. 1 */
-        a0 = sin + cos_div_g;         /* 0.25 .. 4.10 */
-        a1 = sin - cos_div_g;         /* -3.98 .. 1 */
-    }
-
-    const int32_t rcp_a0 = fp_div(1, a0, 57); /* 0.24 .. 3.98, s2.29 */
-    *c++ = FRACMUL_SHL(b0, rcp_a0, 1);       /* 0.063 .. 15.85 */
-    *c++ = FRACMUL_SHL(b1, rcp_a0, 1);       /* -15.85 .. 15.85 */
-    *c++ = -FRACMUL_SHL(a1, rcp_a0, 1);      /* -1 .. 1 */
-}
-
-#ifdef HAVE_SW_TONE_CONTROLS
-/** 
- * Calculate second order section filter consisting of one low-shelf and one
- * high-shelf section.
- * @param cutoff_low low-shelf midpoint frequency. See eq_pk_coefs for format.
- * @param cutoff_high high-shelf midpoint frequency.
- * @param A_low decibel value multiplied by ten, describing gain/attenuation of
- * low-shelf part. Max value is 24 dB.
- * @param A_high decibel value multiplied by ten, describing gain/attenuation of
- * high-shelf part. Max value is 24 dB.
- * @param A decibel value multiplied by ten, describing additional overall gain.
- * @param c pointer to coefficient storage. Coefficients are s4.27 format.
- */
-void filter_bishelf_coefs(unsigned long cutoff_low, unsigned long cutoff_high,
-                          long A_low, long A_high, long A, int32_t *c)
-{
-    const long g = get_replaygain_int(A*10) << 7; /* 10^(db/20), s0.31 */
-    int32_t c_ls[3], c_hs[3];
-
-    filter_shelf_coefs(cutoff_low, A_low, true, c_ls);
-    filter_shelf_coefs(cutoff_high, A_high, false, c_hs);
-    c_ls[0] = FRACMUL(g, c_ls[0]);
-    c_ls[1] = FRACMUL(g, c_ls[1]);
-
-    /* now we cascade the two first order filters to one second order filter
-     * which can be used by eq_filter(). these resulting coefficients have a
-     * really wide numerical range, so we use a fixed point format which will
-     * work for the selected cutoff frequencies (in dsp.c) only.
-     */
-    const int32_t b0 = c_ls[0], b1 = c_ls[1], b2 = c_hs[0], b3 = c_hs[1];
-    const int32_t a0 = c_ls[2], a1 = c_hs[2];
-    *c++ = FRACMUL_SHL(b0, b2, 4);
-    *c++ = FRACMUL_SHL(b0, b3, 4) + FRACMUL_SHL(b1, b2, 4);
-    *c++ = FRACMUL_SHL(b1, b3, 4);
-    *c++ = a0 + a1;
-    *c++ = -FRACMUL_SHL(a0, a1, 4);
-}
-#endif
-
-/* Coef calculation taken from Audio-EQ-Cookbook.txt by Robert Bristow-Johnson.
- * Slightly faster calculation can be done by deriving forms which use tan()
- * instead of cos() and sin(), but the latter are far easier to use when doing
- * fixed point math, and performance is not a big point in the calculation part.
- * All the 'a' filter coefficients are negated so we can use only additions
- * in the filtering equation.
- */
-
-/** 
- * Calculate second order section peaking filter coefficients.
- * @param cutoff a value from 0 to 0x80000000, where 0 represents 0 Hz and
- * 0x80000000 represents the Nyquist frequency (samplerate/2).
- * @param Q Q factor value multiplied by ten. Lower bound is artificially set
- * at 0.5.
- * @param db decibel value multiplied by ten, describing gain/attenuation at
- * peak freq. Max value is 24 dB.
- * @param c pointer to coefficient storage. Coefficients are s3.28 format.
- */
-void eq_pk_coefs(unsigned long cutoff, unsigned long Q, long db, int32_t *c)
-{
-    long cs;
-    const long one = 1 << 28; /* s3.28 */
-    const long A = get_replaygain_int(db*5) << 5; /* 10^(db/40), s2.29 */
-    const long alpha = fp_sincos(cutoff, &cs)/(2*Q)*10 >> 1; /* s1.30 */
-    int32_t a0, a1, a2; /* these are all s3.28 format */
-    int32_t b0, b1, b2;
-    const long alphadivA = fp_div(alpha, A, 27);
-    const long alphaA = FRACMUL(alpha, A);
-
-    /* possible numerical ranges are in comments by each coef */
-    b0 = one + alphaA;                /* [1 .. 5] */
-    b1 = a1 = -2*(cs >> 3);           /* [-2 .. 2] */
-    b2 = one - alphaA;                /* [-3 .. 1] */
-    a0 = one + alphadivA;             /* [1 .. 5] */
-    a2 = one - alphadivA;             /* [-3 .. 1] */
-
-    /* range of this is roughly [0.2 .. 1], but we'll never hit 1 completely */
-    const long rcp_a0 = fp_div(1, a0, 59); /* s0.31 */
-    *c++ = FRACMUL(b0, rcp_a0);         /* [0.25 .. 4] */
-    *c++ = FRACMUL(b1, rcp_a0);         /* [-2 .. 2] */
-    *c++ = FRACMUL(b2, rcp_a0);         /* [-2.4 .. 1] */
-    *c++ = FRACMUL(-a1, rcp_a0);        /* [-2 .. 2] */
-    *c++ = FRACMUL(-a2, rcp_a0);        /* [-0.6 .. 1] */
-}
+#include <string.h>
+#include "dsp_proc_entry.h"
 
 /**
- * Calculate coefficients for lowshelf filter. Parameters are as for
- * eq_pk_coefs, but the coefficient format is s5.26 fixed point.
- */
-void eq_ls_coefs(unsigned long cutoff, unsigned long Q, long db, int32_t *c)
-{
-    long cs;
-    const long one = 1 << 25; /* s6.25 */
-    const long sqrtA = get_replaygain_int(db*5/2) << 2; /* 10^(db/80), s5.26 */
-    const long A = FRACMUL_SHL(sqrtA, sqrtA, 8); /* s2.29 */
-    const long alpha = fp_sincos(cutoff, &cs)/(2*Q)*10 >> 1; /* s1.30 */
-    const long ap1 = (A >> 4) + one;
-    const long am1 = (A >> 4) - one;
-    const long ap1_cs = FRACMUL(ap1, cs);
-    const long am1_cs = FRACMUL(am1, cs);
-    const long twosqrtalpha = 2*FRACMUL(sqrtA, alpha);
-    int32_t a0, a1, a2; /* these are all s6.25 format */
-    int32_t b0, b1, b2;
-    
-    /* [0.1 .. 40] */
-    b0 = FRACMUL_SHL(A, ap1 - am1_cs + twosqrtalpha, 2);
-    /* [-16 .. 63.4] */
-    b1 = FRACMUL_SHL(A, am1 - ap1_cs, 3);
-    /* [0 .. 31.7] */
-    b2 = FRACMUL_SHL(A, ap1 - am1_cs - twosqrtalpha, 2);
-    /* [0.5 .. 10] */
-    a0 = ap1 + am1_cs + twosqrtalpha;
-    /* [-16 .. 4] */
-    a1 = -2*(am1 + ap1_cs);
-    /* [0 .. 8] */
-    a2 = ap1 + am1_cs - twosqrtalpha;
-
-    /* [0.1 .. 1.99] */
-    const long rcp_a0 = fp_div(1, a0, 55);    /* s1.30 */
-    *c++ = FRACMUL_SHL(b0, rcp_a0, 2);       /* [0.06 .. 15.9] */
-    *c++ = FRACMUL_SHL(b1, rcp_a0, 2);       /* [-2 .. 31.7] */
-    *c++ = FRACMUL_SHL(b2, rcp_a0, 2);       /* [0 .. 15.9] */
-    *c++ = FRACMUL_SHL(-a1, rcp_a0, 2);      /* [-2 .. 2] */
-    *c++ = FRACMUL_SHL(-a2, rcp_a0, 2);      /* [0 .. 1] */
-}
-
-/**
- * Calculate coefficients for highshelf filter. Parameters are as for
- * eq_pk_coefs, but the coefficient format is s5.26 fixed point.
- */
-void eq_hs_coefs(unsigned long cutoff, unsigned long Q, long db, int32_t *c)
-{
-    long cs;
-    const long one = 1 << 25; /* s6.25 */
-    const long sqrtA = get_replaygain_int(db*5/2) << 2; /* 10^(db/80), s5.26 */
-    const long A = FRACMUL_SHL(sqrtA, sqrtA, 8); /* s2.29 */
-    const long alpha = fp_sincos(cutoff, &cs)/(2*Q)*10 >> 1; /* s1.30 */
-    const long ap1 = (A >> 4) + one;
-    const long am1 = (A >> 4) - one;
-    const long ap1_cs = FRACMUL(ap1, cs);
-    const long am1_cs = FRACMUL(am1, cs);
-    const long twosqrtalpha = 2*FRACMUL(sqrtA, alpha);
-    int32_t a0, a1, a2; /* these are all s6.25 format */
-    int32_t b0, b1, b2;
-
-    /* [0.1 .. 40] */
-    b0 = FRACMUL_SHL(A, ap1 + am1_cs + twosqrtalpha, 2);
-    /* [-63.5 .. 16] */
-    b1 = -FRACMUL_SHL(A, am1 + ap1_cs, 3);
-    /* [0 .. 32] */
-    b2 = FRACMUL_SHL(A, ap1 + am1_cs - twosqrtalpha, 2);
-    /* [0.5 .. 10] */
-    a0 = ap1 - am1_cs + twosqrtalpha;
-    /* [-4 .. 16] */
-    a1 = 2*(am1 - ap1_cs);
-    /* [0 .. 8] */
-    a2 = ap1 - am1_cs - twosqrtalpha;
-
-    /* [0.1 .. 1.99] */
-    const long rcp_a0 = fp_div(1, a0, 55);    /* s1.30 */
-    *c++ = FRACMUL_SHL(b0, rcp_a0, 2);       /* [0 .. 16] */
-    *c++ = FRACMUL_SHL(b1, rcp_a0, 2);       /* [-31.7 .. 2] */
-    *c++ = FRACMUL_SHL(b2, rcp_a0, 2);       /* [0 .. 16] */
-    *c++ = FRACMUL_SHL(-a1, rcp_a0, 2);      /* [-2 .. 2] */
-    *c++ = FRACMUL_SHL(-a2, rcp_a0, 2);      /* [0 .. 1] */
-}
-
-/* We realise the filters as a second order direct form 1 structure. Direct
- * form 1 was chosen because of better numerical properties for fixed point
- * implementations.
+ * Current setup is one lowshelf filters three peaking filters and one
+ *  highshelf filter. Varying the number of shelving filters make no sense,
+ *  but adding peaking filters is possible. Check EQ_NUM_BANDS to have
+ *  2 shelving filters and EQ_NUM_BANDS-2 peaking filters.
  */
 
-#if (!defined(CPU_COLDFIRE) && !defined(CPU_ARM))
-void eq_filter(int32_t **x, struct eqfilter *f, unsigned num,
-               unsigned channels, unsigned shift)
-{
-    unsigned c, i;
-    long long acc;
-
-    /* Direct form 1 filtering code.
-       y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2],
-       where y[] is output and x[] is input.
-     */
-
-    for (c = 0; c < channels; c++) {
-        for (i = 0; i < num; i++) {
-            acc  = (long long) x[c][i] * f->coefs[0];
-            acc += (long long) f->history[c][0] * f->coefs[1];
-            acc += (long long) f->history[c][1] * f->coefs[2];
-            acc += (long long) f->history[c][2] * f->coefs[3];
-            acc += (long long) f->history[c][3] * f->coefs[4];
-            f->history[c][1] = f->history[c][0];
-            f->history[c][0] = x[c][i];
-            f->history[c][3] = f->history[c][2];
-            x[c][i] = (acc << shift) >> 32;
-            f->history[c][2] = x[c][i];
-        }
-    }
-}
+#if EQ_NUM_BANDS < 3
+/* No good. Expect at least 1 peaking and low/high shelving filters */
+#error Band count must be greater than or equal to 3
 #endif
 
+static struct eq_state
+{
+    uint32_t enabled;                        /* Mask of enabled bands */
+    uint8_t bands[EQ_NUM_BANDS+1];           /* Indexes of enabled bands */
+    struct dsp_filter filters[EQ_NUM_BANDS]; /* Data for each filter */
+} eq_data IBSS_ATTR;
+
+/* Clear histories of all enabled bands */
+static void eq_flush(void)
+{
+    if (eq_data.enabled == 0)
+        return; /* Not initialized yet/no bands on */
+
+    for (uint8_t *b = eq_data.bands; *b < EQ_NUM_BANDS; b++)
+        filter_flush(&eq_data.filters[*b]);
+}
+
+/** DSP interface **/
+
+/* Set the precut gain value */
+void dsp_set_eq_precut(int precut)
+{
+    pga_set_gain(PGA_EQ_PRECUT, get_replaygain_int(precut * -10));
+}
+
+/* Update the filter configuration for the band */
+void dsp_set_eq_coefs(int band, const struct eq_band_setting *setting)
+{
+    static void (* const coef_gen[EQ_NUM_BANDS])(unsigned long cutoff,
+                                                 unsigned long Q, long db,
+                                                 struct dsp_filter *f) =
+    {
+        [0]                    = filter_ls_coefs,
+        [1 ... EQ_NUM_BANDS-2] = filter_pk_coefs, 
+        [EQ_NUM_BANDS-1]       = filter_hs_coefs,
+    };
+
+    if (band < 0 || band >= EQ_NUM_BANDS)
+        return;
+
+    /* NOTE: The coef functions assume the EMAC unit is in fractional mode,
+       which it should be, since we're executed from the main thread. */
+
+    uint32_t mask = eq_data.enabled;
+    struct dsp_filter *filter = &eq_data.filters[band];
+
+    /* Assume a band is disabled if the gain is zero */
+    mask &= ~BIT_N(band);
+
+    if (setting->gain != 0)
+    {
+        mask |= BIT_N(band);
+
+        /* Convert user settings to format required by coef generator
+           functions */
+        coef_gen[band](0xffffffff / NATIVE_FREQUENCY * setting->cutoff,
+                       setting->q ?: 1, setting->gain, filter);
+    }
+
+    if (mask == eq_data.enabled)
+        return; /* No change in band-enable state */
+
+    if (mask & BIT_N(band))
+        filter_flush(filter); /* Coming online */
+
+    eq_data.enabled = mask;
+
+    /* Only be active if there are bands to process - if EQ is off, then
+       this call has no effect */
+    struct dsp_config *dsp = dsp_get_config(CODEC_IDX_AUDIO);
+    dsp_proc_activate(dsp, DSP_PROC_EQUALIZER, mask != 0);
+  
+    /* Prepare list of enabled bands for efficient iteration */
+    for (band = 0; mask != 0; mask &= mask - 1, band++)
+        eq_data.bands[band] = (uint8_t)find_first_set_bit(mask);
+
+    eq_data.bands[band] = EQ_NUM_BANDS;
+}
+
+/* Enable or disable the equalizer */
+void dsp_eq_enable(bool enable)
+{
+    struct dsp_config *dsp = dsp_get_config(CODEC_IDX_AUDIO);
+    dsp_proc_enable(dsp, DSP_PROC_EQUALIZER, enable);
+
+    if (enable && eq_data.enabled != 0)
+        dsp_proc_activate(dsp, DSP_PROC_EQUALIZER, true);
+}
+
+/* Apply EQ filters to those bands that have got it switched on. */
+static void eq_process(struct dsp_proc_entry *this,
+                       struct dsp_buffer **buf_p)
+{
+    struct dsp_buffer *buf = *buf_p;
+    int count = buf->remcount;
+    unsigned int channels = buf->format.num_channels;
+
+    for (uint8_t *b = eq_data.bands; *b < EQ_NUM_BANDS; b++)
+        filter_process(&eq_data.filters[*b], buf->p32, count, channels);
+
+    (void)this;
+}
+
+/* DSP message hook */
+static intptr_t eq_configure(struct dsp_proc_entry *this,
+                             struct dsp_config *dsp,
+                             unsigned int setting,
+                             intptr_t value)
+{
+    switch (setting)
+    {
+    case DSP_PROC_INIT:
+        if (value != 0)
+            break;
+        this->process[0] = eq_process;
+    case DSP_PROC_CLOSE:
+        pga_enable_gain(PGA_EQ_PRECUT, setting == DSP_PROC_INIT);
+        break;
+        
+    case DSP_FLUSH:
+        eq_flush();
+        break;
+    }
+
+    return 1;
+    (void)dsp;
+}
+
+/* Database entry */
+DSP_PROC_DB_ENTRY(EQUALIZER,
+                  eq_configure);
diff --git a/lib/rbcodec/dsp/eq.h b/lib/rbcodec/dsp/eq.h
index a44e9153ac..53097beb12 100644
--- a/lib/rbcodec/dsp/eq.h
+++ b/lib/rbcodec/dsp/eq.h
@@ -18,33 +18,25 @@
  * KIND, either express or implied.
  *
  ****************************************************************************/
-
 #ifndef _EQ_H
 #define _EQ_H
 
-#include <inttypes.h>
-#include <stdbool.h>
+/* => support from 3 to 32 bands, inclusive
+ * Menus and screens must be updated to support changing this from 5
+ * without modifying other stuff (remove comment when this is no longer
+ * true :-) */
+#define EQ_NUM_BANDS 5
 
-/* These depend on the fixed point formats used by the different filter types
-   and need to be changed when they change.
- */
-#define FILTER_BISHELF_SHIFT 5
-#define EQ_PEAK_SHIFT 4
-#define EQ_SHELF_SHIFT 6
-
-struct eqfilter {
-    int32_t coefs[5];        /* Order is b0, b1, b2, a1, a2 */
-    int32_t history[2][4];
+struct eq_band_setting
+{
+    int cutoff; /* Hz */
+    int q;
+    int gain;   /* +/- dB */
 };
 
-void filter_shelf_coefs(unsigned long cutoff, long A, bool low, int32_t *c);
-void filter_bishelf_coefs(unsigned long cutoff_low, unsigned long cutoff_high,
-                          long A_low, long A_high, long A, int32_t *c);
-void eq_pk_coefs(unsigned long cutoff, unsigned long Q, long db, int32_t *c);
-void eq_ls_coefs(unsigned long cutoff, unsigned long Q, long db, int32_t *c);
-void eq_hs_coefs(unsigned long cutoff, unsigned long Q, long db, int32_t *c);
-void eq_filter(int32_t **x, struct eqfilter *f, unsigned num,
-               unsigned channels, unsigned shift);
-
-#endif
+/** DSP interface **/
+void dsp_set_eq_precut(int precut);
+void dsp_set_eq_coefs(int band, const struct eq_band_setting *setting);
+void dsp_eq_enable(bool enable);
 
+#endif /* _EQ_H */
diff --git a/lib/rbcodec/dsp/eq_arm.S b/lib/rbcodec/dsp/eq_arm.S
deleted file mode 100644
index b0e1771e89..0000000000
--- a/lib/rbcodec/dsp/eq_arm.S
+++ /dev/null
@@ -1,89 +0,0 @@
-/***************************************************************************
- *             __________               __   ___.
- *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
- *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
- *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
- *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
- *                     \/            \/     \/    \/            \/
- * $Id$
- *
- * Copyright (C) 2006-2007 Thom Johansen
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
- * KIND, either express or implied.
- *
- ****************************************************************************/
-
-#include "config.h"
-
-/* uncomment this to make filtering calculate lower bits after shifting.
- * without this, "shift" of the lower bits will be lost here.
- */
-/* #define HIGH_PRECISION */
-
-/*
- * void eq_filter(int32_t **x, struct eqfilter *f, unsigned num,
- *                unsigned channels, unsigned shift)
- */
-#if CONFIG_CPU == PP5002
-    .section    .icode,"ax",%progbits
-#else
-    .text
-#endif
-    .global eq_filter
-eq_filter:
-    ldr r12, [sp]             @ get shift parameter
-    stmdb sp!, { r0-r11, lr } @ save all params and clobbered regs 
-    ldmia r1!, { r4-r8 }      @ load coefs
-    mov r10, r1               @ loop prelude expects filter struct addr in r10
-
-.filterloop:
-    ldr r9, [sp]            @ get pointer to this channels data
-    add r0, r9, #4
-    str r0, [sp]            @ save back pointer to next channels data
-    ldr r9, [r9]            @ r9 = x[]
-    ldr r14, [sp, #8]       @ r14 = numsamples
-    ldmia r10, { r0-r3 }    @ load history, r10 should be filter struct addr
-    str r10, [sp, #4]       @ save it for loop end
-
-    /* r0-r3 = history, r4-r8 = coefs, r9 = x[], r10..r11 = accumulator,
-     * r12 = shift amount, r14 = number of samples.
-     */
-.loop:
-    /* Direct form 1 filtering code.
-     * y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2],
-     * where y[] is output and x[] is input. This is performed out of order to
-     * reuse registers, we're pretty short on regs.
-     */
-    smull r10, r11, r6, r1     @ acc = b2*x[i - 2]
-    mov r1, r0                 @ fix input history
-    smlal r10, r11, r5, r0     @ acc += b1*x[i - 1]
-    ldr r0, [r9]               @ load input and fix history in same operation
-    smlal r10, r11, r7, r2     @ acc += a1*y[i - 1]
-    smlal r10, r11, r8, r3     @ acc += a2*y[i - 2]
-    smlal r10, r11, r4, r0     @ acc += b0*x[i] /* avoid stall on arm9*/
-    mov r3, r2                 @ fix output history
-    mov r2, r11, asl r12       @ get upper part of result and shift left
-#ifdef HIGH_PRECISION
-    rsb r11, r12, #32          @ get shift amount for lower part
-    orr r2, r2, r10, lsr r11   @ then mix in correctly shifted lower part
-#endif
-    str r2, [r9], #4           @ save result
-    subs r14, r14, #1          @ are we done with this channel?
-    bne .loop
-
-    ldr r10, [sp, #4]          @ load filter struct pointer
-    stmia r10!, { r0-r3 }      @ save back history
-    ldr r11, [sp, #12]         @ load number of channels
-    subs r11, r11, #1          @ all channels processed?
-    strne r11, [sp, #12]
-    bne .filterloop
-
-    add sp, sp, #16            @ compensate for temp storage
-    ldmpc regs=r4-r11
-
diff --git a/lib/rbcodec/dsp/eq_cf.S b/lib/rbcodec/dsp/eq_cf.S
deleted file mode 100644
index 30a28b9d99..0000000000
--- a/lib/rbcodec/dsp/eq_cf.S
+++ /dev/null
@@ -1,91 +0,0 @@
-/***************************************************************************
- *             __________               __   ___.
- *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
- *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
- *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
- *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
- *                     \/            \/     \/    \/            \/
- * $Id$
- *
- * Copyright (C) 2006-2007 Thom Johansen
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
- * KIND, either express or implied.
- *
- ****************************************************************************/
-
-/* uncomment this to make filtering calculate lower bits after shifting.
- * without this, "shift" - 1 of the lower bits will be lost here.
- */
-/* #define HIGH_PRECISION */
-
-/*
- * void eq_filter(int32_t **x, struct eqfilter *f, unsigned num,
- *                unsigned channels, unsigned shift)
- */
-    .text
-    .global eq_filter
-eq_filter:
-    lea.l (-11*4, %sp), %sp 
-    movem.l %d2-%d7/%a2-%a6, (%sp)    | save clobbered regs
-    move.l (11*4+8, %sp), %a5         | fetch filter structure address
-    move.l (11*4+20, %sp), %d7        | load shift count
-    subq.l #1, %d7                    | EMAC gives us one free shift
-#ifdef HIGH_PRECISION
-    moveq.l #8, %d6
-    sub.l %d7, %d6                    | shift for lower part of accumulator
-#endif
-    movem.l (%a5), %a0-%a4            | load coefs
-    lea.l (5*4, %a5), %a5             | point to filter history
-
-.filterloop:
-    move.l (11*4+4, %sp), %a6         | load input channel pointer
-    addq.l #4, (11*4+4, %sp)          | point x to next channel
-    move.l (%a6), %a6
-    move.l (11*4+12, %sp), %d5        | number of samples
-    movem.l (%a5), %d0-%d3            | load filter history
-
-    /* d0-d3 = history, d4 = temp, d5 = sample count, d6 = lower shift amount,
-     * d7 = upper shift amount, a0-a4 = coefs, a5 = history pointer, a6 = x[]
-     */
-.loop:
-    /* Direct form 1 filtering code. We assume DSP has put EMAC in frac mode.
-     * y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2],
-     * where y[] is output and x[] is input. This is performed out of order
-     * to do parallel load of input value.
-     */
-    mac.l %a2, %d1, %acc0               | acc = b2*x[i - 2]
-    move.l %d0, %d1                     | fix input history
-    mac.l %a1, %d0, (%a6), %d0, %acc0   | acc += b1*x[i - 1], x[i] -> d0
-    mac.l %a0, %d0, %acc0               | acc += b0*x[i]
-    mac.l %a3, %d2, %acc0               | acc += a1*y[i - 1]
-    mac.l %a4, %d3, %acc0               | acc += a2*y[i - 2]
-    move.l %d2, %d3                     | fix output history
-#ifdef HIGH_PRECISION
-    move.l %accext01, %d2               | fetch lower part of accumulator
-    move.b %d2, %d4                     | clear upper three bytes
-    lsr.l %d6, %d4                      | shift lower bits
-#endif
-    movclr.l %acc0, %d2                 | fetch upper part of result
-    asl.l %d7, %d2                      | restore fixed point format
-#ifdef HIGH_PRECISION
-    or.l %d2, %d4                       | combine lower and upper parts
-#endif
-    move.l %d2, (%a6)+                  | save result
-    subq.l #1, %d5                      | are we done with this channel?
-    jne .loop
-    
-    movem.l %d0-%d3, (%a5)              | save history back to struct
-    lea.l (4*4, %a5), %a5               | point to next channel's history
-    subq.l #1, (11*4+16, %sp)           | have we processed both channels?
-    jne .filterloop
-
-    movem.l (%sp), %d2-%d7/%a2-%a6
-    lea.l (11*4, %sp), %sp
-    rts
-
diff --git a/lib/rbcodec/dsp/lin_resample.c b/lib/rbcodec/dsp/lin_resample.c
new file mode 100644
index 0000000000..c8be3cb1ad
--- /dev/null
+++ b/lib/rbcodec/dsp/lin_resample.c
@@ -0,0 +1,281 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2005 Miika Pekkarinen
+ * Copyright (C) 2012 Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#include "config.h"
+#include "system.h"
+#include "dsp.h"
+#include "fracmul.h"
+#include "fixedpoint.h"
+#include "dsp_sample_io.h"
+#include <string.h>
+#include "dsp_proc_entry.h"
+
+/**
+ * Linear interpolation resampling that introduces a one sample delay because
+ * of our inability to look into the future at the end of a frame.
+ */
+
+#if 0 /* Set to '1' to enable debug messages */
+#include <debug.h>
+#else
+#undef DEBUGF
+#define DEBUGF(...)
+#endif
+
+#define RESAMPLE_BUF_COUNT 192 /* Per channel, per DSP */
+
+/* Data for each resampler on each DSP */
+static struct resample_data
+{
+    uint32_t delta;          /* 00h: Phase delta for each step */
+    uint32_t phase;          /* 04h: Current phase [pos16|frac16] */
+    int32_t  last_sample[2]; /* 08h: Last samples for interpolation (L+R) */
+    int32_t  frequency;      /* 10h: Virtual samplerate */
+                             /* 14h */
+    struct dsp_config *dsp;  /* The DSP for this resampler */
+    struct dsp_buffer resample_buf; /* Buffer descriptor for resampled data */
+    int32_t resample_buf_arr[2][RESAMPLE_BUF_COUNT]; /* Actual output data */
+} resample_data[DSP_COUNT] IBSS_ATTR;
+
+/* Actual worker function. Implemented here or in target assembly code. */
+int lin_resample_resample(struct resample_data *data, struct dsp_buffer *src,
+                          struct dsp_buffer *dst);
+
+static void lin_resample_flush_data(struct resample_data *data)
+{
+    data->phase = 0;
+    data->last_sample[0] = 0;
+    data->last_sample[1] = 0;
+}
+
+static void lin_resample_flush(struct dsp_proc_entry *this)
+{
+    struct resample_data *data = (void *)this->data;
+    data->resample_buf.remcount = 0;
+    lin_resample_flush_data(data);
+}
+
+static bool lin_resample_new_delta(struct resample_data *data,
+                                   struct dsp_buffer *buf)
+{
+    int32_t frequency = buf->format.frequency; /* virtual samplerate */
+
+    data->frequency = frequency;
+    data->delta = fp_div(frequency, NATIVE_FREQUENCY, 16);
+
+    if (frequency == NATIVE_FREQUENCY)
+    {
+        /* NOTE: If fully glitch-free transistions from no resampling to
+           resampling are desired, last_sample history should be maintained
+           even when not resampling. */
+        lin_resample_flush_data(data);
+        return false;
+    }
+
+    return true;
+}
+
+#if !defined(CPU_COLDFIRE) && !defined(CPU_ARM)
+/* Where the real work is done */
+int lin_resample_resample(struct resample_data *data, struct dsp_buffer *src,
+                          struct dsp_buffer *dst)
+{
+    int ch = src->format.num_channels - 1;
+    uint32_t count = MIN(src->remcount, 0x8000);
+    uint32_t delta = data->delta;
+    uint32_t phase, pos;
+    int32_t *d;
+
+    do
+    {
+        const int32_t *s = src->p32[ch];
+
+        d = dst->p32[ch];
+        int32_t *dmax = d + dst->bufcount;
+
+        phase = data->phase;
+        pos = phase >> 16;
+        pos = MIN(pos, count);
+
+        int32_t last = pos > 0 ? s[pos - 1] : data->last_sample[ch];
+
+        if (pos < count)
+        {
+            while (1)
+            {
+                *d++ = last + FRACMUL((phase & 0xffff) << 15, s[pos] - last);
+                phase += delta;
+                pos = phase >> 16;
+
+                if (pos >= count || d >= dmax)
+                    break;
+
+                if (pos > 0)
+                    last = s[pos - 1];
+            }
+
+            if (pos > 0)
+            {
+                pos = MIN(pos, count);
+                last = s[pos - 1];
+            }
+        }
+
+        data->last_sample[ch] = last;
+    }
+    while (--ch >= 0);
+
+    /* Wrap phase accumulator back to start of next frame. */
+    data->phase = phase - (pos << 16);
+
+    dst->remcount = d - dst->p32[0];
+
+    return pos;
+}
+#endif /* CPU */
+
+/* Resample count stereo samples or stop when the destination is full.
+ * Updates the src buffer and changes to its own output buffer to refer to
+ * the resampled data. */
+static void lin_resample_process(struct dsp_proc_entry *this,
+                                 struct dsp_buffer **buf_p)
+{
+    struct resample_data *data = (void *)this->data;
+    struct dsp_buffer *src = *buf_p;
+    struct dsp_buffer *dst = &data->resample_buf;
+
+    *buf_p = dst;
+
+    if (dst->remcount > 0)
+        return; /* data still remains */
+
+    int channels = src->format.num_channels;
+
+    dst->remcount = 0;
+    dst->p32[0] = data->resample_buf_arr[0];
+    dst->p32[1] = data->resample_buf_arr[channels - 1];
+
+    if (src->remcount > 0)
+    {
+        dst->bufcount = RESAMPLE_BUF_COUNT;
+
+        int consumed = lin_resample_resample(data, src, dst);
+
+        /* Advance src by consumed amount */
+        if (consumed > 0)
+            dsp_advance_buffer32(src, consumed);
+    }
+    /* else purged resample_buf */
+
+    /* Inherit in-place processed mask from source buffer */
+    dst->proc_mask = src->proc_mask;
+}
+
+/* Finish draining old samples then switch format or shut off */
+static void lin_resample_new_format(struct dsp_proc_entry *this,
+                                    struct dsp_buffer **buf_p)
+{
+    struct resample_data *data = (void *)this->data;
+    struct dsp_buffer *src = *buf_p;
+    struct dsp_buffer *dst = &data->resample_buf;
+
+    if (dst->remcount > 0)
+    {
+        *buf_p = dst;
+        return; /* data still remains */
+    }
+
+    DSP_PRINT_FORMAT(DSP_PROC_RESAMPLE, DSP_PROC_RESAMPLE, src->format);
+
+    struct dsp_config *dsp = data->dsp;
+    int32_t frequency = data->frequency;
+    bool active = dsp_proc_active(dsp, DSP_PROC_RESAMPLE);
+
+    if (src->format.frequency != frequency)
+    {
+        DEBUGF("  DSP_PROC_RESAMPLE- new delta\n");
+        active = lin_resample_new_delta(data, src);
+        dsp_proc_activate(dsp, DSP_PROC_RESAMPLE, active);
+    }
+
+    /* Everything after us is NATIVE_FREQUENCY */
+    struct sample_format f = src->format;
+    f.frequency = NATIVE_FREQUENCY;
+    f.codec_frequency = NATIVE_FREQUENCY;
+
+    if (!active)
+    {
+        DEBUGF("  DSP_PROC_RESAMPLE- not active\n");
+        dst->format = f; /* Keep track */
+        return; /* No resampling required */
+    }
+
+    format_change_ack(&src->format);
+
+    if (EQU_SAMPLE_FORMAT(f, dst->format))
+    {
+        DEBUGF("  DSP_PROC_RESAMPLE- same dst format\n");
+        format_change_ack(&f); /* Nothing changed that matters downstream */
+    }
+
+    dst->format = f;
+    dsp_proc_call(this, buf_p, 0);
+}
+
+/* DSP message hook */
+static intptr_t lin_resample_configure(struct dsp_proc_entry *this,
+                                       struct dsp_config *dsp,
+                                       unsigned int setting,
+                                       intptr_t value)
+{
+    switch (setting)
+    {
+    case DSP_INIT:
+        /* Always enable resampler so that format changes may be monitored and
+         * it self-activated when required */
+        dsp_proc_enable(dsp, DSP_PROC_RESAMPLE, true);
+        break;
+
+    case DSP_FLUSH:
+        lin_resample_flush(this);
+        break;
+
+    case DSP_PROC_INIT:
+        this->data = (intptr_t)&resample_data[dsp_get_id(dsp)];
+        this->ip_mask = 0; /* Not in-place */
+        this->process[0] = lin_resample_process;
+        this->process[1] = lin_resample_new_format;
+        ((struct resample_data *)this->data)->dsp = dsp;
+        break;
+
+    case DSP_PROC_CLOSE:
+        /* This stage should be enabled at all times */
+        DEBUGF("DSP_PROC_RESAMPLE- Error: Closing!\n");
+        break;
+    }
+
+    return 1;
+    (void)value;
+}
+
+/* Database entry */
+DSP_PROC_DB_ENTRY(RESAMPLE,
+                  lin_resample_configure);
diff --git a/lib/rbcodec/dsp/pga.c b/lib/rbcodec/dsp/pga.c
new file mode 100644
index 0000000000..c2c29ccfc0
--- /dev/null
+++ b/lib/rbcodec/dsp/pga.c
@@ -0,0 +1,144 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2005 Magnus Holmgren
+ * Copyright (C) 2012 Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#include "config.h"
+#include "system.h"
+#include "dsp.h"
+#include "dsp-util.h"
+#include "fixedpoint.h"
+#include "fracmul.h"
+#include "dsp_proc_entry.h"
+
+/* Implemented here or in target assembly code */
+void pga_process(struct dsp_proc_entry *this, struct dsp_buffer **buf_p);
+
+#define DEFAULT_PGA_GAIN (PGA_UNITY >> 1) /* s8.23 format */
+
+static struct pga_data
+{
+    int32_t gain;                 /* 00h: Final gain in s8.23 format */
+    uint32_t enabled;             /* Mask of enabled gains */
+    int32_t gains[PGA_NUM_GAINS]; /* Individual gains in s7.24 format */
+} pga_data =
+{
+    .gain = DEFAULT_PGA_GAIN,
+    .enabled = 0,
+    .gains[0 ... PGA_NUM_GAINS-1] = PGA_UNITY,
+};
+
+/* Combine all gains to a global gain and enable/disable the amplifier if
+   the overall gain is not unity/unity */
+static void pga_update(void)
+{
+    int32_t gain = PGA_UNITY;
+
+    /* Multiply all gains with one another to get overall amp gain */
+    for (int i = 0; i < PGA_NUM_GAINS; i++)
+    {
+        if (pga_data.enabled & BIT_N(i)) /* Only enabled gains factor in */
+            gain = fp_mul(gain, pga_data.gains[i], 24);
+    }
+
+    gain >>= 1; /* s7.24 -> s8.23 format */
+
+    if (gain == pga_data.gain)
+        return;
+
+    struct dsp_config *dsp = dsp_get_config(CODEC_IDX_AUDIO);
+    pga_data.gain = gain;
+    dsp_proc_enable(dsp, DSP_PROC_PGA, gain != DEFAULT_PGA_GAIN);
+    dsp_proc_activate(dsp, DSP_PROC_PGA, true);
+}
+
+
+/** Amp controls **/
+
+/* Set a particular gain value - doesn't have to be enabled */
+void pga_set_gain(enum pga_gain_ids id, int32_t value)
+{
+    if (value == pga_data.gains[id])
+        return;
+
+    pga_data.gains[id] = value;
+
+    if (BIT_N(id) & pga_data.enabled)
+        pga_update();
+}
+
+/* Enable or disable the specified gain stage */
+void pga_enable_gain(enum pga_gain_ids id, bool enable)
+{
+    uint32_t bit = BIT_N(id);
+
+    if (enable != !(pga_data.enabled & bit))
+        return;
+
+    pga_data.enabled ^= bit;
+    pga_update();
+}
+
+
+/** DSP interface **/
+
+#if !defined(CPU_COLDFIRE) && !defined(CPU_ARM)
+/* Apply a constant gain to the samples (e.g., for ReplayGain). */
+void pga_process(struct dsp_proc_entry *this, struct dsp_buffer **buf_p)
+{
+    int32_t gain = ((struct pga_data *)this->data)->gain;
+    struct dsp_buffer *buf = *buf_p;
+    unsigned int channels = buf->format.num_channels;
+
+    for (unsigned int ch = 0; ch < channels; ch++)
+    {
+        int32_t *d = buf->p32[ch];
+        int count = buf->remcount;
+
+        for (int i = 0; i < count; i++)
+            d[i] = FRACMUL_SHL(d[i], gain, 8);
+    }
+
+    (void)this;
+}
+#endif /* CPU */
+
+/* DSP message hook */
+static intptr_t pga_configure(struct dsp_proc_entry *this,
+                              struct dsp_config *dsp,
+                              unsigned int setting,
+                              intptr_t value)
+{
+    switch (setting)
+    {
+    case DSP_PROC_INIT:
+        if (value != 0)
+            break; /* Already initialized */
+        this->data = (intptr_t)&pga_data;
+        this->process[0] = pga_process;
+        break;
+    }
+
+    return 1;
+    (void)dsp;
+}
+
+/* Database entry */
+DSP_PROC_DB_ENTRY(PGA,
+                  pga_configure);
diff --git a/lib/rbcodec/dsp/pga.h b/lib/rbcodec/dsp/pga.h
new file mode 100644
index 0000000000..f0c4c4717f
--- /dev/null
+++ b/lib/rbcodec/dsp/pga.h
@@ -0,0 +1,40 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2012 Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#ifndef PGA_H
+#define PGA_H
+
+#define PGA_UNITY ((int32_t)0x01000000) /* s7.24 */
+
+/* Various gains supported by pre-gain amp */
+enum pga_gain_ids
+{
+    PGA_EQ_PRECUT = 0,
+    PGA_REPLAYGAIN,
+#ifdef HAVE_SW_VOLUME_CONTROL
+    PGA_VOLUME,
+#endif
+    PGA_NUM_GAINS,
+};
+
+void pga_set_gain(enum pga_gain_ids id, int32_t value);
+void pga_enable_gain(enum pga_gain_ids id, bool enable);
+
+#endif /* PGA_H */
diff --git a/lib/rbcodec/dsp/tdspeed.c b/lib/rbcodec/dsp/tdspeed.c
index c2f4a3f704..3aa8acc458 100644
--- a/lib/rbcodec/dsp/tdspeed.c
+++ b/lib/rbcodec/dsp/tdspeed.c
@@ -9,6 +9,7 @@
  *
  * Copyright (C) 2006 by Nicolas Pitre <nico@cam.org>
  * Copyright (C) 2006-2007 by Stéphane Doyon <s.doyon@videotron.ca>
+ * Copyright (C) 2012 Michael Sevakis
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -19,69 +20,42 @@
  * KIND, either express or implied.
  *
  ****************************************************************************/
-
-#include <inttypes.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <string.h>
+#include "config.h"
+#include "system.h"
 #include "sound.h"
 #include "core_alloc.h"
 #include "system.h"
 #include "tdspeed.h"
 #include "settings.h"
 #include "dsp-util.h"
+#include "dsp_proc_entry.h"
 
 #define assert(cond)
 
+#define TIMESTRETCH_SET_FACTOR (DSP_PROC_SETTING+DSP_PROC_TIMESTRETCH)
+
 #define MIN_RATE 8000
 #define MAX_RATE 48000 /* double buffer for double rate */
 #define MINFREQ 100
 
-#define FIXED_BUFSIZE 3072 /* 48KHz factor 3.0 */
+#define MAX_INPUTCOUNT       512 /* Max input count so dst doesn't overflow */
+#define FIXED_BUFCOUNT      3072 /* 48KHz factor 3.0 */
+#define FIXED_OUTBUFCOUNT   4096
 
-static int32_t** dsp_src;
-static int handles[4];
-static int32_t *overlap_buffer[2] = { NULL, NULL };
-static int32_t *outbuf[2] = { NULL, NULL };
-
-static int move_callback(int handle, void* current, void* new)
+enum tdspeed_ops
 {
-    /* TODO */
-    (void)handle;
-    if (dsp_src)
-    {
-        int ch = (current == outbuf[0]) ? 0 : 1;
-        dsp_src[ch] = outbuf[ch] = new;
-    }
-    return BUFLIB_CB_OK;
-}
-
-static struct buflib_callbacks ops = {
-    .move_callback = move_callback,
-    .shrink_callback = NULL,
+    TDSOP_PROCESS,
+    TDSOP_LAST,
+    TDSOP_PURGE,
 };
 
-static int ovl_move_callback(int handle, void* current, void* new)
-{
-    /* TODO */
-    (void)handle;
-    if (dsp_src)
-    {
-        int ch = (current == overlap_buffer[0]) ? 0 : 1;
-        overlap_buffer[ch] = new;
-    }
-    return BUFLIB_CB_OK;
-}
-
-static struct buflib_callbacks ovl_ops = {
-    .move_callback = ovl_move_callback,
-    .shrink_callback = NULL,
-};
-
-
 static struct tdspeed_state_s
 {
-    bool stereo;
+    struct dsp_proc_entry *this; /* this stage */
+    struct dsp_config *dsp; /* the DSP we use */
+    unsigned int channels;  /* flags parameter to use in call */
+    int32_t samplerate;     /* current samplerate of input data */
+    int32_t factor;         /* stretch factor (perdecimille) */
     int32_t shift_max;      /* maximum displacement on a frame */
     int32_t src_step;       /* source window pace */
     int32_t dst_step;       /* destination window pace */
@@ -89,62 +63,132 @@ static struct tdspeed_state_s
     int32_t ovl_shift;      /* overlap buffer frame shift */
     int32_t ovl_size;       /* overlap buffer used size */
     int32_t ovl_space;      /* overlap buffer size */
-    int32_t *ovl_buff[2];   /* overlap buffer */
+    int32_t *ovl_buff[2];   /* overlap buffer (L+R) */
 } tdspeed_state;
 
-void tdspeed_init(void)
-{
-    if (!global_settings.timestretch_enabled)
-        return;
+static int handles[4] = { 0, 0, 0, 0 };
+static int32_t *buffers[4] = { NULL, NULL, NULL, NULL };
 
-    /* Allocate buffers */
-    if (overlap_buffer[0] == NULL)
+#define overlap_buffer  (&buffers[0])
+#define outbuf          (&buffers[2])
+#define out_size        FIXED_OUTBUFCOUNT
+
+/* Processed buffer passed out to later stages */
+static struct dsp_buffer dsp_outbuf;
+
+static int move_callback(int handle, void *current, void *new)
+{
+#if 0
+    /* Should not currently need to block this since DSP loop completes an
+       iteration before yielding and begins again at its input buffer */
+    if (dsp_is_busy(tdspeed_state.dsp))
+        return BUFLIB_CB_CANNOT_MOVE; /* DSP processing in progress */
+#endif
+
+    ptrdiff_t shift = (int32_t *)new - (int32_t *)current;
+    int32_t **p32 = dsp_outbuf.p32;
+
+    for (unsigned int i = 0; i < ARRAYLEN(handles); i++)
     {
-        handles[0] = core_alloc_ex("tdspeed ovl left", FIXED_BUFSIZE * sizeof(int32_t), &ovl_ops);
-        overlap_buffer[0] = core_get_data(handles[0]);
-    }
-    if (overlap_buffer[1] == NULL)
-    {
-        handles[1] = core_alloc_ex("tdspeed ovl right", FIXED_BUFSIZE * sizeof(int32_t), &ovl_ops);
-        overlap_buffer[1] = core_get_data(handles[1]);
-    }
-    if (outbuf[0] == NULL)
-    {
-        handles[2] = core_alloc_ex("tdspeed left", TDSPEED_OUTBUFSIZE * sizeof(int32_t), &ops);
-        outbuf[0] = core_get_data(handles[2]);
-    }
-    if (outbuf[1] == NULL)
-    {
-        handles[3] = core_alloc_ex("tdspeed right", TDSPEED_OUTBUFSIZE * sizeof(int32_t), &ops);
-        outbuf[1] = core_get_data(handles[3]);
+        if (handle != handles[i])
+            continue;
+
+        switch (i)
+        {
+        case 0: case 1:
+            /* moving overlap (input) buffers */
+            tdspeed_state.ovl_buff[i] = new;
+            break;
+
+        case 2:
+            /* moving outbuf left channel and dsp_outbuf.p32[0] */
+            if (p32[0] == p32[1])
+                p32[1] += shift; /* mono mode */
+
+            p32[0] += shift;
+            break;
+
+        case 3:
+            /* moving outbuf right channel and dsp_outbuf.p32[1] */
+            p32[1] += shift;
+            break;
+        }
+
+        buffers[i] = new;
+        break;
     }
+
+    return BUFLIB_CB_OK;
 }
 
-void tdspeed_finish(void)
+static struct buflib_callbacks ops =
 {
-    for(unsigned i = 0; i < ARRAYLEN(handles); i++)
+    .move_callback = move_callback,
+    .shrink_callback = NULL,
+};
+
+/* Allocate timestretch buffers */
+static bool tdspeed_alloc_buffers(void)
+{
+    static const struct
     {
-        if (handles[i] > 0)
+        const char *name;
+        size_t size;
+    } bufdefs[4] =
+    {
+        { "tdspeed ovl L", FIXED_BUFCOUNT * sizeof(int32_t) },
+        { "tdspeed ovl R", FIXED_BUFCOUNT * sizeof(int32_t) },
+        { "tdspeed out L", FIXED_OUTBUFCOUNT * sizeof(int32_t) },
+        { "tdspeed out R", FIXED_OUTBUFCOUNT * sizeof(int32_t) },
+    };
+
+    for (unsigned int i = 0; i < ARRAYLEN(bufdefs); i++)
+    {
+        if (handles[i] <= 0)
         {
-            core_free(handles[i]);
-            handles[i] = 0;
+            handles[i] = core_alloc_ex(bufdefs[i].name, bufdefs[i].size, &ops);
+
+            if (handles[i] <= 0)
+                return false;
+        }
+
+        if (buffers[i] == NULL)
+        {
+            buffers[i] = core_get_data(handles[i]);
+
+            if (buffers[i] == NULL)
+                return false;
         }
     }
-    overlap_buffer[0] = overlap_buffer[1] = NULL;
-    outbuf[0]         = outbuf[1]         = NULL;
+
+    return true;
 }
 
-bool tdspeed_config(int samplerate, bool stereo, int32_t factor)
+/* Free timestretch buffers */
+static void tdspeed_free_buffers(void)
+{
+    for (unsigned int i = 0; i < ARRAYLEN(handles); i++)
+    {
+        if (handles[i] > 0)
+            core_free(handles[i]);
+
+        handles[i] = 0;
+        buffers[i] = NULL;
+    }
+}
+
+/* Discard all data */
+static void tdspeed_flush(void)
 {
     struct tdspeed_state_s *st = &tdspeed_state;
-    int src_frame_sz;
+    st->ovl_size = 0;
+    st->ovl_shift = 0;
+    dsp_outbuf.remcount = 0; /* Dump remaining output */
+}
 
-    /* Check buffers were allocated ok */
-    if (overlap_buffer[0] == NULL || overlap_buffer[1] == NULL)
-        return false;
-
-    if (outbuf[0] == NULL || outbuf[1] == NULL)
-        return false;
+static bool tdspeed_update(int32_t samplerate, int32_t factor)
+{
+    struct tdspeed_state_s *st = &tdspeed_state;
 
     /* Check parameters */
     if (factor == PITCH_SPEED_100)
@@ -156,7 +200,10 @@ bool tdspeed_config(int samplerate, bool stereo, int32_t factor)
     if (factor < STRETCH_MIN || factor > STRETCH_MAX)
         return false;
 
-    st->stereo = stereo;
+    /* Save parameters we'll need later if format changes */
+    st->samplerate = samplerate;
+    st->factor     = factor;
+
     st->dst_step = samplerate / MINFREQ;
 
     if (factor > PITCH_SPEED_100)
@@ -171,7 +218,7 @@ bool tdspeed_config(int samplerate, bool stereo, int32_t factor)
     st->src_step = st->dst_step * factor / PITCH_SPEED_100;
     st->shift_max = (st->dst_step > st->src_step) ? st->dst_step : st->src_step;
 
-    src_frame_sz = st->shift_max + st->dst_step;
+    int src_frame_sz = st->shift_max + st->dst_step;
 
     if (st->dst_step > st->src_step)
         src_frame_sz += st->dst_step - st->src_step;
@@ -182,32 +229,27 @@ bool tdspeed_config(int samplerate, bool stereo, int32_t factor)
     if (st->src_step > st->dst_step)
         st->ovl_space += 2*st->src_step - st->dst_step;
 
-    if (st->ovl_space > FIXED_BUFSIZE)
-        st->ovl_space = FIXED_BUFSIZE;
+    if (st->ovl_space > FIXED_BUFCOUNT)
+        st->ovl_space = FIXED_BUFCOUNT;
 
+    /* just discard remaining input data */
     st->ovl_size = 0;
     st->ovl_shift = 0;
 
     st->ovl_buff[0] = overlap_buffer[0];
-
-    if (stereo)
-        st->ovl_buff[1] = overlap_buffer[1];
-    else
-        st->ovl_buff[1] = st->ovl_buff[0];
+    st->ovl_buff[1] = overlap_buffer[1]; /* ignored if mono */
 
     return true;
 }
 
 static int tdspeed_apply(int32_t *buf_out[2], int32_t *buf_in[2],
-                         int data_len, int last, int out_size)
+                         int data_len, enum tdspeed_ops op, int *consumed)
 /* data_len in samples */
 {
     struct tdspeed_state_s *st = &tdspeed_state;
     int32_t *dest[2];
     int32_t next_frame, prev_frame, src_frame_sz;
-    bool stereo = buf_in[0] != buf_in[1];
-
-    assert(stereo == st->stereo);
+    bool stereo = st->channels > 1;
 
     src_frame_sz = st->shift_max + st->dst_step;
 
@@ -233,7 +275,7 @@ static int tdspeed_apply(int32_t *buf_out[2], int32_t *buf_in[2],
         if (copy > data_len)
             copy = data_len;
 
-        assert(st->ovl_size + copy <= FIXED_BUFSIZE);
+        assert(st->ovl_size + copy <= FIXED_BUFCOUNT);
         memcpy(st->ovl_buff[0] + st->ovl_size, buf_in[0],
                copy * sizeof(int32_t));
 
@@ -241,7 +283,9 @@ static int tdspeed_apply(int32_t *buf_out[2], int32_t *buf_in[2],
             memcpy(st->ovl_buff[1] + st->ovl_size, buf_in[1],
                    copy * sizeof(int32_t));
 
-        if (!last && have + copy < src_frame_sz)
+        *consumed += copy;
+
+        if (op == TDSOP_PROCESS && have + copy < src_frame_sz)
         {
             /* still not enough to process at least one frame */
             st->ovl_size += copy;
@@ -254,13 +298,14 @@ static int tdspeed_apply(int32_t *buf_out[2], int32_t *buf_in[2],
 
         if (copy == data_len)
         {
-            assert(have + copy <= FIXED_BUFSIZE);
-            return tdspeed_apply(buf_out, st->ovl_buff, have+copy, last,
-                               out_size);
+            assert(have + copy <= FIXED_BUFCOUNT);
+            return tdspeed_apply(buf_out, st->ovl_buff, have+copy, op,
+                                 consumed);
         }
 
-        assert(have + copy <= FIXED_BUFSIZE);
-        int i = tdspeed_apply(buf_out, st->ovl_buff, have+copy, -1, out_size);
+        assert(have + copy <= FIXED_BUFCOUNT);
+        int i = tdspeed_apply(buf_out, st->ovl_buff, have+copy,
+                              TDSOP_LAST, consumed);
 
         dest[0] = buf_out[0] + i;
         dest[1] = buf_out[1] + i;
@@ -379,12 +424,12 @@ skip:;
     }
 
     /* now deal with remaining partial frames */
-    if (last == -1)
+    if (op == TDSOP_LAST)
     {
         /* special overlap buffer processing: remember frame shift only */
         st->ovl_shift = next_frame - prev_frame;
     }
-    else if (last != 0)
+    else if (op == TDSOP_PURGE)
     {
         /* last call: purge all remaining data to output buffer */
         int i = data_len - prev_frame;
@@ -400,6 +445,8 @@ skip:;
             memcpy(dest[1], buf_in[1] + prev_frame, i * sizeof(int32_t));
             dest[1] += i;
         }
+
+        *consumed += i;
     }
     else
     {
@@ -408,7 +455,7 @@ skip:;
         int i = (st->ovl_shift < 0) ? next_frame : prev_frame;
         st->ovl_size = data_len - i;
 
-        assert(st->ovl_size <= FIXED_BUFSIZE);
+        assert(st->ovl_size <= FIXED_BUFCOUNT);
         memcpy(st->ovl_buff[0], buf_in[0] + i, st->ovl_size * sizeof(int32_t));
 
         if (stereo)
@@ -418,32 +465,223 @@ skip:;
     return dest[0] - buf_out[0];
 }
 
-long tdspeed_est_output_size()
+
+/** DSP interface **/
+
+static void tdspeed_process_new_format(struct dsp_proc_entry *this,
+                                       struct dsp_buffer **buf_p);
+
+/* Enable or disable the availability of timestretch */
+void dsp_timestretch_enable(bool enabled)
 {
-    return TDSPEED_OUTBUFSIZE;
+    if (enabled != !tdspeed_state.this)
+        return; /* No change */
+
+    dsp_proc_enable(dsp_get_config(CODEC_IDX_AUDIO), DSP_PROC_TIMESTRETCH,
+                    enabled);
 }
 
-long tdspeed_est_input_size(long size)
+/* Set the timestretch ratio */
+void dsp_set_timestretch(int32_t percent)
 {
     struct tdspeed_state_s *st = &tdspeed_state;
 
-    size = (size - st->ovl_size) * st->src_step / st->dst_step;
+    if (!st->this)
+        return; /* not enabled */
 
-    if (size < 0)
-        size = 0;
+    if (percent <= 0)
+        percent = PITCH_SPEED_100;
 
-    return size;
+    if (percent == st->factor)
+        return; /* no change */
+
+    dsp_configure(st->dsp, TIMESTRETCH_SET_FACTOR, percent);
 }
 
-int tdspeed_doit(int32_t *src[], int count)
+/* Return the timestretch ratio */
+int32_t dsp_get_timestretch(void)
 {
-    dsp_src = src;
-    count = tdspeed_apply( (int32_t *[2]) { outbuf[0], outbuf[1] },
-                           src, count, 0, TDSPEED_OUTBUFSIZE);
-
-    src[0] = outbuf[0];
-    src[1] = outbuf[1];
-
-    return count;
+    return tdspeed_state.factor;
 }
 
+/* Return whether or not timestretch is enabled and initialized */
+bool dsp_timestretch_available(void)
+{
+    return !!tdspeed_state.this;
+}
+
+/* Apply timestretch to the input buffer and switch to our output buffer */
+static void tdspeed_process(struct dsp_proc_entry *this,
+                            struct dsp_buffer **buf_p)
+{
+    struct dsp_buffer *src = *buf_p;
+    struct dsp_buffer *dst = &dsp_outbuf;
+
+    *buf_p = dst; /* switch to our buffer */
+
+    int count = dst->remcount;
+
+    if (count > 0)
+        return; /* output remains from an earlier call */
+
+    dst->p32[0] = outbuf[0];
+    dst->p32[1] = outbuf[src->format.num_channels - 1];
+
+    if (src->remcount > 0)
+    {
+        dst->bufcount = 0; /* use this to get consumed src */
+        count = tdspeed_apply(dst->p32, src->p32,
+                              MIN(src->remcount, MAX_INPUTCOUNT),
+                              TDSOP_PROCESS, &dst->bufcount);
+
+        /* advance src by samples consumed */
+        if (dst->bufcount > 0)
+            dsp_advance_buffer32(src, dst->bufcount);
+    }
+    /* else purged dsp_outbuf */
+
+    dst->remcount = count;
+
+    /* inherit in-place processed mask from source buffer */
+    dst->proc_mask = src->proc_mask;
+
+    (void)this;
+}
+
+/* Process format changes and settings changes */
+static void tdspeed_process_new_format(struct dsp_proc_entry *this,
+                                       struct dsp_buffer **buf_p)
+{
+    struct dsp_buffer *src = *buf_p;
+    struct dsp_buffer *dst = &dsp_outbuf;
+
+    if (dst->remcount > 0)
+    {
+        *buf_p = dst;
+        return; /* output remains from an earlier call */
+    }
+
+    DSP_PRINT_FORMAT(DSP_PROC_TIMESTRETCH, DSP_PROC_TIMESTRETCH, src->format);
+
+    struct tdspeed_state_s *st = &tdspeed_state;
+    struct dsp_config *dsp = st->dsp;
+    struct sample_format *format = &src->format;
+    unsigned int channels = format->num_channels;
+
+    if (format->codec_frequency != st->samplerate)
+    {
+        /* relevent parameters are changing - all overlap will be discarded */
+        st->channels = channels;
+
+        DEBUGF("  DSP_PROC_TIMESTRETCH- new settings: "
+               "ch:%u chz: %u, %d.%02d%%\n",
+               channels,
+               format->codec_frequency,
+               st->factor / 100, st->factor % 100);
+        bool active = tdspeed_update(format->codec_frequency, st->factor);
+        dsp_proc_activate(dsp, DSP_PROC_TIMESTRETCH, active);
+
+        if (!active)
+        {
+            DEBUGF("  DSP_PROC_RESAMPLE- not active\n");
+            dst->format = src->format; /* Keep track */
+            return; /* no more for now */
+        }
+    }
+    else if (channels != st->channels)
+    {
+        /* channel count transistion - have to make old data in overlap
+           buffer compatible with new format */
+        DEBUGF("  DSP_PROC_TIMESTRETCH- new ch count: %u=>%u\n",
+               st->channels, channels);
+
+        st->channels = channels;
+
+        if (channels > 1)
+        {
+            /* mono->stereo: Process the old mono as stereo now */
+            memcpy(st->ovl_buff[1], st->ovl_buff[0],
+                   st->ovl_size * sizeof (int32_t));
+        }
+        else
+        {
+            /* stereo->mono: Process the old stereo as mono now */
+            for (int i = 0; i < st->ovl_size; i++)
+            {
+                st->ovl_buff[0][i] = st->ovl_buff[0][i] / 2 +
+                                     st->ovl_buff[1][i] / 2;
+            }
+        }
+    }
+
+    struct sample_format f = *format;
+    format_change_ack(format);
+
+    if (EQU_SAMPLE_FORMAT(f, dst->format))
+    {
+        DEBUGF("  DSP_PROC_TIMESTRETCH- same dst format\n");
+        format_change_ack(&f); /* nothing changed that matters downstream */
+    }
+
+    dst->format = f;
+
+    /* return to normal processing */
+    this->process[0] = tdspeed_process;
+    dsp_proc_call(this, buf_p, 0);
+}
+
+/* DSP message hook */
+static intptr_t tdspeed_configure(struct dsp_proc_entry *this,
+                                  struct dsp_config *dsp,
+                                  unsigned int setting,
+                                  intptr_t value)
+{
+    struct tdspeed_state_s *st = &tdspeed_state;
+
+    switch (setting)
+    {
+    case DSP_INIT:
+        /* everything is at 100% until dsp_set_timestretch is called with
+           some other value and timestretch is enabled at the time */
+        if (value == CODEC_IDX_AUDIO)
+            st->factor = PITCH_SPEED_100;
+        break;
+
+    case DSP_FLUSH:
+        tdspeed_flush();
+        break;
+
+    case DSP_PROC_INIT:
+        if (!tdspeed_alloc_buffers())
+            return -1; /* fail the init */
+
+        st->this = this;
+        st->dsp = dsp;
+        this->ip_mask = 0; /* not in-place */
+        this->process[0] = tdspeed_process;
+        this->process[1] = tdspeed_process_new_format;
+        break;
+
+    case DSP_PROC_CLOSE:
+        st->this = NULL;
+        st->factor = PITCH_SPEED_100;
+        dsp_outbuf.remcount = 0;
+        tdspeed_free_buffers();
+        break;
+
+    case TIMESTRETCH_SET_FACTOR:
+        /* force update as a format change */
+        st->samplerate = 0;
+        st->factor = (int32_t)value;
+        st->this->process[0] = tdspeed_process_new_format;
+        dsp_proc_activate(st->dsp, DSP_PROC_TIMESTRETCH, true);
+        break;
+    }
+
+    return 1;
+    (void)value;
+}
+
+/* Database entry */
+DSP_PROC_DB_ENTRY(TIMESTRETCH,
+                  tdspeed_configure);
diff --git a/lib/rbcodec/dsp/tdspeed.h b/lib/rbcodec/dsp/tdspeed.h
index e91eeb1701..ca8a7846a4 100644
--- a/lib/rbcodec/dsp/tdspeed.h
+++ b/lib/rbcodec/dsp/tdspeed.h
@@ -23,12 +23,8 @@
 #ifndef _TDSPEED_H
 #define _TDSPEED_H
 
-#include "dsp.h"
-
-#define TDSPEED_OUTBUFSIZE 4096
-
-/* some #define functions to get the pitch, stretch and speed values based on */
-/* two known values.  Remember that params are alphabetical.                  */
+/* some #define functions to get the pitch, stretch and speed values based
+ * upon two known values.  Remember that params are alphabetical. */
 #define GET_SPEED(pitch, stretch) \
     ((pitch * stretch + PITCH_SPEED_100 / 2L) / PITCH_SPEED_100)
 #define GET_PITCH(speed, stretch) \
@@ -36,14 +32,12 @@
 #define GET_STRETCH(pitch, speed) \
     ((speed * PITCH_SPEED_100 + pitch   / 2L) / pitch)
 
-void tdspeed_init(void);
-void tdspeed_finish(void);
-bool tdspeed_config(int samplerate, bool stereo, int32_t factor);
-long tdspeed_est_output_size(void);
-long tdspeed_est_input_size(long size);
-int tdspeed_doit(int32_t *src[], int count);
-
 #define STRETCH_MAX (250L * PITCH_SPEED_PRECISION) /* 250% */
 #define STRETCH_MIN (35L  * PITCH_SPEED_PRECISION) /* 35%  */
 
-#endif
+void dsp_timestretch_enable(bool enable);
+void dsp_set_timestretch(int32_t percent);
+int32_t dsp_get_timestretch(void);
+bool dsp_timestretch_available(void);
+
+#endif /* _TDSPEED_H */
diff --git a/lib/rbcodec/dsp/tone_controls.c b/lib/rbcodec/dsp/tone_controls.c
new file mode 100644
index 0000000000..0bd4a447d7
--- /dev/null
+++ b/lib/rbcodec/dsp/tone_controls.c
@@ -0,0 +1,118 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2007 Thom Johansen
+ * Copyright (C) 2012 Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#include "config.h"
+#include "system.h"
+#include "dsp.h"
+#include <string.h>
+#include "dsp_proc_entry.h"
+#include "dsp_filter.h"
+
+/* These apply to all DSP streams to remain as consistant as possible with
+ * the behavior of hardware tone controls */
+
+/* Cutoffs in HZ - not adjustable for now */
+static const unsigned int tone_bass_cutoff = 200;
+static const unsigned int tone_treble_cutoff = 3500;
+
+/* Current bass and treble gain values */
+static int tone_bass = 0;
+static int tone_treble = 0;
+
+/* Data for each DSP */
+static struct dsp_filter tone_filters[DSP_COUNT] IBSS_ATTR;
+
+/* Update the filters' coefficients based upon the bass/treble settings */
+void tone_set_prescale(int prescale)
+{
+    int bass = tone_bass;
+    int treble = tone_treble;
+
+    struct dsp_filter tone_filter; /* Temp to hold new version */
+    filter_bishelf_coefs(0xffffffff / NATIVE_FREQUENCY * tone_bass_cutoff,
+                         0xffffffff / NATIVE_FREQUENCY * tone_treble_cutoff,
+                         bass, treble, -prescale, &tone_filter);
+
+    struct dsp_config *dsp;
+    for (int i = 0; (dsp = dsp_get_config(i)); i++)
+    {
+        struct dsp_filter *filter = &tone_filters[i];
+        filter_copy(filter, &tone_filter);
+    
+        bool enable = bass != 0 || treble != 0;
+        dsp_proc_enable(dsp, DSP_PROC_TONE_CONTROLS, enable);
+
+        if (!dsp_proc_active(dsp, DSP_PROC_TONE_CONTROLS))
+        {
+            filter_flush(filter); /* Going online */
+            dsp_proc_activate(dsp, DSP_PROC_TONE_CONTROLS, true);
+        }
+    }
+}
+
+/* Prescaler is always set after setting bass/treble, so we wait with
+ * calculating coefs until such time. */
+
+/* Change the bass setting */
+void tone_set_bass(int bass)
+{
+    tone_bass = bass;
+}
+
+/* Change the treble setting */
+void tone_set_treble(int treble)
+{
+    tone_treble = treble;
+}
+
+/* Apply the tone control filter in-place */
+static void tone_process(struct dsp_proc_entry *this,
+                         struct dsp_buffer **buf_p)
+{
+    struct dsp_buffer *buf = *buf_p;
+    filter_process((void *)this->data, buf->p32, buf->remcount,
+                   buf->format.num_channels);
+}
+
+/* DSP message hook */
+static intptr_t tone_configure(struct dsp_proc_entry *this,
+                               struct dsp_config *dsp,
+                               unsigned int setting,
+                               intptr_t value)
+{
+    switch (setting)
+    {
+    case DSP_PROC_INIT:
+        if (value != 0)
+            break;
+        this->data = (intptr_t)&tone_filters[dsp_get_id(dsp)];
+        this->process[0] = tone_process;
+    case DSP_FLUSH:
+        filter_flush((struct dsp_filter *)this->data);
+        break;
+    }
+
+    return 1;
+}
+
+/* Database entry */
+DSP_PROC_DB_ENTRY(TONE_CONTROLS,
+                  tone_configure);
diff --git a/lib/rbcodec/dsp/tone_controls.h b/lib/rbcodec/dsp/tone_controls.h
new file mode 100644
index 0000000000..1e27ecf800
--- /dev/null
+++ b/lib/rbcodec/dsp/tone_controls.h
@@ -0,0 +1,28 @@
+/***************************************************************************
+ *             __________               __   ___.
+ *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
+ *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
+ *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
+ *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
+ *                     \/            \/     \/    \/            \/
+ * $Id$
+ *
+ * Copyright (C) 2007 Thom Johansen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#ifndef TONE_CONTROLS_H
+#define TONE_CONTROLS_H
+
+void tone_set_prescale(int prescale);
+void tone_set_bass(int bass);
+void tone_set_treble(int treble);
+
+#endif /* TONE_CONTROLS_H */