diff --git a/apps/eq.c b/apps/eq.c
index 8ad886fc0c..3d2f8d133d 100644
--- a/apps/eq.c
+++ b/apps/eq.c
@@ -24,6 +24,8 @@
    Slightly faster calculation can be done by deriving forms which use tan()
    instead of cos() and sin(), but the latter are far easier to use when doing
    fixed point math, and performance is not a big point in the calculation part.
+   All the 'a' filter coefficients are negated so we can use only additions
+   in the filtering equation.
    We realise the filters as a second order direct form 1 structure. Direct
    form 1 was chosen because of better numerical properties for fixed point
    implementations.
@@ -153,8 +155,8 @@ void eq_pk_coefs(unsigned long cutoff, unsigned long Q, long db, long *c)
     c[0] = DIV64(b0, a0, 28);
     c[1] = DIV64(b1, a0, 28);
     c[2] = DIV64(b2, a0, 28);
-    c[3] = DIV64(a1, a0, 28);
-    c[4] = DIV64(a2, a0, 28);
+    c[3] = DIV64(-a1, a0, 28);
+    c[4] = DIV64(-a2, a0, 28);
 }
 
 /* Calculate coefficients for lowshelf filter */
@@ -180,8 +182,8 @@ void eq_ls_coefs(unsigned long cutoff, unsigned long Q, long db, long *c)
     c[0] = DIV64(b0, a0, 24);
     c[1] = DIV64(b1, a0, 24);
     c[2] = DIV64(b2, a0, 24);
-    c[3] = DIV64(a1, a0, 24);
-    c[4] = DIV64(a2, a0, 24);
+    c[3] = DIV64(-a1, a0, 24);
+    c[4] = DIV64(-a2, a0, 24);
 }
 
 /* Calculate coefficients for highshelf filter */
@@ -207,8 +209,8 @@ void eq_hs_coefs(unsigned long cutoff, unsigned long Q, long db, long *c)
     c[0] = DIV64(b0, a0, 24);
     c[1] = DIV64(b1, a0, 24);
     c[2] = DIV64(b2, a0, 24);
-    c[3] = DIV64(a1, a0, 24);
-    c[4] = DIV64(a2, a0, 24);
+    c[3] = DIV64(-a1, a0, 24);
+    c[4] = DIV64(-a2, a0, 24);
 }
 
 #if !defined(CPU_COLDFIRE) || defined(SIMULATOR)
diff --git a/apps/eq_cf.S b/apps/eq_cf.S
index 3876ca72d6..0a34d7707e 100644
--- a/apps/eq_cf.S
+++ b/apps/eq_cf.S
@@ -26,21 +26,24 @@ eq_filter:
     movem.l (11*4+16, %sp), %d6-%d7   | load num. channels and shift count
     movem.l (%a5), %a0-%a4            | load coefs
     lea.l (5*4, %a5), %a5             | point to filter history
-    moveq.l #2, %d6                   | number of channels (hardcode to stereo)
 
 .filterloop:
     move.l (11*4+4, %sp), %a6         | load input channel pointer
+    addq.l #4, (11*4+4, %sp)          | point x to next channel
     move.l (%a6), %a6
     move.l (11*4+12, %sp), %d5        | number of samples
-    addq.l #4, (11*4+4, %sp)          | point x to next channel
     movem.l (%a5), %d0-%d3            | load filter history
 .loop:
-    move.l (%a6), %d4
-    mac.l %a0, %d4, %acc0               | acc = b0*x[i]
-    mac.l %a1, %d0, %acc0               | acc += b1*x[i - 1]
+    /* Direct form 1 filtering code. We assume DSP has put EMAC in frac mode.
+       y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2],
+       where y[] is output and x[] is input. This is performed out of order
+       to do parallel load of input value.
+     */
+    mac.l %a1, %d0, (%a6), %d4, %acc0   | acc = b1*x[i - 1], x[i] -> d4
     mac.l %a2, %d1, %acc0               | acc += b2*x[i - 2]
-    msac.l %a3, %d2, %acc0              | acc -= a1*y[i - 1]
-    msac.l %a4, %d3, %acc0              | acc -= a2*y[i - 2]
+    mac.l %a0, %d4, %acc0               | acc += b0*x[i]
+    mac.l %a3, %d2, %acc0               | acc += a1*y[i - 1]
+    mac.l %a4, %d3, %acc0               | acc += a2*y[i - 2]
     move.l %d0, %d1                     | fix history
     move.l %d4, %d0
     move.l %d2, %d3