codeclib/libtremor: Clean up duplication of inline funcs, constify the ones in codeclib and copy over the slightly faster MULT31_SHIFT15 from tremor for cf.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@30578 a1c6a512-1295-4272-9138-f99709370657
2011-09-21 15:38:54 +00:00 · 2011-09-21 15:38:54 +00:00 · 151424a6fd
commit 151424a6fd
parent ccaf55a8ae
9 changed files with 85 additions and 275 deletions
--- a/apps/codecs/lib/asm_arm.h
+++ b/apps/codecs/lib/asm_arm.h
@ -166,7 +166,7 @@ static inline void XNPROD31(int32_t  a, int32_t  b,

 /* asm versions of vector operations for block.c, window.c */
 static inline
-void vect_add(int32_t *x, int32_t *y, int n)
+void vect_add(int32_t *x, const int32_t *y, int n)
 {
  while (n>=4) {
    asm volatile ("ldmia %[x], {r0, r1, r2, r3};"
@ -190,7 +190,7 @@ void vect_add(int32_t *x, int32_t *y, int n)
 }

 static inline
-void vect_copy(int32_t *x, int32_t *y, int n)
+void vect_copy(int32_t *x, const int32_t *y, int n)
 {
  while (n>=4) {
    asm volatile ("ldmia %[y]!, {r0, r1, r2, r3};"
@ -208,7 +208,7 @@ void vect_copy(int32_t *x, int32_t *y, int n)
 }

 static inline
-void vect_mult_fw(int32_t *data, int32_t *window, int n)
+void vect_mult_fw(int32_t *data, const int32_t *window, int n)
 {
  while (n>=4) {
    asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
@ -237,7 +237,7 @@ void vect_mult_fw(int32_t *data, int32_t *window, int n)
 }

 static inline
-void vect_mult_bw(int32_t *data, int32_t *window, int n)
+void vect_mult_bw(int32_t *data, const int32_t *window, int n)
 {
  while (n>=4) {
    asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
--- a/apps/codecs/lib/asm_mcf5249.h
+++ b/apps/codecs/lib/asm_mcf5249.h
@ -44,17 +44,17 @@ static inline int32_t MULT31(int32_t x, int32_t y) {
 }

 #define INCL_OPTIMIZED_MULT31_SHIFT15
+/* NOTE: this requires that the emac is *NOT* rounding */
 static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) {
  int32_t r;

  asm volatile ("mac.l %[x], %[y], %%acc0;"  /* multiply */
                "mulu.l %[y], %[x];"         /* get lower half, avoid emac stall */
                "movclr.l %%acc0, %[r];"     /* get higher half */
-                "asl.l #8, %[r];"            /* hi<<16, plus one free */
-                "asl.l #8, %[r];"
+                "swap %[r];"                 /* hi<<16, plus one free */
                "lsr.l #8, %[x];"            /* (unsigned)lo >> 15 */
                "lsr.l #7, %[x];"
-                "or.l %[x], %[r];"           /* logical-or results */
+                "move.w %[x], %[r];"         /* logical-or results */
                : [r] "=&d" (r), [x] "+d" (x)
                : [y] "d" (y)
                : "cc");
@ -202,7 +202,7 @@ void vect_add(int32_t *x, const int32_t *y, int n)
 }

 static inline
-void vect_copy(int32_t *x, int32_t *y, int n)
+void vect_copy(int32_t *x, const int32_t *y, int n)
 {
  /* align to 16 bytes */
  while(n>0 && (int)x&15) {
@ -228,7 +228,7 @@ void vect_copy(int32_t *x, int32_t *y, int n)
 }

 static inline
-void vect_mult_fw(int32_t *data, int32_t *window, int n)
+void vect_mult_fw(int32_t *data, const int32_t *window, int n)
 {
  /* ensure data is aligned to 16-bytes */
  while(n>0 && (int)data&15) {
@ -282,7 +282,7 @@ void vect_mult_fw(int32_t *data, int32_t *window, int n)
 }

 static inline
-void vect_mult_bw(int32_t *data, int32_t *window, int n)
+void vect_mult_bw(int32_t *data, const int32_t *window, int n)
 {
  /* ensure at least data is aligned to 16-bytes */
  while(n>0 && (int)data&15) {
--- a/apps/codecs/lib/codeclib_misc.h
+++ b/apps/codecs/lib/codeclib_misc.h
@ -187,7 +187,7 @@ static inline void XNPROD31(int32_t a, int32_t b,
 #define _V_VECT_OPS

 static inline
-void vect_add(int32_t *x, int32_t *y, int n)
+void vect_add(int32_t *x, const int32_t *y, int n)
 {
  while (n>0) {
    *x++ += *y++;
@ -196,7 +196,7 @@ void vect_add(int32_t *x, int32_t *y, int n)
 }

 static inline
-void vect_copy(int32_t *x, int32_t *y, int n)
+void vect_copy(int32_t *x, const int32_t *y, int n)
 {
  while (n>0) {
    *x++ = *y++;
@ -205,7 +205,7 @@ void vect_copy(int32_t *x, int32_t *y, int n)
 }

 static inline
-void vect_mult_fw(int32_t *data, int32_t *window, int n)
+void vect_mult_fw(int32_t *data, const int32_t *window, int n)
 {
  while(n>0) {
    *data = MULT31(*data, *window);
@ -216,7 +216,7 @@ void vect_mult_fw(int32_t *data, int32_t *window, int n)
 }

 static inline
-void vect_mult_bw(int32_t *data, int32_t *window, int n)
+void vect_mult_bw(int32_t *data, const int32_t *window, int n)
 {
  while(n>0) {
    *data = MULT31(*data, *window);
--- a/apps/codecs/libtremor/asm_arm.h
+++ b/apps/codecs/libtremor/asm_arm.h
@ -19,33 +19,8 @@

 #if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_)
 #define _V_WIDE_MATH
-
-static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
-  int lo,hi;
-  asm volatile("smull\t%0, %1, %2, %3"
-               : "=&r"(lo),"=&r"(hi)
-               : "%r"(x),"r"(y) );
-  return(hi);
-}
-
-static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
-  return MULT32(x,y)<<1;
-}
-
-static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
-  int lo,hi;
-  asm volatile("smull   %0, %1, %2, %3\n\t"
-               "movs    %0, %0, lsr #15\n\t"
-               "adc     %1, %0, %1, lsl #17\n\t"
-               : "=&r"(lo),"=&r"(hi)
-               : "%r"(x),"r"(y)
-               : "cc");
-  return(hi);
-}
-
-#ifndef _V_VECT_OPS
-#define _V_VECT_OPS
-
+#ifndef _TREMOR_VECT_OPS
+#define _TREMOR_VECT_OPS
 /* asm versions of vector operations for block.c, window.c */
 /* SOME IMPORTANT NOTES: this implementation of vect_mult_bw does
   NOT do a final shift, meaning that the result of vect_mult_bw is
@ -114,7 +89,7 @@ void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)

 #if ARM_ARCH >= 6
 static inline 
-void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
+void ogg_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
 {
  /* Note, mult_fw uses MULT31 */
  do{
@ -139,7 +114,7 @@ void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
 }
 #else
 static inline 
-void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
+void ogg_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
 {
  /* Note, mult_fw uses MULT31 */
  do{
@ -166,7 +141,7 @@ void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)

 #if ARM_ARCH >= 6
 static inline
-void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
+void ogg_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
 {
  /* NOTE mult_bw uses MULT_32 i.e. doesn't shift result left at end */
  /* On ARM, we can do the shift at the same time as the overlap-add */
@ -187,7 +162,7 @@ void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
 }
 #else
 static inline
-void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
+void ogg_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
 {
  /* NOTE mult_bw uses MULT_32 i.e. doesn't shift result left at end */
  /* On ARM, we can do the shift at the same time as the overlap-add */
@ -207,14 +182,7 @@ void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
  } while (n);
 }
 #endif
-
-static inline void vect_copy(ogg_int32_t *x, const ogg_int32_t *y, int n)
-{
-  memcpy(x,y,n*sizeof(ogg_int32_t));
-}
-
 #endif
-
 #endif

 #ifndef _V_LSP_MATH_ASM
--- a/apps/codecs/libtremor/asm_mcf5249.h
+++ b/apps/codecs/libtremor/asm_mcf5249.h
@ -28,226 +28,37 @@

 #define MB()

-static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
-
-  asm volatile ("mac.l %[x], %[y], %%acc0;"    /* multiply & shift  */
-                "movclr.l %%acc0, %[x];"       /* move & clear acc */
-                "asr.l #1, %[x];"              /* no overflow test */
-                : [x] "+&d" (x)
-                : [y] "r" (y)
-                : "cc");
-  return x;
-}
-
-static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
-
-  asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
-                "movclr.l %%acc0, %[x];"    /* move and clear */
-                : [x] "+&r" (x)
-                : [y] "r" (y)
-                : "cc");
-  return x;
-}
-
-
-static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
-  ogg_int32_t r;
-
-  asm volatile ("mac.l %[x], %[y], %%acc0;"  /* multiply */
-                "mulu.l %[y], %[x];"         /* get lower half, avoid emac stall */
-                "movclr.l %%acc0, %[r];"     /* get higher half */
-                "swap %[r];"                 /* hi<<16, plus one free */
-                "lsr.l #8, %[x];"            /* (unsigned)lo >> 15 */
-                "lsr.l #7, %[x];"
-                "move.w %[x], %[r];"         /* logical-or results */
-                : [r] "=&d" (r), [x] "+d" (x)
-                : [y] "d" (y)
-                : "cc");
-  return r;
-}
-
-#ifndef _V_VECT_OPS
-#define _V_VECT_OPS
-
-/* asm versions of vector operations for block.c, window.c */
-/* assumes MAC is initialized & accumulators cleared */
-static inline 
-void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n)
-{
-  /* align to 16 bytes */
-  while(n>0 && (int)x&15) {
-    *x++ += *y++;
-    n--;
-  }
-  asm volatile ("bra 1f;"
-                "0:"                          /* loop start */
-                "movem.l (%[x]), %%d0-%%d3;"  /* fetch values */
-                "movem.l (%[y]), %%a0-%%a3;"
-                /* add */
-                "add.l %%a0, %%d0;"
-                "add.l %%a1, %%d1;"
-                "add.l %%a2, %%d2;"
-                "add.l %%a3, %%d3;"
-                /* store and advance */
-                "movem.l %%d0-%%d3, (%[x]);"  
-                "lea.l (4*4, %[x]), %[x];"
-                "lea.l (4*4, %[y]), %[y];"
-                "subq.l #4, %[n];"     /* done 4 elements */
-                "1: cmpi.l #4, %[n];"
-                "bge 0b;"
-                : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
-                : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
-                    "cc", "memory");
-  /* add final elements */
-  while (n>0) {
-    *x++ += *y++;
-    n--;
-  }
-}
+#ifndef _TREMOR_VECT_OPS
+#define _TREMOR_VECT_OPS
 static inline 
 void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
 {
    /* coldfire asm has symmetrical versions of vect_add_right_left
       and vect_add_left_right  (since symmetrical versions of
       vect_mult_fw and vect_mult_bw  i.e.  both use MULT31) */
-    vect_add_right_left(x, y, n );
+    vect_add(x, y, n );
 }

 static inline 
-void vect_copy(ogg_int32_t *x, const ogg_int32_t *y, int n)
+void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n)
 {
-  /* align to 16 bytes */
-  while(n>0 && (int)x&15) {
-    *x++ = *y++;
-    n--;
-  }  
-  asm volatile ("bra 1f;"
-                "0:"                                    /* loop start */
-                "movem.l (%[y]), %%d0-%%d3;"            /* fetch values */
-                "movem.l %%d0-%%d3, (%[x]);"            /* store */
-                "lea.l (4*4, %[x]), %[x];"              /* advance */
-                "lea.l (4*4, %[y]), %[y];"
-                "subq.l #4, %[n];"                      /* done 4 elements */
-                "1: cmpi.l #4, %[n];"
-                "bge 0b;"
-                : [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
-                : : "%d0", "%d1", "%d2", "%d3", "cc", "memory");
-  /* copy final elements */
-  while (n>0) {
-    *x++ = *y++;
-    n--;
-  }
+    /* coldfire asm has symmetrical versions of vect_add_right_left
+       and vect_add_left_right  (since symmetrical versions of
+       vect_mult_fw and vect_mult_bw  i.e.  both use MULT31) */
+    vect_add(x, y, n );
 }

 static inline
-void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
+void ogg_vect_mult_fw(int32_t *data, const int32_t *window, int n)
 {
-  /* ensure data is aligned to 16-bytes */
-  while(n>0 && (int)data&15) {
-    *data = MULT31(*data, *window);
-    data++;
-    window++;
-    n--;
-  }
-  asm volatile ("movem.l (%[d]), %%d0-%%d3;"  /* loop start */
-                "movem.l (%[w]), %%a0-%%a3;"  /* pre-fetch registers */
-                "lea.l (4*4, %[w]), %[w];"
-                "bra 1f;"               /* jump to loop condition */
-                "0:" /* loop body */
-                /* multiply and load next window values */
-                "mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;"
-                "mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;"
-                "mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;"
-                "mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;"              
-                "movclr.l %%acc0, %%d0;"  /* get the products */
-                "movclr.l %%acc1, %%d1;"
-                "movclr.l %%acc2, %%d2;"
-                "movclr.l %%acc3, %%d3;"
-                /* store and advance */
-                "movem.l %%d0-%%d3, (%[d]);"  
-                "lea.l (4*4, %[d]), %[d];"
-                "movem.l (%[d]), %%d0-%%d3;"
-                "subq.l #4, %[n];"     /* done 4 elements */
-                "1: cmpi.l #4, %[n];"
-                "bge 0b;"
-                /* multiply final elements */
-                "tst.l %[n];"
-                "beq 1f;"      /* n=0 */
-                "mac.l %%d0, %%a0, %%acc0;"
-                "movclr.l %%acc0, %%d0;"
-                "move.l %%d0, (%[d])+;"
-                "subq.l #1, %[n];"
-                "beq 1f;"     /* n=1 */
-                "mac.l %%d1, %%a1, %%acc0;"
-                "movclr.l %%acc0, %%d1;"
-                "move.l %%d1, (%[d])+;"
-                "subq.l #1, %[n];"
-                "beq 1f;"     /* n=2 */
-                /* otherwise n = 3 */
-                "mac.l %%d2, %%a2, %%acc0;"
-                "movclr.l %%acc0, %%d2;"
-                "move.l %%d2, (%[d])+;"
-                "1:"
-                : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
-                : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
-                    "cc", "memory");
+    vect_mult_fw(data, window, n);
 }

 static inline
-void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
+void ogg_vect_mult_bw(int32_t *data, const int32_t *window, int n)
 {
-  /* ensure at least data is aligned to 16-bytes */
-  while(n>0 && (int)data&15) {
-    *data = MULT31(*data, *window);
-    data++;
-    window--;
-    n--;
+    vect_mult_bw(data, window, n);
 }
-  asm volatile ("lea.l (-3*4, %[w]), %[w];"     /* loop start */
-                "movem.l (%[d]), %%d0-%%d3;"    /* pre-fetch registers */
-                "movem.l (%[w]), %%a0-%%a3;"
-                "bra 1f;"               /* jump to loop condition */
-                "0:" /* loop body */
-                /* multiply and load next window value */
-                "mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;"
-                "mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;"
-                "mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;"
-                "mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;"              
-                "movclr.l %%acc0, %%d0;"  /* get the products */
-                "movclr.l %%acc1, %%d1;"
-                "movclr.l %%acc2, %%d2;"
-                "movclr.l %%acc3, %%d3;"
-                /* store and advance */
-                "movem.l %%d0-%%d3, (%[d]);"  
-                "lea.l (4*4, %[d]), %[d];"
-                "movem.l (%[d]), %%d0-%%d3;"
-                "subq.l #4, %[n];"     /* done 4 elements */
-                "1: cmpi.l #4, %[n];"
-                "bge 0b;"
-                /* multiply final elements */
-                "tst.l %[n];"
-                "beq 1f;"      /* n=0 */
-                "mac.l %%d0, %%a3, %%acc0;"
-                "movclr.l %%acc0, %%d0;"
-                "move.l %%d0, (%[d])+;"
-                "subq.l #1, %[n];"
-                "beq 1f;"     /* n=1 */
-                "mac.l %%d1, %%a2, %%acc0;"
-                "movclr.l %%acc0, %%d1;"
-                "move.l %%d1, (%[d])+;"
-                "subq.l #1, %[n];"
-                "beq 1f;"     /* n=2 */
-                /* otherwise n = 3 */
-                "mac.l %%d2, %%a1, %%acc0;"
-                "movclr.l %%acc0, %%d2;"
-                "move.l %%d2, (%[d])+;"
-                "1:"
-                : [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
-                : : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
-                    "cc", "memory");
-}
-
-#endif
-
+#endif
 #endif
 #endif
--- a/apps/codecs/libtremor/misc.h
+++ b/apps/codecs/libtremor/misc.h
@ -22,6 +22,8 @@
 #include "ivorbiscodec.h"
 #include "os_types.h"

+#include "codeclib_misc.h"
+
 #include "asm_arm.h"
 #include "asm_mcf5249.h"
  
@ -37,7 +39,7 @@ extern int _ilog(unsigned int v);
 #ifndef  _LOW_ACCURACY_
 /* 64 bit multiply */
 /* #include <sys/types.h> */
-
+#if 0
 #if BYTE_ORDER==LITTLE_ENDIAN
 union magic {
  struct {
@ -70,7 +72,7 @@ static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
  magic.whole  = (ogg_int64_t)x * y;
  return ((ogg_uint32_t)(magic.halves.lo)>>15) | ((magic.halves.hi)<<17);
 }
-
+#endif
 #else
 /* 32 bit multiply, more portable but less accurate */

@ -119,10 +121,11 @@ static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {

 /* replaced XPROD32 with a macro to avoid memory reference 
   _x, _y are the results (must be l-values) */
+/*
 #define XPROD32(_a, _b, _t, _v, _x, _y) \
  { (_x)=MULT32(_a,_t)+MULT32(_b,_v);   \
    (_y)=MULT32(_b,_t)-MULT32(_a,_v); }
-
+*/

 #ifdef __i386__

@ -134,7 +137,7 @@ static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
    *(_y)=MULT31(_b,_t)+MULT31(_a,_v); }

 #else
-
+/*
 static inline void XPROD31(ogg_int32_t  a, ogg_int32_t  b,
                           ogg_int32_t  t, ogg_int32_t  v,
                           ogg_int32_t *x, ogg_int32_t *y)
@ -150,8 +153,36 @@ static inline void XNPROD31(ogg_int32_t  a, ogg_int32_t  b,
  *x = MULT31(a, t) - MULT31(b, v);
  *y = MULT31(b, t) + MULT31(a, v);
 }
+*/
+#endif
+#ifndef _TREMOR_VECT_OPS
+#define _TREMOR_VECT_OPS
+static inline 
+void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
+{
+    vect_add(x, y, n );
+}
+
+static inline 
+void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n)
+{
+    vect_add(x, y, n );
+}
+
+static inline
+void ogg_vect_mult_fw(int32_t *data, const int32_t *window, int n)
+{
+    vect_mult_fw(data, window, n);
+}
+
+static inline
+void ogg_vect_mult_bw(int32_t *data, const int32_t *window, int n)
+{
+    vect_mult_bw(data, window, n);
+}
 #endif

+#if 0
 #ifndef _V_VECT_OPS
 #define _V_VECT_OPS

@ -174,7 +205,7 @@ void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
 }

 static inline 
-void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
+void ogg_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
 {
  while(n>0) {
    *data = MULT31(*data, *window);
@ -185,7 +216,7 @@ void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
 }

 static inline
-void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
+void ogg_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
 {
  while(n>0) {
    *data = MULT31(*data, *window);
@ -202,8 +233,6 @@ static inline void vect_copy(ogg_int32_t *x, const ogg_int32_t *y, int n)
 }
 #endif

-#endif
-
 static inline ogg_int32_t VFLOAT_MULT(ogg_int32_t a,ogg_int32_t ap,
                                      ogg_int32_t b,ogg_int32_t bp,
                                      ogg_int32_t *p){
@ -218,7 +247,8 @@ static inline ogg_int32_t VFLOAT_MULT(ogg_int32_t a,ogg_int32_t ap,
  }else
    return 0;
 }
-
+#endif
+#endif
 static inline ogg_int32_t VFLOAT_MULTI(ogg_int32_t a,ogg_int32_t ap,
                                      ogg_int32_t i,
                                      ogg_int32_t *p){
@ -226,7 +256,7 @@ static inline ogg_int32_t VFLOAT_MULTI(ogg_int32_t a,ogg_int32_t ap,
  int ip=_ilog(abs(i))-31;
  return VFLOAT_MULT(a,ap,i<<-ip,ip,p);
 }
-
+#if 0
 static inline ogg_int32_t VFLOAT_ADD(ogg_int32_t a,ogg_int32_t ap,
                                      ogg_int32_t b,ogg_int32_t bp,
                                      ogg_int32_t *p){
@ -268,6 +298,6 @@ static inline ogg_int32_t VFLOAT_ADD(ogg_int32_t a,ogg_int32_t ap,
  }
  return(a);
 }
-
+#endif
 #endif

--- a/apps/codecs/libtremor/os_types.h
+++ b/apps/codecs/libtremor/os_types.h
@ -19,6 +19,7 @@
 #ifndef _OS_TYPES_H
 #define _OS_TYPES_H

+#include <stdint.h>
 #include <stdlib.h>
 #include <codecs.h>

@ -49,9 +50,9 @@ void ogg_free(void *ptr);
 void iram_malloc_init(void);
 void *iram_malloc(size_t size);

-   typedef short ogg_int16_t;
-   typedef int ogg_int32_t;
-   typedef unsigned int ogg_uint32_t;
-   typedef long long ogg_int64_t;
+   typedef int16_t ogg_int16_t;
+   typedef int32_t ogg_int32_t;
+   typedef uint32_t ogg_uint32_t;
+   typedef int64_t ogg_int64_t;

 #endif  /* _OS_TYPES_H */
--- a/apps/codecs/libtremor/sharedbook.c
+++ b/apps/codecs/libtremor/sharedbook.c
@ -224,7 +224,7 @@ static ogg_int32_t *_book_unquantize(const static_codebook *b,int n,
          int indexdiv=1;
          for(k=0;k<b->dim;k++){
            int index= (j/indexdiv)%quantvals;
-            int point=0;
+            ogg_int32_t point=0;
            int val=VFLOAT_MULTI(delta,delpoint,
                                 abs(b->quantlist[index]),&point);

@ -258,7 +258,7 @@ static ogg_int32_t *_book_unquantize(const static_codebook *b,int n,
          int         lastpoint=0;

          for(k=0;k<b->dim;k++){
-            int point=0;
+            ogg_int32_t point=0;
            int val=VFLOAT_MULTI(delta,delpoint,
                                 abs(b->quantlist[j*b->dim+k]),&point);

--- a/apps/codecs/libtremor/window.c
+++ b/apps/codecs/libtremor/window.c
@ -73,8 +73,8 @@ void _vorbis_apply_window(ogg_int32_t *d,const void *window_p[2],
     outside that region are not added (so don't need to be zerod). see block.c
     memset((void *)&d[0], 0, sizeof(ogg_int32_t)*leftbegin); */

-  vect_mult_fw(&d[leftbegin], &window[lW][0], leftend-leftbegin);
-  vect_mult_bw(&d[rightbegin], &window[nW][rn/2-1], rightend-rightbegin);
+  ogg_vect_mult_fw(&d[leftbegin], &window[lW][0], leftend-leftbegin);
+  ogg_vect_mult_bw(&d[rightbegin], &window[nW][rn/2-1], rightend-rightbegin);

  /* Again - memset not needed
     memset((void *)&d[rightend], 0, sizeof(ogg_int32_t)*(n-rightend)); */