1
0
Fork 0
forked from len0rd/rockbox

codeclib/libtremor: Clean up duplication of inline funcs, constify the ones in codeclib and copy over the slightly faster MULT31_SHIFT15 from tremor for cf.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@30578 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Nils Wallménius 2011-09-21 15:38:54 +00:00
parent ccaf55a8ae
commit 151424a6fd
9 changed files with 85 additions and 275 deletions

View file

@ -166,7 +166,7 @@ static inline void XNPROD31(int32_t a, int32_t b,
/* asm versions of vector operations for block.c, window.c */
static inline
void vect_add(int32_t *x, int32_t *y, int n)
void vect_add(int32_t *x, const int32_t *y, int n)
{
while (n>=4) {
asm volatile ("ldmia %[x], {r0, r1, r2, r3};"
@ -190,7 +190,7 @@ void vect_add(int32_t *x, int32_t *y, int n)
}
static inline
void vect_copy(int32_t *x, int32_t *y, int n)
void vect_copy(int32_t *x, const int32_t *y, int n)
{
while (n>=4) {
asm volatile ("ldmia %[y]!, {r0, r1, r2, r3};"
@ -208,7 +208,7 @@ void vect_copy(int32_t *x, int32_t *y, int n)
}
static inline
void vect_mult_fw(int32_t *data, int32_t *window, int n)
void vect_mult_fw(int32_t *data, const int32_t *window, int n)
{
while (n>=4) {
asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
@ -237,7 +237,7 @@ void vect_mult_fw(int32_t *data, int32_t *window, int n)
}
static inline
void vect_mult_bw(int32_t *data, int32_t *window, int n)
void vect_mult_bw(int32_t *data, const int32_t *window, int n)
{
while (n>=4) {
asm volatile ("ldmia %[d], {r0, r1, r2, r3};"

View file

@ -44,17 +44,17 @@ static inline int32_t MULT31(int32_t x, int32_t y) {
}
#define INCL_OPTIMIZED_MULT31_SHIFT15
/* NOTE: this requires that the emac is *NOT* rounding */
static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) {
int32_t r;
asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
"mulu.l %[y], %[x];" /* get lower half, avoid emac stall */
"movclr.l %%acc0, %[r];" /* get higher half */
"asl.l #8, %[r];" /* hi<<16, plus one free */
"asl.l #8, %[r];"
"swap %[r];" /* hi<<16, plus one free */
"lsr.l #8, %[x];" /* (unsigned)lo >> 15 */
"lsr.l #7, %[x];"
"or.l %[x], %[r];" /* logical-or results */
"move.w %[x], %[r];" /* logical-or results */
: [r] "=&d" (r), [x] "+d" (x)
: [y] "d" (y)
: "cc");
@ -202,7 +202,7 @@ void vect_add(int32_t *x, const int32_t *y, int n)
}
static inline
void vect_copy(int32_t *x, int32_t *y, int n)
void vect_copy(int32_t *x, const int32_t *y, int n)
{
/* align to 16 bytes */
while(n>0 && (int)x&15) {
@ -228,7 +228,7 @@ void vect_copy(int32_t *x, int32_t *y, int n)
}
static inline
void vect_mult_fw(int32_t *data, int32_t *window, int n)
void vect_mult_fw(int32_t *data, const int32_t *window, int n)
{
/* ensure data is aligned to 16-bytes */
while(n>0 && (int)data&15) {
@ -282,7 +282,7 @@ void vect_mult_fw(int32_t *data, int32_t *window, int n)
}
static inline
void vect_mult_bw(int32_t *data, int32_t *window, int n)
void vect_mult_bw(int32_t *data, const int32_t *window, int n)
{
/* ensure at least data is aligned to 16-bytes */
while(n>0 && (int)data&15) {

View file

@ -187,7 +187,7 @@ static inline void XNPROD31(int32_t a, int32_t b,
#define _V_VECT_OPS
static inline
void vect_add(int32_t *x, int32_t *y, int n)
void vect_add(int32_t *x, const int32_t *y, int n)
{
while (n>0) {
*x++ += *y++;
@ -196,7 +196,7 @@ void vect_add(int32_t *x, int32_t *y, int n)
}
static inline
void vect_copy(int32_t *x, int32_t *y, int n)
void vect_copy(int32_t *x, const int32_t *y, int n)
{
while (n>0) {
*x++ = *y++;
@ -205,7 +205,7 @@ void vect_copy(int32_t *x, int32_t *y, int n)
}
static inline
void vect_mult_fw(int32_t *data, int32_t *window, int n)
void vect_mult_fw(int32_t *data, const int32_t *window, int n)
{
while(n>0) {
*data = MULT31(*data, *window);
@ -216,7 +216,7 @@ void vect_mult_fw(int32_t *data, int32_t *window, int n)
}
static inline
void vect_mult_bw(int32_t *data, int32_t *window, int n)
void vect_mult_bw(int32_t *data, const int32_t *window, int n)
{
while(n>0) {
*data = MULT31(*data, *window);

View file

@ -19,33 +19,8 @@
#if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_)
#define _V_WIDE_MATH
static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
int lo,hi;
asm volatile("smull\t%0, %1, %2, %3"
: "=&r"(lo),"=&r"(hi)
: "%r"(x),"r"(y) );
return(hi);
}
static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
return MULT32(x,y)<<1;
}
static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
int lo,hi;
asm volatile("smull %0, %1, %2, %3\n\t"
"movs %0, %0, lsr #15\n\t"
"adc %1, %0, %1, lsl #17\n\t"
: "=&r"(lo),"=&r"(hi)
: "%r"(x),"r"(y)
: "cc");
return(hi);
}
#ifndef _V_VECT_OPS
#define _V_VECT_OPS
#ifndef _TREMOR_VECT_OPS
#define _TREMOR_VECT_OPS
/* asm versions of vector operations for block.c, window.c */
/* SOME IMPORTANT NOTES: this implementation of vect_mult_bw does
NOT do a final shift, meaning that the result of vect_mult_bw is
@ -114,7 +89,7 @@ void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
#if ARM_ARCH >= 6
static inline
void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
void ogg_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
{
/* Note, mult_fw uses MULT31 */
do{
@ -139,7 +114,7 @@ void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
}
#else
static inline
void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
void ogg_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
{
/* Note, mult_fw uses MULT31 */
do{
@ -166,7 +141,7 @@ void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
#if ARM_ARCH >= 6
static inline
void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
void ogg_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
{
/* NOTE mult_bw uses MULT_32 i.e. doesn't shift result left at end */
/* On ARM, we can do the shift at the same time as the overlap-add */
@ -187,7 +162,7 @@ void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
}
#else
static inline
void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
void ogg_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
{
/* NOTE mult_bw uses MULT_32 i.e. doesn't shift result left at end */
/* On ARM, we can do the shift at the same time as the overlap-add */
@ -207,14 +182,7 @@ void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
} while (n);
}
#endif
static inline void vect_copy(ogg_int32_t *x, const ogg_int32_t *y, int n)
{
memcpy(x,y,n*sizeof(ogg_int32_t));
}
#endif
#endif
#ifndef _V_LSP_MATH_ASM

View file

@ -28,226 +28,37 @@
#define MB()
static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply & shift */
"movclr.l %%acc0, %[x];" /* move & clear acc */
"asr.l #1, %[x];" /* no overflow test */
: [x] "+&d" (x)
: [y] "r" (y)
: "cc");
return x;
}
static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
"movclr.l %%acc0, %[x];" /* move and clear */
: [x] "+&r" (x)
: [y] "r" (y)
: "cc");
return x;
}
static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
ogg_int32_t r;
asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
"mulu.l %[y], %[x];" /* get lower half, avoid emac stall */
"movclr.l %%acc0, %[r];" /* get higher half */
"swap %[r];" /* hi<<16, plus one free */
"lsr.l #8, %[x];" /* (unsigned)lo >> 15 */
"lsr.l #7, %[x];"
"move.w %[x], %[r];" /* logical-or results */
: [r] "=&d" (r), [x] "+d" (x)
: [y] "d" (y)
: "cc");
return r;
}
#ifndef _V_VECT_OPS
#define _V_VECT_OPS
/* asm versions of vector operations for block.c, window.c */
/* assumes MAC is initialized & accumulators cleared */
static inline
void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n)
{
/* align to 16 bytes */
while(n>0 && (int)x&15) {
*x++ += *y++;
n--;
}
asm volatile ("bra 1f;"
"0:" /* loop start */
"movem.l (%[x]), %%d0-%%d3;" /* fetch values */
"movem.l (%[y]), %%a0-%%a3;"
/* add */
"add.l %%a0, %%d0;"
"add.l %%a1, %%d1;"
"add.l %%a2, %%d2;"
"add.l %%a3, %%d3;"
/* store and advance */
"movem.l %%d0-%%d3, (%[x]);"
"lea.l (4*4, %[x]), %[x];"
"lea.l (4*4, %[y]), %[y];"
"subq.l #4, %[n];" /* done 4 elements */
"1: cmpi.l #4, %[n];"
"bge 0b;"
: [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
: : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
"cc", "memory");
/* add final elements */
while (n>0) {
*x++ += *y++;
n--;
}
}
#ifndef _TREMOR_VECT_OPS
#define _TREMOR_VECT_OPS
static inline
void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
{
/* coldfire asm has symmetrical versions of vect_add_right_left
and vect_add_left_right (since symmetrical versions of
vect_mult_fw and vect_mult_bw i.e. both use MULT31) */
vect_add_right_left(x, y, n );
vect_add(x, y, n );
}
static inline
void vect_copy(ogg_int32_t *x, const ogg_int32_t *y, int n)
void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n)
{
/* align to 16 bytes */
while(n>0 && (int)x&15) {
*x++ = *y++;
n--;
}
asm volatile ("bra 1f;"
"0:" /* loop start */
"movem.l (%[y]), %%d0-%%d3;" /* fetch values */
"movem.l %%d0-%%d3, (%[x]);" /* store */
"lea.l (4*4, %[x]), %[x];" /* advance */
"lea.l (4*4, %[y]), %[y];"
"subq.l #4, %[n];" /* done 4 elements */
"1: cmpi.l #4, %[n];"
"bge 0b;"
: [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
: : "%d0", "%d1", "%d2", "%d3", "cc", "memory");
/* copy final elements */
while (n>0) {
*x++ = *y++;
n--;
}
/* coldfire asm has symmetrical versions of vect_add_right_left
and vect_add_left_right (since symmetrical versions of
vect_mult_fw and vect_mult_bw i.e. both use MULT31) */
vect_add(x, y, n );
}
static inline
void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
void ogg_vect_mult_fw(int32_t *data, const int32_t *window, int n)
{
/* ensure data is aligned to 16-bytes */
while(n>0 && (int)data&15) {
*data = MULT31(*data, *window);
data++;
window++;
n--;
}
asm volatile ("movem.l (%[d]), %%d0-%%d3;" /* loop start */
"movem.l (%[w]), %%a0-%%a3;" /* pre-fetch registers */
"lea.l (4*4, %[w]), %[w];"
"bra 1f;" /* jump to loop condition */
"0:" /* loop body */
/* multiply and load next window values */
"mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;"
"mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;"
"mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;"
"mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;"
"movclr.l %%acc0, %%d0;" /* get the products */
"movclr.l %%acc1, %%d1;"
"movclr.l %%acc2, %%d2;"
"movclr.l %%acc3, %%d3;"
/* store and advance */
"movem.l %%d0-%%d3, (%[d]);"
"lea.l (4*4, %[d]), %[d];"
"movem.l (%[d]), %%d0-%%d3;"
"subq.l #4, %[n];" /* done 4 elements */
"1: cmpi.l #4, %[n];"
"bge 0b;"
/* multiply final elements */
"tst.l %[n];"
"beq 1f;" /* n=0 */
"mac.l %%d0, %%a0, %%acc0;"
"movclr.l %%acc0, %%d0;"
"move.l %%d0, (%[d])+;"
"subq.l #1, %[n];"
"beq 1f;" /* n=1 */
"mac.l %%d1, %%a1, %%acc0;"
"movclr.l %%acc0, %%d1;"
"move.l %%d1, (%[d])+;"
"subq.l #1, %[n];"
"beq 1f;" /* n=2 */
/* otherwise n = 3 */
"mac.l %%d2, %%a2, %%acc0;"
"movclr.l %%acc0, %%d2;"
"move.l %%d2, (%[d])+;"
"1:"
: [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
: : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
"cc", "memory");
vect_mult_fw(data, window, n);
}
static inline
void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
void ogg_vect_mult_bw(int32_t *data, const int32_t *window, int n)
{
/* ensure at least data is aligned to 16-bytes */
while(n>0 && (int)data&15) {
*data = MULT31(*data, *window);
data++;
window--;
n--;
vect_mult_bw(data, window, n);
}
asm volatile ("lea.l (-3*4, %[w]), %[w];" /* loop start */
"movem.l (%[d]), %%d0-%%d3;" /* pre-fetch registers */
"movem.l (%[w]), %%a0-%%a3;"
"bra 1f;" /* jump to loop condition */
"0:" /* loop body */
/* multiply and load next window value */
"mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;"
"mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;"
"mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;"
"mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;"
"movclr.l %%acc0, %%d0;" /* get the products */
"movclr.l %%acc1, %%d1;"
"movclr.l %%acc2, %%d2;"
"movclr.l %%acc3, %%d3;"
/* store and advance */
"movem.l %%d0-%%d3, (%[d]);"
"lea.l (4*4, %[d]), %[d];"
"movem.l (%[d]), %%d0-%%d3;"
"subq.l #4, %[n];" /* done 4 elements */
"1: cmpi.l #4, %[n];"
"bge 0b;"
/* multiply final elements */
"tst.l %[n];"
"beq 1f;" /* n=0 */
"mac.l %%d0, %%a3, %%acc0;"
"movclr.l %%acc0, %%d0;"
"move.l %%d0, (%[d])+;"
"subq.l #1, %[n];"
"beq 1f;" /* n=1 */
"mac.l %%d1, %%a2, %%acc0;"
"movclr.l %%acc0, %%d1;"
"move.l %%d1, (%[d])+;"
"subq.l #1, %[n];"
"beq 1f;" /* n=2 */
/* otherwise n = 3 */
"mac.l %%d2, %%a1, %%acc0;"
"movclr.l %%acc0, %%d2;"
"move.l %%d2, (%[d])+;"
"1:"
: [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
: : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
"cc", "memory");
}
#endif
#endif
#endif
#endif

View file

@ -22,6 +22,8 @@
#include "ivorbiscodec.h"
#include "os_types.h"
#include "codeclib_misc.h"
#include "asm_arm.h"
#include "asm_mcf5249.h"
@ -37,7 +39,7 @@ extern int _ilog(unsigned int v);
#ifndef _LOW_ACCURACY_
/* 64 bit multiply */
/* #include <sys/types.h> */
#if 0
#if BYTE_ORDER==LITTLE_ENDIAN
union magic {
struct {
@ -70,7 +72,7 @@ static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
magic.whole = (ogg_int64_t)x * y;
return ((ogg_uint32_t)(magic.halves.lo)>>15) | ((magic.halves.hi)<<17);
}
#endif
#else
/* 32 bit multiply, more portable but less accurate */
@ -119,10 +121,11 @@ static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
/* replaced XPROD32 with a macro to avoid memory reference
_x, _y are the results (must be l-values) */
/*
#define XPROD32(_a, _b, _t, _v, _x, _y) \
{ (_x)=MULT32(_a,_t)+MULT32(_b,_v); \
(_y)=MULT32(_b,_t)-MULT32(_a,_v); }
*/
#ifdef __i386__
@ -134,7 +137,7 @@ static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
*(_y)=MULT31(_b,_t)+MULT31(_a,_v); }
#else
/*
static inline void XPROD31(ogg_int32_t a, ogg_int32_t b,
ogg_int32_t t, ogg_int32_t v,
ogg_int32_t *x, ogg_int32_t *y)
@ -150,8 +153,36 @@ static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b,
*x = MULT31(a, t) - MULT31(b, v);
*y = MULT31(b, t) + MULT31(a, v);
}
*/
#endif
#ifndef _TREMOR_VECT_OPS
#define _TREMOR_VECT_OPS
static inline
void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
{
vect_add(x, y, n );
}
static inline
void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n)
{
vect_add(x, y, n );
}
static inline
void ogg_vect_mult_fw(int32_t *data, const int32_t *window, int n)
{
vect_mult_fw(data, window, n);
}
static inline
void ogg_vect_mult_bw(int32_t *data, const int32_t *window, int n)
{
vect_mult_bw(data, window, n);
}
#endif
#if 0
#ifndef _V_VECT_OPS
#define _V_VECT_OPS
@ -174,7 +205,7 @@ void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
}
static inline
void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
void ogg_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
{
while(n>0) {
*data = MULT31(*data, *window);
@ -185,7 +216,7 @@ void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
}
static inline
void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
void ogg_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
{
while(n>0) {
*data = MULT31(*data, *window);
@ -202,8 +233,6 @@ static inline void vect_copy(ogg_int32_t *x, const ogg_int32_t *y, int n)
}
#endif
#endif
static inline ogg_int32_t VFLOAT_MULT(ogg_int32_t a,ogg_int32_t ap,
ogg_int32_t b,ogg_int32_t bp,
ogg_int32_t *p){
@ -218,7 +247,8 @@ static inline ogg_int32_t VFLOAT_MULT(ogg_int32_t a,ogg_int32_t ap,
}else
return 0;
}
#endif
#endif
static inline ogg_int32_t VFLOAT_MULTI(ogg_int32_t a,ogg_int32_t ap,
ogg_int32_t i,
ogg_int32_t *p){
@ -226,7 +256,7 @@ static inline ogg_int32_t VFLOAT_MULTI(ogg_int32_t a,ogg_int32_t ap,
int ip=_ilog(abs(i))-31;
return VFLOAT_MULT(a,ap,i<<-ip,ip,p);
}
#if 0
static inline ogg_int32_t VFLOAT_ADD(ogg_int32_t a,ogg_int32_t ap,
ogg_int32_t b,ogg_int32_t bp,
ogg_int32_t *p){
@ -268,6 +298,6 @@ static inline ogg_int32_t VFLOAT_ADD(ogg_int32_t a,ogg_int32_t ap,
}
return(a);
}
#endif
#endif

View file

@ -19,6 +19,7 @@
#ifndef _OS_TYPES_H
#define _OS_TYPES_H
#include <stdint.h>
#include <stdlib.h>
#include <codecs.h>
@ -49,9 +50,9 @@ void ogg_free(void *ptr);
void iram_malloc_init(void);
void *iram_malloc(size_t size);
typedef short ogg_int16_t;
typedef int ogg_int32_t;
typedef unsigned int ogg_uint32_t;
typedef long long ogg_int64_t;
typedef int16_t ogg_int16_t;
typedef int32_t ogg_int32_t;
typedef uint32_t ogg_uint32_t;
typedef int64_t ogg_int64_t;
#endif /* _OS_TYPES_H */

View file

@ -224,7 +224,7 @@ static ogg_int32_t *_book_unquantize(const static_codebook *b,int n,
int indexdiv=1;
for(k=0;k<b->dim;k++){
int index= (j/indexdiv)%quantvals;
int point=0;
ogg_int32_t point=0;
int val=VFLOAT_MULTI(delta,delpoint,
abs(b->quantlist[index]),&point);
@ -258,7 +258,7 @@ static ogg_int32_t *_book_unquantize(const static_codebook *b,int n,
int lastpoint=0;
for(k=0;k<b->dim;k++){
int point=0;
ogg_int32_t point=0;
int val=VFLOAT_MULTI(delta,delpoint,
abs(b->quantlist[j*b->dim+k]),&point);

View file

@ -73,8 +73,8 @@ void _vorbis_apply_window(ogg_int32_t *d,const void *window_p[2],
outside that region are not added (so don't need to be zerod). see block.c
memset((void *)&d[0], 0, sizeof(ogg_int32_t)*leftbegin); */
vect_mult_fw(&d[leftbegin], &window[lW][0], leftend-leftbegin);
vect_mult_bw(&d[rightbegin], &window[nW][rn/2-1], rightend-rightbegin);
ogg_vect_mult_fw(&d[leftbegin], &window[lW][0], leftend-leftbegin);
ogg_vect_mult_bw(&d[rightbegin], &window[nW][rn/2-1], rightend-rightbegin);
/* Again - memset not needed
memset((void *)&d[rightend], 0, sizeof(ogg_int32_t)*(n-rightend)); */