diff --git a/apps/codecs/Makefile b/apps/codecs/Makefile index 61c9476b07..e555d94f50 100644 --- a/apps/codecs/Makefile +++ b/apps/codecs/Makefile @@ -17,7 +17,7 @@ ifdef APPEXTRA endif ifdef SOFTWARECODECS - CODECLIBS = -lmad -la52 -lffmpegFLAC -lTremor -lwavpack -lmusepack -lalac -lfaad -lm4a -lspeex + CODECLIBS = -lmad -la52 -lffmpegFLAC -lTremor -lwavpack -lmusepack -lalac -lfaad -lm4a -lspeex endif # we "borrow" the plugin LDS file @@ -39,7 +39,7 @@ DIRS = . CODECDEPS = $(LINKCODEC) $(BUILDDIR)/libcodec.a -.PHONY: libmad liba52 libffmpegFLAC libTremor libspeex libwavpack libmusepack libalac libfaad libm4a +.PHONY: libmad liba52 libffmpegFLAC libTremor libspeex libwavpack libmusepack libalac libfaad libm4a OUTPUT = $(SOFTWARECODECS) @@ -164,9 +164,7 @@ $(BUILDDIR)/libspeex.a: libspeex libspeex: $(SILENT)mkdir -p $(OBJDIR)/libspeex - $(call PRINTS,MAKE in Tremor)$(MAKE) -C libspeex OBJDIR=$(OBJDIR)/libspeex OUTPUT=$(BUILDDIR)/libspeex.a - - + $(call PRINTS,MAKE in libspeex)$(MAKE) -C libspeex OBJDIR=$(OBJDIR)/libspeex OUTPUT=$(BUILDDIR)/libspeex.a $(BUILDDIR)/libwavpack.a: libwavpack diff --git a/apps/codecs/libspeex/Makefile b/apps/codecs/libspeex/Makefile index 8500399071..ef119f9c15 100644 --- a/apps/codecs/libspeex/Makefile +++ b/apps/codecs/libspeex/Makefile @@ -14,7 +14,7 @@ ifdef APPEXTRA INCLUDES += $(patsubst %,-I$(APPSDIR)/%,$(subst :, ,$(APPEXTRA))) endif -SPEEXOPTS = -O +SPEEXOPTS = -O -DHAVE_CONFIG_H CFLAGS = $(INCLUDES) $(GCCOPTS) $(TARGET_INC) $(SPEEXOPTS) $(TARGET) \ $(EXTRA_DEFINES) -DMEM=${MEMORYSIZE} ${PROFILE_OPTS} -Wno-unused-parameter diff --git a/apps/codecs/libspeex/SOURCES b/apps/codecs/libspeex/SOURCES index 6bc2ab7a2c..16168b0277 100644 --- a/apps/codecs/libspeex/SOURCES +++ b/apps/codecs/libspeex/SOURCES @@ -1,38 +1,45 @@ -nb_celp.c -sb_celp.c -lpc.c -ltp.c -lsp.c -quant_lsp.c -lsp_tables_nb.c -gain_table.c -gain_table_lbr.c -cb_search.c -filters.c bits.c -modes.c -speex.c -vq.c -high_lsp_tables.c -vbr.c -hexc_table.c +cb_search.c +exc_10_16_table.c +exc_10_32_table.c +exc_20_32_table.c exc_5_256_table.c exc_5_64_table.c exc_8_128_table.c -exc_10_32_table.c -exc_10_16_table.c -exc_20_32_table.c -hexc_10_32_table.c -misc.c -speex_header.c -speex_callbacks.c -math_approx.c -stereo.c -preprocess.c -jitter.c -mdf.c fftwrap.c +filterbank.c +filters.c +gain_table.c +gain_table_lbr.c +hexc_10_32_table.c +hexc_table.c +high_lsp_tables.c +jitter.c kiss_fft.c kiss_fftr.c -window.c +lbr_48k_tables.c +lpc.c +lsp.c +lsp_tables_nb.c +ltp.c +math_approx.c +mdf.c +medfilter.c +misc.c +modes.c +nb_celp.c oggframing.c +preprocess.c +quant_lsp.c +resample.c +rockbox.c +sb_celp.c +smallft.c +speex.c +speex_callbacks.c +speex_header.c +stereo.c +vbr.c +vorbis_psy.c +vq.c +window.c diff --git a/apps/codecs/libspeex/_kiss_fft_guts.h b/apps/codecs/libspeex/_kiss_fft_guts.h index afe399fc19..43a3ba57f9 100644 --- a/apps/codecs/libspeex/_kiss_fft_guts.h +++ b/apps/codecs/libspeex/_kiss_fft_guts.h @@ -11,18 +11,16 @@ Redistribution and use in source and binary forms, with or without modification, THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef MIN + #define MIN(a,b) ((a)<(b) ? (a):(b)) -#endif -#ifndef MAX #define MAX(a,b) ((a)>(b) ? (a):(b)) -#endif /* kiss_fft.h defines kiss_fft_scalar as either short or a float type and defines typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */ #include "kiss_fft.h" +#include "math_approx.h" #define MAXFACTORS 32 /* e.g. an fft of length 128 has 4 factors @@ -143,6 +141,11 @@ struct kiss_fft_state{ (x)->r = KISS_FFT_COS(phase);\ (x)->i = KISS_FFT_SIN(phase);\ }while(0) +#define kf_cexp2(x,phase) \ + do{ \ + (x)->r = spx_cos_norm((phase));\ + (x)->i = spx_cos_norm((phase)-32768);\ +}while(0) /* a debugging function */ diff --git a/apps/codecs/libspeex/arch.h b/apps/codecs/libspeex/arch.h index 99cff41b55..2bc5061b29 100644 --- a/apps/codecs/libspeex/arch.h +++ b/apps/codecs/libspeex/arch.h @@ -35,16 +35,15 @@ #ifndef ARCH_H #define ARCH_H -//#warning "----------------Arch.h-------------" #include "speex/speex_types.h" - #define ABS(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute integer value. */ #define ABS16(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 16-bit value. */ +#define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Maximum 16-bit value. */ #define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */ #define ABS32(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 32-bit value. */ +#define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Maximum 32-bit value. */ #define MAX32(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 32-bit value. */ -#define FIXED_POINT #ifdef FIXED_POINT @@ -71,6 +70,7 @@ typedef spx_word32_t spx_sig_t; #define VERY_SMALL 0 #define VERY_LARGE32 ((spx_word32_t)2147483647) #define VERY_LARGE16 ((spx_word16_t)32767) +#define Q15_ONE ((spx_word16_t)32767) #ifdef FIXED_DEBUG @@ -79,7 +79,7 @@ typedef spx_word32_t spx_sig_t; #include "fixed_generic.h" -#if defined (ARM5E_ASM) +#ifdef ARM5E_ASM #include "fixed_arm5e.h" #elif defined (ARM4_ASM) #include "fixed_arm4.h" @@ -116,6 +116,7 @@ typedef float spx_word32_t; #define VERY_SMALL 1e-15f #define VERY_LARGE32 1e15f #define VERY_LARGE16 1e15f +#define Q15_ONE ((spx_word16_t)1.f) #define QCONST16(x,bits) (x) #define QCONST32(x,bits) (x) @@ -130,6 +131,7 @@ typedef float spx_word32_t; #define SHL32(a,shift) (a) #define PSHR16(a,shift) (a) #define PSHR32(a,shift) (a) +#define VSHR32(a,shift) (a) #define SATURATE16(x,a) (x) #define SATURATE32(x,a) (x) @@ -150,6 +152,7 @@ typedef float spx_word32_t; #define MULT16_32_Q13(a,b) ((a)*(b)) #define MULT16_32_Q14(a,b) ((a)*(b)) #define MULT16_32_Q15(a,b) ((a)*(b)) +#define MULT16_32_P15(a,b) ((a)*(b)) #define MAC16_32_Q11(c,a,b) ((c)+(a)*(b)) #define MAC16_32_Q15(c,a,b) ((c)+(a)*(b)) diff --git a/apps/codecs/libspeex/bits.c b/apps/codecs/libspeex/bits.c index 376e804f92..6ea495f8f4 100644 --- a/apps/codecs/libspeex/bits.c +++ b/apps/codecs/libspeex/bits.c @@ -76,6 +76,7 @@ void speex_bits_destroy(SpeexBits *bits) void speex_bits_reset(SpeexBits *bits) { + /* We only need to clear the first byte now */ bits->chars[0]=0; bits->nbBits=0; bits->charPtr=0; @@ -225,11 +226,10 @@ void speex_bits_pack(SpeexBits *bits, int data, int nbBits) speex_warning("Buffer too small to pack bits"); if (bits->owner) { - int new_nchars = ((bits->buf_size+5)*3)>>1; + int new_nchars = ((bits->buf_size+5)*3)>>1; char *tmp = (char*)speex_realloc(bits->chars, new_nchars); if (tmp) { - speex_memset_bytes(tmp, 0, new_nchars); bits->buf_size=new_nchars; bits->chars=tmp; } else { diff --git a/apps/codecs/libspeex/cb_search.c b/apps/codecs/libspeex/cb_search.c index 5c68826095..6c4f0680a5 100644 --- a/apps/codecs/libspeex/cb_search.c +++ b/apps/codecs/libspeex/cb_search.c @@ -226,11 +226,13 @@ int update_target /* Update target: only update target if necessary */ if (update_target) { - VARDECL(spx_sig_t *r2); - ALLOC(r2, nsf, spx_sig_t); - syn_percep_zero(e, ak, awk1, awk2, r2, nsf,p, stack); + VARDECL(spx_word16_t *r2); + ALLOC(r2, nsf, spx_word16_t); for (j=0;j header file. */ +/* #undef HAVE_DLFCN_H */ + +/* Define to 1 if you have the `getopt_long' function. */ +#define HAVE_GETOPT_LONG 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the `m' library (-lm). */ +/* #undef HAVE_LIBM */ + +/* Define to 1 if you have the `winmm' library (-lwinmm). */ +/* #undef HAVE_LIBWINMM */ + +/* Define to 1 if you have the header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_AUDIOIO_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_SOUNDCARD_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "" + +/* Reduce precision to 16 bits (EXPERIMENTAL) */ +/* #undef PRECISION16 */ + +/* The size of `int', as computed by sizeof. */ +#define SIZEOF_INT 4 + +/* The size of `long', as computed by sizeof. */ +#define SIZEOF_LONG 4 + +/* The size of `short', as computed by sizeof. */ +#define SIZEOF_SHORT 2 + +/* Version extra */ +#define SPEEX_EXTRA_VERSION "-svn" + +/* Version major */ +#define SPEEX_MAJOR_VERSION 1 + +/* Version micro */ +#define SPEEX_MICRO_VERSION 14 + +/* Version minor */ +#define SPEEX_MINOR_VERSION 1 + +/* Complete version string */ +#define SPEEX_VERSION "1.1.14-svn" + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Enable support for TI C55X DSP */ +/* #undef TI_C55X */ + +/* Make use of alloca */ +/* #undef USE_ALLOCA */ + +/* Use C99 variable-size arrays */ +#define VAR_ARRAYS + +/* Enable Vorbis-style psychoacoustics (EXPERIMENTAL) */ +/* #undef VORBIS_PSYCHO */ + +/* Define to 1 if your processor stores words with the most significant byte + first (like Motorola and SPARC, unlike Intel and VAX). */ +#ifdef ROCKBOX_BIG_ENDIAN +#define WORDS_BIGENDIAN 1 +#endif + +/* Enable SSE support */ +/* #undef _USE_SSE */ + +/* Define to empty if `const' does not conform to ANSI C. */ +/* #undef const */ + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +/* #undef inline */ +#endif + +/* Define to equivalent of C99 restrict keyword, or to nothing if this is not + supported. Do not define if restrict is supported directly. */ +#define restrict __restrict + +#define RELEASE 1 +#define OVERRIDE_SPEEX_ALLOC 1 +#define OVERRIDE_SPEEX_ALLOC_SCRATCH 1 +#define OVERRIDE_SPEEX_REALLOC 1 +#define OVERRIDE_SPEEX_FREE 1 +#define OVERRIDE_SPEEX_FREE_SCRATCH 1 +#define OVERRIDE_SPEEX_MOVE 1 +#define OVERRIDE_SPEEX_ERROR 1 +#define OVERRIDE_SPEEX_WARNING 1 +#define OVERRIDE_SPEEX_WARNING_INT 1 +#define OVERRIDE_SPEEX_PUTC 1 + diff --git a/apps/codecs/libspeex/fftwrap.c b/apps/codecs/libspeex/fftwrap.c index df56580f93..a1086408ed 100644 --- a/apps/codecs/libspeex/fftwrap.c +++ b/apps/codecs/libspeex/fftwrap.c @@ -41,7 +41,6 @@ #include "misc.h" -#include "math_approx.h" #define MAX_FFT_SIZE 2048 @@ -65,7 +64,7 @@ static int maximize_range(spx_word16_t *in, spx_word16_t *out, spx_word16_t boun } for (i=0;i> shift; + out[i] = PSHR16(in[i], shift); } } #endif @@ -104,8 +103,8 @@ void spx_fft(void *table, float *in, float *out) if (in==out) { int i; - speex_warning("FFT should not be done in-place"); float scale = 1./((struct drft_lookup *)table)->n; + speex_warning("FFT should not be done in-place"); for (i=0;i<((struct drft_lookup *)table)->n;i++) out[i] = scale*in[i]; } else { @@ -121,7 +120,6 @@ void spx_ifft(void *table, float *in, float *out) { if (in==out) { - int i; speex_warning("FFT should not be done in-place"); } else { int i; diff --git a/apps/codecs/libspeex/filterbank.c b/apps/codecs/libspeex/filterbank.c new file mode 100644 index 0000000000..187d5eea90 --- /dev/null +++ b/apps/codecs/libspeex/filterbank.c @@ -0,0 +1,226 @@ +/* Copyright (C) 2006 Jean-Marc Valin */ +/** + @file filterbank.c + @brief Converting between psd and filterbank + */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "filterbank.h" +#include "misc.h" +#include +#include "math_approx.h" + +#ifdef FIXED_POINT + +#define toBARK(n) (MULT16_16(26829,spx_atan(SHR32(MULT16_16(97,n),2))) + MULT16_16(4588,spx_atan(MULT16_32_Q15(20,MULT16_16(n,n)))) + MULT16_16(3355,n)) + +#else +#define toBARK(n) (13.1f*atan(.00074f*(n))+2.24f*atan((n)*(n)*1.85e-8f)+1e-4f*(n)) +#endif + +#define toMEL(n) (2595.f*log10(1.f+(n)/700.f)) + +FilterBank *filterbank_new(int banks, spx_word32_t sampling, int len, int type) +{ + FilterBank *bank; + spx_word32_t df; + spx_word32_t max_mel, mel_interval; + int i; + int id1; + int id2; + df = DIV32(SHL32(sampling,15),MULT16_16(2,len)); + max_mel = toBARK(EXTRACT16(MULT16_16_Q15(QCONST16(.5f,15),sampling))); + mel_interval = PDIV32(max_mel,banks-1); + + bank = (FilterBank*)speex_alloc(sizeof(FilterBank)); + bank->nb_banks = banks; + bank->len = len; + bank->bank_left = (int*)speex_alloc(len*sizeof(int)); + bank->bank_right = (int*)speex_alloc(len*sizeof(int)); + bank->filter_left = (spx_word16_t*)speex_alloc(len*sizeof(spx_word16_t)); + bank->filter_right = (spx_word16_t*)speex_alloc(len*sizeof(spx_word16_t)); + /* Think I can safely disable normalisation that for fixed-point (and probably float as well) */ +#ifndef FIXED_POINT + bank->scaling = (float*)speex_alloc(banks*sizeof(float)); +#endif + for (i=0;i max_mel) + break; +#ifdef FIXED_POINT + id1 = DIV32(mel,mel_interval); +#else + id1 = (int)(floor(mel/mel_interval)); +#endif + if (id1>banks-2) + { + id1 = banks-2; + val = Q15_ONE; + } else { + val = DIV32_16(mel - id1*mel_interval,EXTRACT16(PSHR32(mel_interval,15))); + } + id2 = id1+1; + bank->bank_left[i] = id1; + bank->filter_left[i] = SUB16(Q15_ONE,val); + bank->bank_right[i] = id2; + bank->filter_right[i] = val; + } + + /* Think I can safely disable normalisation for fixed-point (and probably float as well) */ +#ifndef FIXED_POINT + for (i=0;inb_banks;i++) + bank->scaling[i] = 0; + for (i=0;ilen;i++) + { + int id = bank->bank_left[i]; + bank->scaling[id] += bank->filter_left[i]; + id = bank->bank_right[i]; + bank->scaling[id] += bank->filter_right[i]; + } + for (i=0;inb_banks;i++) + bank->scaling[i] = Q15_ONE/(bank->scaling[i]); +#endif + return bank; +} + +void filterbank_destroy(FilterBank *bank) +{ + speex_free(bank->bank_left); + speex_free(bank->bank_right); + speex_free(bank->filter_left); + speex_free(bank->filter_right); +#ifndef FIXED_POINT + speex_free(bank->scaling); +#endif + speex_free(bank); +} + +void filterbank_compute_bank32(FilterBank *bank, spx_word32_t *ps, spx_word32_t *mel) +{ + int i; + for (i=0;inb_banks;i++) + mel[i] = 0; + + for (i=0;ilen;i++) + { + int id; + id = bank->bank_left[i]; + mel[id] += MULT16_32_P15(bank->filter_left[i],ps[i]); + id = bank->bank_right[i]; + mel[id] += MULT16_32_P15(bank->filter_right[i],ps[i]); + } + /* Think I can safely disable normalisation that for fixed-point (and probably float as well) */ +#ifndef FIXED_POINT + /*for (i=0;inb_banks;i++) + mel[i] = MULT16_32_P15(Q15(bank->scaling[i]),mel[i]); + */ +#endif +} + +void filterbank_compute_psd16(FilterBank *bank, spx_word16_t *mel, spx_word16_t *ps) +{ + int i; + for (i=0;ilen;i++) + { + spx_word32_t tmp; + int id1, id2; + id1 = bank->bank_left[i]; + id2 = bank->bank_right[i]; + tmp = MULT16_16(mel[id1],bank->filter_left[i]); + tmp += MULT16_16(mel[id2],bank->filter_right[i]); + ps[i] = EXTRACT16(PSHR32(tmp,15)); + } +} + + +#ifndef FIXED_POINT +void filterbank_compute_bank(FilterBank *bank, float *ps, float *mel) +{ + int i; + for (i=0;inb_banks;i++) + mel[i] = 0; + + for (i=0;ilen;i++) + { + int id = bank->bank_left[i]; + mel[id] += bank->filter_left[i]*ps[i]; + id = bank->bank_right[i]; + mel[id] += bank->filter_right[i]*ps[i]; + } + for (i=0;inb_banks;i++) + mel[i] *= bank->scaling[i]; +} + +void filterbank_compute_psd(FilterBank *bank, float *mel, float *ps) +{ + int i; + for (i=0;ilen;i++) + { + int id = bank->bank_left[i]; + ps[i] = mel[id]*bank->filter_left[i]; + id = bank->bank_right[i]; + ps[i] += mel[id]*bank->filter_right[i]; + } +} + +void filterbank_psy_smooth(FilterBank *bank, float *ps, float *mask) +{ + /* Low freq slope: 14 dB/Bark*/ + /* High freq slope: 9 dB/Bark*/ + /* Noise vs tone: 5 dB difference */ + /* FIXME: Temporary kludge */ + float bark[100]; + int i; + /* Assumes 1/3 Bark resolution */ + float decay_low = 0.34145f; + float decay_high = 0.50119f; + filterbank_compute_bank(bank, ps, bark); + for (i=1;inb_banks;i++) + { + /*float decay_high = 13-1.6*log10(bark[i-1]); + decay_high = pow(10,(-decay_high/30.f));*/ + bark[i] = bark[i] + decay_high*bark[i-1]; + } + for (i=bank->nb_banks-2;i>=0;i--) + { + bark[i] = bark[i] + decay_low*bark[i+1]; + } + filterbank_compute_psd(bank, bark, mask); +} + +#endif diff --git a/apps/codecs/libspeex/filterbank.h b/apps/codecs/libspeex/filterbank.h new file mode 100644 index 0000000000..5ded6b93ee --- /dev/null +++ b/apps/codecs/libspeex/filterbank.h @@ -0,0 +1,66 @@ +/* Copyright (C) 2006 Jean-Marc Valin */ +/** + @file filterbank.h + @brief Converting between psd and filterbank + */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef FILTERBANK_H +#define FILTERBANK_H + +#include "misc.h" + +typedef struct { + int *bank_left; + int *bank_right; + spx_word16_t *filter_left; + spx_word16_t *filter_right; +#ifndef FIXED_POINT + float *scaling; +#endif + int nb_banks; + int len; +} FilterBank; + + +FilterBank *filterbank_new(int banks, spx_word32_t sampling, int len, int type); + +void filterbank_destroy(FilterBank *bank); + +void filterbank_compute_bank32(FilterBank *bank, spx_word32_t *ps, spx_word32_t *mel); + +void filterbank_compute_psd16(FilterBank *bank, spx_word16_t *mel, spx_word16_t *psd); + +#ifndef FIXED_POINT +void filterbank_compute_bank(FilterBank *bank, float *psd, float *mel); +void filterbank_compute_psd(FilterBank *bank, float *mel, float *psd); +#endif + + +#endif diff --git a/apps/codecs/libspeex/filters.c b/apps/codecs/libspeex/filters.c index 7e4f3cea59..d49d4d80b3 100644 --- a/apps/codecs/libspeex/filters.c +++ b/apps/codecs/libspeex/filters.c @@ -83,8 +83,8 @@ void highpass(const spx_word16_t *x, spx_word16_t *y, int len, int filtID, spx_m spx_word16_t yi; spx_word32_t vout = ADD32(MULT16_16(num[0], x[i]),mem[0]); yi = EXTRACT16(SATURATE(PSHR32(vout,14),32767)); - mem[0] = ADD32(MAC16_16(mem[1], num[1],x[i]), MULT16_32_Q14(-den[1],vout)); - mem[1] = ADD32(MULT16_16(num[2],x[i]), MULT16_32_Q14(-den[2],vout)); + mem[0] = ADD32(MAC16_16(mem[1], num[1],x[i]), SHL32(MULT16_32_Q15(-den[1],vout),1)); + mem[1] = ADD32(MULT16_16(num[2],x[i]), SHL32(MULT16_32_Q15(-den[2],vout),1)); y[i] = yi; } } @@ -218,10 +218,10 @@ spx_word16_t compute_rms16(const spx_word16_t *x, int len) for (i=0;i>1; for (i=0;i>1; + N2 = N>>1; + ALLOC(xx1, M2+N2, spx_word16_t); + ALLOC(xx2, M2+N2, spx_word16_t); - for (i = 0; i < N/2; i++) - xx[2*i] = PSHR32(x[N/2-1-i],SIG_SHIFT); - for (i = 0; i < M - 1; i += 2) - xx[N+i] = mem[i+1]; + for (i = 0; i < N2; i++) + xx1[i] = x1[N2-1-i]; + for (i = 0; i < M2; i++) + xx1[N2+i] = mem1[2*i+1]; + for (i = 0; i < N2; i++) + xx2[i] = x2[N2-1-i]; + for (i = 0; i < M2; i++) + xx2[N2+i] = mem2[2*i+1]; - for (i = 0; i < N; i += 4) { + for (i = 0; i < N2; i += 2) { spx_sig_t y0, y1, y2, y3; - spx_word16_t x0; + spx_word16_t x10, x20; y0 = y1 = y2 = y3 = 0; - x0 = xx[N-4-i]; + x10 = xx1[N2-2-i]; + x20 = xx2[N2-2-i]; - for (j = 0; j < M; j += 4) { - spx_word16_t x1; + for (j = 0; j < M2; j += 2) { + spx_word16_t x11, x21; spx_word16_t a0, a1; - a0 = a[j]; - a1 = a[j+1]; - x1 = xx[N-2+j-i]; + a0 = a[2*j]; + a1 = a[2*j+1]; + x11 = xx1[N2-1+j-i]; + x21 = xx2[N2-1+j-i]; - y0 = ADD32(y0,SHR(MULT16_16(a0, x1),2)); - y1 = ADD32(y1,SHR(MULT16_16(a1, x1),2)); - y2 = ADD32(y2,SHR(MULT16_16(a0, x0),2)); - y3 = ADD32(y3,SHR(MULT16_16(a1, x0),2)); +#ifdef FIXED_POINT + /* We multiply twice by the same coef to avoid overflows */ + y0 = MAC16_16(MAC16_16(y0, a0, x11), NEG16(a0), x21); + y1 = MAC16_16(MAC16_16(y1, a1, x11), a1, x21); + y2 = MAC16_16(MAC16_16(y2, a0, x10), NEG16(a0), x20); + y3 = MAC16_16(MAC16_16(y3, a1, x10), a1, x20); +#else + y0 = ADD32(y0,MULT16_16(a0, x11-x21)); + y1 = ADD32(y1,MULT16_16(a1, x11+x21)); + y2 = ADD32(y2,MULT16_16(a0, x10-x20)); + y3 = ADD32(y3,MULT16_16(a1, x10+x20)); +#endif + a0 = a[2*j+2]; + a1 = a[2*j+3]; + x10 = xx1[N2+j-i]; + x20 = xx2[N2+j-i]; - a0 = a[j+2]; - a1 = a[j+3]; - x0 = xx[N+j-i]; - - y0 = ADD32(y0,SHR(MULT16_16(a0, x0),2)); - y1 = ADD32(y1,SHR(MULT16_16(a1, x0),2)); - y2 = ADD32(y2,SHR(MULT16_16(a0, x1),2)); - y3 = ADD32(y3,SHR(MULT16_16(a1, x1),2)); +#ifdef FIXED_POINT + /* We multiply twice by the same coef to avoid overflows */ + y0 = MAC16_16(MAC16_16(y0, a0, x10), NEG16(a0), x20); + y1 = MAC16_16(MAC16_16(y1, a1, x10), a1, x20); + y2 = MAC16_16(MAC16_16(y2, a0, x11), NEG16(a0), x21); + y3 = MAC16_16(MAC16_16(y3, a1, x11), a1, x21); +#else + y0 = ADD32(y0,MULT16_16(a0, x10-x20)); + y1 = ADD32(y1,MULT16_16(a1, x10+x20)); + y2 = ADD32(y2,MULT16_16(a0, x11-x21)); + y3 = ADD32(y3,MULT16_16(a1, x11+x21)); +#endif } - y[i] = y0; - y[i+1] = y1; - y[i+2] = y2; - y[i+3] = y3; +#ifdef FIXED_POINT + y[2*i] = EXTRACT16(SATURATE32(PSHR32(y0,15),32767)); + y[2*i+1] = EXTRACT16(SATURATE32(PSHR32(y1,15),32767)); + y[2*i+2] = EXTRACT16(SATURATE32(PSHR32(y2,15),32767)); + y[2*i+3] = EXTRACT16(SATURATE32(PSHR32(y3,15),32767)); +#else + /* Normalize up explicitly if we're in float */ + y[2*i] = 2.f*y0; + y[2*i+1] = 2.f*y1; + y[2*i+2] = 2.f*y2; + y[2*i+3] = 2.f*y3; +#endif } - for (i = 0; i < M - 1; i += 2) - mem[i+1] = xx[i]; + for (i = 0; i < M2; i++) + mem1[2*i+1] = xx1[i]; + for (i = 0; i < M2; i++) + mem2[2*i+1] = xx2[i]; } #ifdef FIXED_POINT #if 0 -spx_word16_t shift_filt[3][7] = {{-33, 1043, -4551, 19959, 19959, -4551, 1043}, +const spx_word16_t shift_filt[3][7] = {{-33, 1043, -4551, 19959, 19959, -4551, 1043}, {-98, 1133, -4425, 29179, 8895, -2328, 444}, {444, -2328, 8895, 29179, -4425, 1133, -98}}; #else -spx_word16_t shift_filt[3][7] = {{-390, 1540, -4993, 20123, 20123, -4993, 1540}, +const spx_word16_t shift_filt[3][7] = {{-390, 1540, -4993, 20123, 20123, -4993, 1540}, {-1064, 2817, -6694, 31589, 6837, -990, -209}, {-209, -990, 6837, 31589, -6694, 2817, -1064}}; #endif #else #if 0 -float shift_filt[3][7] = {{-9.9369e-04, 3.1831e-02, -1.3889e-01, 6.0910e-01, 6.0910e-01, -1.3889e-01, 3.1831e-02}, +const float shift_filt[3][7] = {{-9.9369e-04, 3.1831e-02, -1.3889e-01, 6.0910e-01, 6.0910e-01, -1.3889e-01, 3.1831e-02}, {-0.0029937, 0.0345613, -0.1350474, 0.8904793, 0.2714479, -0.0710304, 0.0135403}, {0.0135403, -0.0710304, 0.2714479, 0.8904793, -0.1350474, 0.0345613, -0.0029937}}; #else -float shift_filt[3][7] = {{-0.011915f, 0.046995f, -0.152373f, 0.614108f, 0.614108f, -0.152373f, 0.046995f}, +const float shift_filt[3][7] = {{-0.011915f, 0.046995f, -0.152373f, 0.614108f, 0.614108f, -0.152373f, 0.046995f}, {-0.0324855f, 0.0859768f, -0.2042986f, 0.9640297f, 0.2086420f, -0.0302054f, -0.0063646f}, {-0.0063646f, -0.0302054f, 0.2086420f, 0.9640297f, -0.2042986f, 0.0859768f, -0.0324855f}}; #endif @@ -784,7 +657,9 @@ char *stack spx_word16_t g1, g2; spx_word16_t ngain; spx_word16_t gg1, gg2; - +#ifdef FIXED_POINT + int scaledown=0; +#endif #if 0 /* Set to 1 to enable full pitch search */ int nol_pitch[6]; spx_word16_t nol_pitch_coef[6]; @@ -819,6 +694,23 @@ char *stack else interp_pitch(exc, iexc+nsf, -corr_pitch, 80); +#ifdef FIXED_POINT + for (i=0;i16383) + { + scaledown = 1; + break; + } + } + if (scaledown) + { + for (i=0;i -void filter_mem2_10(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem) +void filter_mem16_10(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem) { __m128 num[3], den[3], mem[3]; @@ -87,7 +87,7 @@ void filter_mem2_10(const float *x, const float *_num, const float *_den, float _mm_store_ss(_mem+9, mem[2]); } -void filter_mem2_8(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem) +void filter_mem16_8(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem) { __m128 num[2], den[2], mem[2]; @@ -130,18 +130,18 @@ void filter_mem2_8(const float *x, const float *_num, const float *_den, float * } -#define OVERRIDE_FILTER_MEM2 -void filter_mem2(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem) +#define OVERRIDE_FILTER_MEM16 +void filter_mem16(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem, char *stack) { if(ord==10) - filter_mem2_10(x, _num, _den, y, N, ord, _mem); + filter_mem16_10(x, _num, _den, y, N, ord, _mem); else if (ord==8) - filter_mem2_8(x, _num, _den, y, N, ord, _mem); + filter_mem16_8(x, _num, _den, y, N, ord, _mem); } -void iir_mem2_10(const float *x, const float *_den, float *y, int N, int ord, float *_mem) +void iir_mem16_10(const float *x, const float *_den, float *y, int N, int ord, float *_mem) { __m128 den[3], mem[3]; @@ -190,7 +190,7 @@ void iir_mem2_10(const float *x, const float *_den, float *y, int N, int ord, fl } -void iir_mem2_8(const float *x, const float *_den, float *y, int N, int ord, float *_mem) +void iir_mem16_8(const float *x, const float *_den, float *y, int N, int ord, float *_mem) { __m128 den[2], mem[2]; @@ -229,17 +229,17 @@ void iir_mem2_8(const float *x, const float *_den, float *y, int N, int ord, flo _mm_storeu_ps(_mem+4, mem[1]); } -#define OVERRIDE_IIR_MEM2 -void iir_mem2(const float *x, const float *_den, float *y, int N, int ord, float *_mem) +#define OVERRIDE_IIR_MEM16 +void iir_mem16(const float *x, const float *_den, float *y, int N, int ord, float *_mem, char *stack) { if(ord==10) - iir_mem2_10(x, _den, y, N, ord, _mem); + iir_mem16_10(x, _den, y, N, ord, _mem); else if (ord==8) - iir_mem2_8(x, _den, y, N, ord, _mem); + iir_mem16_8(x, _den, y, N, ord, _mem); } -void fir_mem2_10(const float *x, const float *_num, float *y, int N, int ord, float *_mem) +void fir_mem16_10(const float *x, const float *_num, float *y, int N, int ord, float *_mem) { __m128 num[3], mem[3]; @@ -287,7 +287,7 @@ void fir_mem2_10(const float *x, const float *_num, float *y, int N, int ord, fl _mm_store_ss(_mem+9, mem[2]); } -void fir_mem2_8(const float *x, const float *_num, float *y, int N, int ord, float *_mem) +void fir_mem16_8(const float *x, const float *_num, float *y, int N, int ord, float *_mem) { __m128 num[2], mem[2]; @@ -326,11 +326,11 @@ void fir_mem2_8(const float *x, const float *_num, float *y, int N, int ord, flo _mm_storeu_ps(_mem+4, mem[1]); } -#define OVERRIDE_FIR_MEM2 -void fir_mem2(const float *x, const float *_num, float *y, int N, int ord, float *_mem) +#define OVERRIDE_FIR_MEM16 +void fir_mem16(const float *x, const float *_num, float *y, int N, int ord, float *_mem, char *stack) { if(ord==10) - fir_mem2_10(x, _num, y, N, ord, _mem); + fir_mem16_10(x, _num, y, N, ord, _mem); else if (ord==8) - fir_mem2_8(x, _num, y, N, ord, _mem); + fir_mem16_8(x, _num, y, N, ord, _mem); } diff --git a/apps/codecs/libspeex/fixed_debug.h b/apps/codecs/libspeex/fixed_debug.h index 65c5712d32..d5c449f4d9 100644 --- a/apps/codecs/libspeex/fixed_debug.h +++ b/apps/codecs/libspeex/fixed_debug.h @@ -74,53 +74,57 @@ static inline int NEG32(long long x) return res; } -static inline short EXTRACT16(int x) +#define EXTRACT16(x) _EXTRACT16(x, __FILE__, __LINE__) +static inline short _EXTRACT16(int x, char *file, int line) { int res; if (!VERIFY_SHORT(x)) { - fprintf (stderr, "EXTRACT16: input is not short: %d\n", x); + fprintf (stderr, "EXTRACT16: input is not short: %d in %s: line %d\n", x, file, line); } res = x; spx_mips++; return res; } -static inline int EXTEND32(int x) +#define EXTEND32(x) _EXTEND32(x, __FILE__, __LINE__) +static inline int _EXTEND32(int x, char *file, int line) { int res; if (!VERIFY_SHORT(x)) { - fprintf (stderr, "EXTRACT16: input is not short: %d\n", x); + fprintf (stderr, "EXTEND32: input is not short: %d in %s: line %d\n", x, file, line); } res = x; spx_mips++; return res; } -static inline short SHR16(int a, int shift) +#define SHR16(a, shift) _SHR16(a, shift, __FILE__, __LINE__) +static inline short _SHR16(int a, int shift, char *file, int line) { int res; if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift)) { - fprintf (stderr, "SHR16: inputs are not short: %d %d\n", a, shift); + fprintf (stderr, "SHR16: inputs are not short: %d >> %d in %s: line %d\n", a, shift, file, line); } res = a>>shift; if (!VERIFY_SHORT(res)) - fprintf (stderr, "SHR16: output is not short: %d\n", res); + fprintf (stderr, "SHR16: output is not short: %d in %s: line %d\n", res, file, line); spx_mips++; return res; } -static inline short SHL16(int a, int shift) +#define SHL16(a, shift) _SHL16(a, shift, __FILE__, __LINE__) +static inline short _SHL16(int a, int shift, char *file, int line) { int res; if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift)) { - fprintf (stderr, "SHR16: inputs are not short: %d %d\n", a, shift); + fprintf (stderr, "SHL16: inputs are not short: %d %d in %s: line %d\n", a, shift, file, line); } res = a<>shift; if (!VERIFY_INT(res)) + { fprintf (stderr, "SHR32: output is not int: %d\n", (int)res); + } spx_mips++; return res; } @@ -143,62 +149,71 @@ static inline int SHL32(long long a, int shift) long long res; if (!VERIFY_INT(a) || !VERIFY_SHORT(shift)) { - fprintf (stderr, "SHR32: inputs are not int: %d %d\n", (int)a, shift); + fprintf (stderr, "SHL32: inputs are not int: %d %d\n", (int)a, shift); } res = a<>1))),shift)) +#define PSHR32(a,shift) (SHR32(ADD32((a),((1<<((shift))>>1))),shift)) +#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift))) -#define PSHR16(a,shift) (SHR16(ADD16(a,(1<<((shift)-1))),shift)) -#define PSHR32(a,shift) (SHR32(ADD32(a,(1<<((shift)-1))),shift)) #define SATURATE16(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))) #define SATURATE32(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))) -#define SHR(a,shift) ((a) >> (shift)) -#define SHL(a,shift) ((a) << (shift)) +//#define SHR(a,shift) ((a) >> (shift)) +//#define SHL(a,shift) ((a) << (shift)) -static inline short ADD16(int a, int b) +#define ADD16(a, b) _ADD16(a, b, __FILE__, __LINE__) +static inline short _ADD16(int a, int b, char *file, int line) { int res; if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) { - fprintf (stderr, "ADD16: inputs are not short: %d %d\n", a, b); + fprintf (stderr, "ADD16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); } res = a+b; if (!VERIFY_SHORT(res)) - fprintf (stderr, "ADD16: output is not short: %d+%d=%d\n", a,b,res); - spx_mips++; - return res; -} -static inline short SUB16(int a, int b) -{ - int res; - if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) { - fprintf (stderr, "SUB16: inputs are not short: %d %d\n", a, b); + fprintf (stderr, "ADD16: output is not short: %d+%d=%d in %s: line %d\n", a,b,res, file, line); } - res = a-b; - if (!VERIFY_SHORT(res)) - fprintf (stderr, "SUB16: output is not short: %d\n", res); spx_mips++; return res; } -static inline int ADD32(long long a, long long b) +#define SUB16(a, b) _SUB16(a, b, __FILE__, __LINE__) +static inline short _SUB16(int a, int b, char *file, int line) +{ + int res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "SUB16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); + } + res = a-b; + if (!VERIFY_SHORT(res)) + fprintf (stderr, "SUB16: output is not short: %d in %s: line %d\n", res, file, line); + spx_mips++; + return res; +} + +#define ADD32(a, b) _ADD32(a, b, __FILE__, __LINE__) +static inline int _ADD32(long long a, long long b, char *file, int line) { long long res; if (!VERIFY_INT(a) || !VERIFY_INT(b)) { - fprintf (stderr, "ADD32: inputs are not int: %d %d\n", (int)a, (int)b); + fprintf (stderr, "ADD32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line); } res = a+b; if (!VERIFY_INT(res)) { - fprintf (stderr, "ADD32: output is not int: %d\n", (int)res); + fprintf (stderr, "ADD32: output is not int: %d in %s: line %d\n", (int)res, file, line); } spx_mips++; return res; @@ -220,8 +235,6 @@ static inline int SUB32(long long a, long long b) #define ADD64(a,b) (MIPS_INC(a)+(b)) -#define PSHR(a,shift) (SHR((a)+(1<<((shift)-1)),shift)) - /* result fits in 16 bits */ static inline short MULT16_16_16(int a, int b) { @@ -237,36 +250,56 @@ static inline short MULT16_16_16(int a, int b) return res; } -static inline int MULT16_16(int a, int b) +#define MULT16_16(a, b) _MULT16_16(a, b, __FILE__, __LINE__) +static inline int _MULT16_16(int a, int b, char *file, int line) { long long res; if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) { - fprintf (stderr, "MULT16_16: inputs are not short: %d %d\n", a, b); + fprintf (stderr, "MULT16_16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); } res = ((long long)a)*b; if (!VERIFY_INT(res)) - fprintf (stderr, "MULT16_16: output is not int: %d\n", (int)res); + fprintf (stderr, "MULT16_16: output is not int: %d in %s: line %d\n", (int)res, file, line); spx_mips++; return res; } #define MAC16_16(c,a,b) (spx_mips--,ADD32((c),MULT16_16((a),(b)))) -#define MAC16_16_Q11(c,a,b) (ADD16((c),EXTRACT16(SHR32(MULT16_16((a),(b)),11)))) -#define MAC16_16_Q13(c,a,b) (ADD16((c),EXTRACT16(SHR32(MULT16_16((a),(b)),13)))) -#define MAC16_16_P13(c,a,b) (ADD32((c),SHR(ADD32(4096,MULT16_16((a),(b))),13))) +#define MAC16_16_Q11(c,a,b) (EXTRACT16(ADD16((c),EXTRACT16(SHR32(MULT16_16((a),(b)),11))))) +#define MAC16_16_Q13(c,a,b) (EXTRACT16(ADD16((c),EXTRACT16(SHR32(MULT16_16((a),(b)),13))))) +#define MAC16_16_P13(c,a,b) (EXTRACT16(ADD32((c),SHR32(ADD32(4096,MULT16_16((a),(b))),13)))) -static inline int MULT16_32_QX(int a, long long b, int Q) +#define MULT16_32_QX(a, b, Q) _MULT16_32_QX(a, b, Q, __FILE__, __LINE__) +static inline int _MULT16_32_QX(int a, long long b, int Q, char *file, int line) { long long res; if (!VERIFY_SHORT(a) || !VERIFY_INT(b)) { - fprintf (stderr, "MULT16_32_Q%d: inputs are not short+int: %d %d\n", Q, (int)a, (int)b); + fprintf (stderr, "MULT16_32_Q%d: inputs are not short+int: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line); } + if (ABS32(b)>=(1<<(15+Q))) + fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line); res = (((long long)a)*(long long)b) >> Q; if (!VERIFY_INT(res)) - fprintf (stderr, "MULT16_32_Q%d: output is not int: %d*%d=%d\n", Q, (int)a, (int)b,(int)res); + fprintf (stderr, "MULT16_32_Q%d: output is not int: %d*%d=%d in %s: line %d\n", Q, (int)a, (int)b,(int)res, file, line); + spx_mips+=5; + return res; +} + +static inline int MULT16_32_PX(int a, long long b, int Q) +{ + long long res; + if (!VERIFY_SHORT(a) || !VERIFY_INT(b)) + { + fprintf (stderr, "MULT16_32_P%d: inputs are not short+int: %d %d\n", Q, (int)a, (int)b); + } + if (ABS32(b)>=(1<<(15+Q))) + fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d\n", Q, (int)a, (int)b); + res = ((((long long)a)*(long long)b) + ((1<>1))>> Q; + if (!VERIFY_INT(res)) + fprintf (stderr, "MULT16_32_P%d: output is not int: %d*%d=%d\n", Q, (int)a, (int)b,(int)res); spx_mips+=5; return res; } @@ -278,6 +311,7 @@ static inline int MULT16_32_QX(int a, long long b, int Q) #define MULT16_32_Q13(a,b) MULT16_32_QX(a,b,13) #define MULT16_32_Q14(a,b) MULT16_32_QX(a,b,14) #define MULT16_32_Q15(a,b) MULT16_32_QX(a,b,15) +#define MULT16_32_P15(a,b) MULT16_32_PX(a,b,15) #define MAC16_32_Q15(c,a,b) ADD32((c),MULT16_32_Q15((a),(b))) static inline int SATURATE(int a, int b) @@ -341,7 +375,9 @@ static inline short MULT16_16_Q15(int a, int b) res = ((long long)a)*b; res >>= 15; if (!VERIFY_SHORT(res)) + { fprintf (stderr, "MULT16_16_Q15: output is not short: %d\n", (int)res); + } spx_mips+=3; return res; } @@ -398,23 +434,24 @@ static inline short MULT16_16_P15(int a, int b) return res; } +#define DIV32_16(a, b) _DIV32_16(a, b, __FILE__, __LINE__) -static inline int DIV32_16(long long a, long long b) +static inline int _DIV32_16(long long a, long long b, char *file, int line) { long long res; if (b==0) { - fprintf(stderr, "DIV32_16: divide by zero: %d/%d\n", (int)a, (int)b); + fprintf(stderr, "DIV32_16: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line); return 0; } if (!VERIFY_INT(a) || !VERIFY_SHORT(b)) { - fprintf (stderr, "DIV32_16: inputs are not int/short: %d %d\n", (int)a, (int)b); + fprintf (stderr, "DIV32_16: inputs are not int/short: %d %d in %s: line %d\n", (int)a, (int)b, file, line); } res = a/b; if (!VERIFY_SHORT(res)) { - fprintf (stderr, "DIV32_16: output is not short: %d / %d = %d\n", (int)a,(int)b,(int)res); + fprintf (stderr, "DIV32_16: output is not short: %d / %d = %d in %s: line %d\n", (int)a,(int)b,(int)res, file, line); if (res>32767) res = 32767; if (res<-32768) @@ -423,22 +460,24 @@ static inline int DIV32_16(long long a, long long b) spx_mips+=20; return res; } -static inline int DIV32(long long a, long long b) + +#define DIV32(a, b) _DIV32(a, b, __FILE__, __LINE__) +static inline int _DIV32(long long a, long long b, char *file, int line) { long long res; if (b==0) { - fprintf(stderr, "DIV32: divide by zero: %d/%d\n", (int)a, (int)b); + fprintf(stderr, "DIV32: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line); return 0; } if (!VERIFY_INT(a) || !VERIFY_INT(b)) { - fprintf (stderr, "DIV32: inputs are not int/short: %d %d\n", (int)a, (int)b); + fprintf (stderr, "DIV32: inputs are not int/short: %d %d in %s: line %d\n", (int)a, (int)b, file, line); } res = a/b; if (!VERIFY_INT(res)) - fprintf (stderr, "DIV32: output is not int: %d\n", (int)res); + fprintf (stderr, "DIV32: output is not int: %d in %s: line %d\n", (int)res, file, line); spx_mips+=36; return res; } diff --git a/apps/codecs/libspeex/fixed_generic.h b/apps/codecs/libspeex/fixed_generic.h index 375050c353..2948177c0b 100644 --- a/apps/codecs/libspeex/fixed_generic.h +++ b/apps/codecs/libspeex/fixed_generic.h @@ -46,14 +46,15 @@ #define SHL16(a,shift) ((a) << (shift)) #define SHR32(a,shift) ((a) >> (shift)) #define SHL32(a,shift) ((a) << (shift)) -#define PSHR16(a,shift) (SHR16((a)+(1<<((shift)-1)),shift)) -#define PSHR32(a,shift) (SHR32((a)+(1<<((shift)-1)),shift)) +#define PSHR16(a,shift) (SHR16((a)+((1<<((shift))>>1)),shift)) +#define PSHR32(a,shift) (SHR32((a)+((1<<((shift))>>1)),shift)) +#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift))) #define SATURATE16(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))) #define SATURATE32(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))) #define SHR(a,shift) ((a) >> (shift)) #define SHL(a,shift) ((spx_word32_t)(a) << (shift)) -#define PSHR(a,shift) (SHR((a)+(1<<((shift)-1)),shift)) +#define PSHR(a,shift) (SHR((a)+((1<<((shift))>>1)),shift)) #define SATURATE(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))) @@ -77,6 +78,7 @@ #define MULT16_32_Q11(a,b) ADD32(MULT16_16((a),SHR((b),11)), SHR(MULT16_16((a),((b)&0x000007ff)),11)) #define MAC16_32_Q11(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),11)), SHR(MULT16_16((a),((b)&0x000007ff)),11))) +#define MULT16_32_P15(a,b) ADD32(MULT16_16((a),SHR((b),15)), PSHR(MULT16_16((a),((b)&0x00007fff)),15)) #define MULT16_32_Q15(a,b) ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)) #define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))) diff --git a/apps/codecs/libspeex/jitter.c b/apps/codecs/libspeex/jitter.c index 79d1f9ddce..dc31c06407 100644 --- a/apps/codecs/libspeex/jitter.c +++ b/apps/codecs/libspeex/jitter.c @@ -41,7 +41,10 @@ #include #include #include -#include + +#ifndef NULL +#define NULL 0 +#endif #define LATE_BINS 10 #define MAX_MARGIN 30 /**< Number of bins in margin histogram */ @@ -181,7 +184,7 @@ void jitter_buffer_put(JitterBuffer *jitter, const JitterBufferPacket *packet) /* Copy packet in buffer */ jitter->buf[i]=(char*)speex_alloc(packet->len); - for (j=0;j<(signed int)packet->len;j++) + for (j=0;((unsigned)j)len;j++) jitter->buf[i][j]=packet->data[j]; jitter->timestamp[i]=packet->timestamp; jitter->span[i]=packet->span; @@ -378,7 +381,7 @@ int jitter_buffer_get(JitterBuffer *jitter, JitterBufferPacket *packet, spx_uint /* Check for potential overflow */ packet->len = jitter->len[i]; /* Copy packet */ - for (j=0;j<(signed int)packet->len;j++) + for (j=0;((unsigned)j)len;j++) packet->data[j] = jitter->buf[i][j]; /* Remove packet */ speex_free(jitter->buf[i]); @@ -424,7 +427,23 @@ void jitter_buffer_tick(JitterBuffer *jitter) jitter->current_timestamp += jitter->tick_size; } - +/* Used like the ioctl function to control the jitter buffer parameters */ +int jitter_buffer_ctl(JitterBuffer *jitter, int request, void *ptr) +{ + switch(request) + { + case JITTER_BUFFER_SET_MARGIN: + jitter->buffer_margin = *(spx_int32_t*)ptr; + break; + case JITTER_BUFFER_GET_MARGIN: + *(spx_int32_t*)ptr = jitter->buffer_margin; + break; + default: + speex_warning_int("Unknown jitter_buffer_ctl request: ", request); + return -1; + } + return 0; +} diff --git a/apps/codecs/libspeex/kiss_fft.c b/apps/codecs/libspeex/kiss_fft.c index f913a15a21..4ab31f26fb 100644 --- a/apps/codecs/libspeex/kiss_fft.c +++ b/apps/codecs/libspeex/kiss_fft.c @@ -19,7 +19,6 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #include "_kiss_fft_guts.h" #include "misc.h" -#include "math_approx.h" /* The guts header contains all the multiplication and addition macros that are defined for fixed or floating point complex numbers. It also delares the kf_ internal functions. @@ -55,8 +54,8 @@ static void kf_bfly2( kiss_fft_cpx *x=Fout; for (i=0;i<2*m;i++) { - x[i].r = SHR(x[i].r,1); - x[i].i = SHR(x[i].i,1); + x[i].r = SHR16(x[i].r,1); + x[i].i = SHR16(x[i].i,1); } } @@ -86,9 +85,9 @@ static void kf_bfly4( tw3 = tw2 = tw1 = st->twiddles; if (!st->inverse) { - int i; + unsigned int i; kiss_fft_cpx *x=Fout; - for (i=0;i<4*(signed int)m;i++) + for (i=0;i<4*m;i++) { x[i].r = PSHR16(x[i].r,2); x[i].i = PSHR16(x[i].i,2); @@ -339,8 +338,6 @@ static void kf_factor(int n,int * facbuf) { int p=4; - double floor_sqrt; - floor_sqrt = floor( sqrt((double)n) ); /*factor out powers of 4, powers of 2, then any remaining primes */ do { @@ -350,7 +347,7 @@ void kf_factor(int n,int * facbuf) case 2: p = 3; break; default: p += 2; break; } - if (p > floor_sqrt) + if (p>32000 || (spx_int32_t)p*(spx_int32_t)p > n) p = n; /* no more factors, skip to end */ } n /= p; @@ -358,7 +355,6 @@ void kf_factor(int n,int * facbuf) *facbuf++ = n; } while (n > 1); } - /* * * User-callable function to allocate all necessary storage space for the fft. @@ -383,15 +379,22 @@ kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem int i; st->nfft=nfft; st->inverse = inverse_fft; - +#ifdef FIXED_POINT for (i=0;iinverse) - phase *= -1; - kf_cexp(st->twiddles+i, phase ); + spx_word32_t phase = i; + if (!st->inverse) + phase = -phase; + kf_cexp2(st->twiddles+i, DIV32(SHL32(phase,17),nfft)); } - +#else + for (i=0;iinverse) + phase *= -1; + kf_cexp(st->twiddles+i, phase ); + } +#endif kf_factor(nfft,st->factors); } return st; diff --git a/apps/codecs/libspeex/kiss_fftr.c b/apps/codecs/libspeex/kiss_fftr.c index 763ae12a00..95c4573326 100644 --- a/apps/codecs/libspeex/kiss_fftr.c +++ b/apps/codecs/libspeex/kiss_fftr.c @@ -17,7 +17,6 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #endif #include "kiss_fftr.h" -#include "math_approx.h" #include "_kiss_fft_guts.h" struct kiss_fftr_state{ @@ -59,13 +58,22 @@ kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenme st->super_twiddles = st->tmpbuf + nfft; kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize); - for (i = 0; i < nfft; ++i) { - double phase = - -3.14159265358979323846264338327 * ((double) i / nfft + .5); - if (inverse_fft) - phase *= -1; - kf_cexp (st->super_twiddles+i,phase); +#ifdef FIXED_POINT + for (i=0;i>1); + if (!inverse_fft) + phase = -phase; + kf_cexp2(st->super_twiddles+i, DIV32(SHL32(phase,16),nfft)); } +#else + for (i=0;isuper_twiddles+i, phase ); + } +#endif return st; } @@ -76,8 +84,7 @@ void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *fr kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc; if ( st->substate->inverse) { - speex_warning("kiss fft usage error: improper alloc\n"); - //exit(1); + speex_error("kiss fft usage error: improper alloc\n"); } ncfft = st->substate->nfft; @@ -131,8 +138,7 @@ void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *t int k, ncfft; if (st->substate->inverse == 0) { - speex_warning ("kiss fft usage error: improper alloc\n"); - //exit (1); + speex_error ("kiss fft usage error: improper alloc\n"); } ncfft = st->substate->nfft; diff --git a/apps/codecs/libspeex/lbr_48k_tables.c b/apps/codecs/libspeex/lbr_48k_tables.c index 2e6db3fd4a..d4d80dc399 100644 --- a/apps/codecs/libspeex/lbr_48k_tables.c +++ b/apps/codecs/libspeex/lbr_48k_tables.c @@ -34,74 +34,74 @@ #endif -int dummy_epic_48k_variable=0; +const int dummy_epic_48k_variable=0; #ifdef EPIC_48K -const signed char gain_cdbk_ulbr[192] = { --31, -48, -30, --19, -10, -18, --33, -22, -45, --5, -56, -43, --30, -56, -3, --59, -17, -52, --41, -60, -58, --64, -47, -22, --30, -31, -31, --29, -14, -31, --22, -37, -58, --31, -44, 13, --37, 0, 1, --46, -55, -35, --56, -14, -53, --8, 1, -36, --29, -15, -27, --29, -39, -28, --43, -5, 3, --51, -27, -54, -10, -46, -36, -3, -3, -42, --27, 16, -22, --34, -52, 13, --31, -21, -28, --34, -45, -40, --20, -48, 4, --40, -27, 16, --6, 11, -44, --35, 12, -5, -19, -33, -37, --29, 18, -32, --29, -23, -19, -16, -47, -28, --34, -30, 17, --20, 2, -26, --38, -40, -36, -15, -14, -40, --39, 14, -9, --15, 25, -39, --26, 19, -32, --39, 17, -14, -10, -36, -26, -14, -13, -40, --29, -21, -12, --8, 19, -39, --36, -18, 15, --32, -38, -38, --19, 4, -23, --38, -7, 11, -9, -10, -39, --37, 24, -19, --34, -5, -8, --20, 23, -41, --4, 17, -31, --17, -26, -26, --24, 28, -36, --7, 15, -39, --42, 16, -11, --29, 14, -6, --36, 28, -27, --21, 5, -26, -11, -9, -39, --38, -7, 13, +const signed char gain_cdbk_ulbr[256] = { +-31, -48, -30, 10, +-19, -10, -18, 25, +-33, -22, -45, 12, +-5, -56, -43, 31, +-30, -56, -3, 28, +-59, -17, -52, 31, +-41, -60, -58, 32, +-64, -47, -22, 29, +-30, -31, -31, 2, +-29, -14, -31, 11, +-22, -37, -58, 21, +-31, -44, 13, 29, +-37, 0, 1, 35, +-46, -55, -35, 20, +-56, -14, -53, 32, +-8, 1, -36, 31, +-29, -15, -27, 13, +-29, -39, -28, 7, +-43, -5, 3, 37, +-51, -27, -54, 23, +10, -46, -36, 30, +3, -3, -42, 37, +-27, 16, -22, 32, +-34, -52, 13, 34, +-31, -21, -28, 8, +-34, -45, -40, 12, +-20, -48, 4, 32, +-40, -27, 16, 31, +-6, 11, -44, 41, +-35, 12, -5, 37, +19, -33, -37, 29, +-29, 18, -32, 27, +-29, -23, -19, 13, +16, -47, -28, 34, +-34, -30, 17, 27, +-20, 2, -26, 26, +-38, -40, -36, 9, +15, -14, -40, 37, +-39, 14, -9, 38, +-15, 25, -39, 41, +-26, 19, -32, 29, +-39, 17, -14, 37, +10, -36, -26, 26, +14, -13, -40, 37, +-29, -21, -12, 17, +-8, 19, -39, 41, +-36, -18, 15, 33, +-32, -38, -38, 6, +-19, 4, -23, 29, +-38, -7, 11, 37, +9, -10, -39, 35, +-37, 24, -19, 37, +-34, -5, -8, 27, +-20, 23, -41, 38, +-4, 17, -31, 39, +-17, -26, -26, 14, +-24, 28, -36, 36, +-7, 15, -39, 40, +-42, 16, -11, 40, +-29, 14, -6, 38, +-36, 28, -27, 35, +-21, 5, -26, 27, +11, -9, -39, 37, +-38, -7, 13, 38 }; diff --git a/apps/codecs/libspeex/lsp.c b/apps/codecs/libspeex/lsp.c index 3fdc08aab3..a73d8835f0 100644 --- a/apps/codecs/libspeex/lsp.c +++ b/apps/codecs/libspeex/lsp.c @@ -509,7 +509,7 @@ void lsp_to_lpc(spx_lsp_t *freq,spx_coef_t *ak,int lpcrdr, char *stack) /* hard limit ak's to +/- 32767 */ - if (a < -32767) a = 32767; + if (a < -32767) a = -32767; if (a > 32767) a = 32767; ak[j-1] = (short)a; diff --git a/apps/codecs/libspeex/lsp.h b/apps/codecs/libspeex/lsp.h index 6266f42fb5..9d0345f426 100644 --- a/apps/codecs/libspeex/lsp.h +++ b/apps/codecs/libspeex/lsp.h @@ -1,64 +1,64 @@ -/*---------------------------------------------------------------------------*\ -Original Copyright - FILE........: AK2LSPD.H - TYPE........: Turbo C header file - COMPANY.....: Voicetronix - AUTHOR......: James Whitehall - DATE CREATED: 21/11/95 - -Modified by Jean-Marc Valin - - This file contains functions for converting Linear Prediction - Coefficients (LPC) to Line Spectral Pair (LSP) and back. Note that the - LSP coefficients are not in radians format but in the x domain of the - unit circle. - -\*---------------------------------------------------------------------------*/ -/** - @file lsp.h - @brief Line Spectral Pair (LSP) functions. -*/ -/* Speex License: - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - - Neither the name of the Xiph.org Foundation nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef __AK2LSPD__ -#define __AK2LSPD__ - -#include "misc.h" - -int lpc_to_lsp (spx_coef_t *a, int lpcrdr, spx_lsp_t *freq, int nb, spx_word16_t delta, char *stack); -void lsp_to_lpc(spx_lsp_t *freq, spx_coef_t *ak, int lpcrdr, char *stack); - -/*Added by JMV*/ -void lsp_enforce_margin(spx_lsp_t *lsp, int len, spx_word16_t margin); - -void lsp_interpolate(spx_lsp_t *old_lsp, spx_lsp_t *new_lsp, spx_lsp_t *interp_lsp, int len, int subframe, int nb_subframes); - -#endif /* __AK2LSPD__ */ +/*---------------------------------------------------------------------------*\ +Original Copyright + FILE........: AK2LSPD.H + TYPE........: Turbo C header file + COMPANY.....: Voicetronix + AUTHOR......: James Whitehall + DATE CREATED: 21/11/95 + +Modified by Jean-Marc Valin + + This file contains functions for converting Linear Prediction + Coefficients (LPC) to Line Spectral Pair (LSP) and back. Note that the + LSP coefficients are not in radians format but in the x domain of the + unit circle. + +\*---------------------------------------------------------------------------*/ +/** + @file lsp.h + @brief Line Spectral Pair (LSP) functions. +*/ +/* Speex License: + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + - Neither the name of the Xiph.org Foundation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef __AK2LSPD__ +#define __AK2LSPD__ + +#include "misc.h" + +int lpc_to_lsp (spx_coef_t *a, int lpcrdr, spx_lsp_t *freq, int nb, spx_word16_t delta, char *stack); +void lsp_to_lpc(spx_lsp_t *freq, spx_coef_t *ak, int lpcrdr, char *stack); + +/*Added by JMV*/ +void lsp_enforce_margin(spx_lsp_t *lsp, int len, spx_word16_t margin); + +void lsp_interpolate(spx_lsp_t *old_lsp, spx_lsp_t *new_lsp, spx_lsp_t *interp_lsp, int len, int subframe, int nb_subframes); + +#endif /* __AK2LSPD__ */ diff --git a/apps/codecs/libspeex/lsp_arm4.h b/apps/codecs/libspeex/lsp_arm4.h deleted file mode 100644 index 20e505287e..0000000000 --- a/apps/codecs/libspeex/lsp_arm4.h +++ /dev/null @@ -1,89 +0,0 @@ -/* Copyright (C) 2006 David Rowe */ -/** - @file lsp_bfin.h - @author David Rowe - @brief LSP routines optimised for the Blackfin -*/ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - - Neither the name of the Xiph.org Foundation nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#define OVERRIDE_CHEB_POLY_EVA -#ifdef OVERRIDE_CHEB_POLY_EVA -static inline spx_word32_t cheb_poly_eva( - spx_word16_t *coef, /* P or Q coefs in Q13 format */ - spx_word16_t x, /* cos of freq (-1.0 to 1.0) in Q14 format */ - int m, /* LPC order/2 */ - char *stack -) -{ - spx_word32_t sum; - - __asm__ __volatile__ - ( - "P0 = %2;\n\t" /* P0: coef[m], coef[m-1],..., coef[0] */ - "R4 = 8192;\n\t" /* R4: rounding constant */ - "R2 = %1;\n\t" /* R2: x */ - - "R5 = -16383;\n\t" - "R2 = MAX(R2,R5);\n\t" - "R5 = 16383;\n\t" - "R2 = MIN(R2,R5);\n\t" - - "R3 = W[P0--] (X);\n\t" /* R3: sum */ - "R5 = W[P0--] (X);\n\t" - "R5 = R5.L * R2.L (IS);\n\t" - "R5 = R5 + R4;\n\t" - "R5 >>>= 14;\n\t" - "R3 = R3 + R5;\n\t" - - "R0 = R2;\n\t" /* R0: b0 */ - "R1 = 16384;\n\t" /* R1: b1 */ - "LOOP cpe%= LC0 = %3;\n\t" - "LOOP_BEGIN cpe%=;\n\t" - "P1 = R0;\n\t" - "R0 = R2.L * R0.L (IS) || R5 = W[P0--] (X);\n\t" - "R0 >>>= 13;\n\t" - "R0 = R0 - R1;\n\t" - "R1 = P1;\n\t" - "R5 = R5.L * R0.L (IS);\n\t" - "R5 = R5 + R4;\n\t" - "R5 >>>= 14;\n\t" - "R3 = R3 + R5;\n\t" - "LOOP_END cpe%=;\n\t" - "%0 = R3;\n\t" - : "=&d" (sum) - : "a" (x), "a" (&coef[m]), "a" (m-1) - : "R0", "R1", "R3", "R2", "R4", "R5", "P0", "P1" - ); - return sum; -} -#endif - - - diff --git a/apps/codecs/libspeex/ltp.c b/apps/codecs/libspeex/ltp.c index 27e4f4de2d..fa77da2b9d 100644 --- a/apps/codecs/libspeex/ltp.c +++ b/apps/codecs/libspeex/ltp.c @@ -176,20 +176,56 @@ void open_loop_nbest_pitch(spx_word16_t *sw, int start, int end, int len, int *p VARDECL(spx_word32_t *best_ener); spx_word32_t e0; VARDECL(spx_word32_t *corr); +#ifdef FIXED_POINT + /* In fixed-point, we need only one (temporary) array of 32-bit values and two (corr16, ener16) + arrays for (normalized) 16-bit values */ + VARDECL(spx_word16_t *corr16); + VARDECL(spx_word16_t *ener16); + spx_word32_t *energy; + int cshift=0, eshift=0; + int scaledown = 0; + ALLOC(corr16, end-start+1, spx_word16_t); + ALLOC(ener16, end-start+1, spx_word16_t); + ALLOC(corr, end-start+1, spx_word32_t); + energy = corr; +#else + /* In floating-point, we need to float arrays and no normalized copies */ VARDECL(spx_word32_t *energy); - + spx_word16_t *corr16; + spx_word16_t *ener16; + ALLOC(energy, end-start+2, spx_word32_t); + ALLOC(corr, end-start+1, spx_word32_t); + corr16 = corr; + ener16 = energy; +#endif + ALLOC(best_score, N, spx_word32_t); ALLOC(best_ener, N, spx_word32_t); - ALLOC(corr, end-start+1, spx_word32_t); - ALLOC(energy, end-start+2, spx_word32_t); - for (i=0;i16383) + { + scaledown=1; + break; + } + } + /* If the weighted input is close to saturation, then we scale it down */ + if (scaledown) + { + for (i=-end;iMULT16_16(best_score[N-1],ADD16(1,ener16[i-start]))) - { - /* We can safely put it last and then check */ - best_score[N-1]=tmp; - best_ener[N-1]=ener16[i-start]+1; - pitch[N-1]=i; - /* Check if it comes in front of others */ - for (j=0;jMULT16_16(best_score[j],ADD16(1,ener16[i-start]))) - { - for (k=N-1;k>j;k--) - { - best_score[k]=best_score[k-1]; - best_ener[k]=best_ener[k-1]; - pitch[k]=pitch[k-1]; - } - best_score[j]=tmp; - best_ener[j]=ener16[i-start]+1; - pitch[j]=i; - break; - } - } - } + sw[i]=SHL16(sw[i],1); } - } -#else + } +#endif + + /* Search for the best pitch prediction gain */ for (i=start;i<=end;i++) { - float tmp = corr[i-start]*corr[i-start]; - if (tmp*best_ener[N-1]>best_score[N-1]*(1+energy[i-start])) + spx_word16_t tmp = MULT16_16_16(corr16[i-start],corr16[i-start]); + /* Instead of dividing the tmp by the energy, we multiply on the other side */ + if (MULT16_16(tmp,best_ener[N-1])>MULT16_16(best_score[N-1],ADD16(1,ener16[i-start]))) { - for (j=0;jbest_score[j]*(1+energy[i-start])) + if (MULT16_16(tmp,best_ener[j])>MULT16_16(best_score[j],ADD16(1,ener16[i-start]))) { for (k=N-1;k>j;k--) { @@ -260,29 +279,30 @@ void open_loop_nbest_pitch(spx_word16_t *sw, int start, int end, int len, int *p pitch[k]=pitch[k-1]; } best_score[j]=tmp; - best_ener[j]=energy[i-start]+1; + best_ener[j]=ener16[i-start]+1; pitch[j]=i; break; } } } } -#endif - - /* Compute open-loop gain */ + + /* Compute open-loop gain if necessary */ if (gain) { - for (j=0;j=0;i--) { spx_word16_t e0=exc2[-pitch-1+i]; +#ifdef FIXED_POINT + /* Scale excitation down if needed (avoiding overflow) */ + if (scaledown) + e0 = SHR16(e0,1); +#endif x[i][0]=MULT16_16_Q14(r[0], e0); for (j=0;j16383) + { + scaledown=1; + break; + } + } + for (i=-end;i16383) + { + scaledown=1; + break; + } + } +#endif if (N>end-start+1) N=end-start+1; if (end != start) @@ -562,7 +621,7 @@ spx_word32_t *cumul_gain for (j=0;j63) pitch_coef=63; @@ -734,9 +800,11 @@ spx_word32_t *cumul_gain { exc[i]=MULT16_32_Q15(SHL16(pitch_coef, 9),exc[i-start]); } - syn_percep_zero(exc, ak, awk1, awk2, res, nsf, p, stack); for (i=0;i ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * + * Copyright (C) 2007 Dan Everton + * + * All files in this archive are subject to the GNU General Public License. + * See the file COPYING in the source tree root for full license agreement. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ***************************************************************************/ + +#ifndef MATH_H +#define MATH_H + +float floor(float); +float exp(float); +float sqrt(float); +float fabs(float); +float log(float); +float pow(float, float); +float sin(float); +float cos(float); + +#endif + diff --git a/apps/codecs/libspeex/math_approx.c b/apps/codecs/libspeex/math_approx.c index 54a34120bf..21af7667aa 100644 --- a/apps/codecs/libspeex/math_approx.c +++ b/apps/codecs/libspeex/math_approx.c @@ -34,181 +34,89 @@ #include "config.h" #endif - - #include "math_approx.h" #include "misc.h" +spx_int16_t spx_ilog2(spx_uint32_t x) +{ + int r=0; + if (x>=(spx_int32_t)65536) + { + x >>= 16; + r += 16; + } + if (x>=256) + { + x >>= 8; + r += 8; + } + if (x>=16) + { + x >>= 4; + r += 4; + } + if (x>=4) + { + x >>= 2; + r += 2; + } + if (x>=2) + { + r += 1; + } + return r; +} + +spx_int16_t spx_ilog4(spx_uint32_t x) +{ + int r=0; + if (x>=(spx_int32_t)65536) + { + x >>= 16; + r += 8; + } + if (x>=256) + { + x >>= 8; + r += 4; + } + if (x>=16) + { + x >>= 4; + r += 2; + } + if (x>=4) + { + r += 1; + } + return r; +} + #ifdef FIXED_POINT /* sqrt(x) ~= 0.22178 + 1.29227*x - 0.77070*x^2 + 0.25723*x^3 (for .25 < x < 1) */ +/*#define C0 3634 +#define C1 21173 +#define C2 -12627 +#define C3 4215*/ + +/* sqrt(x) ~= 0.22178 + 1.29227*x - 0.77070*x^2 + 0.25659*x^3 (for .25 < x < 1) */ #define C0 3634 #define C1 21173 #define C2 -12627 -#define C3 4215 +#define C3 4204 spx_word16_t spx_sqrt(spx_word32_t x) { - int k=0; + int k; spx_word32_t rt; - - if (x<=0) - return 0; -#if 1 - if (x>=16777216) - { - x>>=10; - k+=5; - } - if (x>=1048576) - { - x>>=6; - k+=3; - } - if (x>=262144) - { - x>>=4; - k+=2; - } - if (x>=32768) - { - x>>=2; - k+=1; - } - if (x>=16384) - { - x>>=2; - k+=1; - } -#else - while (x>=16384) - { - x>>=2; - k++; - } -#endif - while (x<4096) - { - x<<=2; - k--; - } + k = spx_ilog4(x)-6; + x = VSHR32(x, (k<<1)); rt = ADD16(C0, MULT16_16_Q14(x, ADD16(C1, MULT16_16_Q14(x, ADD16(C2, MULT16_16_Q14(x, (C3))))))); - if (rt > 16383) - rt = 16383; - if (k>0) - rt <<= k; - else - rt >>= -k; - rt >>=7; + rt = VSHR32(rt,7-k); return rt; } - static int intSqrt(int x) { - int xn; - static int sqrt_table[256] = { - 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, - 59, 61, 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, - 84, 86, 87, 89, 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, - 103, 104, 106, 107, 108, 109, 110, 112, 113, 114, 115, 116, 117, 118, - 119, 120, 121, 122, 123, 124, 125, 126, 128, 128, 129, 130, 131, 132, - 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 144, 145, - 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, 156, 157, - 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, - 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, - 179, 180, 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, - 189, 189, 190, 191, 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, - 198, 199, 199, 200, 201, 201, 202, 203, 203, 204, 204, 205, 206, 206, - 207, 208, 208, 209, 209, 210, 211, 211, 212, 212, 213, 214, 214, 215, - 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, 221, 222, 222, 223, - 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, 230, 231, - 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, - 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, - 246, 247, 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, - 253, 254, 254, 255 - }; - if (x >= 0x10000) { - if (x >= 0x1000000) { - if (x >= 0x10000000) { - if (x >= 0x40000000) { - xn = sqrt_table[x >> 24] << 8; - } else { - xn = sqrt_table[x >> 22] << 7; - } - } else { - if (x >= 0x4000000) { - xn = sqrt_table[x >> 20] << 6; - } else { - xn = sqrt_table[x >> 18] << 5; - } - } - - xn = (xn + 1 + (x / xn)) >> 1; - xn = (xn + 1 + (x / xn)) >> 1; - return ((xn * xn) > x) ? --xn : xn; - } else { - if (x >= 0x100000) { - if (x >= 0x400000) { - xn = sqrt_table[x >> 16] << 4; - } else { - xn = sqrt_table[x >> 14] << 3; - } - } else { - if (x >= 0x40000) { - xn = sqrt_table[x >> 12] << 2; - } else { - xn = sqrt_table[x >> 10] << 1; - } - } - - xn = (xn + 1 + (x / xn)) >> 1; - - return ((xn * xn) > x) ? --xn : xn; - } - } else { - if (x >= 0x100) { - if (x >= 0x1000) { - if (x >= 0x4000) { - xn = (sqrt_table[x >> 8]) + 1; - } else { - xn = (sqrt_table[x >> 6] >> 1) + 1; - } - } else { - if (x >= 0x400) { - xn = (sqrt_table[x >> 4] >> 2) + 1; - } else { - xn = (sqrt_table[x >> 2] >> 3) + 1; - } - } - - return ((xn * xn) > x) ? --xn : xn; - } else { - if (x >= 0) { - return sqrt_table[x] >> 4; - } - } - } - - return -1; - } - -float spx_sqrtf(float arg) -{ - if(arg==0.0) - return 0.0; - else if(arg==1.0) - return 1.0; - else if(arg==2.0) - return 1.414; - else if(arg==3.27) - return 1.8083; - //printf("Sqrt:%f:%f:%f\n",arg,(((float)intSqrt((int)(arg*10000)))/100)+0.0055,(float)spx_sqrt((spx_word32_t)arg)); - //return ((float)fastSqrt((int)(arg*2500)))/50; - //LOGF("Sqrt:%d:%d\n",arg,(intSqrt((int)(arg*2500)))/50); - return (((float)intSqrt((int)(arg*10000)))/100)+0.0055;//(float)spx_sqrt((spx_word32_t)arg); - //return 1; -} - - /* log(x) ~= -2.18151 + 4.20592*x - 2.88938*x^2 + 0.86535*x^3 (for .5 < x < 1) */ @@ -259,6 +167,101 @@ spx_word16_t spx_cos(spx_word16_t x) } } +#define L1 32767 +#define L2 -7651 +#define L3 8277 +#define L4 -626 + +static inline spx_word16_t _spx_cos_pi_2(spx_word16_t x) +{ + spx_word16_t x2; + + x2 = MULT16_16_P15(x,x); + return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2)))))))); +} + +spx_word16_t spx_cos_norm(spx_word32_t x) +{ + x = x&0x0001ffff; + if (x>SHL32(EXTEND32(1), 16)) + x = SUB32(SHL32(EXTEND32(1), 17),x); + if (x&0x00007fff) + { + if (x14) + return 0x7fffffff; + else if (integer < -15) + return 0; + frac = SHL16(x-SHL16(integer,11),3); + frac = ADD16(D0, MULT16_16_Q14(frac, ADD16(D1, MULT16_16_Q14(frac, ADD16(D2 , MULT16_16_Q14(D3,frac)))))); + return VSHR32(EXTEND32(frac), -integer-2); +} + +/* Input in Q11 format, output in Q16 */ +spx_word32_t spx_exp(spx_word16_t x) +{ + if (x>21290) + return 0x7fffffff; + else if (x<-21290) + return 0; + else + return spx_exp2(MULT16_16_P14(23637,x)); +} +#define M1 32767 +#define M2 -21 +#define M3 -11943 +#define M4 4936 + +static inline spx_word16_t spx_atan01(spx_word16_t x) +{ + return MULT16_16_P15(x, ADD32(M1, MULT16_16_P15(x, ADD32(M2, MULT16_16_P15(x, ADD32(M3, MULT16_16_P15(M4, x))))))); +} + +/* Input in Q15, output in Q14 */ +spx_word16_t spx_atan(spx_word32_t x) +{ + if (x <= 32767) + { + return SHR16(spx_atan01(x),1); + } else { + int e = spx_ilog2(x); + if (e>=29) + return 25736; + x = DIV32_16(SHL32(EXTEND32(32767),29-e), EXTRACT16(SHR32(x, e-14))); + return SUB16(25736, SHR16(spx_atan01(x),1)); + } +} #else #ifndef M_PI @@ -284,161 +287,5 @@ spx_word16_t spx_cos(spx_word16_t x) return NEG16(C1 + x*(C2+x*(C3+C4*x))); } } + #endif - -inline float spx_floor(float x){ - return ((float)(((int)x))); -} - -#define FP_BITS (14) -#define FP_MASK ((1 << FP_BITS) - 1) -#define FP_ONE (1 << FP_BITS) -#define FP_TWO (2 << FP_BITS) -#define FP_HALF (1 << (FP_BITS - 1)) -#define FP_LN2 ( 45426 >> (16 - FP_BITS)) -#define FP_LN2_INV ( 94548 >> (16 - FP_BITS)) -#define FP_EXP_ZERO ( 10922 >> (16 - FP_BITS)) -#define FP_EXP_ONE ( -182 >> (16 - FP_BITS)) -#define FP_EXP_TWO ( 4 >> (16 - FP_BITS)) -// #define FP_INF (0x7fffffff) -// #define FP_LN10 (150902 >> (16 - FP_BITS)) - -#define FP_MAX_DIGITS (4) -#define FP_MAX_DIGITS_INT (10000) - - -// #define FP_FAST_MUL_DIV - -// #ifdef FP_FAST_MUL_DIV - -/* These macros can easily overflow, but they are good enough for our uses, - * and saves some code. - */ -#define fp_mul(x, y) (((x) * (y)) >> FP_BITS) -#define fp_div(x, y) (((x) << FP_BITS) / (y)) - -#ifndef abs - #define abs(x) (((x)<0)?((x)*-1):(x)) -#endif - -float spx_sqrt2(float xf) { - long x=(xf*(2.0*FP_BITS)); - int i=0, s = (x + FP_ONE) >> 1; - for (; i < 8; i++) { - s = (s + fp_div(x, s)) >> 1; - } - return s/((float)(2*FP_BITS)); -} - -static int exp_s16p16(int x) -{ - int t; - int y = 0x00010000; - - if (x < 0) x += 0xb1721, y >>= 16; - t = x - 0x58b91; if (t >= 0) x = t, y <<= 8; - t = x - 0x2c5c8; if (t >= 0) x = t, y <<= 4; - t = x - 0x162e4; if (t >= 0) x = t, y <<= 2; - t = x - 0x0b172; if (t >= 0) x = t, y <<= 1; - t = x - 0x067cd; if (t >= 0) x = t, y += y >> 1; - t = x - 0x03920; if (t >= 0) x = t, y += y >> 2; - t = x - 0x01e27; if (t >= 0) x = t, y += y >> 3; - t = x - 0x00f85; if (t >= 0) x = t, y += y >> 4; - t = x - 0x007e1; if (t >= 0) x = t, y += y >> 5; - t = x - 0x003f8; if (t >= 0) x = t, y += y >> 6; - t = x - 0x001fe; if (t >= 0) x = t, y += y >> 7; - y += ((y >> 8) * x) >> 8; - - return y; -} -float spx_expB(float xf) { - return exp_s16p16(xf*32)/32; -} -float spx_expC(float xf){ - long x=xf*(2*FP_BITS); -/* -static long fp_exp(long x) -{*/ - long k; - long z; - long R; - long xp; - - if (x == 0) - { - return FP_ONE; - } - - k = (fp_mul(abs(x), FP_LN2_INV) + FP_HALF) & ~FP_MASK; - - if (x < 0) - { - k = -k; - } - - x -= fp_mul(k, FP_LN2); - z = fp_mul(x, x); - R = FP_TWO + fp_mul(z, FP_EXP_ZERO + fp_mul(z, FP_EXP_ONE - + fp_mul(z, FP_EXP_TWO))); - xp = FP_ONE + fp_div(fp_mul(FP_TWO, x), R - x); - - if (k < 0) - { - k = FP_ONE >> (-k >> FP_BITS); - } - else - { - k = FP_ONE << (k >> FP_BITS); - } - - return fp_mul(k, xp)/(2*FP_BITS); -} -/*To generate (ruby code): (0...33).each { |idx| puts Math.exp((idx-10) / 8.0).to_s + "," } */ -const float exp_lookup_int[33]={0.28650479686019,0.32465246735835,0.367879441171442,0.416862019678508,0.472366552741015,0.53526142851899,0.606530659712633,0.687289278790972,0.778800783071405,0.882496902584595,1.0,1.13314845306683,1.28402541668774,1.4549914146182,1.64872127070013,1.86824595743222,2.11700001661267,2.3988752939671,2.71828182845905,3.08021684891803,3.49034295746184,3.95507672292058,4.48168907033806,5.07841903718008,5.75460267600573,6.52081912033011,7.38905609893065,8.37289748812726,9.48773583635853,10.7510131860764,12.1824939607035,13.8045741860671,15.6426318841882}; -/*To generate (ruby code): (0...32).each { |idx| puts Math.exp((idx-16.0) / 4.0).to_s+","} */ -static const float exp_table[32]={0.0183156388887342,0.0235177458560091,0.0301973834223185,0.038774207831722,0.0497870683678639,0.0639278612067076,0.0820849986238988,0.105399224561864,0.135335283236613,0.173773943450445,0.22313016014843,0.28650479686019,0.367879441171442,0.472366552741015,0.606530659712633,0.778800783071405,1.0,1.28402541668774,1.64872127070013,2.11700001661267,2.71828182845905,3.49034295746184,4.48168907033806,5.75460267600573,7.38905609893065,9.48773583635853,12.1824939607035,15.6426318841882,20.0855369231877,25.7903399171931,33.1154519586923,42.5210820000628}; -/**Returns exp(x) Range x=-4-+4 {x.0,x.25,x.5,x.75} */ -float spx_exp(float xf){ - float flt=spx_floor(xf); - if(-4xf&&(abs(xf-flt)==0.0||abs(xf-flt)==0.25||abs(xf-flt)==0.5||abs(xf-flt)==0.75||abs(xf-flt)==1.0)){ -#ifdef SIMULATOR -/* printf("NtbSexp:%f,%d,%f:%f,%f,%f:%d,%d:%d\n", - exp_sqrt_table[(int)((xf+4.0)*4.0)], - (int)((xf-4.0)*4.0), - (xf-4.0)*4.0, - xf, - flt, - xf-flt, - -4xf, - abs(xf-flt) - );*/ -#endif - return exp_table[(int)((xf+4.0)*4.0)]; - } else if (-4xf){ -#ifdef SIMULATOR -/* printf("NtbLexp:%f,%f,%f:%d,%d:%d\n",xf,flt,xf-flt,-4xf,abs(xf-flt)); */ -#endif - - return exp_table[(int)((xf+4.0)*4.0)]; - } - -#ifdef SIMULATOR -/* printf("NTBLexp:%f,%f,%f:%d,%d:%d\n",xf,flt,xf-flt,-4xf,abs(xf-flt)); */ -#endif - return spx_expB(xf); - //return exp(xf); -} -//Placeholders (not fixed point, only used when encoding): -float pow(float a,float b){ - return 0; -} -float log(float l){ - return 0; -} -float fabs(float a){ - return 0; -} -float sin(float a){ - return 0; -} diff --git a/apps/codecs/libspeex/math_approx.h b/apps/codecs/libspeex/math_approx.h index 265cdec822..49cfda6ebe 100644 --- a/apps/codecs/libspeex/math_approx.h +++ b/apps/codecs/libspeex/math_approx.h @@ -35,37 +35,28 @@ #ifndef MATH_APPROX_H #define MATH_APPROX_H - #include "misc.h" -#ifdef FIXED_POINT spx_word16_t spx_cos(spx_word16_t x); +spx_int16_t spx_ilog2(spx_uint32_t x); +spx_int16_t spx_ilog4(spx_uint32_t x); +#ifdef FIXED_POINT spx_word16_t spx_sqrt(spx_word32_t x); -float spx_sqrtf(float arg); spx_word16_t spx_acos(spx_word16_t x); -float spx_floor(float x); -float spx_exp(float x); -extern const float exp_lookup_int[]; -/** Returns: Math.exp((idx-10) / 8.0) Range:0-32*/ -static inline float spx_exp_lookup(int xf){ - return exp_lookup_int[xf]; -} -//Placeholders: -float pow(float a,float b); -float log(float l); -float fabs(float l); -float sin(float l); -//float floor(float l); +spx_word32_t spx_exp(spx_word16_t x); +spx_word16_t spx_cos_norm(spx_word32_t x); -#define floor spx_floor -#define exp spx_exp -#define sqrt spx_sqrt -#define acos spx_acos -#define cos spx_cos +/* Input in Q15, output in Q14 */ +spx_word16_t spx_atan(spx_word32_t x); #else + #define spx_sqrt sqrt #define spx_acos acos +#define spx_exp exp +#define spx_cos_norm(x) (cos((.5f*M_PI)*(x))) +#define spx_atan atan + #endif #endif diff --git a/apps/codecs/libspeex/mdf.c b/apps/codecs/libspeex/mdf.c index 719135e2fd..eac9f83588 100644 --- a/apps/codecs/libspeex/mdf.c +++ b/apps/codecs/libspeex/mdf.c @@ -79,9 +79,6 @@ #define M_PI 3.14159265358979323846 #endif -#define min(a,b) ((a)<(b) ? (a) : (b)) -#define max(a,b) ((a)>(b) ? (a) : (b)) - #ifdef FIXED_POINT #define WEIGHT_SHIFT 11 #define NORMALIZE_SCALEDOWN 5 @@ -90,19 +87,40 @@ #define WEIGHT_SHIFT 0 #endif -/* If enabled, the transition between blocks is smooth, so there isn't any blocking -aftifact when adapting. The cost is an extra FFT and a matrix-vector multiply */ -#define SMOOTH_BLOCKS +/* If enabled, the AEC will use a foreground filter and a background filter to be more robust to double-talk + and difficult signals in general. The cost is an extra FFT and a matrix-vector multiply */ +#define TWO_PATH #ifdef FIXED_POINT -static const spx_float_t MIN_LEAK = {16777, -19}; +static const spx_float_t MIN_LEAK = {20972, -22}; + +/* Constants for the two-path filter */ +static const spx_float_t VAR1_SMOOTH = {23593, -16}; +static const spx_float_t VAR2_SMOOTH = {23675, -15}; +static const spx_float_t VAR1_UPDATE = {16384, -15}; +static const spx_float_t VAR2_UPDATE = {16384, -16}; +static const spx_float_t VAR_BACKTRACK = {16384, -12}; #define TOP16(x) ((x)>>16) + #else -static const spx_float_t MIN_LEAK = .032f; + +static const spx_float_t MIN_LEAK = .005f; + +/* Constants for the two-path filter */ +static const spx_float_t VAR1_SMOOTH = .36f; +static const spx_float_t VAR2_SMOOTH = .7225f; +static const spx_float_t VAR1_UPDATE = .5f; +static const spx_float_t VAR2_UPDATE = .25f; +static const spx_float_t VAR_BACKTRACK = 4.f; #define TOP16(x) (x) #endif +#define PLAYBACK_DELAY 2 + +void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len); + + /** Speex echo cancellation state. */ struct SpeexEchoState_ { int frame_size; /**< Number of samples processed each time */ @@ -111,35 +129,44 @@ struct SpeexEchoState_ { int cancel_count; int adapted; int saturated; + int screwed_up; spx_int32_t sampling_rate; spx_word16_t spec_average; spx_word16_t beta0; spx_word16_t beta_max; spx_word32_t sum_adapt; - spx_word16_t *e; - spx_word16_t *x; - spx_word16_t *X; - spx_word16_t *d; - spx_word16_t *y; + spx_word16_t leak_estimate; + + spx_word16_t *e; /* scratch */ + spx_word16_t *x; /* Far-end input buffer (2N) */ + spx_word16_t *X; /* Far-end buffer (M+1 frames) in frequency domain */ + spx_word16_t *input; /* scratch */ + spx_word16_t *y; /* scratch */ spx_word16_t *last_y; - spx_word32_t *Yps; - spx_word16_t *Y; + spx_word16_t *Y; /* scratch */ spx_word16_t *E; - spx_word32_t *PHI; - spx_word32_t *W; - spx_word32_t *power; - spx_float_t *power_1; - spx_word16_t *wtmp; -#ifdef FIXED_POINT - spx_word16_t *wtmp2; + spx_word32_t *PHI; /* scratch */ + spx_word32_t *W; /* (Background) filter weights */ +#ifdef TWO_PATH + spx_word32_t *foreground; /* Foreground filter weights */ + spx_word32_t Davg1; /* 1st recursive average of the residual power difference */ + spx_word32_t Davg2; /* 2nd recursive average of the residual power difference */ + spx_float_t Dvar1; /* Estimated variance of 1st estimator */ + spx_float_t Dvar2; /* Estimated variance of 2nd estimator */ #endif - spx_word32_t *Rf; - spx_word32_t *Yf; - spx_word32_t *Xf; + spx_word32_t *power; /* Power of the far-end signal */ + spx_float_t *power_1;/* Inverse power of far-end */ + spx_word16_t *wtmp; /* scratch */ +#ifdef FIXED_POINT + spx_word16_t *wtmp2; /* scratch */ +#endif + spx_word32_t *Rf; /* scratch */ + spx_word32_t *Yf; /* scratch */ + spx_word32_t *Xf; /* scratch */ spx_word32_t *Eh; spx_word32_t *Yh; - spx_float_t Pey; - spx_float_t Pyy; + spx_float_t Pey; + spx_float_t Pyy; spx_word16_t *window; spx_word16_t *prop; void *fft_table; @@ -151,6 +178,7 @@ struct SpeexEchoState_ { /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */ spx_int16_t *play_buf; int play_buf_pos; + int play_buf_started; }; static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem) @@ -177,6 +205,7 @@ static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, } } +/* This inner product is slightly different from the codec version because of fixed-point */ static inline spx_word32_t mdf_inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len) { spx_word32_t sum=0; @@ -255,18 +284,52 @@ static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t #endif /** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */ -static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N) +static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_float_t p, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N) { int i, j; - prod[0] = FLOAT_MUL32(w[0],MULT16_16(X[0],Y[0])); + spx_float_t W; + W = FLOAT_AMULT(p, w[0]); + prod[0] = FLOAT_MUL32(W,MULT16_16(X[0],Y[0])); for (i=1,j=1;i max_sum) + max_sum = prop[i]; + } + for (i=0;icancel_count=0; st->sum_adapt = 0; st->saturated = 0; - /* FIXME: Make that an init option (new API call?) */ + st->screwed_up = 0; + /* This is the default sampling rate */ st->sampling_rate = 8000; st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate); #ifdef FIXED_POINT @@ -291,14 +355,14 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) st->beta0 = (2.0f*st->frame_size)/st->sampling_rate; st->beta_max = (.5f*st->frame_size)/st->sampling_rate; #endif + st->leak_estimate = 0; st->fft_table = spx_fft_init(N); st->e = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); st->x = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); - st->d = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); + st->input = (spx_word16_t*)speex_alloc(st->frame_size*sizeof(spx_word16_t)); st->y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); - st->Yps = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); st->last_y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); @@ -310,6 +374,9 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) st->Y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); st->E = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); st->W = (spx_word32_t*)speex_alloc(M*N*sizeof(spx_word32_t)); +#ifdef TWO_PATH + st->foreground = (spx_word32_t*)speex_alloc(M*N*sizeof(spx_word32_t)); +#endif st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t)); st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t)); @@ -331,12 +398,10 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) st->power_1[i] = FLOAT_ONE; for (i=0;iW[i] = 0; - for (i=0;iPHI[i] = 0; { spx_word32_t sum = 0; /* Ratio of ~10 between adaptation rate of first and last block */ - spx_word16_t decay = QCONST16(exp(-2.4/M),15); + spx_word16_t decay = SHR32(spx_exp(NEG16(DIV32_16(QCONST16(2.4,11),M))),1); st->prop[0] = QCONST16(.7, 15); sum = EXTEND32(st->prop[0]); for (i=1;i=0;i--) { - st->prop[i] = DIV32(SHL32(EXTEND32(st->prop[i]),15),sum); + st->prop[i] = DIV32(MULT16_16(QCONST16(.8,15), st->prop[i]),sum); } } @@ -363,9 +428,15 @@ SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) st->adapted = 0; st->Pey = st->Pyy = FLOAT_ONE; - st->play_buf = (spx_int16_t*)speex_alloc(2*st->frame_size*sizeof(spx_int16_t)); - st->play_buf_pos = 0; - +#ifdef TWO_PATH + st->Davg1 = st->Davg2 = 0; + st->Dvar1 = st->Dvar2 = FLOAT_ZERO; +#endif + + st->play_buf = (spx_int16_t*)speex_alloc((PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t)); + st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; + st->play_buf_started = 0; + return st; } @@ -374,23 +445,48 @@ void speex_echo_state_reset(SpeexEchoState *st) { int i, M, N; st->cancel_count=0; + st->screwed_up = 0; N = st->window_size; M = st->M; for (i=0;iW[i] = 0; +#ifdef TWO_PATH + for (i=0;iforeground[i] = 0; +#endif for (i=0;iX[i] = 0; for (i=0;i<=st->frame_size;i++) + { st->power[i] = 0; + st->power_1[i] = FLOAT_ONE; + st->Eh[i] = 0; + st->Yh[i] = 0; + } + for (i=0;iframe_size;i++) + { + st->last_y[i] = 0; + } for (i=0;iE[i] = 0; + st->x[i] = 0; + } st->notch_mem[0] = st->notch_mem[1] = 0; - + st->memX=st->memD=st->memE=0; + st->saturated = 0; st->adapted = 0; st->sum_adapt = 0; st->Pey = st->Pyy = FLOAT_ONE; - st->play_buf_pos = 0; +#ifdef TWO_PATH + st->Davg1 = st->Davg2 = 0; + st->Dvar1 = st->Dvar2 = FLOAT_ZERO; +#endif + for (i=0;i<3*st->frame_size;i++) + st->play_buf[i] = 0; + st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; + st->play_buf_started = 0; } @@ -401,10 +497,9 @@ void speex_echo_state_destroy(SpeexEchoState *st) speex_free(st->e); speex_free(st->x); - speex_free(st->d); + speex_free(st->input); speex_free(st->y); speex_free(st->last_y); - speex_free(st->Yps); speex_free(st->Yf); speex_free(st->Rf); speex_free(st->Xf); @@ -415,6 +510,9 @@ void speex_echo_state_destroy(SpeexEchoState *st) speex_free(st->Y); speex_free(st->E); speex_free(st->W); +#ifdef TWO_PATH + speex_free(st->foreground); +#endif speex_free(st->PHI); speex_free(st->power); speex_free(st->power_1); @@ -428,17 +526,19 @@ void speex_echo_state_destroy(SpeexEchoState *st) speex_free(st); } -void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out, spx_int32_t *Yout) +void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out) { int i; + /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/ + st->play_buf_started = 1; if (st->play_buf_pos>=st->frame_size) { - speex_echo_cancel(st, rec, st->play_buf, out, Yout); + speex_echo_cancellation(st, rec, st->play_buf, out); st->play_buf_pos -= st->frame_size; - for (i=0;iframe_size;i++) + for (i=0;iplay_buf_pos;i++) st->play_buf[i] = st->play_buf[i+st->frame_size]; } else { - speex_warning("no playback frame available"); + speex_warning("No playback frame available (your application is buggy and/or got xruns)"); if (st->play_buf_pos!=0) { speex_warning("internal playback buffer corruption?"); @@ -451,24 +551,46 @@ void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play) { - if (st->play_buf_pos<=st->frame_size) + /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/ + if (!st->play_buf_started) + { + speex_warning("discarded first playback frame"); + return; + } + if (st->play_buf_pos<=PLAYBACK_DELAY*st->frame_size) { int i; for (i=0;iframe_size;i++) st->play_buf[st->play_buf_pos+i] = play[i]; st->play_buf_pos += st->frame_size; + if (st->play_buf_pos <= (PLAYBACK_DELAY-1)*st->frame_size) + { + speex_warning("Auto-filling the buffer (your application is buggy and/or got xruns)"); + for (i=0;iframe_size;i++) + st->play_buf[st->play_buf_pos+i] = play[i]; + st->play_buf_pos += st->frame_size; + } } else { - speex_warning("had to discard a playback frame"); + speex_warning("Had to discard a playback frame (your application is buggy and/or got xruns)"); } } /** Performs echo cancellation on a frame */ -void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int16_t *echo, spx_int16_t *out, spx_int32_t *Yout) +void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout) +{ + speex_echo_cancellation(st, in, far_end, out); +} + +/** Performs echo cancellation on a frame (deprecated, last arg now ignored) */ +void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out) { int i,j; int N,M; - spx_word32_t Syy,See,Sxx; - spx_word16_t leak_estimate; + spx_word32_t Syy,See,Sxx,Sdd, Sff; +#ifdef TWO_PATH + spx_word32_t Dbf; + int update_foreground; +#endif spx_word32_t Sey; spx_word16_t ss, ss_1; spx_float_t Pey = FLOAT_ONE, Pyy=FLOAT_ONE; @@ -487,47 +609,46 @@ void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int ss_1 = 1-ss; #endif - filter_dc_notch16(ref, st->notch_radius, st->d, st->frame_size, st->notch_mem); - /* Copy input data to buffer */ + /* Apply a notch filter to make sure DC doesn't end up causing problems */ + filter_dc_notch16(in, st->notch_radius, st->input, st->frame_size, st->notch_mem); + /* Copy input data to buffer and apply pre-emphasis */ for (i=0;iframe_size;i++) { - spx_word16_t tmp; spx_word32_t tmp32; - st->x[i] = st->x[i+st->frame_size]; - tmp32 = SUB32(EXTEND32(echo[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memX))); + tmp32 = SUB32(EXTEND32(far_end[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memX))); #ifdef FIXED_POINT - /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */ + /* If saturation occurs here, we need to freeze adaptation for M+1 frames (not just one) */ if (tmp32 > 32767) { tmp32 = 32767; - st->saturated = 1; - } + st->saturated = M+1; + } if (tmp32 < -32767) { tmp32 = -32767; - st->saturated = 1; + st->saturated = M+1; } #endif st->x[i+st->frame_size] = EXTRACT16(tmp32); - st->memX = echo[i]; + st->memX = far_end[i]; - tmp = st->d[i]; - st->d[i] = st->d[i+st->frame_size]; - tmp32 = SUB32(EXTEND32(tmp), EXTEND32(MULT16_16_P15(st->preemph, st->memD))); + tmp32 = SUB32(EXTEND32(st->input[i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD))); #ifdef FIXED_POINT if (tmp32 > 32767) { tmp32 = 32767; - st->saturated = 1; + if (st->saturated == 0) + st->saturated = 1; } if (tmp32 < -32767) { tmp32 = -32767; - st->saturated = 1; + if (st->saturated == 0) + st->saturated = 1; } #endif - st->d[i+st->frame_size] = tmp32; - st->memD = tmp; + st->memD = st->input[i]; + st->input[i] = tmp32; } /* Shift memory: this could be optimized eventually*/ @@ -537,28 +658,40 @@ void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int st->X[(j+1)*N+i] = st->X[j*N+i]; } - /* Convert x (echo input) to frequency domain */ + /* Convert x (far end) to frequency domain */ spx_fft(st->fft_table, st->x, &st->X[0]); + for (i=0;ilast_y[i] = st->x[i]; + Sxx = mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size); + for (i=0;iframe_size;i++) + st->x[i] = st->x[i+st->frame_size]; + /* From here on, the top part of x is used as scratch space */ -#ifdef SMOOTH_BLOCKS - spectral_mul_accum(st->X, st->W, st->Y, N, M); +#ifdef TWO_PATH + /* Compute foreground filter */ + spectral_mul_accum(st->X, st->foreground, st->Y, N, M); spx_ifft(st->fft_table, st->Y, st->e); + for (i=0;iframe_size;i++) + st->x[i+st->frame_size] = SUB16(st->input[i], st->e[i+st->frame_size]); + Sff = mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size); #endif - + + /* Adjust proportional adaption rate */ + mdf_adjust_prop (st->W, N, M, st->prop); /* Compute weight gradient */ - if (!st->saturated) + if (st->saturated == 0) { for (j=M-1;j>=0;j--) { - weighted_spectral_mul_conj(st->power_1, &st->X[(j+1)*N], st->E, st->PHI, N); + weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N], st->E, st->PHI, N); for (i=0;iW[j*N+i] += MULT16_32_Q15(st->prop[j], st->PHI[i]); + st->W[j*N+i] = ADD32(st->W[j*N+i], st->PHI[i]); - } + } + } else { + st->saturated--; } - st->saturated = 0; - /* Update weight to prevent circular convolution (MDF / AUMDF) */ for (j=0;jX, st->W, st->Y, N, M); spx_ifft(st->fft_table, st->Y, st->y); +#ifdef TWO_PATH + /* Difference in response, this is used to estimate the variance of our residual power estimate */ + for (i=0;iframe_size;i++) + st->x[i+st->frame_size] = SUB16(st->e[i+st->frame_size], st->y[i+st->frame_size]); + Dbf = 10+mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size); +#endif + + for (i=0;iframe_size;i++) + st->x[i+st->frame_size] = SUB16(st->input[i], st->y[i+st->frame_size]); + See = mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size); +#ifndef TWO_PATH + Sff = See; +#endif + +#ifdef TWO_PATH + /* Logic for updating the foreground filter */ + /* For two time windows, compute the mean of the energy difference, as well as the variance */ + st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See))); + st->Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),st->Davg2), MULT16_32_Q15(QCONST16(.15f,15),SUB32(Sff,See))); + st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf))); + st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf))); + + /* Equivalent float code: + st->Davg1 = .6*st->Davg1 + .4*(Sff-See); + st->Davg2 = .85*st->Davg2 + .15*(Sff-See); + st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf; + st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf; + */ + + update_foreground = 0; + /* Check if we have a statistically significant reduction in the residual echo */ + /* Note that this is *not* Gaussian, so we need to be careful about the longer tail */ + if (FLOAT_GT(FLOAT_MUL32U(SUB32(Sff,See),ABS32(SUB32(Sff,See))), FLOAT_MUL32U(Sff,Dbf))) + update_foreground = 1; + else if (FLOAT_GT(FLOAT_MUL32U(st->Davg1, ABS32(st->Davg1)), FLOAT_MULT(VAR1_UPDATE,(st->Dvar1)))) + update_foreground = 1; + else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2)))) + update_foreground = 1; + + /* Do we update? */ + if (update_foreground) + { + st->Davg1 = st->Davg2 = 0; + st->Dvar1 = st->Dvar2 = FLOAT_ZERO; + /* Copy background filter to foreground filter */ + for (i=0;iforeground[i] = st->W[i]; + /* Apply a smooth transition so as to not introduce blocking artifacts */ + for (i=0;iframe_size;i++) + st->e[i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[i+st->frame_size]); + } else { + int reset_background=0; + /* Otherwise, check if the background filter is significantly worse */ + if (FLOAT_GT(FLOAT_MUL32U(NEG32(SUB32(Sff,See)),ABS32(SUB32(Sff,See))), FLOAT_MULT(VAR_BACKTRACK,FLOAT_MUL32U(Sff,Dbf)))) + reset_background = 1; + if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg1), ABS32(st->Davg1)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar1))) + reset_background = 1; + if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg2), ABS32(st->Davg2)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar2))) + reset_background = 1; + if (reset_background) + { + /* Copy foreground filter to background filter */ + for (i=0;iW[i] = st->foreground[i]; + /* We also need to copy the output so as to get correct adaptation */ + for (i=0;iframe_size;i++) + st->y[i+st->frame_size] = st->e[i+st->frame_size]; + for (i=0;iframe_size;i++) + st->x[i+st->frame_size] = SUB16(st->input[i], st->y[i+st->frame_size]); + See = Sff; + st->Davg1 = st->Davg2 = 0; + st->Dvar1 = st->Dvar2 = FLOAT_ZERO; + } + } +#endif + /* Compute error signal (for the output with de-emphasis) */ for (i=0;iframe_size;i++) { spx_word32_t tmp_out; -#ifdef SMOOTH_BLOCKS - spx_word16_t y = MULT16_16_Q15(st->window[i+st->frame_size],st->e[i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[i+st->frame_size]); - tmp_out = SUB32(EXTEND32(st->d[i+st->frame_size]), EXTEND32(y)); +#ifdef TWO_PATH + tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->e[i+st->frame_size])); #else - tmp_out = SUB32(EXTEND32(st->d[i+st->frame_size]), EXTEND32(st->y[i+st->frame_size])); + tmp_out = SUB32(EXTEND32(st->input[i]), EXTEND32(st->y[i+st->frame_size])); #endif - /* Saturation */ if (tmp_out>32767) tmp_out = 32767; else if (tmp_out<-32768) tmp_out = -32768; tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE))); - /* This is an arbitrary test for saturation */ - if (ref[i] <= -32000 || ref[i] >= 32000) + /* This is an arbitrary test for saturation in the microphone signal */ + if (in[i] <= -32000 || in[i] >= 32000) { tmp_out = 0; - st->saturated = 1; + if (st->saturated == 0) + st->saturated = 1; } out[i] = (spx_int16_t)tmp_out; st->memE = tmp_out; @@ -629,15 +837,44 @@ void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int for (i=0;iframe_size;i++) { st->e[i] = 0; - st->e[i+st->frame_size] = st->d[i+st->frame_size] - st->y[i+st->frame_size]; + st->e[i+st->frame_size] = st->x[i+st->frame_size]; } /* Compute a bunch of correlations */ Sey = mdf_inner_prod(st->e+st->frame_size, st->y+st->frame_size, st->frame_size); - See = mdf_inner_prod(st->e+st->frame_size, st->e+st->frame_size, st->frame_size); - See = ADD32(See, SHR32(MULT16_16(N, 100),6)); Syy = mdf_inner_prod(st->y+st->frame_size, st->y+st->frame_size, st->frame_size); - Sxx = mdf_inner_prod(st->x+st->frame_size, st->x+st->frame_size, st->frame_size); + Sdd = mdf_inner_prod(st->input, st->input, st->frame_size); + + /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/ + + /* Do some sanity check */ + if (!(Syy>=0 && Sxx>=0 && See >= 0) +#ifndef FIXED_POINT + || !(Sff < N*1e9 && Syy < N*1e9 && Sxx < N*1e9) +#endif + ) + { + /* Things have gone really bad */ + st->screwed_up += 50; + for (i=0;iframe_size;i++) + out[i] = 0; + } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6))) + { + /* AEC seems to add lots of echo instead of removing it, let's see if it will improve */ + st->screwed_up++; + } else { + /* Everything's fine */ + st->screwed_up=0; + } + if (st->screwed_up>=50) + { + speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now."); + speex_echo_state_reset(st); + return; + } + + /* Add a small noise floor to make sure not to have problems when dividing */ + See = MAX32(See, SHR32(MULT16_16(N, 100),6)); /* Convert error to frequency domain */ spx_fft(st->fft_table, st->e, st->E); @@ -645,12 +882,12 @@ void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int st->y[i] = 0; spx_fft(st->fft_table, st->y, st->Y); - /* Compute power spectrum of echo (X), error (E) and filter response (Y) */ + /* Compute power spectrum of far end (X), error (E) and filter response (Y) */ power_spectrum(st->E, st->Rf, N); power_spectrum(st->Y, st->Yf, N); power_spectrum(st->X, st->Xf, N); - /* Smooth echo energy estimate over time */ + /* Smooth far end energy estimate over time */ for (j=0;j<=st->frame_size;j++) st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]); @@ -660,7 +897,7 @@ void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int { float scale2 = .5f/M; for (j=0;j<=st->frame_size;j++) - st->power[j] = 0; + st->power[j] = 100; for (i=0;iX[i*N], st->Xf, N); @@ -706,17 +943,17 @@ void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int if (FLOAT_GT(st->Pey, st->Pyy)) st->Pey = st->Pyy; /* leak_estimate is the linear regression result */ - leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14)); + st->leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14)); /* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */ - if (leak_estimate > 16383) - leak_estimate = 32767; + if (st->leak_estimate > 16383) + st->leak_estimate = 32767; else - leak_estimate = SHL16(leak_estimate,1); - /*printf ("%f\n", leak_estimate);*/ + st->leak_estimate = SHL16(st->leak_estimate,1); + /*printf ("%f\n", st->leak_estimate);*/ /* Compute Residual to Error Ratio */ #ifdef FIXED_POINT - tmp32 = MULT16_32_Q15(leak_estimate,Syy); + tmp32 = MULT16_32_Q15(st->leak_estimate,Syy); tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1))); /* Check for y in e (lower bound on RER) */ { @@ -730,8 +967,8 @@ void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int if (tmp32 > SHR32(See,1)) tmp32 = SHR32(See,1); RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15)); -#else - RER = (.0001*Sxx + 3.*MULT16_32_Q15(leak_estimate,Syy)) / See; +#else + RER = (.0001*Sxx + 3.*MULT16_32_Q15(st->leak_estimate,Syy)) / See; /* Check for y in e (lower bound on RER) */ if (RER < Sey*Sey/(1+See*Syy)) RER = Sey*Sey/(1+See*Syy); @@ -740,18 +977,19 @@ void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int #endif /* We consider that the filter has had minimal adaptation if the following is true*/ - if (!st->adapted && st->sum_adapt > QCONST32(1,15)) + if (!st->adapted && st->sum_adapt > QCONST32(M,15) && MULT16_32_Q15(st->leak_estimate,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy)) { st->adapted = 1; } if (st->adapted) { + /* Normal learning rate calculation once we're past the minimal adaptation phase */ for (i=0;i<=st->frame_size;i++) { spx_word32_t r, e; /* Compute frequency-domain adaptation mask */ - r = MULT16_32_Q15(leak_estimate,SHL32(st->Yf[i],3)); + r = MULT16_32_Q15(st->leak_estimate,SHL32(st->Yf[i],3)); e = SHL32(st->Rf[i],3)+1; #ifdef FIXED_POINT if (r>SHR32(e,1)) @@ -764,20 +1002,22 @@ void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int /*st->power_1[i] = adapt_rate*r/(e*(1+st->power[i]));*/ st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16); } - } else if (Sxx > SHR32(MULT16_16(N, 1000),6)) { + } else { /* Temporary adaption rate if filter is not yet adapted enough */ spx_word16_t adapt_rate=0; - tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx); + if (Sxx > SHR32(MULT16_16(N, 1000),6)) + { + tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx); #ifdef FIXED_POINT - if (tmp32 > SHR32(See,2)) - tmp32 = SHR32(See,2); + if (tmp32 > SHR32(See,2)) + tmp32 = SHR32(See,2); #else - if (tmp32 > .25*See) - tmp32 = .25*See; + if (tmp32 > .25*See) + tmp32 = .25*See; #endif - adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15)); - + adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15)); + } for (i=0;i<=st->frame_size;i++) st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT+1); @@ -786,48 +1026,55 @@ void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *ref, const spx_int st->sum_adapt = ADD32(st->sum_adapt,adapt_rate); } - /* Compute spectrum of estimated echo for use in an echo post-filter (if necessary)*/ - if (Yout) + /* Save residual echo so it can be used by the nonlinear processor */ + if (st->adapted) { - spx_word16_t leak2; - if (st->adapted) - { - /* If the filter is adapted, take the filtered echo */ - for (i=0;iframe_size;i++) - st->last_y[i] = st->last_y[st->frame_size+i]; - for (i=0;iframe_size;i++) - st->last_y[st->frame_size+i] = ref[i]-out[i]; - } else { - /* If filter isn't adapted yet, all we can do is take the echo signal directly */ - for (i=0;ilast_y[i] = st->x[i]; - } - - /* Apply hanning window (should pre-compute it)*/ - for (i=0;iy[i] = MULT16_16_Q15(st->window[i],st->last_y[i]); - - /* Compute power spectrum of the echo */ - spx_fft(st->fft_table, st->y, st->Y); - power_spectrum(st->Y, st->Yps, N); - -#ifdef FIXED_POINT - if (leak_estimate > 16383) - leak2 = 32767; - else - leak2 = SHL16(leak_estimate, 1); -#else - if (leak_estimate>.5) - leak2 = 1; - else - leak2 = 2*leak_estimate; -#endif - /* Estimate residual echo */ - for (i=0;i<=st->frame_size;i++) - Yout[i] = (spx_int32_t)MULT16_32_Q15(leak2,st->Yps[i]); + /* If the filter is adapted, take the filtered echo */ + for (i=0;iframe_size;i++) + st->last_y[i] = st->last_y[st->frame_size+i]; + for (i=0;iframe_size;i++) + st->last_y[st->frame_size+i] = in[i]-out[i]; + } else { + /* If filter isn't adapted yet, all we can do is take the far end signal directly */ + /* moved earlier: for (i=0;ilast_y[i] = st->x[i];*/ } + } +/* Compute spectrum of estimated echo for use in an echo post-filter */ +void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, int len) +{ + int i; + spx_word16_t leak2; + int N; + + N = st->window_size; + + /* Apply hanning window (should pre-compute it)*/ + for (i=0;iy[i] = MULT16_16_Q15(st->window[i],st->last_y[i]); + + /* Compute power spectrum of the echo */ + spx_fft(st->fft_table, st->y, st->Y); + power_spectrum(st->Y, residual_echo, N); + +#ifdef FIXED_POINT + if (st->leak_estimate > 16383) + leak2 = 32767; + else + leak2 = SHL16(st->leak_estimate, 1); +#else + if (st->leak_estimate>.5) + leak2 = 1; + else + leak2 = 2*st->leak_estimate; +#endif + /* Estimate residual echo */ + for (i=0;i<=st->frame_size;i++) + residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]); + +} int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) { diff --git a/apps/codecs/libspeex/misc.c b/apps/codecs/libspeex/misc.c index 4d2b485a66..97b35b3085 100644 --- a/apps/codecs/libspeex/misc.c +++ b/apps/codecs/libspeex/misc.c @@ -34,15 +34,11 @@ #include "config.h" #endif -#include "../codecs.h" - #include #include #include #include "misc.h" - - #ifdef USER_MISC #include "user_misc.h" #endif @@ -51,16 +47,14 @@ #include "misc_bfin.h" #endif -//static struct codec_api *rb;//defined in decoder api speex.c - #ifndef RELEASE void print_vec(float *vec, int len, char *name) { -/* int i; + int i; printf ("%s ", name); for (i=0;i>24; ret += (i>>8)&0x0000ff00; ret += (i<<8)&0x00ff0000; @@ -84,7 +78,7 @@ spx_uint32_t be_int(spx_uint32_t i) spx_uint32_t le_int(spx_uint32_t i) { spx_uint32_t ret=i; -#ifdef ROCKBOX_BIG_ENDIAN +#ifdef WORDS_BIGENDIAN ret = i>>24; ret += (i>>8)&0x0000ff00; ret += (i<<8)&0x00ff0000; @@ -93,86 +87,38 @@ spx_uint32_t le_int(spx_uint32_t i) return ret; } -#if BYTES_PER_CHAR == 2 -void speex_memcpy_bytes(char *dst, char *src, int nbytes) -{ - int i; - int nchars = nbytes/BYTES_PER_CHAR; - for (i=0;idebugf -#else -#define DEBUGF(...) -#endif - -#ifdef ROCKBOX_HAS_LOGF -#undef LOGF -#define LOGF rb->logf -#else -#define LOGF(...) -#endif - -#ifdef CPU_ARM -#define ARM4_ASM -#endif - - #include "arch.h" #ifndef RELEASE @@ -125,12 +98,6 @@ void speex_free_scratch (void *ptr); /** Speex wrapper for mem_move */ void *speex_move (void *dest, void *src, int n); -/** Speex wrapper for memcpy */ -void speex_memcpy_bytes(char *dst, char *src, int nbytes); - -/** Speex wrapper for memset */ -void speex_memset_bytes(char *dst, char src, int nbytes); - /** Print error message to stderr */ void speex_error(const char *str); diff --git a/apps/codecs/libspeex/modes.c b/apps/codecs/libspeex/modes.c index 97e7d1e3fa..4b21ad75c3 100644 --- a/apps/codecs/libspeex/modes.c +++ b/apps/codecs/libspeex/modes.c @@ -495,7 +495,7 @@ static const SpeexSBMode sb_wb_mode = { #endif .012, /*lag_factor*/ QCONST16(.0002,15), /*lpc_floor*/ - 0.9, + QCONST16(0.9f,15), {NULL, &wb_submode1, &wb_submode2, &wb_submode3, &wb_submode4, NULL, NULL, NULL}, 3, {1, 8, 2, 3, 4, 5, 5, 6, 6, 7, 7}, @@ -541,7 +541,7 @@ static const SpeexSBMode sb_uwb_mode = { #endif .012, /*lag_factor*/ QCONST16(.0002,15), /*lpc_floor*/ - 0.7, + QCONST16(0.7f,15), {NULL, &wb_submode1, NULL, NULL, NULL, NULL, NULL, NULL}, 1, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, @@ -608,11 +608,7 @@ static const SpeexSubmode nb_48k_submode = { split_cb_search_shape_sign, split_cb_shape_sign_unquant, &split_cb_nb_48k, -#ifdef FIXED_POINT - 22938, 16384, 11796, 18022, -#else - 0.7, 0.5, .36, .55, -#endif + QCONST16(.7,15), 144 }; @@ -622,7 +618,6 @@ static const SpeexNBMode nb_48k_mode = { 240, /*frameSize*/ 48, /*subframeSize*/ 10, /*lpcSize*/ - 640, /*bufSize*/ 17, /*pitchStart*/ 144, /*pitchEnd*/ 0.9, /*gamma1*/ diff --git a/apps/codecs/libspeex/modes.h b/apps/codecs/libspeex/modes.h index 6a632402f7..819be70e23 100644 --- a/apps/codecs/libspeex/modes.h +++ b/apps/codecs/libspeex/modes.h @@ -130,7 +130,7 @@ typedef struct SpeexSBMode { spx_word16_t gamma2; /**< Perceptual filter parameter #1 */ float lag_factor; /**< Lag-windowing parameter */ spx_word16_t lpc_floor; /**< Noise floor for LPC analysis */ - float folding_gain; + spx_word16_t folding_gain; const SpeexSubmode *submodes[SB_SUBMODES]; /**< Sub-mode data for the mode */ int defaultSubmode; /**< Default sub-mode to use when encoding */ diff --git a/apps/codecs/libspeex/nb_celp.c b/apps/codecs/libspeex/nb_celp.c index fba6d612d1..5befb3bb37 100644 --- a/apps/codecs/libspeex/nb_celp.c +++ b/apps/codecs/libspeex/nb_celp.c @@ -187,7 +187,7 @@ void *nb_encoder_init(const SpeexMode *m) st->mem_exc2 = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); st->pi_gain = (spx_word32_t*)speex_alloc((st->nbSubframes)*sizeof(spx_word32_t)); - st->innov_save = NULL; + st->innov_rms_save = NULL; st->pitch = (int*)speex_alloc((st->nbSubframes)*sizeof(int)); @@ -280,6 +280,8 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) int pitch_half[2]; int ol_pitch_id=0; #endif + spx_word32_t ener=0; + spx_word16_t fine_gain; spx_word16_t *in = (spx_word16_t*)vin; st=(EncState *)state; @@ -432,7 +434,7 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) ol_gain2=ol2; ol_gain2 = sqrt(2*ol_gain2*(ol1+ol2))*1.3*(1-.5*GAIN_SCALING_1*GAIN_SCALING_1*ol_pitch_coef*ol_pitch_coef); - ol_gain=SHR(sqrt(1+ol_gain2/st->frameSize),SIG_SHIFT); + ol_gain=SHR32(sqrt(1+ol_gain2/st->frameSize),SIG_SHIFT); } else #endif @@ -490,7 +492,7 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) /* delta_qual*=.1*(3+st->vbr_quality);*/ if (st->vbr_enabled) { - int mode; + spx_int32_t mode; int choice=0; float min_diff=100; mode = 8; @@ -540,7 +542,7 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) if (st->abr_enabled) { - int bitrate; + spx_int32_t bitrate; speex_encoder_ctl(state, SPEEX_GET_BITRATE, &bitrate); st->abr_drift+=(bitrate-st->abr_enabled); st->abr_drift2 = .95*st->abr_drift2 + .05*(bitrate-st->abr_enabled); @@ -720,7 +722,6 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) int offset; spx_word16_t *sw; spx_word16_t *exc; - spx_sig_t *innov_save = NULL; int pitch; int response_bound = st->subframeSize; #ifdef EPIC_48K @@ -739,9 +740,6 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) exc=st->exc+offset; /* Weighted signal */ sw=st->sw+offset; - /* Pointer for saving innovation */ - if (st->innov_save) - innov_save = st->innov_save+offset; /* LSP interpolation (quantized and unquantized) */ lsp_interpolate(st->old_lsp, lsp, interp_lsp, st->lpcSize, sub, st->nbSubframes); @@ -838,9 +836,9 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) for (i=0;ilpcSize;i++) st->mem_sw[i]=mem[i]; - /* Compute target signal */ + /* Compute target signal (saturation prevents overflows on clipped input speech) */ for (i=0;isubframeSize;i++) - target[i]=SUB16(sw[i],PSHR32(ringing[i],1)); + target[i]=EXTRACT16(SATURATE(SUB32(sw[i],PSHR32(ringing[i],1)),32767)); /* Reset excitation */ for (i=0;isubframeSize;i++) @@ -901,75 +899,64 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) } /* Quantization of innovation */ - { - spx_word32_t ener=0; - spx_word16_t fine_gain; - - for (i=0;isubframeSize;i++) - innov[i]=0; - - for (i=0;isubframeSize;i++) - real_exc[i] = SUB16(real_exc[i], PSHR32(exc32[i],SIG_SHIFT-1)); - - ener = SHL32(EXTEND32(compute_rms16(real_exc, st->subframeSize)),SIG_SHIFT); - - /*FIXME: Should use DIV32_16 and make sure result fits in 16 bits */ + for (i=0;isubframeSize;i++) + innov[i]=0; + + /* FIXME: Make sure this is save from overflows (so far so good) */ + for (i=0;isubframeSize;i++) + real_exc[i] = EXTRACT16(SUB32(EXTEND32(real_exc[i]), PSHR32(exc32[i],SIG_SHIFT-1))); + + ener = SHL32(EXTEND32(compute_rms16(real_exc, st->subframeSize)),SIG_SHIFT); + + /*FIXME: Should use DIV32_16 and make sure result fits in 16 bits */ #ifdef FIXED_POINT - { - spx_word32_t f = PDIV32(ener,PSHR32(ol_gain,SIG_SHIFT)); - if (f<=32767) - fine_gain = f; - else - fine_gain = 32767; - } + { + spx_word32_t f = PDIV32(ener,PSHR32(ol_gain,SIG_SHIFT)); + if (f<=32767) + fine_gain = f; + else + fine_gain = 32767; + } #else - fine_gain = PDIV32_16(ener,PSHR32(ol_gain,SIG_SHIFT)); + fine_gain = PDIV32_16(ener,PSHR32(ol_gain,SIG_SHIFT)); #endif - /* Calculate gain correction for the sub-frame (if any) */ - if (SUBMODE(have_subframe_gain)) + /* Calculate gain correction for the sub-frame (if any) */ + if (SUBMODE(have_subframe_gain)) + { + int qe; + if (SUBMODE(have_subframe_gain)==3) { - int qe; - if (SUBMODE(have_subframe_gain)==3) - { - qe = scal_quant(fine_gain, exc_gain_quant_scal3_bound, 8); - speex_bits_pack(bits, qe, 3); - ener=MULT16_32_Q14(exc_gain_quant_scal3[qe],ol_gain); - } else { - qe = scal_quant(fine_gain, exc_gain_quant_scal1_bound, 2); - speex_bits_pack(bits, qe, 1); - ener=MULT16_32_Q14(exc_gain_quant_scal1[qe],ol_gain); - } + qe = scal_quant(fine_gain, exc_gain_quant_scal3_bound, 8); + speex_bits_pack(bits, qe, 3); + ener=MULT16_32_Q14(exc_gain_quant_scal3[qe],ol_gain); } else { - ener=ol_gain; + qe = scal_quant(fine_gain, exc_gain_quant_scal1_bound, 2); + speex_bits_pack(bits, qe, 1); + ener=MULT16_32_Q14(exc_gain_quant_scal1[qe],ol_gain); } + } else { + ener=ol_gain; + } + + /*printf ("%f %f\n", ener, ol_gain);*/ + + /* Normalize innovation */ + signal_div(target, target, ener, st->subframeSize); + + /* Quantize innovation */ + if (SUBMODE(innovation_quant)) + { + /* Codebook search */ + SUBMODE(innovation_quant)(target, interp_qlpc, bw_lpc1, bw_lpc2, + SUBMODE(innovation_params), st->lpcSize, st->subframeSize, + innov, syn_resp, bits, stack, st->complexity, SUBMODE(double_codebook)); + + /* De-normalize innovation and update excitation */ + signal_mul(innov, innov, ener, st->subframeSize); + + for (i=0;isubframeSize;i++) + exc[i] = EXTRACT16(SATURATE32(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT),32767)); - /*printf ("%f %f\n", ener, ol_gain);*/ - - /* Normalize innovation */ - signal_div(target, target, ener, st->subframeSize); - - /* Quantize innovation */ - if (SUBMODE(innovation_quant)) - { - /* Codebook search */ - SUBMODE(innovation_quant)(target, interp_qlpc, bw_lpc1, bw_lpc2, - SUBMODE(innovation_params), st->lpcSize, st->subframeSize, - innov, syn_resp, bits, stack, st->complexity, SUBMODE(double_codebook)); - - /* De-normalize innovation and update excitation */ - signal_mul(innov, innov, ener, st->subframeSize); - - for (i=0;isubframeSize;i++) - exc[i] = EXTRACT16(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT)); - } else { - speex_error("No fixed codebook"); - } - - if (innov_save) - { - for (i=0;isubframeSize;i++) - innov_save[i] = innov[i]; - } /* In some (rare) modes, we do a second search (more bits) to reduce noise even more */ if (SUBMODE(double_codebook)) { char *tmp_stack=stack; @@ -978,23 +965,26 @@ int nb_encode(void *state, void *vin, SpeexBits *bits) for (i=0;isubframeSize;i++) innov2[i]=0; for (i=0;isubframeSize;i++) - target[i]=MULT16_16_P13(QCONST16(2.2,13), target[i]); + target[i]=MULT16_16_P13(QCONST16(2.2f,13), target[i]); SUBMODE(innovation_quant)(target, interp_qlpc, bw_lpc1, bw_lpc2, SUBMODE(innovation_params), st->lpcSize, st->subframeSize, innov2, syn_resp, bits, stack, st->complexity, 0); - signal_mul(innov2, innov2, MULT16_32_Q15(QCONST16(0.454545,15),ener), st->subframeSize); + signal_mul(innov2, innov2, MULT16_32_Q15(QCONST16(0.454545f,15),ener), st->subframeSize); for (i=0;isubframeSize;i++) - exc[i] = ADD32(exc[i],PSHR32(innov2[i],SIG_SHIFT)); - if (innov_save) - { - for (i=0;isubframeSize;i++) - innov_save[i] = ADD32(innov_save[i],innov2[i]); - } + innov[i] = ADD32(innov[i],innov2[i]); stack = tmp_stack; } - + for (i=0;isubframeSize;i++) + exc[i] = EXTRACT16(SATURATE32(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT),32767)); + if (st->innov_rms_save) + { + st->innov_rms_save[sub] = compute_rms(innov, st->subframeSize); + } + } else { + speex_error("No fixed codebook"); } + for (i=0;isubframeSize;i++) sw[i] = exc[i]; /* Final signal synthesis from excitation */ @@ -1166,7 +1156,7 @@ static void nb_decode_lost(DecState *st, spx_word16_t *out, char *stack) pitch_gain = st->last_pitch_gain; if (pitch_gain>54) pitch_gain = 54; - pitch_gain = SHL(pitch_gain, 9); + pitch_gain = SHL16(pitch_gain, 9); #else pitch_gain = GAIN_SCALING_1*st->last_pitch_gain; if (pitch_gain>.85) @@ -1200,7 +1190,7 @@ static void nb_decode_lost(DecState *st, spx_word16_t *out, char *stack) st->first = 0; st->count_lost++; - st->pitch_gain_buf[st->pitch_gain_buf_idx++] = PSHR(pitch_gain,9); + st->pitch_gain_buf[st->pitch_gain_buf_idx++] = PSHR16(pitch_gain,9); if (st->pitch_gain_buf_idx > 2) /* rollover */ st->pitch_gain_buf_idx = 0; } @@ -1226,7 +1216,7 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) VARDECL(spx_lsp_t *qlsp); spx_word16_t pitch_average=0; #ifdef EPIC_48K - int pitch_half[2]; + int pitch_half[2] = {0, 0}; int ol_pitch_id=0; #endif spx_word16_t *out = (spx_word16_t*)vout; @@ -1338,7 +1328,7 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) { VARDECL(spx_coef_t *lpc); ALLOC(lpc, st->lpcSize, spx_coef_t); - bw_lpc(GAMMA_SCALING*.93, st->interp_qlpc, lpc, st->lpcSize); + bw_lpc(QCONST16(0.93f,15), st->interp_qlpc, lpc, st->lpcSize); { float innov_gain=0; float pgain=GAIN_SCALING_1*st->last_pitch_gain; @@ -1426,6 +1416,7 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) int qe; qe = speex_bits_unpack_unsigned(bits, 5); #ifdef FIXED_POINT + /* FIXME: Perhaps we could slightly lower the gain here when the output is going to saturate? */ ol_gain = MULT16_32_Q15(28406,ol_gain_table[qe]); #else ol_gain = SIG_SCALING*exp(qe/3.5); @@ -1458,7 +1449,7 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) int offset; spx_word16_t *exc; spx_word16_t *sp; - spx_sig_t *innov_save = NULL; + spx_word16_t *innov_save = NULL; spx_word16_t tmp; #ifdef EPIC_48K @@ -1576,16 +1567,38 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) { /*Fixed codebook contribution*/ SUBMODE(innovation_unquant)(innov, SUBMODE(innovation_params), st->subframeSize, bits, stack, &st->seed); + /* De-normalize innovation and update excitation */ +#ifdef FIXED_POINT + signal_mul(innov, innov, ener, st->subframeSize); +#else + signal_mul(innov, innov, ener, st->subframeSize); +#endif + /* Decode second codebook (only for some modes) */ + if (SUBMODE(double_codebook)) + { + char *tmp_stack=stack; + VARDECL(spx_sig_t *innov2); + ALLOC(innov2, st->subframeSize, spx_sig_t); + for (i=0;isubframeSize;i++) + innov2[i]=0; + SUBMODE(innovation_unquant)(innov2, SUBMODE(innovation_params), st->subframeSize, bits, stack, &st->seed); + signal_mul(innov2, innov2, MULT16_32_Q15(QCONST16(0.454545f,15),ener), st->subframeSize); + for (i=0;isubframeSize;i++) + innov[i] = ADD32(innov[i], innov2[i]); + stack = tmp_stack; + } + for (i=0;isubframeSize;i++) + exc[i]=EXTRACT16(SATURATE32(PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT),32767)); + /*print_vec(exc, 40, "innov");*/ + if (innov_save) + { + for (i=0;isubframeSize;i++) + innov_save[i] = EXTRACT16(PSHR32(innov[i], SIG_SHIFT)); + } } else { speex_error("No fixed codebook"); } - /* De-normalize innovation and update excitation */ -#ifdef FIXED_POINT - signal_mul(innov, innov, ener, st->subframeSize); -#else - signal_mul(innov, innov, ener, st->subframeSize); -#endif /*Vocoder mode*/ if (st->submodeID==1) { @@ -1617,35 +1630,8 @@ int nb_decode(void *state, SpeexBits *bits, void *vout) st->voc_mean = .95*st->voc_mean + .05*exc[i]; exc[i]-=st->voc_mean; } - } else { - for (i=0;isubframeSize;i++) - exc[i]=PSHR32(ADD32(SHL32(exc32[i],1),innov[i]),SIG_SHIFT); - /*print_vec(exc, 40, "innov");*/ - } - if (innov_save) - { - for (i=0;isubframeSize;i++) - innov_save[i] = innov[i]; - } - /* Decode second codebook (only for some modes) */ - if (SUBMODE(double_codebook)) - { - char *tmp_stack=stack; - VARDECL(spx_sig_t *innov2); - ALLOC(innov2, st->subframeSize, spx_sig_t); - for (i=0;isubframeSize;i++) - innov2[i]=0; - SUBMODE(innovation_unquant)(innov2, SUBMODE(innovation_params), st->subframeSize, bits, stack, &st->seed); - signal_mul(innov2, innov2, MULT16_32_Q15(QCONST16(0.454545,15),ener), st->subframeSize); - for (i=0;isubframeSize;i++) - exc[i] = ADD16(exc[i],PSHR32(innov2[i],SIG_SHIFT)); - if (innov_save) - { - for (i=0;isubframeSize;i++) - innov_save[i] = ADD32(innov_save[i],innov2[i]); - } - stack = tmp_stack; } + } } @@ -1759,40 +1745,40 @@ int nb_encoder_ctl(void *state, int request, void *ptr) switch(request) { case SPEEX_GET_FRAME_SIZE: - (*(int*)ptr) = st->frameSize; + (*(spx_int32_t*)ptr) = st->frameSize; break; case SPEEX_SET_LOW_MODE: case SPEEX_SET_MODE: - st->submodeSelect = st->submodeID = (*(int*)ptr); + st->submodeSelect = st->submodeID = (*(spx_int32_t*)ptr); break; case SPEEX_GET_LOW_MODE: case SPEEX_GET_MODE: - (*(int*)ptr) = st->submodeID; + (*(spx_int32_t*)ptr) = st->submodeID; break; case SPEEX_SET_VBR: - st->vbr_enabled = (*(int*)ptr); + st->vbr_enabled = (*(spx_int32_t*)ptr); break; case SPEEX_GET_VBR: - (*(int*)ptr) = st->vbr_enabled; + (*(spx_int32_t*)ptr) = st->vbr_enabled; break; case SPEEX_SET_VAD: - st->vad_enabled = (*(int*)ptr); + st->vad_enabled = (*(spx_int32_t*)ptr); break; case SPEEX_GET_VAD: - (*(int*)ptr) = st->vad_enabled; + (*(spx_int32_t*)ptr) = st->vad_enabled; break; case SPEEX_SET_DTX: - st->dtx_enabled = (*(int*)ptr); + st->dtx_enabled = (*(spx_int32_t*)ptr); break; case SPEEX_GET_DTX: - (*(int*)ptr) = st->dtx_enabled; + (*(spx_int32_t*)ptr) = st->dtx_enabled; break; case SPEEX_SET_ABR: st->abr_enabled = (*(spx_int32_t*)ptr); st->vbr_enabled = st->abr_enabled!=0; if (st->vbr_enabled) { - int i=10; + spx_int32_t i=10; spx_int32_t rate, target; float vbr_qual; target = (*(spx_int32_t*)ptr); @@ -1825,7 +1811,7 @@ int nb_encoder_ctl(void *state, int request, void *ptr) break; case SPEEX_SET_QUALITY: { - int quality = (*(int*)ptr); + int quality = (*(spx_int32_t*)ptr); if (quality < 0) quality = 0; if (quality > 10) @@ -1834,7 +1820,7 @@ int nb_encoder_ctl(void *state, int request, void *ptr) } break; case SPEEX_SET_COMPLEXITY: - st->complexity = (*(int*)ptr); + st->complexity = (*(spx_int32_t*)ptr); if (st->complexity<0) st->complexity=0; break; @@ -1843,7 +1829,7 @@ int nb_encoder_ctl(void *state, int request, void *ptr) break; case SPEEX_SET_BITRATE: { - int i=10; + spx_int32_t i=10; spx_int32_t rate, target; target = (*(spx_int32_t*)ptr); while (i>=0) @@ -1884,21 +1870,21 @@ int nb_encoder_ctl(void *state, int request, void *ptr) } break; case SPEEX_SET_SUBMODE_ENCODING: - st->encode_submode = (*(int*)ptr); + st->encode_submode = (*(spx_int32_t*)ptr); break; case SPEEX_GET_SUBMODE_ENCODING: - (*(int*)ptr) = st->encode_submode; + (*(spx_int32_t*)ptr) = st->encode_submode; break; case SPEEX_GET_LOOKAHEAD: - (*(int*)ptr)=(st->windowSize-st->frameSize); + (*(spx_int32_t*)ptr)=(st->windowSize-st->frameSize); break; case SPEEX_SET_PLC_TUNING: - st->plc_tuning = (*(int*)ptr); + st->plc_tuning = (*(spx_int32_t*)ptr); if (st->plc_tuning>100) st->plc_tuning=100; break; case SPEEX_GET_PLC_TUNING: - (*(int*)ptr)=(st->plc_tuning); + (*(spx_int32_t*)ptr)=(st->plc_tuning); break; case SPEEX_SET_VBR_MAX_BITRATE: st->vbr_max = (*(spx_int32_t*)ptr); @@ -1925,19 +1911,18 @@ int nb_encoder_ctl(void *state, int request, void *ptr) case SPEEX_GET_EXC: { int i; - spx_word16_t *e = (spx_word16_t*)ptr; - for (i=0;iframeSize;i++) - e[i]=st->exc[i]; + for (i=0;inbSubframes;i++) + ((spx_word16_t*)ptr)[i] = compute_rms16(st->exc+i*st->subframeSize, st->subframeSize); } break; case SPEEX_GET_RELATIVE_QUALITY: (*(float*)ptr)=st->relative_quality; break; case SPEEX_SET_INNOVATION_SAVE: - st->innov_save = (spx_sig_t*)ptr; + st->innov_rms_save = (spx_word16_t*)ptr; break; case SPEEX_SET_WIDEBAND: - st->isWideband = *((int*)ptr); + st->isWideband = *((spx_int32_t*)ptr); break; default: speex_warning_int("Unknown nb_ctl request: ", request); @@ -1954,20 +1939,20 @@ int nb_decoder_ctl(void *state, int request, void *ptr) { case SPEEX_SET_LOW_MODE: case SPEEX_SET_MODE: - st->submodeID = (*(int*)ptr); + st->submodeID = (*(spx_int32_t*)ptr); break; case SPEEX_GET_LOW_MODE: case SPEEX_GET_MODE: - (*(int*)ptr) = st->submodeID; + (*(spx_int32_t*)ptr) = st->submodeID; break; case SPEEX_SET_ENH: - st->lpc_enh_enabled = *((int*)ptr); + st->lpc_enh_enabled = *((spx_int32_t*)ptr); break; case SPEEX_GET_ENH: - *((int*)ptr) = st->lpc_enh_enabled; + *((spx_int32_t*)ptr) = st->lpc_enh_enabled; break; case SPEEX_GET_FRAME_SIZE: - (*(int*)ptr) = st->frameSize; + (*(spx_int32_t*)ptr) = st->frameSize; break; case SPEEX_GET_BITRATE: if (st->submodes[st->submodeID]) @@ -2007,13 +1992,13 @@ int nb_decoder_ctl(void *state, int request, void *ptr) } break; case SPEEX_SET_SUBMODE_ENCODING: - st->encode_submode = (*(int*)ptr); + st->encode_submode = (*(spx_int32_t*)ptr); break; case SPEEX_GET_SUBMODE_ENCODING: - (*(int*)ptr) = st->encode_submode; + (*(spx_int32_t*)ptr) = st->encode_submode; break; case SPEEX_GET_LOOKAHEAD: - (*(int*)ptr)=st->subframeSize; + (*(spx_int32_t*)ptr)=st->subframeSize; break; case SPEEX_SET_HIGHPASS: st->highpass_enabled = (*(spx_int32_t*)ptr); @@ -2033,19 +2018,18 @@ int nb_decoder_ctl(void *state, int request, void *ptr) case SPEEX_GET_EXC: { int i; - spx_word16_t *e = (spx_word16_t*)ptr; - for (i=0;iframeSize;i++) - e[i]=st->exc[i]; + for (i=0;inbSubframes;i++) + ((spx_word16_t*)ptr)[i] = compute_rms16(st->exc+i*st->subframeSize, st->subframeSize); } break; case SPEEX_GET_DTX_STATUS: - *((int*)ptr) = st->dtx_enabled; + *((spx_int32_t*)ptr) = st->dtx_enabled; break; case SPEEX_SET_INNOVATION_SAVE: - st->innov_save = (spx_sig_t*)ptr; + st->innov_save = (spx_word16_t*)ptr; break; case SPEEX_SET_WIDEBAND: - st->isWideband = *((int*)ptr); + st->isWideband = *((spx_int32_t*)ptr); break; default: speex_warning_int("Unknown nb_ctl request: ", request); diff --git a/apps/codecs/libspeex/nb_celp.h b/apps/codecs/libspeex/nb_celp.h index a72c2b1168..1ebf71754c 100644 --- a/apps/codecs/libspeex/nb_celp.h +++ b/apps/codecs/libspeex/nb_celp.h @@ -96,12 +96,12 @@ typedef struct EncState { spx_mem_t *mem_exc2; /**< Filter memory for excitation (whole frame) */ spx_mem_t mem_hp[2]; /**< High-pass filter memory */ spx_word32_t *pi_gain; /**< Gain of LPC filter at theta=pi (fe/2) */ - spx_sig_t *innov_save; /**< If non-NULL, innovation is copied here */ + spx_word16_t *innov_rms_save; /**< If non-NULL, innovation RMS is copied here */ VBRState *vbr; /**< State of the VBR data */ float vbr_quality; /**< Quality setting for VBR encoding */ float relative_quality; /**< Relative quality that will be needed by VBR */ - int vbr_enabled; /**< 1 for enabling VBR, 0 otherwise */ + spx_int32_t vbr_enabled; /**< 1 for enabling VBR, 0 otherwise */ spx_int32_t vbr_max; /**< Max bit-rate allowed in VBR mode */ int vad_enabled; /**< 1 for enabling VAD, 0 otherwise */ int dtx_enabled; /**< 1 for enabling DTX, 0 otherwise */ @@ -148,7 +148,7 @@ typedef struct DecState { spx_mem_t *mem_sp; /**< Filter memory for synthesis signal */ spx_mem_t mem_hp[2]; /**< High-pass filter memory */ spx_word32_t *pi_gain; /**< Gain of LPC filter at theta=pi (fe/2) */ - spx_sig_t *innov_save; /** If non-NULL, innovation is copied here */ + spx_word16_t *innov_save; /** If non-NULL, innovation is copied here */ /* This is used in packet loss concealment */ int last_pitch; /**< Pitch of last correctly decoded frame */ diff --git a/apps/codecs/libspeex/preprocess.c b/apps/codecs/libspeex/preprocess.c index 64d75e57ad..bd4f776fd2 100644 --- a/apps/codecs/libspeex/preprocess.c +++ b/apps/codecs/libspeex/preprocess.c @@ -1,6 +1,6 @@ -/* Copyright (C) 2003 Epic Games - Written by Jean-Marc Valin - +/* Copyright (C) 2003 Epic Games (written by Jean-Marc Valin) + Copyright (C) 2004-2006 Epic Games + File: preprocess.c Preprocessor with denoising based on the algorithm by Ephraim and Malah @@ -31,96 +31,364 @@ POSSIBILITY OF SUCH DAMAGE. */ + +/* + Recommended papers: + + Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error + short-time spectral amplitude estimator". IEEE Transactions on Acoustics, + Speech and Signal Processing, vol. ASSP-32, no. 6, pp. 1109-1121, 1984. + + Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error + log-spectral amplitude estimator". IEEE Transactions on Acoustics, Speech and + Signal Processing, vol. ASSP-33, no. 2, pp. 443-445, 1985. + + I. Cohen and B. Berdugo, "Speech enhancement for non-stationary noise environments". + Signal Processing, vol. 81, no. 2, pp. 2403-2418, 2001. + + Stefan Gustafsson, Rainer Martin, Peter Jax, and Peter Vary. "A psychoacoustic + approach to combined acoustic echo cancellation and noise reduction". IEEE + Transactions on Speech and Audio Processing, 2002. + + J.-M. Valin, J. Rouat, and F. Michaud, "Microphone array post-filter for separation + of simultaneous non-stationary sources". In Proceedings IEEE International + Conference on Acoustics, Speech, and Signal Processing, 2004. +*/ + #ifdef HAVE_CONFIG_H #include "config.h" #endif -#include "math_approx.h" +#include #include "speex/speex_preprocess.h" +#include "speex/speex_echo.h" #include "misc.h" -#include "smallft.h" - -#define max(a,b) ((a) > (b) ? (a) : (b)) -#define min(a,b) ((a) < (b) ? (a) : (b)) +#include "fftwrap.h" +#include "filterbank.h" +#include "math_approx.h" #ifndef M_PI #define M_PI 3.14159263 #endif -#define SQRT_M_PI_2 0.88623 -#define LOUDNESS_EXP 2.5 +#define LOUDNESS_EXP 5.f +#define AMP_SCALE .001f +#define AMP_SCALE_1 1000.f + +#define NB_BANDS 24 -#define NB_BANDS 8 +#define SPEECH_PROB_START_DEFAULT QCONST16(0.35f,15) +#define SPEECH_PROB_CONTINUE_DEFAULT QCONST16(0.20f,15) +#define NOISE_SUPPRESS_DEFAULT -15 +#define ECHO_SUPPRESS_DEFAULT -40 +#define ECHO_SUPPRESS_ACTIVE_DEFAULT -15 -#define SPEEX_PROB_START_DEFAULT 0.35f -#define SPEEX_PROB_CONTINUE_DEFAULT 0.20f +#ifndef NULL +#define NULL 0 +#endif -#define ZMIN .1 -#define ZMAX .316 -#define ZMIN_1 10 -#define LOG_MIN_MAX_1 0.86859 +#define SQR(x) ((x)*(x)) +#define SQR16(x) (MULT16_16((x),(x))) +#define SQR16_Q15(x) (MULT16_16_Q15((x),(x))) -static void conj_window(float *w, int len) +#ifdef FIXED_POINT +static inline spx_word16_t DIV32_16_Q8(spx_word32_t a, spx_word32_t b) +{ + if (SHR32(a,7) >= b) + { + return 32767; + } else { + if (b>=QCONST32(1,23)) + { + a = SHR32(a,8); + b = SHR32(b,8); + } + if (b>=QCONST32(1,19)) + { + a = SHR32(a,4); + b = SHR32(b,4); + } + if (b>=QCONST32(1,15)) + { + a = SHR32(a,4); + b = SHR32(b,4); + } + a = SHL32(a,8); + return PDIV32_16(a,b); + } + +} +static inline spx_word16_t DIV32_16_Q15(spx_word32_t a, spx_word32_t b) +{ + if (SHR32(a,15) >= b) + { + return 32767; + } else { + if (b>=QCONST32(1,23)) + { + a = SHR32(a,8); + b = SHR32(b,8); + } + if (b>=QCONST32(1,19)) + { + a = SHR32(a,4); + b = SHR32(b,4); + } + if (b>=QCONST32(1,15)) + { + a = SHR32(a,4); + b = SHR32(b,4); + } + a = SHL32(a,15)-a; + return DIV32_16(a,b); + } +} +#define SNR_SCALING 256.f +#define SNR_SCALING_1 0.0039062f +#define SNR_SHIFT 8 + +#define FRAC_SCALING 32767.f +#define FRAC_SCALING_1 3.0518e-05 +#define FRAC_SHIFT 1 + +#define EXPIN_SCALING 2048.f +#define EXPIN_SCALING_1 0.00048828f +#define EXPIN_SHIFT 11 +#define EXPOUT_SCALING_1 1.5259e-05 + +#define NOISE_SHIFT 7 + +#else + +#define DIV32_16_Q8(a,b) ((a)/(b)) +#define DIV32_16_Q15(a,b) ((a)/(b)) +#define SNR_SCALING 1.f +#define SNR_SCALING_1 1.f +#define SNR_SHIFT 0 +#define FRAC_SCALING 1.f +#define FRAC_SCALING_1 1.f +#define FRAC_SHIFT 0 +#define NOISE_SHIFT 0 + +#define EXPIN_SCALING 1.f +#define EXPIN_SCALING_1 1.f +#define EXPOUT_SCALING_1 1.f + +#endif + +/** Speex pre-processor state. */ +struct SpeexPreprocessState_ { + /* Basic info */ + int frame_size; /**< Number of samples processed each time */ + int ps_size; /**< Number of points in the power spectrum */ + int sampling_rate; /**< Sampling rate of the input/output */ + int nbands; + FilterBank *bank; + + /* Parameters */ + int denoise_enabled; + int vad_enabled; + int dereverb_enabled; + spx_word16_t reverb_decay; + spx_word16_t reverb_level; + spx_word16_t speech_prob_start; + spx_word16_t speech_prob_continue; + int noise_suppress; + int echo_suppress; + int echo_suppress_active; + SpeexEchoState *echo_state; + + /* DSP-related arrays */ + spx_word16_t *frame; /**< Processing frame (2*ps_size) */ + spx_word16_t *ft; /**< Processing frame in freq domain (2*ps_size) */ + spx_word32_t *ps; /**< Current power spectrum */ + spx_word16_t *gain2; /**< Adjusted gains */ + spx_word16_t *gain_floor; /**< Minimum gain allowed */ + spx_word16_t *window; /**< Analysis/Synthesis window */ + spx_word32_t *noise; /**< Noise estimate */ + spx_word32_t *reverb_estimate; /**< Estimate of reverb energy */ + spx_word32_t *old_ps; /**< Power spectrum for last frame */ + spx_word16_t *gain; /**< Ephraim Malah gain */ + spx_word16_t *prior; /**< A-priori SNR */ + spx_word16_t *post; /**< A-posteriori SNR */ + + spx_word32_t *S; /**< Smoothed power spectrum */ + spx_word32_t *Smin; /**< See Cohen paper */ + spx_word32_t *Stmp; /**< See Cohen paper */ + int *update_prob; /**< Propability of speech presence for noise update */ + + spx_word16_t *zeta; /**< Smoothed a priori SNR */ + spx_word32_t *echo_noise; + spx_word32_t *residual_echo; + + /* Misc */ + spx_word16_t *inbuf; /**< Input buffer (overlapped analysis) */ + spx_word16_t *outbuf; /**< Output buffer (for overlap and add) */ + + /* AGC stuff, only for floating point for now */ +#ifndef FIXED_POINT + int agc_enabled; + float agc_level; + float *loudness_weight; /**< Perceptual loudness curve */ + float loudness; /**< Loudness estimate */ + float agc_gain; /**< Current AGC gain */ + int nb_loudness_adapt; /**< Number of frames used for loudness adaptation so far */ + float max_gain; /**< Maximum gain allowed */ + float max_increase_step; /**< Maximum increase in gain from one frame to another */ + float max_decrease_step; /**< Maximum decrease in gain from one frame to another */ + float prev_loudness; /**< Loudness of previous frame */ + float init_max; /**< Current gain limit during initialisation */ +#endif + int nb_adapt; /**< Number of frames used for adaptation so far */ + int was_speech; + int min_count; /**< Number of frames processed so far */ + void *fft_lookup; /**< Lookup table for the FFT */ +#ifdef FIXED_POINT + int frame_shift; +#endif +}; + + +static void conj_window(spx_word16_t *w, int len) { int i; for (i=0;i19) + return ADD32(EXTEND32(Q15_ONE),EXTEND32(DIV32_16(QCONST32(.1296,23), SHR32(xx,EXPIN_SHIFT-SNR_SHIFT)))); + frac = SHL32(xx-SHL32(ind,10),5); + return SHL32(DIV32_16(PSHR32(MULT16_16(Q15_ONE-frac,table[ind]) + MULT16_16(frac,table[ind+1]),7),(spx_sqrt(SHL32(xx,15)+6711))),7); +} + +static inline spx_word16_t qcurve(spx_word16_t x) +{ + x = MAX16(x, 1); + return DIV32_16(SHL32(EXTEND32(32767),9),ADD16(512,MULT16_16_Q15(QCONST16(.60f,15),DIV32_16(32767,x)))); +} + +/* Compute the gain floor based on different floors for the background noise and residual echo */ +static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len) +{ + int i; + + if (noise_suppress > effective_echo_suppress) + { + spx_word16_t noise_gain, gain_ratio; + noise_gain = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(0.11513,11),noise_suppress)),1))); + gain_ratio = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(.2302585f,11),effective_echo_suppress-noise_suppress)),1))); + + /* gain_floor = sqrt [ (noise*noise_floor + echo*echo_floor) / (noise+echo) ] */ + for (i=0;i19) - return 1+.1296/x; - frac = 2*x-integer; - return ((1-frac)*table[ind] + frac*table[ind+1])/sqrt(x+.0001f); + x = EXPIN_SCALING_1*xx; + integer = floor(2*x); + ind = (int)integer; + if (ind<0) + return FRAC_SCALING; + if (ind>19) + return FRAC_SCALING*(1+.1296/x); + frac = 2*x-integer; + return FRAC_SCALING*((1-frac)*table[ind] + frac*table[ind+1])/sqrt(x+.0001f); } -static inline float qcurve(float x) +static inline spx_word16_t qcurve(spx_word16_t x) { - return 1.f/(1.f+.1f/(x*x)); + return 1.f/(1.f+.15f/(SNR_SCALING_1*x)); } +static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len) +{ + int i; + float echo_floor; + float noise_floor; + + noise_floor = exp(.2302585f*noise_suppress); + echo_floor = exp(.2302585f*effective_echo_suppress); + + /* Compute the gain floor based on different floors for the background noise and residual echo */ + for (i=0;iframe_size = frame_size; @@ -153,49 +421,51 @@ SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_r st->sampling_rate = sampling_rate; st->denoise_enabled = 1; - st->agc_enabled = 0; - st->agc_level = 8000; st->vad_enabled = 0; st->dereverb_enabled = 0; - st->reverb_decay = .5; - st->reverb_level = .2; + st->reverb_decay = 0; + st->reverb_level = 0; + st->noise_suppress = NOISE_SUPPRESS_DEFAULT; + st->echo_suppress = ECHO_SUPPRESS_DEFAULT; + st->echo_suppress_active = ECHO_SUPPRESS_ACTIVE_DEFAULT; - st->speech_prob_start = SPEEX_PROB_START_DEFAULT; - st->speech_prob_continue = SPEEX_PROB_CONTINUE_DEFAULT; + st->speech_prob_start = SPEECH_PROB_START_DEFAULT; + st->speech_prob_continue = SPEECH_PROB_CONTINUE_DEFAULT; - st->frame = (float*)speex_alloc(2*N*sizeof(float)); - st->ps = (float*)speex_alloc(N*sizeof(float)); - st->gain2 = (float*)speex_alloc(N*sizeof(float)); - st->window = (float*)speex_alloc(2*N*sizeof(float)); - st->noise = (float*)speex_alloc(N*sizeof(float)); - st->reverb_estimate = (float*)speex_alloc(N*sizeof(float)); - st->old_ps = (float*)speex_alloc(N*sizeof(float)); - st->gain = (float*)speex_alloc(N*sizeof(float)); - st->prior = (float*)speex_alloc(N*sizeof(float)); - st->post = (float*)speex_alloc(N*sizeof(float)); - st->loudness_weight = (float*)speex_alloc(N*sizeof(float)); - st->inbuf = (float*)speex_alloc(N3*sizeof(float)); - st->outbuf = (float*)speex_alloc(N3*sizeof(float)); - st->echo_noise = (float*)speex_alloc(N*sizeof(float)); - - st->S = (float*)speex_alloc(N*sizeof(float)); - st->Smin = (float*)speex_alloc(N*sizeof(float)); - st->Stmp = (float*)speex_alloc(N*sizeof(float)); - st->update_prob = (float*)speex_alloc(N*sizeof(float)); - - st->zeta = (float*)speex_alloc(N*sizeof(float)); - st->Zpeak = 0; - st->Zlast = 0; - - st->noise_bands = (float*)speex_alloc(NB_BANDS*sizeof(float)); - st->noise_bands2 = (float*)speex_alloc(NB_BANDS*sizeof(float)); - st->speech_bands = (float*)speex_alloc(NB_BANDS*sizeof(float)); - st->speech_bands2 = (float*)speex_alloc(NB_BANDS*sizeof(float)); - st->noise_bandsN = st->speech_bandsN = 1; + st->echo_state = NULL; + + st->nbands = NB_BANDS; + M = st->nbands; + st->bank = filterbank_new(M, sampling_rate, N, 1); + + st->frame = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t)); + st->window = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t)); + st->ft = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t)); + + st->ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); + st->noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); + st->echo_noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); + st->residual_echo = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); + st->reverb_estimate = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); + st->old_ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t)); + st->prior = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); + st->post = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); + st->gain = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); + st->gain2 = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); + st->gain_floor = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); + st->zeta = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t)); + + st->S = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); + st->Smin = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); + st->Stmp = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); + st->update_prob = (int*)speex_alloc(N*sizeof(int)); + + st->inbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t)); + st->outbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t)); conj_window(st->window, 2*N3); for (i=2*N3;i<2*st->ps_size;i++) - st->window[i]=1; + st->window[i]=Q15_ONE; if (N4>0) { @@ -205,51 +475,61 @@ SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_r st->window[i+N3]=1; } } - for (i=0;inoise[i]=1e4; - st->reverb_estimate[i]=0.; - st->old_ps[i]=1e4; - st->gain[i]=1; - st->post[i]=1; - st->prior[i]=1; + st->noise[i]=QCONST32(1.f,NOISE_SHIFT); + st->reverb_estimate[i]=0; + st->old_ps[i]=1; + st->gain[i]=Q15_ONE; + st->post[i]=SHL16(1, SNR_SHIFT); + st->prior[i]=SHL16(1, SNR_SHIFT); } + for (i=0;iupdate_prob[i] = 1; for (i=0;iinbuf[i]=0; st->outbuf[i]=0; } - +#ifndef FIXED_POINT + st->agc_enabled = 0; + st->agc_level = 8000; + st->loudness_weight = (float*)speex_alloc(N*sizeof(float)); for (i=0;iloudness_weight[i] = .5f*(1.f/(1.f+ff/8000.f))+1.f*exp(-.5f*(ff-3800.f)*(ff-3800.f)/9e5f);*/ st->loudness_weight[i] = .35f-.35f*ff/16000.f+.73f*exp(-.5f*(ff-3800)*(ff-3800)/9e5f); if (st->loudness_weight[i]<.01f) st->loudness_weight[i]=.01f; st->loudness_weight[i] *= st->loudness_weight[i]; } - - st->speech_prob = 0; - st->last_speech = 1000; - st->loudness = pow(6000,LOUDNESS_EXP); - st->loudness2 = 6000; + st->loudness = pow(AMP_SCALE*st->agc_level,LOUDNESS_EXP); + st->agc_gain = 1; st->nb_loudness_adapt = 0; + st->max_gain = 10; + st->max_increase_step = exp(0.11513f * 6.*st->frame_size / st->sampling_rate); + st->max_decrease_step = exp(-0.11513f * 30.*st->frame_size / st->sampling_rate); + st->prev_loudness = 1; + st->init_max = 1; +#endif + st->was_speech = 0; - st->fft_lookup = (struct drft_lookup*)speex_alloc(sizeof(struct drft_lookup)); - spx_drft_init(st->fft_lookup,2*N); + st->fft_lookup = spx_fft_init(2*N); st->nb_adapt=0; - st->consec_noise=0; - st->nb_preprocess=0; + st->min_count=0; return st; } void speex_preprocess_state_destroy(SpeexPreprocessState *st) { speex_free(st->frame); + speex_free(st->ft); speex_free(st->ps); speex_free(st->gain2); + speex_free(st->gain_floor); speex_free(st->window); speex_free(st->noise); speex_free(st->reverb_estimate); @@ -257,8 +537,11 @@ void speex_preprocess_state_destroy(SpeexPreprocessState *st) speex_free(st->gain); speex_free(st->prior); speex_free(st->post); +#ifndef FIXED_POINT speex_free(st->loudness_weight); +#endif speex_free(st->echo_noise); + speex_free(st->residual_echo); speex_free(st->S); speex_free(st->Smin); @@ -266,298 +549,63 @@ void speex_preprocess_state_destroy(SpeexPreprocessState *st) speex_free(st->update_prob); speex_free(st->zeta); - speex_free(st->noise_bands); - speex_free(st->noise_bands2); - speex_free(st->speech_bands); - speex_free(st->speech_bands2); - speex_free(st->inbuf); speex_free(st->outbuf); - spx_drft_clear(st->fft_lookup); - speex_free(st->fft_lookup); - + spx_fft_destroy(st->fft_lookup); + filterbank_destroy(st->bank); speex_free(st); } -static void update_noise(SpeexPreprocessState *st, float *ps, spx_int32_t *echo) +/* FIXME: The AGC doesn't work yet with fixed-point*/ +#ifndef FIXED_POINT +static void speex_compute_agc(SpeexPreprocessState *st, spx_word16_t Pframe, spx_word16_t *ft) { int i; - float beta; - st->nb_adapt++; - beta=1.0f/st->nb_adapt; - if (beta < .05f) - beta=.05f; + int N = st->ps_size; + float target_gain; + float loudness=1.f; + float rate; - if (!echo) + for (i=2;ips_size;i++) - st->noise[i] = (1.f-beta)*st->noise[i] + beta*ps[i]; - } else { - for (i=0;ips_size;i++) - st->noise[i] = (1.f-beta)*st->noise[i] + beta*max(1.f,ps[i]-st->frame_size*st->frame_size*1.0*echo[i]); -#if 0 - for (i=0;ips_size;i++) - st->noise[i] = 0; -#endif + loudness += 2.f*N*st->ps[i]* st->loudness_weight[i]; } -} - -static int speex_compute_vad(SpeexPreprocessState *st, float *ps, float mean_prior, float mean_post) -{ - int i, is_speech=0; - int N = st->ps_size; - float scale=.5f/N; - - /* FIXME: Clean this up a bit */ - { - float bands[NB_BANDS]; - int j; - float p0, p1; - float tot_loudness=0; - float x = sqrt(mean_post); - - for (i=5;ips[i] * st->loudness_weight[i]; - } - - for (i=0;ispeech_prob + .01*(1-st->speech_prob); - p1 *= .01*st->speech_prob + .99*(1-st->speech_prob); - - st->speech_prob = p0/(p1+p0); - */ - - if (st->noise_bandsN < 50 || st->speech_bandsN < 50) - { - if (mean_post > 5.f) - { - float adapt = 1./st->speech_bandsN++; - if (adapt<.005f) - adapt = .005f; - for (i=0;ispeech_bands[i] = (1.f-adapt)*st->speech_bands[i] + adapt*bands[i]; - /*st->speech_bands2[i] = (1-adapt)*st->speech_bands2[i] + adapt*bands[i]*bands[i];*/ - st->speech_bands2[i] = (1.f-adapt)*st->speech_bands2[i] + adapt*(bands[i]-st->speech_bands[i])*(bands[i]-st->speech_bands[i]); - } - } else { - float adapt = 1./st->noise_bandsN++; - if (adapt<.005f) - adapt = .005f; - for (i=0;inoise_bands[i] = (1.f-adapt)*st->noise_bands[i] + adapt*bands[i]; - /*st->noise_bands2[i] = (1-adapt)*st->noise_bands2[i] + adapt*bands[i]*bands[i];*/ - st->noise_bands2[i] = (1.f-adapt)*st->noise_bands2[i] + adapt*(bands[i]-st->noise_bands[i])*(bands[i]-st->noise_bands[i]); - } - } - } - p0=p1=1; - for (i=0;inoise_bands2[i] - st->noise_bands[i]*st->noise_bands[i]; - speech_var = 1.01*st->speech_bands2[i] - st->speech_bands[i]*st->speech_bands[i];*/ - noise_var = st->noise_bands2[i]; - speech_var = st->speech_bands2[i]; - if (noise_var < .1f) - noise_var = .1f; - if (speech_var < .1f) - speech_var = .1f; - - /*speech_var = sqrt(speech_var*noise_var); - noise_var = speech_var;*/ - if (noise_var < .05f*speech_var) - noise_var = .05f*speech_var; - if (speech_var < .05f*noise_var) - speech_var = .05f*noise_var; - - if (bands[i] < st->noise_bands[i]) - speech_var = noise_var; - if (bands[i] > st->speech_bands[i]) - noise_var = speech_var; - - speech_mean = st->speech_bands[i]; - noise_mean = st->noise_bands[i]; - if (noise_mean < speech_mean - 5.f) - noise_mean = speech_mean - 5.f; - - tmp1 = exp(-.5f*(bands[i]-speech_mean)*(bands[i]-speech_mean)/speech_var)/sqrt(2.f*M_PI*speech_var); - tmp2 = exp(-.5f*(bands[i]-noise_mean)*(bands[i]-noise_mean)/noise_var)/sqrt(2.f*M_PI*noise_var); - /*fprintf (stderr, "%f ", (float)(p0/(.01+p0+p1)));*/ - /*fprintf (stderr, "%f ", (float)(bands[i]));*/ - pr = tmp1/(1e-25+tmp1+tmp2); - /*if (bands[i] < st->noise_bands[i]) - pr=.01; - if (bands[i] > st->speech_bands[i] && pr < .995) - pr=.995;*/ - if (pr>.999f) - pr=.999f; - if (pr<.001f) - pr=.001f; - /*fprintf (stderr, "%f ", pr);*/ - p0 *= pr; - p1 *= (1-pr); - } - - p0 = pow(p0,.2); - p1 = pow(p1,.2); - -#if 1 - p0 *= 2.f; - p0=p0/(p1+p0); - if (st->last_speech>20) - { - float tmp = sqrt(tot_loudness)/st->loudness2; - tmp = 1.f-exp(-10.f*tmp); - if (p0>tmp) - p0=tmp; - } - p1=1-p0; -#else - if (sqrt(tot_loudness) < .6f*st->loudness2 && p0>15.f*p1) - p0=15.f*p1; - if (sqrt(tot_loudness) < .45f*st->loudness2 && p0>7.f*p1) - p0=7.f*p1; - if (sqrt(tot_loudness) < .3f*st->loudness2 && p0>3.f*p1) - p0=3.f*p1; - if (sqrt(tot_loudness) < .15f*st->loudness2 && p0>p1) - p0=p1; - /*fprintf (stderr, "%f %f ", (float)(sqrt(tot_loudness) /( .25*st->loudness2)), p0/(p1+p0));*/ -#endif - - p0 *= .99f*st->speech_prob + .01f*(1-st->speech_prob); - p1 *= .01f*st->speech_prob + .99f*(1-st->speech_prob); - - st->speech_prob = p0/(1e-25f+p1+p0); - /*fprintf (stderr, "%f %f %f ", tot_loudness, st->loudness2, st->speech_prob);*/ - - if (st->speech_prob > st->speech_prob_start - || (st->last_speech < 20 && st->speech_prob > st->speech_prob_continue)) - { - is_speech = 1; - st->last_speech = 0; - } else { - st->last_speech++; - if (st->last_speech<20) - is_speech = 1; - } - - if (st->noise_bandsN > 50 && st->speech_bandsN > 50) - { - if (mean_post > 5) - { - float adapt = 1./st->speech_bandsN++; - if (adapt<.005f) - adapt = .005f; - for (i=0;ispeech_bands[i] = (1-adapt)*st->speech_bands[i] + adapt*bands[i]; - /*st->speech_bands2[i] = (1-adapt)*st->speech_bands2[i] + adapt*bands[i]*bands[i];*/ - st->speech_bands2[i] = (1-adapt)*st->speech_bands2[i] + adapt*(bands[i]-st->speech_bands[i])*(bands[i]-st->speech_bands[i]); - } - } else { - float adapt = 1./st->noise_bandsN++; - if (adapt<.005f) - adapt = .005f; - for (i=0;inoise_bands[i] = (1-adapt)*st->noise_bands[i] + adapt*bands[i]; - /*st->noise_bands2[i] = (1-adapt)*st->noise_bands2[i] + adapt*bands[i]*bands[i];*/ - st->noise_bands2[i] = (1-adapt)*st->noise_bands2[i] + adapt*(bands[i]-st->noise_bands[i])*(bands[i]-st->noise_bands[i]); - } - } - } - - - } - - return is_speech; -} - -static void speex_compute_agc(SpeexPreprocessState *st, float mean_prior) -{ - int i; - int N = st->ps_size; - float scale=.5f/N; - float agc_gain; - int freq_start, freq_end; - float active_bands = 0; - - freq_start = (int)(300.0f*2*N/st->sampling_rate); - freq_end = (int)(2000.0f*2*N/st->sampling_rate); - for (i=freq_start;iS[i] > 20.f*st->Smin[i]+1000.f) - active_bands+=1; - } - active_bands /= (freq_end-freq_start+1); - - if (active_bands > .2f) - { - float loudness=0.f; - float rate, rate2=.2f; - st->nb_loudness_adapt++; - rate=2.0f/(1+st->nb_loudness_adapt); - if (rate < .05f) - rate = .05f; - if (rate < .1f && pow(loudness, LOUDNESS_EXP) > st->loudness) - rate = .1f; - if (rate < .2f && pow(loudness, LOUDNESS_EXP) > 3.f*st->loudness) - rate = .2f; - if (rate < .4f && pow(loudness, LOUDNESS_EXP) > 10.f*st->loudness) - rate = .4f; - - for (i=2;ips[i] * st->gain2[i] * st->gain2[i] * st->loudness_weight[i]; - } - loudness=sqrt(loudness); + loudness=sqrt(loudness); /*if (loudness < 2*pow(st->loudness, 1.0/LOUDNESS_EXP) && - loudness*2 > pow(st->loudness, 1.0/LOUDNESS_EXP))*/ - st->loudness = (1-rate)*st->loudness + (rate)*pow(loudness, LOUDNESS_EXP); - - st->loudness2 = (1-rate2)*st->loudness2 + rate2*pow(st->loudness, 1.0f/LOUDNESS_EXP); - - loudness = pow(st->loudness, 1.0f/LOUDNESS_EXP); - - /*fprintf (stderr, "%f %f %f\n", loudness, st->loudness2, rate);*/ + loudness*2 > pow(st->loudness, 1.0/LOUDNESS_EXP))*/ + if (Pframe>.5f) + { + st->nb_loudness_adapt++; + rate=2.0f*Pframe*Pframe/(1+st->nb_loudness_adapt); + st->loudness = (1-rate)*st->loudness + (rate)*pow(AMP_SCALE*loudness, LOUDNESS_EXP); + if (st->init_max < st->max_gain && st->nb_adapt > 20) + st->init_max *= 1.f + .05f*Pframe*Pframe; } + /*printf ("%f %f %f %f\n", Pframe, loudness, pow(st->loudness, 1.0f/LOUDNESS_EXP), st->loudness2);*/ - agc_gain = st->agc_level/st->loudness2; - /*fprintf (stderr, "%f %f %f %f\n", active_bands, st->loudness, st->loudness2, agc_gain);*/ - if (agc_gain>200) - agc_gain = 200; + target_gain = AMP_SCALE*st->agc_level*pow(st->loudness, -1.0f/LOUDNESS_EXP); - for (i=0;igain2[i] *= agc_gain; + if ((Pframe>.5 && st->nb_adapt > 20) || target_gain < st->agc_gain) + { + if (target_gain > st->max_increase_step*st->agc_gain) + target_gain = st->max_increase_step*st->agc_gain; + if (target_gain < st->max_decrease_step*st->agc_gain && loudness < 10*st->prev_loudness) + target_gain = st->max_decrease_step*st->agc_gain; + if (target_gain > st->max_gain) + target_gain = st->max_gain; + if (target_gain > st->init_max) + target_gain = st->init_max; + st->agc_gain = target_gain; + } + /*printf ("%f %f %f\n", loudness, (float)AMP_SCALE_1*pow(st->loudness, 1.0f/LOUDNESS_EXP), st->agc_gain);*/ + + for (i=0;i<2*N;i++) + ft[i] *= st->agc_gain; + st->prev_loudness = loudness; } +#endif static void preprocess_analysis(SpeexPreprocessState *st, spx_int16_t *x) { @@ -565,7 +613,7 @@ static void preprocess_analysis(SpeexPreprocessState *st, spx_int16_t *x) int N = st->ps_size; int N3 = 2*N - st->frame_size; int N4 = st->frame_size - N3; - float *ps=st->ps; + spx_word32_t *ps=st->ps; /* 'Build' input frame */ for (i=0;iframe[i] *= st->window[i]; + st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]); +#ifdef FIXED_POINT + { + spx_word16_t max_val=0; + for (i=0;i<2*N;i++) + max_val = MAX16(max_val, ABS16(st->frame[i])); + st->frame_shift = 14-spx_ilog2(EXTEND32(max_val)); + for (i=0;i<2*N;i++) + st->frame[i] = SHL16(st->frame[i], st->frame_shift); + } +#endif + /* Perform FFT */ - spx_drft_forward(st->fft_lookup, st->frame); - + spx_fft(st->fft_lookup, st->frame, st->ft); + /* Power spectrum */ - ps[0]=1; + ps[0]=MULT16_16(st->ft[0],st->ft[0]); for (i=1;iframe[2*i-1]*st->frame[2*i-1] + st->frame[2*i]*st->frame[2*i]; + ps[i]=MULT16_16(st->ft[2*i-1],st->ft[2*i-1]) + MULT16_16(st->ft[2*i],st->ft[2*i]); + for (i=0;ips[i] = PSHR32(st->ps[i], 2*st->frame_shift); + filterbank_compute_bank32(st->bank, ps, ps+N); } static void update_noise_prob(SpeexPreprocessState *st) { int i; + int min_range; int N = st->ps_size; for (i=1;iS[i] = 100.f+ .8f*st->S[i] + .05f*st->ps[i-1]+.1f*st->ps[i]+.05f*st->ps[i+1]; + st->S[i] = MULT16_32_Q15(QCONST16(.8f,15),st->S[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i-1]) + + MULT16_32_Q15(QCONST16(.1f,15),st->ps[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i+1]); + st->S[0] = MULT16_32_Q15(QCONST16(.8f,15),st->S[0]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[0]); + st->S[N-1] = MULT16_32_Q15(QCONST16(.8f,15),st->S[N-1]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[N-1]); - if (st->nb_preprocess<1) + if (st->nb_adapt==1) { - for (i=1;iSmin[i] = st->Stmp[i] = st->S[i]+100.f; + for (i=0;iSmin[i] = st->Stmp[i] = 0; } - if (st->nb_preprocess%200==0) + if (st->nb_adapt < 100) + min_range = 15; + else if (st->nb_adapt < 1000) + min_range = 50; + else if (st->nb_adapt < 10000) + min_range = 150; + else + min_range = 300; + if (st->min_count > min_range) { - for (i=1;imin_count = 0; + for (i=0;iSmin[i] = min(st->Stmp[i], st->S[i]); + st->Smin[i] = MIN32(st->Stmp[i], st->S[i]); st->Stmp[i] = st->S[i]; } } else { - for (i=1;iSmin[i] = min(st->Smin[i], st->S[i]); - st->Stmp[i] = min(st->Stmp[i], st->S[i]); + st->Smin[i] = MIN32(st->Smin[i], st->S[i]); + st->Stmp[i] = MIN32(st->Stmp[i], st->S[i]); } } - for (i=1;iupdate_prob[i] *= .2f; - if (st->S[i] > 2.5*st->Smin[i]) - st->update_prob[i] += .8f; + if (MULT16_32_Q15(QCONST16(.4f,15),st->S[i]) > ADD32(st->Smin[i],EXTEND32(20))) + st->update_prob[i] = 1; + else + st->update_prob[i] = 0; /*fprintf (stderr, "%f ", st->S[i]/st->Smin[i]);*/ /*fprintf (stderr, "%f ", st->update_prob[i]);*/ } } -#define NOISE_OVERCOMPENS 1.4 +#define NOISE_OVERCOMPENS 1. + +void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len); int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo) +{ + return speex_preprocess_run(st, x); +} + +int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x) { int i; - int is_speech=1; - float mean_post=0; - float mean_prior=0; + int M; int N = st->ps_size; int N3 = 2*N - st->frame_size; int N4 = st->frame_size - N3; - float scale=.5f/N; - float *ps=st->ps; - float Zframe=0, Pframe; - + spx_word32_t *ps=st->ps; + spx_word32_t Zframe; + spx_word16_t Pframe; + spx_word16_t beta, beta_1; + spx_word16_t effective_echo_suppress; + + st->nb_adapt++; + st->min_count++; + + beta = MAX16(QCONST16(.03,15),DIV32_16(Q15_ONE,st->nb_adapt)); + beta_1 = Q15_ONE-beta; + M = st->nbands; + /* Deal with residual echo if provided */ + if (st->echo_state) + { + speex_echo_get_residual(st->echo_state, st->residual_echo, N); +#ifndef FIXED_POINT + /* If there are NaNs or ridiculous values, it'll show up in the DC and we just reset everything to zero */ + if (!(st->residual_echo[0] >=0 && st->residual_echo[0]residual_echo[i] = 0; + } +#endif + for (i=0;iecho_noise[i] = MAX32(MULT16_32_Q15(QCONST16(.6f,15),st->echo_noise[i]), st->residual_echo[i]); + filterbank_compute_bank32(st->bank, st->echo_noise, st->echo_noise+N); + } else { + for (i=0;iecho_noise[i] = 0; + } preprocess_analysis(st, x); update_noise_prob(st); - st->nb_preprocess++; - - /* Noise estimation always updated for the 20 first times */ - if (st->nb_adapt<10) + /* Noise estimation always updated for the 10 first frames */ + /*if (st->nb_adapt<10) { - update_noise(st, ps, echo); + for (i=1;iupdate_prob[i] = 0; } - - /* Deal with residual echo if provided */ - if (echo) - for (i=1;iecho_noise[i] = (.3f*st->echo_noise[i] + st->frame_size*st->frame_size*1.0*echo[i]); - - /* Compute a posteriori SNR */ - for (i=1;inoise[i] + st->echo_noise[i] + st->reverb_estimate[i]; - st->post[i] = ps[i]/tot_noise - 1.f; - if (st->post[i]>100.f) - st->post[i]=100.f; - /*if (st->post[i]<0) - st->post[i]=0;*/ - mean_post+=st->post[i]; + if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i], NOISE_SHIFT)) + st->noise[i] = MAX32(EXTEND32(0),MULT16_32_Q15(beta_1,st->noise[i]) + MULT16_32_Q15(beta,SHL32(st->ps[i],NOISE_SHIFT))); } - mean_post /= N; - if (mean_post<0.f) - mean_post=0.f; + filterbank_compute_bank32(st->bank, st->noise, st->noise+N); /* Special case for first frame */ if (st->nb_adapt==1) - for (i=1;iold_ps[i] = ps[i]; - /* Compute a priori SNR */ + /* Compute a posteriori SNR */ + for (i=0;iprior[i]*st->prior[i]/((1+st->prior[i])*(1+st->prior[i])); - float tot_noise = 1.f+ NOISE_OVERCOMPENS*st->noise[i] + st->echo_noise[i] + st->reverb_estimate[i]; - /* A priori SNR update */ - st->prior[i] = gamma*max(0.0f,st->post[i]) + - (1.f-gamma)* (.8*st->gain[i]*st->gain[i]*st->old_ps[i]/tot_noise + .2*st->prior[i]); - - if (st->prior[i]>100.f) - st->prior[i]=100.f; - - mean_prior+=st->prior[i]; - } + spx_word16_t gamma; + + /* Total noise estimate including residual echo and reverberation */ + spx_word32_t tot_noise = ADD32(ADD32(ADD32(EXTEND32(1), PSHR32(st->noise[i],NOISE_SHIFT)) , st->echo_noise[i]) , st->reverb_estimate[i]); + + /* A posteriori SNR = ps/noise - 1*/ + st->post[i] = SUB16(DIV32_16_Q8(ps[i],tot_noise), QCONST16(1.f,SNR_SHIFT)); + st->post[i]=MIN16(st->post[i], QCONST16(100.f,SNR_SHIFT)); + + /* Computing update gamma = .1 + .9*(old/(old+noise))^2 */ + gamma = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.89f,15),SQR16_Q15(DIV32_16_Q15(st->old_ps[i],ADD32(st->old_ps[i],tot_noise)))); + + /* A priori SNR update = gamma*max(0,post) + (1-gamma)*old/noise */ + st->prior[i] = EXTRACT16(PSHR32(ADD32(MULT16_16(gamma,MAX16(0,st->post[i])), MULT16_16(Q15_ONE-gamma,DIV32_16_Q8(st->old_ps[i],tot_noise))), 15)); + st->prior[i]=MIN16(st->prior[i], QCONST16(100.f,SNR_SHIFT)); } - mean_prior /= N; -#if 0 - for (i=0;ipost, N+M, "");*/ + + /* Recursive average of the a priori SNR. A bit smoothed for the psd components */ + st->zeta[0] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[0]), MULT16_16(QCONST16(.3f,15),st->prior[0])),15); + for (i=1;izeta[i] = PSHR32(ADD32(ADD32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.15f,15),st->prior[i])), + MULT16_16(QCONST16(.075f,15),st->prior[i-1])), MULT16_16(QCONST16(.075f,15),st->prior[i+1])),15); + for (i=N-1;izeta[i] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.3f,15),st->prior[i])),15); + + /* Speech probability of presence for the entire frame is based on the average filterbank a priori SNR */ + Zframe = 0; + for (i=N;izeta[i])); + Pframe = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.899f,15),qcurve(DIV32_16(Zframe,st->nbands))); + + effective_echo_suppress = EXTRACT16(PSHR32(ADD32(MULT16_16(SUB16(Q15_ONE,Pframe), st->echo_suppress), MULT16_16(Pframe, st->echo_suppress_active)),15)); + + compute_gain_floor(st->noise_suppress, effective_echo_suppress, st->noise+N, st->echo_noise+N, st->gain_floor+N, M); + + /* Compute Ephraim & Malah gain speech probability of presence for each critical band (Bark scale) + Technically this is actually wrong because the EM gaim assumes a slightly different probability + distribution */ + for (i=N;iprior[i]); - } - fprintf (stderr, "\n"); + /* See EM and Cohen papers*/ + spx_word32_t theta; + /* Gain from hypergeometric function */ + spx_word32_t MM; + /* Weiner filter gain */ + spx_word16_t prior_ratio; + /* a priority probability of speech presence based on Bark sub-band alone */ + spx_word16_t P1; + /* Speech absence a priori probability (considering sub-band and frame) */ + spx_word16_t q; +#ifdef FIXED_POINT + spx_word16_t tmp; #endif - /*fprintf (stderr, "%f %f\n", mean_prior,mean_post);*/ - - if (st->nb_preprocess>=20) - { - int do_update = 0; - float noise_ener=0, sig_ener=0; - /* If SNR is low (both a priori and a posteriori), update the noise estimate*/ - /*if (mean_prior<.23 && mean_post < .5)*/ - if (mean_prior<.23f && mean_post < .5f) - do_update = 1; - for (i=1;inoise[i]; - sig_ener += ps[i]; - } - if (noise_ener > 3.f*sig_ener) - do_update = 1; - /*do_update = 0;*/ - if (do_update) - { - st->consec_noise++; - } else { - st->consec_noise=0; - } - } - - if (st->vad_enabled) - is_speech = speex_compute_vad(st, ps, mean_prior, mean_post); - - - if (st->consec_noise>=3) - { - update_noise(st, st->old_ps, echo); - } else { - for (i=1;iupdate_prob[i]<.5f/* || st->ps[i] < st->noise[i]*/) - { - if (echo) - st->noise[i] = .95f*st->noise[i] + .05f*max(1.0f,st->ps[i]-st->frame_size*st->frame_size*1.0*echo[i]); - else - st->noise[i] = .95f*st->noise[i] + .05f*st->ps[i]; - } - } - } - - for (i=1;izeta[i] = .7f*st->zeta[i] + .3f*st->prior[i]; - } - - { - int freq_start = (int)(300.0f*2.f*N/st->sampling_rate); - int freq_end = (int)(2000.0f*2.f*N/st->sampling_rate); - for (i=freq_start;izeta[i]; - } - Zframe /= (freq_end-freq_start); - } - st->Zlast = Zframe; - - Pframe = qcurve(Zframe); - - /*fprintf (stderr, "%f\n", Pframe);*/ - /* Compute gain according to the Ephraim-Malah algorithm */ - for (i=1;iprior[i]/(1.0001f+st->prior[i]); - theta = (1.f+st->post[i])*prior_ratio; - - if (i==1 || i==N-1) - zeta1 = st->zeta[i]; - else - zeta1 = .25f*st->zeta[i-1] + .5f*st->zeta[i] + .25f*st->zeta[i+1]; - P1 = qcurve (zeta1); - /* FIXME: add global prob (P2) */ - q = 1-Pframe*P1; - q = 1-P1; - if (q>.95f) - q=.95f; - p=1.f/(1.f + (q/(1.f-q))*(1.f+st->prior[i])*exp(-theta)); - /*p=1;*/ + prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT))); + theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT)); - /* Optimal estimator for loudness domain */ MM = hypergeom_gain(theta); + /* Gain with bound */ + st->gain[i] = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM))); + /* Save old Bark power spectrum */ + st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]); - st->gain[i] = prior_ratio * MM; - /*Put some (very arbitraty) limit on the gain*/ - if (st->gain[i]>2.f) + P1 = QCONST16(.199f,15)+MULT16_16_Q15(QCONST16(.8f,15),qcurve (st->zeta[i])); + q = Q15_ONE-MULT16_16_Q15(Pframe,P1); +#ifdef FIXED_POINT + theta = MIN32(theta, EXTEND32(32767)); +/*Q8*/tmp = MULT16_16_Q15((SHL32(1,SNR_SHIFT)+st->prior[i]),EXTRACT16(MIN32(Q15ONE,SHR32(spx_exp(-EXTRACT16(theta)),1)))); + tmp = MIN16(QCONST16(3.,SNR_SHIFT), tmp); /* Prevent overflows in the next line*/ +/*Q8*/tmp = EXTRACT16(PSHR32(MULT16_16(PDIV32_16(SHL32(EXTEND32(q),8),(Q15_ONE-q)),tmp),8)); + st->gain2[i]=DIV32_16(SHL32(EXTEND32(32767),SNR_SHIFT), ADD16(256,tmp)); +#else + st->gain2[i]=1/(1.f + (q/(1.f-q))*(1+st->prior[i])*exp(-theta)); +#endif + } + /* Convert the EM gains and speech prob to linear frequency */ + filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2); + filterbank_compute_psd16(st->bank,st->gain+N, st->gain); + + /* Use 1 for linear gain resolution (best) or 0 for Bark gain resolution (faster) */ + if (1) + { + filterbank_compute_psd16(st->bank,st->gain_floor+N, st->gain_floor); + + /* Compute gain according to the Ephraim-Malah algorithm -- linear frequency */ + for (i=0;igain[i]=2.f; + spx_word32_t MM; + spx_word32_t theta; + spx_word16_t prior_ratio; + spx_word16_t tmp; + spx_word16_t p; + spx_word16_t g; + + /* Wiener filter gain */ + prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT))); + theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT)); + + /* Optimal estimator for loudness domain */ + MM = hypergeom_gain(theta); + /* EM gain with bound */ + g = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM))); + /* Interpolated speech probability of presence */ + p = st->gain2[i]; + + /* Constrain the gain to be close to the Bark scale gain */ + if (MULT16_16_Q15(QCONST16(.333f,15),g) > st->gain[i]) + g = MULT16_16(3,st->gain[i]); + st->gain[i] = g; + + /* Save old power spectrum */ + st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]); + + /* Apply gain floor */ + if (st->gain[i] < st->gain_floor[i]) + st->gain[i] = st->gain_floor[i]; + + /* Exponential decay model for reverberation (unused) */ + /*st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];*/ + + /* Take into account speech probability of presence (loudness domain MMSE estimator) */ + /* gain2 = [p*sqrt(gain)+(1-p)*sqrt(gain _floor) ]^2 */ + tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15))); + st->gain2[i]=SQR16_Q15(tmp); + + /* Use this if you want a log-domain MMSE estimator instead */ + /*st->gain2[i] = pow(st->gain[i], p) * pow(st->gain_floor[i],1.f-p);*/ } - - st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i]; - if (st->denoise_enabled) + } else { + for (i=N;igain2[i] = p*p*st->gain[i];*/ - st->gain2[i]=(p*sqrt(st->gain[i])+.2*(1-p)) * (p*sqrt(st->gain[i])+.2*(1-p)); - /*st->gain2[i] = pow(st->gain[i], p) * pow(.1f,1.f-p);*/ - } else { - st->gain2[i]=1.f; + spx_word16_t tmp; + spx_word16_t p = st->gain2[i]; + st->gain[i] = MAX16(st->gain[i], st->gain_floor[i]); + tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15))); + st->gain2[i]=SQR16_Q15(tmp); } + filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2); } - st->gain2[0]=st->gain[0]=0.f; - st->gain2[N-1]=st->gain[N-1]=0.f; - /* - for (i=30;idenoise_enabled) { - st->gain[i] = st->gain2[i]*st->gain2[i] + (1-st->gain2[i])*.333*(.6*st->gain2[i-1]+st->gain2[i]+.6*st->gain2[i+1]+.4*st->gain2[i-2]+.4*st->gain2[i+2]); + for (i=0;igain2[i]=Q15_ONE; } - for (i=30;igain2[i] = st->gain[i]; - */ - if (st->agc_enabled) - speex_compute_agc(st, mean_prior); - -#if 0 - if (!is_speech) - { - for (i=0;igain2[i] = 0; - } -#if 0 - else { - for (i=0;igain2[i] = 1; - } -#endif -#endif - + /* Apply computed gain */ for (i=1;iframe[2*i-1] *= st->gain2[i]; - st->frame[2*i] *= st->gain2[i]; + st->ft[2*i-1] = MULT16_16_P15(st->gain2[i],st->ft[2*i-1]); + st->ft[2*i] = MULT16_16_P15(st->gain2[i],st->ft[2*i]); } - - /* Get rid of the DC and very low frequencies */ - st->frame[0]=0; - st->frame[1]=0; - st->frame[2]=0; - /* Nyquist frequency is mostly useless too */ - st->frame[2*N-1]=0; + st->ft[0] = MULT16_16_P15(st->gain2[0],st->ft[0]); + st->ft[2*N-1] = MULT16_16_P15(st->gain2[N-1],st->ft[2*N-1]); + + /*FIXME: This *will* not work for fixed-point */ +#ifndef FIXED_POINT + if (st->agc_enabled) + speex_compute_agc(st, Pframe, st->ft); +#endif /* Inverse FFT with 1/N scaling */ - spx_drft_backward(st->fft_lookup, st->frame); - + spx_ifft(st->fft_lookup, st->ft, st->frame); + /* Scale back to original (lower) amplitude */ for (i=0;i<2*N;i++) - st->frame[i] *= scale; + st->frame[i] = PSHR16(st->frame[i], st->frame_shift); + /*FIXME: This *will* not work for fixed-point */ +#ifndef FIXED_POINT + if (st->agc_enabled) { float max_sample=0; for (i=0;i<2*N;i++) @@ -880,9 +966,11 @@ int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo st->frame[i] *= damp; } } - +#endif + + /* Synthesis window (for WOLA) */ for (i=0;i<2*N;i++) - st->frame[i] *= st->window[i]; + st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]); /* Perform overlap and add */ for (i=0;ioutbuf[i] = st->frame[st->frame_size+i]; - /* Save old power spectrum */ - for (i=1;iold_ps[i] = ps[i]; - - return is_speech; + /* FIXME: This VAD is a kludge */ + if (st->vad_enabled) + { + if (Pframe > st->speech_prob_start || (st->was_speech && Pframe > st->speech_prob_continue)) + { + st->was_speech=1; + return 1; + } else + { + st->was_speech=0; + return 0; + } + } else { + return 1; + } } -void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo) +void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x) { int i; int N = st->ps_size; int N3 = 2*N - st->frame_size; + int M; + spx_word32_t *ps=st->ps; - float *ps=st->ps; - + M = st->nbands; + st->min_count++; + preprocess_analysis(st, x); update_noise_prob(st); - - st->nb_preprocess++; for (i=1;iupdate_prob[i]<.5f || st->ps[i] < st->noise[i]) + if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i],NOISE_SHIFT)) { - if (echo) - st->noise[i] = .95f*st->noise[i] + .1f*max(1.0f,st->ps[i]-st->frame_size*st->frame_size*1.0*echo[i]); - else - st->noise[i] = .95f*st->noise[i] + .1f*st->ps[i]; + st->noise[i] = MULT16_32_Q15(QCONST16(.95f,15),st->noise[i]) + MULT16_32_Q15(QCONST16(.05f,15),SHL32(st->ps[i],NOISE_SHIFT)); } } for (i=0;ioutbuf[i] = x[st->frame_size-N3+i]*st->window[st->frame_size+i]; + st->outbuf[i] = MULT16_16_Q15(x[st->frame_size-N3+i],st->window[st->frame_size+i]); /* Save old power spectrum */ - for (i=1;iold_ps[i] = ps[i]; - for (i=1;ireverb_estimate[i] *= st->reverb_decay; + for (i=0;ireverb_estimate[i] = MULT16_32_Q15(st->reverb_decay, st->reverb_estimate[i]); } @@ -946,17 +1042,17 @@ int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr) switch(request) { case SPEEX_PREPROCESS_SET_DENOISE: - st->denoise_enabled = (*(int*)ptr); + st->denoise_enabled = (*(spx_int32_t*)ptr); break; case SPEEX_PREPROCESS_GET_DENOISE: - (*(int*)ptr) = st->denoise_enabled; + (*(spx_int32_t*)ptr) = st->denoise_enabled; break; - +#ifndef FIXED_POINT case SPEEX_PREPROCESS_SET_AGC: - st->agc_enabled = (*(int*)ptr); + st->agc_enabled = (*(spx_int32_t*)ptr); break; case SPEEX_PREPROCESS_GET_AGC: - (*(int*)ptr) = st->agc_enabled; + (*(spx_int32_t*)ptr) = st->agc_enabled; break; case SPEEX_PREPROCESS_SET_AGC_LEVEL: @@ -969,21 +1065,40 @@ int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr) case SPEEX_PREPROCESS_GET_AGC_LEVEL: (*(float*)ptr) = st->agc_level; break; - + case SPEEX_PREPROCESS_SET_AGC_INCREMENT: + st->max_increase_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate); + break; + case SPEEX_PREPROCESS_GET_AGC_INCREMENT: + (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_increase_step)*st->sampling_rate/st->frame_size); + break; + case SPEEX_PREPROCESS_SET_AGC_DECREMENT: + st->max_decrease_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate); + break; + case SPEEX_PREPROCESS_GET_AGC_DECREMENT: + (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_decrease_step)*st->sampling_rate/st->frame_size); + break; + case SPEEX_PREPROCESS_SET_AGC_MAX_GAIN: + st->max_gain = exp(0.11513f * (*(spx_int32_t*)ptr)); + break; + case SPEEX_PREPROCESS_GET_AGC_MAX_GAIN: + (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_gain)); + break; +#endif case SPEEX_PREPROCESS_SET_VAD: - st->vad_enabled = (*(int*)ptr); + speex_warning("The VAD has been replaced by a hack pending a complete rewrite"); + st->vad_enabled = (*(spx_int32_t*)ptr); break; case SPEEX_PREPROCESS_GET_VAD: - (*(int*)ptr) = st->vad_enabled; + (*(spx_int32_t*)ptr) = st->vad_enabled; break; case SPEEX_PREPROCESS_SET_DEREVERB: - st->dereverb_enabled = (*(int*)ptr); + st->dereverb_enabled = (*(spx_int32_t*)ptr); for (i=0;ips_size;i++) st->reverb_estimate[i]=0; break; case SPEEX_PREPROCESS_GET_DEREVERB: - (*(int*)ptr) = st->dereverb_enabled; + (*(spx_int32_t*)ptr) = st->dereverb_enabled; break; case SPEEX_PREPROCESS_SET_DEREVERB_LEVEL: @@ -1001,24 +1116,47 @@ int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr) break; case SPEEX_PREPROCESS_SET_PROB_START: - st->speech_prob_start = (*(int*)ptr) / 100.0; - if ( st->speech_prob_start > 1 || st->speech_prob_start < 0 ) - st->speech_prob_start = SPEEX_PROB_START_DEFAULT; + *(spx_int32_t*)ptr = MIN32(Q15_ONE,MAX32(0, *(spx_int32_t*)ptr)); + st->speech_prob_start = DIV32_16(MULT16_16(32767,*(spx_int32_t*)ptr), 100); break; case SPEEX_PREPROCESS_GET_PROB_START: - (*(int*)ptr) = st->speech_prob_start * 100; + (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_start, 100); break; case SPEEX_PREPROCESS_SET_PROB_CONTINUE: - st->speech_prob_continue = (*(int*)ptr) / 100.0; - if ( st->speech_prob_continue > 1 || st->speech_prob_continue < 0 ) - st->speech_prob_continue = SPEEX_PROB_CONTINUE_DEFAULT; + *(spx_int32_t*)ptr = MIN32(Q15_ONE,MAX32(0, *(spx_int32_t*)ptr)); + st->speech_prob_continue = DIV32_16(MULT16_16(32767,*(spx_int32_t*)ptr), 100); break; case SPEEX_PREPROCESS_GET_PROB_CONTINUE: - (*(int*)ptr) = st->speech_prob_continue * 100; + (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_continue, 100); break; - default: + case SPEEX_PREPROCESS_SET_NOISE_SUPPRESS: + st->noise_suppress = -ABS(*(spx_int32_t*)ptr); + break; + case SPEEX_PREPROCESS_GET_NOISE_SUPPRESS: + (*(spx_int32_t*)ptr) = st->noise_suppress; + break; + case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS: + st->echo_suppress = -ABS(*(spx_int32_t*)ptr); + break; + case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS: + (*(spx_int32_t*)ptr) = st->echo_suppress; + break; + case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS_ACTIVE: + st->echo_suppress_active = -ABS(*(spx_int32_t*)ptr); + break; + case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS_ACTIVE: + (*(spx_int32_t*)ptr) = st->echo_suppress_active; + break; + case SPEEX_PREPROCESS_SET_ECHO_STATE: + st->echo_state = (SpeexEchoState*)ptr; + break; + case SPEEX_PREPROCESS_GET_ECHO_STATE: + ptr = (void*)st->echo_state; + break; + + default: speex_warning_int("Unknown speex_preprocess_ctl request: ", request); return -1; } diff --git a/apps/codecs/libspeex/pseudofloat.h b/apps/codecs/libspeex/pseudofloat.h index 1608df79b2..67f01b33c3 100644 --- a/apps/codecs/libspeex/pseudofloat.h +++ b/apps/codecs/libspeex/pseudofloat.h @@ -37,7 +37,7 @@ #include "misc.h" #include "math_approx.h" -//#include +#include #ifdef FIXED_POINT @@ -50,9 +50,7 @@ static const spx_float_t FLOAT_ZERO = {0,0}; static const spx_float_t FLOAT_ONE = {16384,-14}; static const spx_float_t FLOAT_HALF = {16384,-15}; -#ifndef MIN #define MIN(a,b) ((a)<(b)?(a):(b)) -#endif static inline spx_float_t PSEUDOFLOAT(spx_int32_t x) { int e=0; @@ -67,18 +65,8 @@ static inline spx_float_t PSEUDOFLOAT(spx_int32_t x) spx_float_t r = {0,0}; return r; } - while (x>32767) - { - x >>= 1; - /*x *= .5;*/ - e++; - } - while (x<16383) - { - x <<= 1; - /*x *= 2;*/ - e--; - } + e = spx_ilog2(ABS32(x))-14; + x = VSHR32(x, e); if (sign) { spx_float_t r; @@ -207,6 +195,14 @@ static inline spx_float_t FLOAT_MULT(spx_float_t a, spx_float_t b) return r; } +static inline spx_float_t FLOAT_AMULT(spx_float_t a, spx_float_t b) +{ + spx_float_t r; + r.m = (spx_int16_t)((spx_int32_t)(a).m*(b).m>>15); + r.e = (a).e+(b).e+15; + return r; +} + static inline spx_float_t FLOAT_SHL(spx_float_t a, int b) { @@ -232,64 +228,40 @@ static inline spx_int32_t FLOAT_EXTRACT32(spx_float_t a) return EXTEND32(a.m)<32767) - { - a >>= 1; - e++; - } - while (a<16384) - { - a <<= 1; - e--; - } - while (b>32767) - { - b >>= 1; - e++; - } - while (b<16384) - { - b <<= 1; - e--; - } + e1 = spx_ilog2(ABS32(a)); + a = VSHR32(a, e1-14); + e2 = spx_ilog2(ABS32(b)); + b = VSHR32(b, e2-14); r.m = MULT16_16_Q15(a,b); - r.e = e+15; + r.e = e1+e2-13; return r; } +/* Do NOT attempt to divide by a negative number */ static inline spx_float_t FLOAT_DIV32_FLOAT(spx_word32_t a, spx_float_t b) { int e=0; spx_float_t r; - /* FIXME: Handle the sign */ if (a==0) { return FLOAT_ZERO; } - while (a=SHL32(EXTEND32(b.m-1),15)) + e = spx_ilog2(ABS32(a))-spx_ilog2(b.m-1)-15; + a = VSHR32(a, e); + if (ABS32(a)>=SHL32(EXTEND32(b.m-1),15)) { a >>= 1; e++; @@ -300,41 +272,47 @@ static inline spx_float_t FLOAT_DIV32_FLOAT(spx_word32_t a, spx_float_t b) } +/* Do NOT attempt to divide by a negative number */ static inline spx_float_t FLOAT_DIV32(spx_word32_t a, spx_word32_t b) { - int e=0; + int e0=0,e=0; spx_float_t r; - /* FIXME: Handle the sign */ if (a==0) { return FLOAT_ZERO; } - while (b>32767) + if (b>32767) { - b >>= 1; - e--; + e0 = spx_ilog2(b)-14; + b = VSHR32(b, e0); + e0 = -e0; } - while (a=SHL32(b-1,15)) + e = spx_ilog2(ABS32(a))-spx_ilog2(b-1)-15; + a = VSHR32(a, e); + if (ABS32(a)>=SHL32(EXTEND32(b-1),15)) { a >>= 1; e++; } + e += e0; r.m = DIV32_16(a,b); r.e = e; return r; } +/* Do NOT attempt to divide by a negative number */ static inline spx_float_t FLOAT_DIVU(spx_float_t a, spx_float_t b) { int e=0; spx_int32_t num; spx_float_t r; + if (b.m<=0) + { + speex_warning_int("Attempted to divide by", b.m); + return FLOAT_ONE; + } num = a.m; + a.m = ABS16(a.m); while (a.m >= b.m) { e++; @@ -350,7 +328,7 @@ static inline spx_float_t FLOAT_SQRT(spx_float_t a) { spx_float_t r; spx_int32_t m; - m = a.m << 14; + m = SHL32(EXTEND32(a.m), 14); r.e = a.e - 14; if (r.e & 1) { @@ -370,6 +348,7 @@ static inline spx_float_t FLOAT_SQRT(spx_float_t a) #define FLOAT_HALF 0.5f #define PSEUDOFLOAT(x) (x) #define FLOAT_MULT(a,b) ((a)*(b)) +#define FLOAT_AMULT(a,b) ((a)*(b)) #define FLOAT_MUL32(a,b) ((a)*(b)) #define FLOAT_DIV32(a,b) ((a)/(b)) #define FLOAT_EXTRACT16(a) (a) diff --git a/apps/codecs/libspeex/quant_lsp.c b/apps/codecs/libspeex/quant_lsp.c index bfca5870ce..d907b98e4d 100644 --- a/apps/codecs/libspeex/quant_lsp.c +++ b/apps/codecs/libspeex/quant_lsp.c @@ -417,7 +417,7 @@ void lsp_quant_48k(spx_lsp_t *lsp, spx_lsp_t *qlsp, int order, SpeexBits *bits) #ifdef FIXED_POINT for (i=0;i +void *speex_alloc (int size) {return calloc(size,1);} +void *speex_realloc (void *ptr, int size) {return realloc(ptr, size);} +void speex_free (void *ptr) {free(ptr);} +#else +#include "misc.h" +#endif + +#include +#include "speex/speex_resampler.h" + +#ifndef M_PI +#define M_PI 3.14159263 +#endif + +#ifdef FIXED_POINT +#define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x))) +#else +#define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x)))) +#endif + +/*#define float double*/ +#define FILTER_SIZE 64 +#define OVERSAMPLE 8 + +#define IMAX(a,b) ((a) > (b) ? (a) : (b)) + +struct QualityMapping { + int base_length; + int oversample; + float downsample_bandwidth; + float upsample_bandwidth; +}; + +/* This table maps conversion quality to internal parameters. There are two + reasons that explain why the up-sampling bandwidth is larger than the + down-sampling bandwidth: + 1) When up-sampling, we can assume that the spectrum is already attenuated + close to the Nyquist rate (from an A/D or a previous resampling filter) + 2) Any aliasing that occurs very close to the Nyquist rate will be masked + by the sinusoids/noise just below the Nyquist rate (guaranteed only for + up-sampling). +*/ +const struct QualityMapping quality_map[11] = { + { 8, 4, 0.70f, 0.80f}, /* 0 */ + { 16, 4, 0.74f, 0.83f}, /* 1 */ + { 32, 4, 0.77f, 0.87f}, /* 2 */ + { 48, 8, 0.84f, 0.90f}, /* 3 */ + { 64, 8, 0.88f, 0.92f}, /* 4 */ + { 80, 8, 0.90f, 0.94f}, /* 5 */ + { 96, 8, 0.91f, 0.94f}, /* 6 */ + {128, 16, 0.93f, 0.95f}, /* 7 */ + {160, 16, 0.94f, 0.96f}, /* 8 */ + {192, 16, 0.95f, 0.96f}, /* 9 */ + {256, 16, 0.96f, 0.97f}, /* 10 */ +}; + +typedef enum {SPEEX_RESAMPLER_DIRECT_SINGLE=0, SPEEX_RESAMPLER_INTERPOLATE_SINGLE=1} SpeexSincType; + +typedef int (*resampler_basic_func)(SpeexResamplerState *, int , const spx_word16_t *, int *, spx_word16_t *, int *); + +struct SpeexResamplerState_ { + int in_rate; + int out_rate; + int num_rate; + int den_rate; + + int quality; + int nb_channels; + int filt_len; + int mem_alloc_size; + int int_advance; + int frac_advance; + float cutoff; + int oversample; + int initialised; + int started; + + /* These are per-channel */ + int *last_sample; + int *samp_frac_num; + int *magic_samples; + + spx_word16_t *mem; + spx_word16_t *sinc_table; + int sinc_table_length; + resampler_basic_func resampler_ptr; + + int in_stride; + int out_stride; + SpeexSincType type; +} ; + +#ifdef FIXED_POINT +/* The slow way of computing a sinc for the table. Should improve that some day */ +static spx_word16_t sinc(float cutoff, float x, int N) +{ + /*fprintf (stderr, "%f ", x);*/ + x *= cutoff; + if (fabs(x)<1e-6f) + return WORD2INT(32768.*cutoff); + else if (fabs(x) > .5f*N) + return 0; + /*FIXME: Can it really be any slower than this? */ + return WORD2INT(32768.*cutoff*sin(M_PI*x)/(M_PI*x) * (.42+.5*cos(2*x*M_PI/N)+.08*cos(4*x*M_PI/N))); +} +#else +/* The slow way of computing a sinc for the table. Should improve that some day */ +static spx_word16_t sinc(float cutoff, float x, int N) +{ + /*fprintf (stderr, "%f ", x);*/ + x *= cutoff; + if (fabs(x)<1e-6) + return cutoff; + else if (fabs(x) > .5*N) + return 0; + /*FIXME: Can it really be any slower than this? */ + return cutoff*sin(M_PI*x)/(M_PI*x) * (.42+.5*cos(2*x*M_PI/N)+.08*cos(4*x*M_PI/N)); +} +#endif + +static int resampler_basic_direct_single(SpeexResamplerState *st, int channel_index, const spx_word16_t *in, int *in_len, spx_word16_t *out, int *out_len) +{ + int N = st->filt_len; + int out_sample = 0; + spx_word16_t *mem; + int last_sample = st->last_sample[channel_index]; + int samp_frac_num = st->samp_frac_num[channel_index]; + mem = st->mem + channel_index * st->mem_alloc_size; + while (!(last_sample >= *in_len || out_sample >= *out_len)) + { + int j; + spx_word32_t sum=0; + + /* We already have all the filter coefficients pre-computed in the table */ + const spx_word16_t *ptr; + /* Do the memory part */ + for (j=0;last_sample-N+1+j < 0;j++) + { + sum += MULT16_16(mem[last_sample+j],st->sinc_table[samp_frac_num*st->filt_len+j]); + } + + /* Do the new part */ + ptr = in+st->in_stride*(last_sample-N+1+j); + for (;jsinc_table[samp_frac_num*st->filt_len+j]); + ptr += st->in_stride; + } + + *out = PSHR32(sum,15); + out += st->out_stride; + out_sample++; + last_sample += st->int_advance; + samp_frac_num += st->frac_advance; + if (samp_frac_num >= st->den_rate) + { + samp_frac_num -= st->den_rate; + last_sample++; + } + } + st->last_sample[channel_index] = last_sample; + st->samp_frac_num[channel_index] = samp_frac_num; + return out_sample; +} + +static int resampler_basic_interpolate_single(SpeexResamplerState *st, int channel_index, const spx_word16_t *in, int *in_len, spx_word16_t *out, int *out_len) +{ + int N = st->filt_len; + int out_sample = 0; + spx_word16_t *mem; + int last_sample = st->last_sample[channel_index]; + int samp_frac_num = st->samp_frac_num[channel_index]; + mem = st->mem + channel_index * st->mem_alloc_size; + while (!(last_sample >= *in_len || out_sample >= *out_len)) + { + int j; + spx_word32_t sum=0; + + /* We need to interpolate the sinc filter */ + spx_word32_t accum[4] = {0.f,0.f, 0.f, 0.f}; + float interp[4]; + const spx_word16_t *ptr; + float alpha = ((float)samp_frac_num)/st->den_rate; + int offset = samp_frac_num*st->oversample/st->den_rate; + float frac = alpha*st->oversample - offset; + /* This code is written like this to make it easy to optimise with SIMD. + For most DSPs, it would be best to split the loops in two because most DSPs + have only two accumulators */ + for (j=0;last_sample-N+1+j < 0;j++) + { + spx_word16_t curr_mem = mem[last_sample+j]; + accum[0] += MULT16_16(curr_mem,st->sinc_table[4+(j+1)*st->oversample-offset-2]); + accum[1] += MULT16_16(curr_mem,st->sinc_table[4+(j+1)*st->oversample-offset-1]); + accum[2] += MULT16_16(curr_mem,st->sinc_table[4+(j+1)*st->oversample-offset]); + accum[3] += MULT16_16(curr_mem,st->sinc_table[4+(j+1)*st->oversample-offset+1]); + } + ptr = in+st->in_stride*(last_sample-N+1+j); + /* Do the new part */ + for (;jin_stride; + accum[0] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-2]); + accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]); + accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]); + accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]); + } + /* Compute interpolation coefficients. I'm not sure whether this corresponds to cubic interpolation + but I know it's MMSE-optimal on a sinc */ + interp[0] = -0.16667f*frac + 0.16667f*frac*frac*frac; + interp[1] = frac + 0.5f*frac*frac - 0.5f*frac*frac*frac; + /*interp[2] = 1.f - 0.5f*frac - frac*frac + 0.5f*frac*frac*frac;*/ + interp[3] = -0.33333f*frac + 0.5f*frac*frac - 0.16667f*frac*frac*frac; + /* Just to make sure we don't have rounding problems */ + interp[2] = 1.f-interp[0]-interp[1]-interp[3]; + /*sum = frac*accum[1] + (1-frac)*accum[2];*/ + sum = interp[0]*accum[0] + interp[1]*accum[1] + interp[2]*accum[2] + interp[3]*accum[3]; + + *out = PSHR32(sum,15); + out += st->out_stride; + out_sample++; + last_sample += st->int_advance; + samp_frac_num += st->frac_advance; + if (samp_frac_num >= st->den_rate) + { + samp_frac_num -= st->den_rate; + last_sample++; + } + } + st->last_sample[channel_index] = last_sample; + st->samp_frac_num[channel_index] = samp_frac_num; + return out_sample; +} + + +static void update_filter(SpeexResamplerState *st) +{ + int i; + int old_length; + + old_length = st->filt_len; + st->oversample = quality_map[st->quality].oversample; + st->filt_len = quality_map[st->quality].base_length; + + if (st->num_rate > st->den_rate) + { + /* down-sampling */ + st->cutoff = quality_map[st->quality].downsample_bandwidth * st->den_rate / st->num_rate; + /* FIXME: divide the numerator and denominator by a certain amount if they're too large */ + st->filt_len = st->filt_len*st->num_rate / st->den_rate; + /* Round down to make sure we have a multiple of 4 */ + st->filt_len &= (~0x3); + } else { + /* up-sampling */ + st->cutoff = quality_map[st->quality].upsample_bandwidth; + } + + /* Choose the resampling type that requires the least amount of memory */ + if (st->den_rate <= st->oversample) + { + if (!st->sinc_table) + st->sinc_table = (spx_word16_t *)speex_alloc(st->filt_len*st->den_rate*sizeof(spx_word16_t)); + else if (st->sinc_table_length < st->filt_len*st->den_rate) + { + st->sinc_table = (spx_word16_t *)speex_realloc(st->sinc_table,st->filt_len*st->den_rate*sizeof(spx_word16_t)); + st->sinc_table_length = st->filt_len*st->den_rate; + } + for (i=0;iden_rate;i++) + { + int j; + for (j=0;jfilt_len;j++) + { + st->sinc_table[i*st->filt_len+j] = sinc(st->cutoff,((j-st->filt_len/2+1)-((float)i)/st->den_rate), st->filt_len); + } + } + st->type = SPEEX_RESAMPLER_DIRECT_SINGLE; + st->resampler_ptr = resampler_basic_direct_single; + /*fprintf (stderr, "resampler uses direct sinc table and normalised cutoff %f\n", cutoff);*/ + } else { + if (!st->sinc_table) + st->sinc_table = (spx_word16_t *)speex_alloc((st->filt_len*st->oversample+8)*sizeof(spx_word16_t)); + else if (st->sinc_table_length < st->filt_len*st->oversample+8) + { + st->sinc_table = (spx_word16_t *)speex_realloc(st->sinc_table,(st->filt_len*st->oversample+8)*sizeof(spx_word16_t)); + st->sinc_table_length = st->filt_len*st->oversample+8; + } + for (i=-4;ioversample*st->filt_len+4;i++) + st->sinc_table[i+4] = sinc(st->cutoff,(i/(float)st->oversample - st->filt_len/2), st->filt_len); + st->type = SPEEX_RESAMPLER_INTERPOLATE_SINGLE; + st->resampler_ptr = resampler_basic_interpolate_single; + /*fprintf (stderr, "resampler uses interpolated sinc table and normalised cutoff %f\n", cutoff);*/ + } + st->int_advance = st->num_rate/st->den_rate; + st->frac_advance = st->num_rate%st->den_rate; + + if (!st->mem) + { + st->mem = (spx_word16_t*)speex_alloc(st->nb_channels*(st->filt_len-1) * sizeof(spx_word16_t)); + for (i=0;inb_channels*(st->filt_len-1);i++) + st->mem[i] = 0; + st->mem_alloc_size = st->filt_len-1; + /*speex_warning("init filter");*/ + } else if (!st->started) + { + st->mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*(st->filt_len-1) * sizeof(spx_word16_t)); + for (i=0;inb_channels*(st->filt_len-1);i++) + st->mem[i] = 0; + st->mem_alloc_size = st->filt_len-1; + /*speex_warning("reinit filter");*/ + } else if (st->filt_len > old_length) + { + /* Increase the filter length */ + /*speex_warning("increase filter size");*/ + int old_alloc_size = st->mem_alloc_size; + if (st->filt_len-1 > st->mem_alloc_size) + { + st->mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*(st->filt_len-1) * sizeof(spx_word16_t)); + st->mem_alloc_size = st->filt_len-1; + } + for (i=0;inb_channels;i++) + { + int j; + /* Copy data going backward */ + for (j=0;jmem[i*st->mem_alloc_size+(st->filt_len-2-j)] = st->mem[i*old_alloc_size+(old_length-2-j)]; + /* Then put zeros for lack of anything better */ + for (;jfilt_len-1;j++) + st->mem[i*st->mem_alloc_size+(st->filt_len-2-j)] = 0; + /* Adjust last_sample */ + st->last_sample[i] += (st->filt_len - old_length)/2; + } + } else if (st->filt_len < old_length) + { + /* Reduce filter length, this a bit tricky */ + /*speex_warning("decrease filter size (unimplemented)");*/ + /* Adjust last_sample (which will likely end up negative) */ + /*st->last_sample += (st->filt_len - old_length)/2;*/ + for (i=0;inb_channels;i++) + { + int j; + st->magic_samples[i] = (old_length - st->filt_len)/2; + /* Copy data going backward */ + for (j=0;jfilt_len-1+st->magic_samples[i];j++) + st->mem[i*st->mem_alloc_size+j] = st->mem[i*st->mem_alloc_size+j+st->magic_samples[i]]; + } + } + +} + + +SpeexResamplerState *speex_resampler_init(int nb_channels, int ratio_num, int ratio_den, int in_rate, int out_rate, int quality) +{ + int i; + SpeexResamplerState *st = (SpeexResamplerState *)speex_alloc(sizeof(SpeexResamplerState)); + st->initialised = 0; + st->started = 0; + st->in_rate = 0; + st->out_rate = 0; + st->num_rate = 0; + st->den_rate = 0; + st->quality = -1; + st->sinc_table_length = 0; + st->mem_alloc_size = 0; + st->filt_len = 0; + st->mem = 0; + st->resampler_ptr = 0; + + st->cutoff = 1.f; + st->nb_channels = nb_channels; + st->in_stride = 1; + st->out_stride = 1; + + /* Per channel data */ + st->last_sample = (int*)speex_alloc(nb_channels*sizeof(int)); + st->magic_samples = (int*)speex_alloc(nb_channels*sizeof(int)); + st->samp_frac_num = (int*)speex_alloc(nb_channels*sizeof(int)); + for (i=0;ilast_sample[i] = 0; + st->magic_samples[i] = 0; + st->samp_frac_num[i] = 0; + } + + speex_resampler_set_quality(st, quality); + speex_resampler_set_rate(st, ratio_num, ratio_den, in_rate, out_rate); + + + update_filter(st); + + st->initialised = 1; + return st; +} + +void speex_resampler_destroy(SpeexResamplerState *st) +{ + speex_free(st->mem); + speex_free(st->sinc_table); + speex_free(st->last_sample); + speex_free(st->magic_samples); + speex_free(st->samp_frac_num); + speex_free(st); +} + + + +static void speex_resampler_process_native(SpeexResamplerState *st, int channel_index, const spx_word16_t *in, int *in_len, spx_word16_t *out, int *out_len) +{ + int j=0; + int N = st->filt_len; + int out_sample = 0; + spx_word16_t *mem; + int tmp_out_len = 0; + mem = st->mem + channel_index * st->mem_alloc_size; + st->started = 1; + + /* Handle the case where we have samples left from a reduction in filter length */ + if (st->magic_samples) + { + int tmp_in_len; + tmp_in_len = st->magic_samples[channel_index]; + tmp_out_len = *out_len; + /* FIXME: Need to handle the case where the out array is too small */ + /* magic_samples needs to be set to zero to avoid infinite recursion */ + st->magic_samples = 0; + speex_resampler_process_native(st, channel_index, mem+N-1, &tmp_in_len, out, &tmp_out_len); + /*speex_warning_int("extra samples:", tmp_out_len);*/ + out += tmp_out_len; + } + + /* Call the right resampler through the function ptr */ + out_sample = st->resampler_ptr(st, channel_index, in, in_len, out, out_len); + + if (st->last_sample[channel_index] < *in_len) + *in_len = st->last_sample[channel_index]; + *out_len = out_sample+tmp_out_len; + st->last_sample[channel_index] -= *in_len; + + for (j=0;jin_stride*(j+*in_len-N+1)]; + +} + +#ifdef FIXED_POINT +void speex_resampler_process_float(SpeexResamplerState *st, int channel_index, const float *in, int *in_len, float *out, int *out_len) +{ + int i; + int istride_save, ostride_save; + spx_word16_t x[*in_len]; + spx_word16_t y[*out_len]; + istride_save = st->in_stride; + ostride_save = st->out_stride; + for (i=0;i<*in_len;i++) + x[i] = WORD2INT(in[i*st->in_stride]); + st->in_stride = st->out_stride = 1; + speex_resampler_process_native(st, channel_index, x, in_len, y, out_len); + st->in_stride = istride_save; + st->out_stride = ostride_save; + for (i=0;i<*out_len;i++) + out[i*st->out_stride] = y[i]; +} +void speex_resampler_process_int(SpeexResamplerState *st, int channel_index, const spx_int16_t *in, int *in_len, spx_int16_t *out, int *out_len) +{ + speex_resampler_process_native(st, channel_index, in, in_len, out, out_len); +} +#else +void speex_resampler_process_float(SpeexResamplerState *st, int channel_index, const float *in, int *in_len, float *out, int *out_len) +{ + speex_resampler_process_native(st, channel_index, in, in_len, out, out_len); +} +void speex_resampler_process_int(SpeexResamplerState *st, int channel_index, const spx_int16_t *in, int *in_len, spx_int16_t *out, int *out_len) +{ + int i; + int istride_save, ostride_save; + spx_word16_t x[*in_len]; + spx_word16_t y[*out_len]; + istride_save = st->in_stride; + ostride_save = st->out_stride; + for (i=0;i<*in_len;i++) + x[i] = in[i+st->in_stride]; + st->in_stride = st->out_stride = 1; + speex_resampler_process_native(st, channel_index, x, in_len, y, out_len); + st->in_stride = istride_save; + st->out_stride = ostride_save; + for (i=0;i<*out_len;i++) + out[i+st->out_stride] = WORD2INT(y[i]); +} +#endif + +void speex_resampler_process_interleaved_float(SpeexResamplerState *st, const float *in, int *in_len, float *out, int *out_len) +{ + int i; + int istride_save, ostride_save; + istride_save = st->in_stride; + ostride_save = st->out_stride; + st->in_stride = st->out_stride = st->nb_channels; + for (i=0;inb_channels;i++) + { + speex_resampler_process_float(st, i, in+i, in_len, out+i, out_len); + } + st->in_stride = istride_save; + st->out_stride = ostride_save; +} + + +void speex_resampler_set_rate(SpeexResamplerState *st, int ratio_num, int ratio_den, int in_rate, int out_rate) +{ + int fact; + if (st->in_rate == in_rate && st->out_rate == out_rate && st->num_rate == ratio_num && st->den_rate == ratio_den) + return; + + st->in_rate = in_rate; + st->out_rate = out_rate; + st->num_rate = ratio_num; + st->den_rate = ratio_den; + /* FIXME: This is terribly inefficient, but who cares (at least for now)? */ + for (fact=2;fact<=sqrt(IMAX(in_rate, out_rate));fact++) + { + while ((st->num_rate % fact == 0) && (st->den_rate % fact == 0)) + { + st->num_rate /= fact; + st->den_rate /= fact; + } + } + + if (st->initialised) + update_filter(st); +} + +void speex_resampler_set_quality(SpeexResamplerState *st, int quality) +{ + if (quality < 0) + quality = 0; + if (quality > 10) + quality = 10; + if (st->quality == quality) + return; + st->quality = quality; + if (st->initialised) + update_filter(st); +} + +void speex_resampler_set_input_stride(SpeexResamplerState *st, int stride) +{ + st->in_stride = stride; +} + +void speex_resampler_set_output_stride(SpeexResamplerState *st, int stride) +{ + st->out_stride = stride; +} + +void speex_resampler_skip_zeros(SpeexResamplerState *st) +{ + int i; + for (i=0;inb_channels;i++) + st->last_sample[i] = st->filt_len/2; +} + +void speex_resampler_reset_mem(SpeexResamplerState *st) +{ + int i; + for (i=0;inb_channels*(st->filt_len-1);i++) + st->mem[i] = 0; +} + diff --git a/apps/codecs/libspeex/rockbox.c b/apps/codecs/libspeex/rockbox.c new file mode 100644 index 0000000000..231a6b8f18 --- /dev/null +++ b/apps/codecs/libspeex/rockbox.c @@ -0,0 +1,121 @@ +/************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * + * Copyright (C) 2007 Dan Everton + * + * All files in this archive are subject to the GNU General Public License. + * See the file COPYING in the source tree root for full license agreement. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ***************************************************************************/ + +#include "../codec.h" +#include "../lib/codeclib.h" + +extern struct codec_api* ci; + +#if defined(DEBUG) || defined(SIMULATOR) +#undef DEBUGF +#define DEBUGF ci->debugf +#else +#define DEBUGF(...) +#endif + +#ifdef ROCKBOX_HAS_LOGF +#undef LOGF +#define LOGF ci->logf +#else +#define LOGF(...) +#endif + +void *speex_alloc (int size) +{ + return codec_calloc(size, 1); +} + +void *speex_alloc_scratch (int size) +{ + return codec_calloc(size,1); +} + +void *speex_realloc (void *ptr, int size) +{ + return codec_realloc(ptr, size); +} + +void speex_free (void *ptr) +{ + codec_free(ptr); +} + +void speex_free_scratch (void *ptr) +{ + codec_free(ptr); +} + +void *speex_move (void *dest, void *src, int n) +{ + return memmove(dest,src,n); +} + +void speex_error(const char *str) +{ + DEBUGF("Fatal error: %s\n", str); + //exit(1); +} + +void speex_warning(const char *str) +{ + DEBUGF("warning: %s\n", str); +} + +void speex_warning_int(const char *str, int val) +{ + DEBUGF("warning: %s %d\n", str, val); +} + +void _speex_putc(int ch, void *file) +{ + //FILE *f = (FILE *)file; + //printf("%c", ch); +} + +float floor(float x) { + return ((float)(((int)x))); +} + +//Placeholders (not fixed point, only used when encoding): +float pow(float a, float b) { + return 0; +} + +float log(float l) { + return 0; +} + +float fabs(float a) { + return 0; +} + +float sin(float a) { + return 0; +} + +float cos(float a) { + return 0; +} + +float sqrt(float a) { + return 0; +} + +float exp(float a) { + return 0; +} diff --git a/apps/codecs/libspeex/sb_celp.c b/apps/codecs/libspeex/sb_celp.c index 0e17e525c0..61ca34efca 100644 --- a/apps/codecs/libspeex/sb_celp.c +++ b/apps/codecs/libspeex/sb_celp.c @@ -33,7 +33,7 @@ #include "config.h" #endif -#include +#include #include "sb_celp.h" #include "stdlib.h" #include "filters.h" @@ -45,6 +45,7 @@ #include "vq.h" #include "ltp.h" #include "misc.h" +#include "math_approx.h" /* Default size for the encoder and decoder stack (can be changed at compile time). This does not apply when using variable-size arrays or alloca. */ @@ -109,12 +110,26 @@ int sb_decoder_ctl(void *state, int request, void *ptr) #ifdef FIXED_POINT static const spx_word16_t gc_quant_bound[16] = {125, 164, 215, 282, 370, 484, 635, 832, 1090, 1428, 1871, 2452, 3213, 4210, 5516, 7228}; +static const spx_word16_t fold_quant_bound[32] = { + 39, 44, 50, 57, 64, 73, 83, 94, + 106, 120, 136, 154, 175, 198, 225, 255, + 288, 327, 370, 420, 476, 539, 611, 692, + 784, 889, 1007, 1141, 1293, 1465, 1660, 1881}; #define LSP_MARGIN 410 #define LSP_DELTA1 6553 #define LSP_DELTA2 1638 #else +static const spx_word16_t gc_quant_bound[16] = { + 0.97979, 1.28384, 1.68223, 2.20426, 2.88829, 3.78458, 4.95900, 6.49787, + 8.51428, 11.15642, 14.61846, 19.15484, 25.09895, 32.88761, 43.09325, 56.46588}; +static const spx_word16_t fold_quant_bound[32] = { + 0.30498, 0.34559, 0.39161, 0.44375, 0.50283, 0.56979, 0.64565, 0.73162, + 0.82903, 0.93942, 1.06450, 1.20624, 1.36685, 1.54884, 1.75506, 1.98875, + 2.25355, 2.55360, 2.89361, 3.27889, 3.71547, 4.21018, 4.77076, 5.40598, + 6.12577, 6.94141, 7.86565, 8.91295, 10.09969, 11.44445, 12.96826, 14.69497}; + #define LSP_MARGIN .05 #define LSP_DELTA1 .2 #define LSP_DELTA2 .05 @@ -126,12 +141,6 @@ static const spx_word16_t gc_quant_bound[16] = {125, 164, 215, 282, 370, 484, 63 #ifdef FIXED_POINT static const spx_word16_t h0[64] = {2, -7, -7, 18, 15, -39, -25, 75, 35, -130, -41, 212, 38, -327, -17, 483, -32, -689, 124, 956, -283, -1307, 543, 1780, -973, -2467, 1733, 3633, -3339, -6409, 9059, 30153, 30153, 9059, -6409, -3339, 3633, 1733, -2467, -973, 1780, 543, -1307, -283, 956, 124, -689, -32, 483, -17, -327, 38, 212, -41, -130, 35, 75, -25, -39, 15, 18, -7, -7, 2}; -static const spx_word16_t h1[64] = {2, 7, -7, -18, 15, 39, -25, -75, 35, 130, -41, -212, 38, 327, -17, -483, -32, 689, 124, -956, -283, 1307, 543, -1780, -973, 2467, 1733, -3633, -3339, 6409, 9059, -30153, 30153, -9059, -6409, 3339, 3633, -1733, -2467, 973, 1780, -543, -1307, 283, 956, -124, -689, 32, 483, 17, -327, -38, 212, 41, -130, -35, 75, 25, -39, -15, 18, 7, -7, -2}; - -/*To generate (ruby code): (0...33).each { |idx| puts Math.exp((idx-10) / 8.0).to_s + "," } */ -static const float spx_exp_lookup_int[33]={0.28650479686019,0.32465246735835,0.367879441171442,0.416862019678508,0.472366552741015,0.53526142851899,0.606530659712633,0.687289278790972,0.778800783071405,0.882496902584595,1.0,1.13314845306683,1.28402541668774,1.4549914146182,1.64872127070013,1.86824595743222,2.11700001661267,2.3988752939671,2.71828182845905,3.08021684891803,3.49034295746184,3.95507672292058,4.48168907033806,5.07841903718008,5.75460267600573,6.52081912033011,7.38905609893065,8.37289748812726,9.48773583635853,10.7510131860764,12.1824939607035,13.8045741860671,15.6426318841882}; - - #else static const float h0[64] = { 3.596189e-05f, -0.0001123515f, @@ -168,67 +177,15 @@ static const float h0[64] = { -0.0001123515f, 3.596189e-05f }; -static const float h1[64] = { - 3.596189e-05f, 0.0001123515f, - -0.0001104587f, -0.0002790277f, - 0.0002298438f, 0.0005953563f, - -0.0003823631f, -0.00113826f, - 0.0005308539f, 0.001986177f, - -0.0006243724f, -0.003235877f, - 0.0005743159f, 0.004989147f, - -0.0002584767f, -0.007367171f, - -0.0004857935f, 0.01050689f, - 0.001894714f, -0.01459396f, - -0.004313674f, 0.01994365f, - 0.00828756f, -0.02716055f, - -0.01485397f, 0.03764973f, - 0.026447f, -0.05543245f, - -0.05095487f, 0.09779096f, - 0.1382363f, -0.4600981f, - 0.4600981f, -0.1382363f, - -0.09779096f, 0.05095487f, - 0.05543245f, -0.026447f, - -0.03764973f, 0.01485397f, - 0.02716055f, -0.00828756f, - -0.01994365f, 0.004313674f, - 0.01459396f, -0.001894714f, - -0.01050689f, 0.0004857935f, - 0.007367171f, 0.0002584767f, - -0.004989147f, -0.0005743159f, - 0.003235877f, 0.0006243724f, - -0.001986177f, -0.0005308539f, - 0.00113826f, 0.0003823631f, - -0.0005953563f, -0.0002298438f, - 0.0002790277f, 0.0001104587f, - -0.0001123515f, -3.596189e-05f -}; #endif extern const spx_word16_t lpc_window[]; -static void mix_and_saturate(spx_word32_t *x0, spx_word32_t *x1, spx_word16_t *out, int len) -{ - int i; - for (i=0;i32767) - out[i] = 32767; - else if (tmp<-32767) - out[i] = -32767; - else - out[i] = tmp; - } -} void *sb_encoder_init(const SpeexMode *m) { int i; + spx_int32_t tmp; SBEncState *st; const SpeexSBMode *mode; @@ -257,10 +214,10 @@ void *sb_encoder_init(const SpeexMode *m) st->submodes=mode->submodes; st->submodeSelect = st->submodeID=mode->defaultSubmode; - i=9; - speex_encoder_ctl(st->st_low, SPEEX_SET_QUALITY, &i); - i=1; - speex_encoder_ctl(st->st_low, SPEEX_SET_WIDEBAND, &i); + tmp=9; + speex_encoder_ctl(st->st_low, SPEEX_SET_QUALITY, &tmp); + tmp=1; + speex_encoder_ctl(st->st_low, SPEEX_SET_WIDEBAND, &tmp); st->lag_factor = mode->lag_factor; st->lpc_floor = mode->lpc_floor; @@ -268,22 +225,11 @@ void *sb_encoder_init(const SpeexMode *m) st->gamma2=mode->gamma2; st->first=1; - st->x0d=(spx_sig_t*)speex_alloc((st->frame_size)*sizeof(spx_sig_t)); - st->x1d=(spx_sig_t*)speex_alloc((st->frame_size)*sizeof(spx_sig_t)); - st->high=(spx_sig_t*)speex_alloc((st->full_frame_size)*sizeof(spx_sig_t)); - st->y0=(spx_sig_t*)speex_alloc((st->full_frame_size)*sizeof(spx_sig_t)); - st->y1=(spx_sig_t*)speex_alloc((st->full_frame_size)*sizeof(spx_sig_t)); + st->high=(spx_word16_t*)speex_alloc((st->windowSize-st->frame_size)*sizeof(spx_word16_t)); st->h0_mem=(spx_word16_t*)speex_alloc((QMF_ORDER)*sizeof(spx_word16_t)); st->h1_mem=(spx_word16_t*)speex_alloc((QMF_ORDER)*sizeof(spx_word16_t)); - st->g0_mem=(spx_word32_t*)speex_alloc((QMF_ORDER)*sizeof(spx_word32_t)); - st->g1_mem=(spx_word32_t*)speex_alloc((QMF_ORDER)*sizeof(spx_word32_t)); - st->excBuf=(spx_sig_t*)speex_alloc((st->bufSize)*sizeof(spx_sig_t)); - st->exc = st->excBuf + st->bufSize - st->windowSize; - - st->res=(spx_sig_t*)speex_alloc((st->frame_size)*sizeof(spx_sig_t)); - st->sw=(spx_sig_t*)speex_alloc((st->frame_size)*sizeof(spx_sig_t)); st->window= lpc_window; st->lagWindow = (spx_word16_t*)speex_alloc((st->lpcSize+1)*sizeof(spx_word16_t)); @@ -303,14 +249,18 @@ void *sb_encoder_init(const SpeexMode *m) st->interp_lpc = (spx_coef_t*)speex_alloc(st->lpcSize*sizeof(spx_coef_t)); st->interp_qlpc = (spx_coef_t*)speex_alloc(st->lpcSize*sizeof(spx_coef_t)); st->pi_gain = (spx_word32_t*)speex_alloc((st->nbSubframes)*sizeof(spx_word32_t)); - st->low_innov = (spx_word32_t*)speex_alloc((st->frame_size)*sizeof(spx_word32_t)); - speex_encoder_ctl(st->st_low, SPEEX_SET_INNOVATION_SAVE, st->low_innov); - st->innov_save = NULL; + st->exc_rms = (spx_word16_t*)speex_alloc((st->nbSubframes)*sizeof(spx_word16_t)); + st->innov_rms_save = NULL; st->mem_sp = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); st->mem_sp2 = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); st->mem_sw = (spx_mem_t*)speex_alloc((st->lpcSize)*sizeof(spx_mem_t)); + for (i=0;ilpcSize;i++) + { + st->old_lsp[i]=LSP_SCALING*(M_PI*((float)(i+1)))/(st->lpcSize+1); + } + st->vbr_quality = 8; st->vbr_enabled = 0; st->vbr_max = 0; @@ -337,20 +287,11 @@ void sb_encoder_destroy(void *state) speex_free_scratch(st->stack); #endif - speex_free(st->x0d); - speex_free(st->x1d); speex_free(st->high); - speex_free(st->y0); - speex_free(st->y1); speex_free(st->h0_mem); speex_free(st->h1_mem); - speex_free(st->g0_mem); - speex_free(st->g1_mem); - speex_free(st->excBuf); - speex_free(st->res); - speex_free(st->sw); speex_free(st->lagWindow); speex_free(st->autocorr); @@ -366,7 +307,7 @@ void sb_encoder_destroy(void *state) speex_free(st->interp_lpc); speex_free(st->interp_qlpc); speex_free(st->pi_gain); - speex_free(st->low_innov); + speex_free(st->exc_rms); speex_free(st->mem_sp); speex_free(st->mem_sp2); @@ -387,44 +328,48 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) VARDECL(spx_word16_t *target); VARDECL(spx_word16_t *syn_resp); VARDECL(spx_word32_t *low_pi_gain); - VARDECL(spx_word16_t *low_exc); + spx_word16_t *low; + spx_word16_t *high; + VARDECL(spx_word16_t *low_exc_rms); + VARDECL(spx_word16_t *low_innov_rms); const SpeexSBMode *mode; - int dtx; + spx_int32_t dtx; spx_word16_t *in = (spx_word16_t*)vin; + spx_word16_t e_low=0, e_high=0; st = (SBEncState*)state; stack=st->stack; mode = (const SpeexSBMode*)(st->mode->mode); - - { - VARDECL(spx_word16_t *low); - ALLOC(low, st->frame_size, spx_word16_t); - - /* Compute the two sub-bands by filtering with h0 and h1*/ - qmf_decomp(in, h0, st->x0d, st->x1d, st->full_frame_size, QMF_ORDER, st->h0_mem, stack); - - for (i=0;iframe_size;i++) - low[i] = SATURATE(PSHR(st->x0d[i],SIG_SHIFT),32767); - - /* Encode the narrowband part*/ - speex_encode_native(st->st_low, low, bits); - - for (i=0;iframe_size;i++) - st->x0d[i] = SHL(low[i],SIG_SHIFT); - } + low = in; + high = in+st->frame_size; + /* High-band buffering / sync with low band */ + /* Compute the two sub-bands by filtering with QMF h0*/ + qmf_decomp(in, h0, low, high, st->full_frame_size, QMF_ORDER, st->h0_mem, stack); + + if (st->vbr_enabled || st->vad_enabled) + { + /* Need to compute things here before the signal is trashed by the encoder */ + /*FIXME: Are the two signals (low, high) in sync? */ + e_low = compute_rms16(low, st->frame_size); + e_high = compute_rms16(high, st->frame_size); + } + ALLOC(low_innov_rms, st->nbSubframes, spx_word16_t); + speex_encoder_ctl(st->st_low, SPEEX_SET_INNOVATION_SAVE, low_innov_rms); + /* Encode the narrowband part*/ + speex_encode_native(st->st_low, low, bits); + + high = high - (st->windowSize-st->frame_size); for (i=0;iwindowSize-st->frame_size;i++) - st->high[i] = st->high[st->frame_size+i]; - for (i=0;iframe_size;i++) - st->high[st->windowSize-st->frame_size+i]=SATURATE(st->x1d[i],536854528); - - speex_move(st->excBuf, st->excBuf+st->frame_size, (st->bufSize-st->frame_size)*sizeof(spx_sig_t)); - + high[i] = st->high[i]; + for (i=0;iwindowSize-st->frame_size;i++) + st->high[i] = high[i+st->frame_size]; + ALLOC(low_pi_gain, st->nbSubframes, spx_word32_t); - ALLOC(low_exc, st->frame_size, spx_word16_t); + ALLOC(low_exc_rms, st->nbSubframes, spx_word16_t); speex_encoder_ctl(st->st_low, SPEEX_GET_PI_GAIN, low_pi_gain); - speex_encoder_ctl(st->st_low, SPEEX_GET_EXC, low_exc); + speex_encoder_ctl(st->st_low, SPEEX_GET_EXC, low_exc_rms); speex_encoder_ctl(st->st_low, SPEEX_GET_LOW_MODE, &dtx); @@ -437,9 +382,15 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) VARDECL(spx_word16_t *w_sig); ALLOC(w_sig, st->windowSize, spx_word16_t); /* Window for analysis */ - for (i=0;iwindowSize;i++) - w_sig[i] = SHR(MULT16_16(SHR((spx_word32_t)(st->high[i]),SIG_SHIFT),st->window[i]),SIG_SHIFT); - + /* FIXME: This is a kludge */ + if (st->subframeSize==80) + { + for (i=0;iwindowSize;i++) + w_sig[i] = EXTRACT16(SHR32(MULT16_16(high[i],st->window[i>>1]),SIG_SHIFT)); + } else { + for (i=0;iwindowSize;i++) + w_sig[i] = EXTRACT16(SHR32(MULT16_16(high[i],st->window[i]),SIG_SHIFT)); + } /* Compute auto-correlation */ _spx_autocorr(w_sig, st->autocorr, st->lpcSize+1, st->windowSize); } @@ -461,7 +412,7 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) /*If we can't find all LSP's, do some damage control and use a flat filter*/ for (i=0;ilpcSize;i++) { - st->lsp[i]=LSP_SCALING*M_PI*((float)(i+1))/(st->lpcSize+1); + st->lsp[i]=st->old_lsp[i]; } } } @@ -469,7 +420,6 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) /* VBR code */ if ((st->vbr_enabled || st->vad_enabled) && !dtx) { - float e_low=0, e_high=0; float ratio; if (st->abr_enabled) { @@ -491,10 +441,7 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) } - /*FIXME: Are the two signals (low, high) in sync? */ - e_low = compute_rms(st->x0d, st->frame_size); - e_high = compute_rms(st->high, st->frame_size); - ratio = 2*log((1+e_high)/(1+e_low)); + ratio = 2*log((1.f+e_high)/(1.f+e_low)); speex_encoder_ctl(st->st_low, SPEEX_GET_RELATIVE_QUALITY, &st->relative_quality); if (ratio<-4) @@ -504,7 +451,7 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) /*if (ratio>-2)*/ if (st->vbr_enabled) { - int modeid; + spx_int32_t modeid; modeid = mode->nb_modes-1; st->relative_quality+=1.0*(ratio+2); if (st->relative_quality<-1) @@ -526,7 +473,7 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) speex_encoder_ctl(state, SPEEX_SET_HIGH_MODE, &modeid); if (st->abr_enabled) { - int bitrate; + spx_int32_t bitrate; speex_encoder_ctl(state, SPEEX_GET_BITRATE, &bitrate); st->abr_drift+=(bitrate-st->abr_enabled); st->abr_drift2 = .95*st->abr_drift2 + .05*(bitrate-st->abr_enabled); @@ -560,23 +507,14 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) if (dtx || st->submodes[st->submodeID] == NULL) { for (i=0;iframe_size;i++) - st->exc[i]=st->sw[i]=VERY_SMALL; + high[i]=VERY_SMALL; for (i=0;ilpcSize;i++) st->mem_sw[i]=0; st->first=1; /* Final signal synthesis from excitation */ - iir_mem2(st->exc, st->interp_qlpc, st->high, st->frame_size, st->lpcSize, st->mem_sp); - -#ifdef RESYNTH - /* Reconstruct the original */ - fir_mem_up(st->x0d, h0, st->y0, st->full_frame_size, QMF_ORDER, st->g0_mem, stack); - fir_mem_up(st->high, h1, st->y1, st->full_frame_size, QMF_ORDER, st->g1_mem, stack); - - for (i=0;ifull_frame_size;i++) - in[i]=SHR(st->y0[i]-st->y1[i], SIG_SHIFT-1); -#endif + iir_mem16(high, st->interp_qlpc, high, st->frame_size, st->lpcSize, st->mem_sp, stack); if (dtx) return 0; @@ -603,24 +541,20 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) for (sub=0;subnbSubframes;sub++) { - spx_sig_t *exc, *sp, *res, *sw, *innov_save=NULL; - spx_word16_t filter_ratio; + VARDECL(spx_word16_t *exc); + VARDECL(spx_word16_t *res); + VARDECL(spx_word16_t *sw); + spx_word16_t *sp; + spx_word16_t filter_ratio; /*Q7*/ int offset; - spx_word32_t rl, rh; + spx_word32_t rl, rh; /*Q13*/ spx_word16_t eh=0; offset = st->subframeSize*sub; - sp=st->high+offset; - exc=st->exc+offset; - res=st->res+offset; - sw=st->sw+offset; - /* Pointer for saving innovation */ - if (st->innov_save) - { - innov_save = st->innov_save+2*offset; - for (i=0;i<2*st->subframeSize;i++) - innov_save[i]=0; - } + sp=high+offset; + ALLOC(exc, st->subframeSize, spx_word16_t); + ALLOC(res, st->subframeSize, spx_word16_t); + ALLOC(sw, st->subframeSize, spx_word16_t); /* LSP interpolation (quantized and unquantized) */ lsp_interpolate(st->old_lsp, st->lsp, st->interp_lsp, st->lpcSize, sub, st->nbSubframes); @@ -647,24 +581,24 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) rl = low_pi_gain[sub]; #ifdef FIXED_POINT - filter_ratio=PDIV32_16(SHL(rl+82,2),SHR(82+rh,5)); + filter_ratio=EXTRACT16(SATURATE(PDIV32(SHL32(ADD32(rl,82),7),ADD32(82,rh)),32767)); #else filter_ratio=(rl+.01)/(rh+.01); #endif /* Compute "real excitation" */ - fir_mem2(sp, st->interp_qlpc, exc, st->subframeSize, st->lpcSize, st->mem_sp2); + fir_mem16(sp, st->interp_qlpc, exc, st->subframeSize, st->lpcSize, st->mem_sp2, stack); /* Compute energy of low-band and high-band excitation */ - eh = compute_rms(exc, st->subframeSize); + eh = compute_rms16(exc, st->subframeSize); if (!SUBMODE(innovation_quant)) {/* 1 for spectral folding excitation, 0 for stochastic */ - float g; - spx_word16_t el; - el = compute_rms(st->low_innov+offset, st->subframeSize); + spx_word32_t g; /*Q7*/ + spx_word16_t el; /*Q0*/ + el = low_innov_rms[sub]; /* Gain to use if we want to use the low-band excitation for high-band */ - g=eh/(1.+el); + g=PDIV32(MULT16_16(filter_ratio,eh),EXTEND32(ADD16(1,el))); #if 0 { @@ -682,15 +616,10 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) } #endif -#ifdef FIXED_POINT - g *= filter_ratio/128.; -#else - g *= filter_ratio; -#endif /*print_vec(&g, 1, "gain factor");*/ /* Gain quantization */ { - int quant = (int) floor(.5 + 10 + 8.0 * log((g+.0001))); + int quant = scal_quant(g, fold_quant_bound, 32); /*speex_warning_int("tata", quant);*/ if (quant<0) quant=0; @@ -700,36 +629,24 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) } } else { - spx_word16_t gc; - spx_word32_t scale; - spx_word16_t el; - el = compute_rms16(low_exc+offset, st->subframeSize); + spx_word16_t gc; /*Q7*/ + spx_word32_t scale; /*Q14*/ + spx_word16_t el; /*Q0*/ + el = low_exc_rms[sub]; /*Q0*/ gc = PDIV32_16(MULT16_16(filter_ratio,1+eh),1+el); /* This is a kludge that cleans up a historical bug */ if (st->subframeSize==80) - gc *= 0.70711; + gc = MULT16_16_P15(QCONST16(0.70711f,15),gc); /*printf ("%f %f %f %f\n", el, eh, filter_ratio, gc);*/ -#ifdef FIXED_POINT { int qgc = scal_quant(gc, gc_quant_bound, 16); speex_bits_pack(bits, qgc, 4); - gc = MULT16_32_Q15(28626,gc_quant_bound[qgc]); + gc = MULT16_16_Q15(QCONST16(0.87360,15),gc_quant_bound[qgc]); } -#else - { - int qgc = (int)floor(.5+3.7*(log(gc)+0.15556)); - if (qgc<0) - qgc=0; - if (qgc>15) - qgc=15; - speex_bits_pack(bits, qgc, 4); - gc = exp((1/3.7)*qgc-0.15556); - } -#endif if (st->subframeSize==80) - gc *= 1.4142; + gc = MULT16_16_P14(QCONST16(1.4142f,14), gc); scale = SHL32(MULT16_16(PDIV32_16(SHL32(EXTEND32(gc),SIG_SHIFT-6),filter_ratio),(1+el)),6); @@ -738,28 +655,25 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) /* Reset excitation */ for (i=0;isubframeSize;i++) - exc[i]=VERY_SMALL; + res[i]=VERY_SMALL; /* Compute zero response (ringing) of A(z/g1) / ( A(z/g2) * Aq(z) ) */ for (i=0;ilpcSize;i++) mem[i]=st->mem_sp[i]; - iir_mem2(exc, st->interp_qlpc, exc, st->subframeSize, st->lpcSize, mem); + iir_mem16(res, st->interp_qlpc, res, st->subframeSize, st->lpcSize, mem, stack); for (i=0;ilpcSize;i++) mem[i]=st->mem_sw[i]; - filter_mem2(exc, st->bw_lpc1, st->bw_lpc2, res, st->subframeSize, st->lpcSize, mem); + filter_mem16(res, st->bw_lpc1, st->bw_lpc2, res, st->subframeSize, st->lpcSize, mem, stack); /* Compute weighted signal */ for (i=0;ilpcSize;i++) mem[i]=st->mem_sw[i]; - filter_mem2(sp, st->bw_lpc1, st->bw_lpc2, sw, st->subframeSize, st->lpcSize, mem); + filter_mem16(sp, st->bw_lpc1, st->bw_lpc2, sw, st->subframeSize, st->lpcSize, mem, stack); /* Compute target signal */ for (i=0;isubframeSize;i++) - target[i]=PSHR32(sw[i]-res[i],SIG_SHIFT); - - for (i=0;isubframeSize;i++) - exc[i]=0; + target[i]=SUB16(sw[i],res[i]); signal_div(target, target, scale, st->subframeSize); @@ -775,15 +689,6 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) signal_mul(innov, innov, scale, st->subframeSize); - for (i=0;isubframeSize;i++) - exc[i] = ADD32(exc[i], innov[i]); - - if (st->innov_save) - { - for (i=0;isubframeSize;i++) - innov_save[2*i]=innov[i]; - } - if (SUBMODE(double_codebook)) { char *tmp_stack=stack; VARDECL(spx_sig_t *innov2); @@ -791,38 +696,40 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) for (i=0;isubframeSize;i++) innov2[i]=0; for (i=0;isubframeSize;i++) - target[i]*=2.5; + target[i]=MULT16_16_P13(QCONST16(2.5f,13), target[i]); + SUBMODE(innovation_quant)(target, st->interp_qlpc, st->bw_lpc1, st->bw_lpc2, SUBMODE(innovation_params), st->lpcSize, st->subframeSize, innov2, syn_resp, bits, stack, st->complexity, 0); + signal_mul(innov2, innov2, MULT16_32_P15(QCONST16(0.4f,15),scale), st->subframeSize); + for (i=0;isubframeSize;i++) - innov2[i]*=scale*(1/2.5)/SIG_SCALING; - for (i=0;isubframeSize;i++) - exc[i] = ADD32(exc[i],innov2[i]); + innov[i] = ADD32(innov[i],innov2[i]); stack = tmp_stack; } + for (i=0;isubframeSize;i++) + exc[i] = PSHR32(innov[i],SIG_SHIFT); + + if (st->innov_rms_save) + { + st->innov_rms_save[sub] = MULT16_16_Q15(QCONST16(.70711f, 15), compute_rms(innov, st->subframeSize)); + } + } + st->exc_rms[sub] = compute_rms16(exc, st->subframeSize); + /*Keep the previous memory*/ for (i=0;ilpcSize;i++) mem[i]=st->mem_sp[i]; /* Final signal synthesis from excitation */ - iir_mem2(exc, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, st->mem_sp); + iir_mem16(exc, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, st->mem_sp, stack); /* Compute weighted signal again, from synthesized speech (not sure it's the right thing) */ - filter_mem2(sp, st->bw_lpc1, st->bw_lpc2, sw, st->subframeSize, st->lpcSize, st->mem_sw); + filter_mem16(sp, st->bw_lpc1, st->bw_lpc2, sw, st->subframeSize, st->lpcSize, st->mem_sw, stack); } - -#ifdef RESYNTH - /* Reconstruct the original */ - fir_mem_up(st->x0d, h0, st->y0, st->full_frame_size, QMF_ORDER, st->g0_mem, stack); - fir_mem_up(st->high, h1, st->y1, st->full_frame_size, QMF_ORDER, st->g1_mem, stack); - - for (i=0;ifull_frame_size;i++) - in[i]=SHR(st->y0[i]-st->y1[i], SIG_SHIFT-1); -#endif for (i=0;ilpcSize;i++) st->old_lsp[i] = st->lsp[i]; for (i=0;ilpcSize;i++) @@ -839,7 +746,7 @@ int sb_encode(void *state, void *vin, SpeexBits *bits) void *sb_decoder_init(const SpeexMode *m) { - int tmp; + spx_int32_t tmp; SBDecState *st; const SpeexSBMode *mode; st = (SBDecState*)speex_alloc(sizeof(SBDecState)); @@ -874,18 +781,10 @@ void *sb_decoder_init(const SpeexMode *m) st->first=1; - - st->x0d = (spx_sig_t*)speex_alloc((st->frame_size)*sizeof(spx_sig_t)); - st->x1d = (spx_sig_t*)speex_alloc((st->frame_size)*sizeof(spx_sig_t)); - st->high = (spx_sig_t*)speex_alloc((st->full_frame_size)*sizeof(spx_sig_t)); - st->y0 = (spx_sig_t*)speex_alloc((st->full_frame_size)*sizeof(spx_sig_t)); - st->y1 = (spx_sig_t*)speex_alloc((st->full_frame_size)*sizeof(spx_sig_t)); - st->g0_mem = (spx_word32_t*)speex_alloc((QMF_ORDER)*sizeof(spx_word32_t)); st->g1_mem = (spx_word32_t*)speex_alloc((QMF_ORDER)*sizeof(spx_word32_t)); - st->exc = (spx_sig_t*)speex_alloc((st->frame_size)*sizeof(spx_sig_t)); - st->excBuf = (spx_sig_t*)speex_alloc((st->subframeSize)*sizeof(spx_sig_t)); + st->excBuf = (spx_word16_t*)speex_alloc((st->subframeSize)*sizeof(spx_word16_t)); st->qlsp = (spx_lsp_t*)speex_alloc((st->lpcSize)*sizeof(spx_lsp_t)); st->old_qlsp = (spx_lsp_t*)speex_alloc((st->lpcSize)*sizeof(spx_lsp_t)); @@ -893,10 +792,9 @@ void *sb_decoder_init(const SpeexMode *m) st->interp_qlpc = (spx_coef_t*)speex_alloc(st->lpcSize*sizeof(spx_coef_t)); st->pi_gain = (spx_word32_t*)speex_alloc((st->nbSubframes)*sizeof(spx_word32_t)); + st->exc_rms = (spx_word16_t*)speex_alloc((st->nbSubframes)*sizeof(spx_word16_t)); st->mem_sp = (spx_mem_t*)speex_alloc((2*st->lpcSize)*sizeof(spx_mem_t)); - st->low_innov = (spx_word32_t*)speex_alloc((st->frame_size)*sizeof(spx_word32_t)); - speex_decoder_ctl(st->st_low, SPEEX_SET_INNOVATION_SAVE, st->low_innov); st->innov_save = NULL; @@ -918,21 +816,15 @@ void sb_decoder_destroy(void *state) speex_free_scratch(st->stack); #endif - speex_free(st->x0d); - speex_free(st->x1d); - speex_free(st->high); - speex_free(st->y0); - speex_free(st->y1); speex_free(st->g0_mem); speex_free(st->g1_mem); - speex_free(st->exc); speex_free(st->excBuf); speex_free(st->qlsp); speex_free(st->old_qlsp); speex_free(st->interp_qlsp); speex_free(st->interp_qlpc); speex_free(st->pi_gain); - speex_free(st->low_innov); + speex_free(st->exc_rms); speex_free(st->mem_sp); speex_free(state); @@ -948,7 +840,7 @@ static void sb_decode_lost(SBDecState *st, spx_word16_t *out, int dtx, char *sta saved_modeid=st->submodeID; st->submodeID=1; } else { - bw_lpc(GAMMA_SCALING*0.99, st->interp_qlpc, st->interp_qlpc, st->lpcSize); + bw_lpc(QCONST16(0.99f,15), st->interp_qlpc, st->interp_qlpc, st->lpcSize); } st->first=1; @@ -957,25 +849,17 @@ static void sb_decode_lost(SBDecState *st, spx_word16_t *out, int dtx, char *sta /* Final signal synthesis from excitation */ if (!dtx) { - spx_word16_t low_ener; - low_ener = .9*compute_rms(st->exc, st->frame_size); - for (i=0;iframe_size;i++) - st->exc[i] = speex_rand(low_ener, &st->seed); + st->last_ener = MULT16_16_Q15(QCONST16(.9f,15),st->last_ener); } - for (i=0;iframe_size;i++) - st->high[i]=st->exc[i]; + out[i+st->frame_size] = speex_rand(st->last_ener, &st->seed); - iir_mem2(st->high, st->interp_qlpc, st->high, st->frame_size, st->lpcSize, - st->mem_sp); + iir_mem16(out+st->frame_size, st->interp_qlpc, out+st->frame_size, st->frame_size, st->lpcSize, + st->mem_sp, stack); /* Reconstruct the original */ - fir_mem_up(st->x0d, h0, st->y0, st->full_frame_size, QMF_ORDER, st->g0_mem, stack); - fir_mem_up(st->high, h1, st->y1, st->full_frame_size, QMF_ORDER, st->g1_mem, stack); - - mix_and_saturate(st->y0, st->y1, out, st->full_frame_size); - + qmf_synth(out, out+st->frame_size, h0, out, st->full_frame_size, QMF_ORDER, st->g0_mem, st->g1_mem, stack); if (dtx) { st->submodeID=saved_modeid; @@ -992,26 +876,20 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) int ret; char *stack; VARDECL(spx_word32_t *low_pi_gain); - VARDECL(spx_word16_t *low_exc); + VARDECL(spx_word16_t *low_exc_rms); VARDECL(spx_coef_t *ak); - int dtx; + spx_int32_t dtx; const SpeexSBMode *mode; spx_word16_t *out = (spx_word16_t*)vout; - + spx_word16_t *low_innov_alias; st = (SBDecState*)state; stack=st->stack; mode = (const SpeexSBMode*)(st->mode->mode); - { - VARDECL(spx_word16_t *low); - ALLOC(low, st->frame_size, spx_word16_t); - - /* Decode the low-band */ - ret = speex_decode_native(st->st_low, bits, low); - - for (i=0;iframe_size;i++) - st->x0d[i] = SHL((spx_sig_t)low[i], SIG_SHIFT); - } + low_innov_alias = out+st->frame_size; + speex_decoder_ctl(st->st_low, SPEEX_SET_INNOVATION_SAVE, low_innov_alias); + /* Decode the low-band */ + ret = speex_decode_native(st->st_low, bits, out); speex_decoder_ctl(st->st_low, SPEEX_GET_DTX_STATUS, &dtx); @@ -1062,29 +940,23 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) } for (i=0;iframe_size;i++) - st->exc[i]=VERY_SMALL; + out[st->frame_size+i]=VERY_SMALL; st->first=1; /* Final signal synthesis from excitation */ - iir_mem2(st->exc, st->interp_qlpc, st->high, st->frame_size, st->lpcSize, st->mem_sp); + iir_mem16(out+st->frame_size, st->interp_qlpc, out+st->frame_size, st->frame_size, st->lpcSize, st->mem_sp, stack); - fir_mem_up(st->x0d, h0, st->y0, st->full_frame_size, QMF_ORDER, st->g0_mem, stack); - fir_mem_up(st->high, h1, st->y1, st->full_frame_size, QMF_ORDER, st->g1_mem, stack); - - mix_and_saturate(st->y0, st->y1, out, st->full_frame_size); + qmf_synth(out, out+st->frame_size, h0, out, st->full_frame_size, QMF_ORDER, st->g0_mem, st->g1_mem, stack); return 0; } - for (i=0;iframe_size;i++) - st->exc[i]=0; - ALLOC(low_pi_gain, st->nbSubframes, spx_word32_t); - ALLOC(low_exc, st->frame_size, spx_word16_t); + ALLOC(low_exc_rms, st->nbSubframes, spx_word16_t); speex_decoder_ctl(st->st_low, SPEEX_GET_PI_GAIN, low_pi_gain); - speex_decoder_ctl(st->st_low, SPEEX_GET_EXC, low_exc); + speex_decoder_ctl(st->st_low, SPEEX_GET_EXC, low_exc_rms); SUBMODE(lsp_unquant)(st->qlsp, st->lpcSize, bits); @@ -1098,15 +970,17 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) for (sub=0;subnbSubframes;sub++) { - spx_sig_t *exc, *sp, *innov_save=NULL; + VARDECL(spx_word32_t *exc); + spx_word16_t *innov_save=NULL; + spx_word16_t *sp; spx_word16_t filter_ratio; spx_word16_t el=0; int offset; spx_word32_t rl=0,rh=0; offset = st->subframeSize*sub; - sp=st->high+offset; - exc=st->exc+offset; + sp=out+st->frame_size+offset; + ALLOC(exc, st->subframeSize, spx_word32_t); /* Pointer for saving innovation */ if (st->innov_save) { @@ -1136,7 +1010,7 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) rl = low_pi_gain[sub]; #ifdef FIXED_POINT - filter_ratio=PDIV32_16(SHL(rl+82,2),SHR(82+rh,5)); + filter_ratio=EXTRACT16(SATURATE(PDIV32(SHL32(ADD32(rl,82),7),ADD32(82,rh)),32767)); #else filter_ratio=(rl+.01)/(rh+.01); #endif @@ -1145,64 +1019,32 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) exc[i]=0; if (!SUBMODE(innovation_unquant)) { - float g; + spx_word32_t g; int quant; quant = speex_bits_unpack_unsigned(bits, 5); -#ifdef FIXED_POINT - g= spx_exp_lookup_int[quant]; -#else - g= exp(((float)quant-10)/8.0); -#endif + g= spx_exp(MULT16_16(QCONST16(.125f,11),(quant-10))); -#ifdef FIXED_POINT - g /= filter_ratio/128.; -#else - g /= filter_ratio; -#endif - /* High-band excitation using the low-band excitation and a gain */ + g = PDIV32(g, filter_ratio); -#if 0 - for (i=0;isubframeSize;i++) - exc[i]=mode->folding_gain*g*st->low_innov[offset+i]; -#else + for (i=0;isubframeSize;i+=2) { - float tmp=1; - /*static tmp1=0,tmp2=0; - static int seed=1; - el = compute_rms(low_innov+offset, st->subframeSize);*/ - for (i=0;isubframeSize;i++) - { - float e=tmp*g*mode->folding_gain*st->low_innov[offset+i]; - tmp *= -1; - exc[i] = e; - /*float r = speex_rand(g*el,&seed); - exc[i] = .5*(r+tmp2 + e-tmp1); - tmp1 = e; - tmp2 = r;*/ - } - + exc[i]=SHL32(MULT16_32_P15(MULT16_16_Q15(mode->folding_gain,low_innov_alias[offset+i]),SHL32(g,6)),SIG_SHIFT); + exc[i+1]=NEG32(SHL32(MULT16_32_P15(MULT16_16_Q15(mode->folding_gain,low_innov_alias[offset+i+1]),SHL32(g,6)),SIG_SHIFT)); } -#endif } else { spx_word16_t gc; spx_word32_t scale; int qgc = speex_bits_unpack_unsigned(bits, 4); - - el = compute_rms16(low_exc+offset, st->subframeSize); - -#ifdef FIXED_POINT - gc = MULT16_32_Q15(28626,gc_quant_bound[qgc]); -#else - gc = exp((1/3.7)*qgc-0.15556); -#endif + + el = low_exc_rms[sub]; + gc = MULT16_16_Q15(QCONST16(0.87360,15),gc_quant_bound[qgc]); if (st->subframeSize==80) - gc *= 1.4142; - - scale = SHL(MULT16_16(PDIV32_16(SHL(gc,SIG_SHIFT-6),filter_ratio),(1+el)),6); + gc = MULT16_16_P14(QCONST16(1.4142f,14),gc); + scale = SHL32(PDIV32(SHL32(MULT16_16(gc, el),3), filter_ratio),SIG_SHIFT-3); SUBMODE(innovation_unquant)(exc, SUBMODE(innovation_params), st->subframeSize, bits, stack, &st->seed); @@ -1216,8 +1058,7 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) innov2[i]=0; SUBMODE(innovation_unquant)(innov2, SUBMODE(innovation_params), st->subframeSize, bits, stack, &st->seed); - for (i=0;isubframeSize;i++) - innov2[i]*=scale/(float)SIG_SCALING*(1/2.5); + signal_mul(innov2, innov2, MULT16_32_P15(QCONST16(0.4f,15),scale), st->subframeSize); for (i=0;isubframeSize;i++) exc[i] = ADD32(exc[i],innov2[i]); stack = tmp_stack; @@ -1228,25 +1069,23 @@ int sb_decode(void *state, SpeexBits *bits, void *vout) if (st->innov_save) { for (i=0;isubframeSize;i++) - innov_save[2*i]=exc[i]; + innov_save[2*i]=EXTRACT16(PSHR32(exc[i],SIG_SHIFT)); } for (i=0;isubframeSize;i++) sp[i]=st->excBuf[i]; - iir_mem2(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, - st->mem_sp); + iir_mem16(sp, st->interp_qlpc, sp, st->subframeSize, st->lpcSize, + st->mem_sp, stack); for (i=0;isubframeSize;i++) - st->excBuf[i]=exc[i]; + st->excBuf[i]=EXTRACT16(PSHR32(exc[i],SIG_SHIFT)); for (i=0;ilpcSize;i++) st->interp_qlpc[i] = ak[i]; + st->exc_rms[sub] = compute_rms16(st->excBuf, st->subframeSize); } - - fir_mem_up(st->x0d, h0, st->y0, st->full_frame_size, QMF_ORDER, st->g0_mem, stack); - fir_mem_up(st->high, h1, st->y1, st->full_frame_size, QMF_ORDER, st->g1_mem, stack); - - mix_and_saturate(st->y0, st->y1, out, st->full_frame_size); - + st->last_ener = compute_rms16(out+st->frame_size, st->frame_size); + + qmf_synth(out, out+st->frame_size, h0, out, st->full_frame_size, QMF_ORDER, st->g0_mem, st->g1_mem, stack); for (i=0;ilpcSize;i++) st->old_qlsp[i] = st->qlsp[i]; @@ -1263,10 +1102,10 @@ int sb_encoder_ctl(void *state, int request, void *ptr) switch(request) { case SPEEX_GET_FRAME_SIZE: - (*(int*)ptr) = st->full_frame_size; + (*(spx_int32_t*)ptr) = st->full_frame_size; break; case SPEEX_SET_HIGH_MODE: - st->submodeSelect = st->submodeID = (*(int*)ptr); + st->submodeSelect = st->submodeID = (*(spx_int32_t*)ptr); break; case SPEEX_SET_LOW_MODE: speex_encoder_ctl(st->st_low, SPEEX_SET_LOW_MODE, ptr); @@ -1284,22 +1123,22 @@ int sb_encoder_ctl(void *state, int request, void *ptr) speex_encoder_ctl(st, SPEEX_SET_QUALITY, ptr); break; case SPEEX_SET_VBR: - st->vbr_enabled = (*(int*)ptr); + st->vbr_enabled = (*(spx_int32_t*)ptr); speex_encoder_ctl(st->st_low, SPEEX_SET_VBR, ptr); break; case SPEEX_GET_VBR: - (*(int*)ptr) = st->vbr_enabled; + (*(spx_int32_t*)ptr) = st->vbr_enabled; break; case SPEEX_SET_VAD: - st->vad_enabled = (*(int*)ptr); + st->vad_enabled = (*(spx_int32_t*)ptr); speex_encoder_ctl(st->st_low, SPEEX_SET_VAD, ptr); break; case SPEEX_GET_VAD: - (*(int*)ptr) = st->vad_enabled; + (*(spx_int32_t*)ptr) = st->vad_enabled; break; case SPEEX_SET_VBR_QUALITY: { - int q; + spx_int32_t q; float qual = (*(float*)ptr)+.6; st->vbr_quality = (*(float*)ptr); if (qual>10) @@ -1320,7 +1159,7 @@ int sb_encoder_ctl(void *state, int request, void *ptr) speex_encoder_ctl(st->st_low, SPEEX_SET_VBR, &st->vbr_enabled); if (st->vbr_enabled) { - int i=10, rate, target; + spx_int32_t i=10, rate, target; float vbr_qual; target = (*(spx_int32_t*)ptr); while (i>=0) @@ -1346,8 +1185,8 @@ int sb_encoder_ctl(void *state, int request, void *ptr) break; case SPEEX_SET_QUALITY: { - int nb_qual; - int quality = (*(int*)ptr); + spx_int32_t nb_qual; + int quality = (*(spx_int32_t*)ptr); if (quality < 0) quality = 0; if (quality > 10) @@ -1359,16 +1198,16 @@ int sb_encoder_ctl(void *state, int request, void *ptr) break; case SPEEX_SET_COMPLEXITY: speex_encoder_ctl(st->st_low, SPEEX_SET_COMPLEXITY, ptr); - st->complexity = (*(int*)ptr); + st->complexity = (*(spx_int32_t*)ptr); if (st->complexity<1) st->complexity=1; break; case SPEEX_GET_COMPLEXITY: - (*(int*)ptr) = st->complexity; + (*(spx_int32_t*)ptr) = st->complexity; break; case SPEEX_SET_BITRATE: { - int i=10; + spx_int32_t i=10; spx_int32_t rate, target; target = (*(spx_int32_t*)ptr); while (i>=0) @@ -1409,22 +1248,20 @@ int sb_encoder_ctl(void *state, int request, void *ptr) st->lsp[i]=(M_PI*((float)(i+1)))/(st->lpcSize+1); for (i=0;ilpcSize;i++) st->mem_sw[i]=st->mem_sp[i]=st->mem_sp2[i]=0; - for (i=0;ibufSize;i++) - st->excBuf[i]=0; for (i=0;ih0_mem[i]=st->h1_mem[i]=st->g0_mem[i]=st->g1_mem[i]=0; + st->h0_mem[i]=st->h1_mem[i]=0; } break; case SPEEX_SET_SUBMODE_ENCODING: - st->encode_submode = (*(int*)ptr); - speex_encoder_ctl(st->st_low, SPEEX_SET_SUBMODE_ENCODING, &ptr); + st->encode_submode = (*(spx_int32_t*)ptr); + speex_encoder_ctl(st->st_low, SPEEX_SET_SUBMODE_ENCODING, ptr); break; case SPEEX_GET_SUBMODE_ENCODING: - (*(int*)ptr) = st->encode_submode; + (*(spx_int32_t*)ptr) = st->encode_submode; break; case SPEEX_GET_LOOKAHEAD: speex_encoder_ctl(st->st_low, SPEEX_GET_LOOKAHEAD, ptr); - (*(int*)ptr) = 2*(*(int*)ptr) + QMF_ORDER - 1; + (*(spx_int32_t*)ptr) = 2*(*(spx_int32_t*)ptr) + QMF_ORDER - 1; break; case SPEEX_SET_PLC_TUNING: speex_encoder_ctl(st->st_low, SPEEX_SET_PLC_TUNING, ptr); @@ -1483,28 +1320,15 @@ int sb_encoder_ctl(void *state, int request, void *ptr) case SPEEX_GET_EXC: { int i; - spx_sig_t *e = (spx_sig_t*)ptr; - for (i=0;ifull_frame_size;i++) - e[i]=0; - for (i=0;iframe_size;i++) - e[2*i]=2*st->exc[i]; - } - break; - case SPEEX_GET_INNOV: - { - int i; - spx_sig_t *e = (spx_sig_t*)ptr; - for (i=0;ifull_frame_size;i++) - e[i]=0; - for (i=0;iframe_size;i++) - e[2*i]=2*st->exc[i]; + for (i=0;inbSubframes;i++) + ((spx_word16_t*)ptr)[i] = st->exc_rms[i]; } break; case SPEEX_GET_RELATIVE_QUALITY: (*(float*)ptr)=st->relative_quality; break; case SPEEX_SET_INNOVATION_SAVE: - st->innov_save = (spx_sig_t*)ptr; + st->innov_rms_save = (spx_word16_t*)ptr; break; case SPEEX_SET_WIDEBAND: speex_encoder_ctl(st->st_low, SPEEX_SET_WIDEBAND, ptr); @@ -1524,7 +1348,7 @@ int sb_decoder_ctl(void *state, int request, void *ptr) switch(request) { case SPEEX_SET_HIGH_MODE: - st->submodeID = (*(int*)ptr); + st->submodeID = (*(spx_int32_t*)ptr); break; case SPEEX_SET_LOW_MODE: speex_decoder_ctl(st->st_low, SPEEX_SET_LOW_MODE, ptr); @@ -1533,20 +1357,20 @@ int sb_decoder_ctl(void *state, int request, void *ptr) speex_decoder_ctl(st->st_low, SPEEX_GET_LOW_MODE, ptr); break; case SPEEX_GET_FRAME_SIZE: - (*(int*)ptr) = st->full_frame_size; + (*(spx_int32_t*)ptr) = st->full_frame_size; break; case SPEEX_SET_ENH: speex_decoder_ctl(st->st_low, request, ptr); - st->lpc_enh_enabled = *((int*)ptr); + st->lpc_enh_enabled = *((spx_int32_t*)ptr); break; case SPEEX_GET_ENH: - *((int*)ptr) = st->lpc_enh_enabled; + *((spx_int32_t*)ptr) = st->lpc_enh_enabled; break; case SPEEX_SET_MODE: case SPEEX_SET_QUALITY: { - int nb_qual; - int quality = (*(int*)ptr); + spx_int32_t nb_qual; + int quality = (*(spx_int32_t*)ptr); if (quality < 0) quality = 0; if (quality > 10) @@ -1587,18 +1411,19 @@ int sb_decoder_ctl(void *state, int request, void *ptr) st->mem_sp[i]=0; for (i=0;ig0_mem[i]=st->g1_mem[i]=0; + st->last_ener=0; } break; case SPEEX_SET_SUBMODE_ENCODING: - st->encode_submode = (*(int*)ptr); - speex_decoder_ctl(st->st_low, SPEEX_SET_SUBMODE_ENCODING, &ptr); + st->encode_submode = (*(spx_int32_t*)ptr); + speex_decoder_ctl(st->st_low, SPEEX_SET_SUBMODE_ENCODING, ptr); break; case SPEEX_GET_SUBMODE_ENCODING: - (*(int*)ptr) = st->encode_submode; + (*(spx_int32_t*)ptr) = st->encode_submode; break; case SPEEX_GET_LOOKAHEAD: speex_decoder_ctl(st->st_low, SPEEX_GET_LOOKAHEAD, ptr); - (*(int*)ptr) = 2*(*(int*)ptr); + (*(spx_int32_t*)ptr) = 2*(*(spx_int32_t*)ptr); break; case SPEEX_SET_HIGHPASS: speex_decoder_ctl(st->st_low, SPEEX_SET_HIGHPASS, ptr); @@ -1618,28 +1443,15 @@ int sb_decoder_ctl(void *state, int request, void *ptr) case SPEEX_GET_EXC: { int i; - spx_sig_t *e = (spx_sig_t*)ptr; - for (i=0;ifull_frame_size;i++) - e[i]=0; - for (i=0;iframe_size;i++) - e[2*i]=2*st->exc[i]; - } - break; - case SPEEX_GET_INNOV: - { - int i; - spx_sig_t *e = (spx_sig_t*)ptr; - for (i=0;ifull_frame_size;i++) - e[i]=0; - for (i=0;iframe_size;i++) - e[2*i]=2*st->exc[i]; + for (i=0;inbSubframes;i++) + ((spx_word16_t*)ptr)[i] = st->exc_rms[i]; } break; case SPEEX_GET_DTX_STATUS: speex_decoder_ctl(st->st_low, SPEEX_GET_DTX_STATUS, ptr); break; case SPEEX_SET_INNOVATION_SAVE: - st->innov_save = (spx_sig_t*)ptr; + st->innov_save = (spx_word16_t*)ptr; break; case SPEEX_SET_WIDEBAND: speex_decoder_ctl(st->st_low, SPEEX_SET_WIDEBAND, ptr); diff --git a/apps/codecs/libspeex/sb_celp.h b/apps/codecs/libspeex/sb_celp.h index 4da03e4394..35997cb9dc 100644 --- a/apps/codecs/libspeex/sb_celp.h +++ b/apps/codecs/libspeex/sb_celp.h @@ -58,16 +58,9 @@ typedef struct SBEncState { spx_word16_t gamma2; /**< Perceptual weighting coef 2 */ char *stack; /**< Temporary allocation stack */ - spx_sig_t *x0d, *x1d; /**< QMF filter signals*/ - spx_sig_t *high; /**< High-band signal (buffer) */ - spx_sig_t *y0, *y1; /**< QMF synthesis signals */ + spx_word16_t *high; /**< High-band signal (buffer) */ spx_word16_t *h0_mem, *h1_mem; - spx_word32_t *g0_mem, *g1_mem; /**< QMF memories */ - spx_sig_t *excBuf; /**< High-band excitation */ - spx_sig_t *exc; /**< High-band excitation (for QMF only)*/ - spx_sig_t *res; /**< Zero-input response (ringing) */ - spx_sig_t *sw; /**< Perceptually weighted signal */ const spx_word16_t *window; /**< LPC analysis window */ spx_word16_t *lagWindow; /**< Auto-correlation window */ spx_word16_t *autocorr; /**< Auto-correlation (for LPC analysis) */ @@ -87,8 +80,8 @@ typedef struct SBEncState { spx_mem_t *mem_sp2; spx_mem_t *mem_sw; /**< Perceptual signal memory */ spx_word32_t *pi_gain; - spx_sig_t *innov_save; /**< If non-NULL, innovation is copied here */ - spx_sig_t *low_innov; /**< Lower-band innovation is copied here magically */ + spx_word16_t *exc_rms; + spx_word16_t *innov_rms_save; /**< If non-NULL, innovation is copied here */ float vbr_quality; /**< Quality setting for VBR encoding */ int vbr_enabled; /**< 1 for enabling VBR, 0 otherwise */ @@ -125,13 +118,9 @@ typedef struct SBDecState { int lpc_enh_enabled; char *stack; - spx_sig_t *x0d, *x1d; - spx_sig_t *high; - spx_sig_t *y0, *y1; spx_word32_t *g0_mem, *g1_mem; - spx_sig_t *exc; - spx_sig_t *excBuf; + spx_word16_t *excBuf; spx_lsp_t *qlsp; spx_lsp_t *old_qlsp; spx_lsp_t *interp_qlsp; @@ -139,9 +128,10 @@ typedef struct SBDecState { spx_mem_t *mem_sp; spx_word32_t *pi_gain; - spx_sig_t *innov_save; /** If non-NULL, innovation is copied here */ - spx_sig_t *low_innov; /** Lower-band innovation is copied here magically */ + spx_word16_t *exc_rms; + spx_word16_t *innov_save; /** If non-NULL, innovation is copied here */ + spx_word16_t last_ener; spx_int32_t seed; int encode_submode; diff --git a/apps/codecs/libspeex/speex.c b/apps/codecs/libspeex/speex.c index 0ddc01bea6..846e02186c 100644 --- a/apps/codecs/libspeex/speex.c +++ b/apps/codecs/libspeex/speex.c @@ -37,7 +37,7 @@ #endif #include "modes.h" -#include "math_approx.h" +#include #ifndef NULL #define NULL 0 @@ -86,7 +86,7 @@ int speex_decode_native(void *state, SpeexBits *bits, spx_word16_t *out) int speex_encode(void *state, float *in, SpeexBits *bits) { int i; - int N; + spx_int32_t N; spx_int16_t short_in[MAX_IN_SAMPLES]; speex_encoder_ctl(state, SPEEX_GET_FRAME_SIZE, &N); for (i=0;idec(state, bits, short_out); @@ -136,7 +136,7 @@ int speex_encode(void *state, float *in, SpeexBits *bits) int speex_encode_int(void *state, spx_int16_t *in, SpeexBits *bits) { int i; - int N; + spx_int32_t N; float float_in[MAX_IN_SAMPLES]; speex_encoder_ctl(state, SPEEX_GET_FRAME_SIZE, &N); for (i=0;i + typedef _G_int32_t spx_int32_t; + typedef _G_uint32_t spx_uint32_t; + typedef _G_int16_t spx_int16_t; + typedef _G_uint16_t spx_uint16_t; +# elif defined(__MINGW32__) + typedef short spx_int16_t; + typedef unsigned short spx_uint16_t; + typedef int spx_int32_t; + typedef unsigned int spx_uint32_t; +# elif defined(__MWERKS__) + typedef int spx_int32_t; + typedef unsigned int spx_uint32_t; + typedef short spx_int16_t; + typedef unsigned short spx_uint16_t; +# else + /* MSVC/Borland */ + typedef __int32 spx_int32_t; + typedef unsigned __int32 spx_uint32_t; + typedef __int16 spx_int16_t; + typedef unsigned __int16 spx_uint16_t; +# endif + +#elif defined(__MACOS__) + +# include + typedef SInt16 spx_int16_t; + typedef UInt16 spx_uint16_t; + typedef SInt32 spx_int32_t; + typedef UInt32 spx_uint32_t; + +#elif defined(__MACOSX__) /* MacOS X Framework build */ + +# include + typedef int16_t spx_int16_t; + typedef u_int16_t spx_uint16_t; + typedef int32_t spx_int32_t; + typedef u_int32_t spx_uint32_t; + +#elif defined(__BEOS__) + + /* Be */ +# include + typedef int16_t spx_int16_t; + typedef u_int16_t spx_uint16_t; + typedef int32_t spx_int32_t; + typedef u_int32_t spx_uint32_t; + +#elif defined (__EMX__) + + /* OS/2 GCC */ + typedef short spx_int16_t; + typedef unsigned short spx_uint16_t; + typedef int spx_int32_t; + typedef unsigned int spx_uint32_t; + +#elif defined (DJGPP) + + /* DJGPP */ + typedef short spx_int16_t; + typedef int spx_int32_t; + typedef unsigned int spx_uint32_t; + +#elif defined(R5900) + + /* PS2 EE */ + typedef int spx_int32_t; + typedef unsigned spx_uint32_t; + typedef short spx_int16_t; + +#elif defined(__SYMBIAN32__) + + /* Symbian GCC */ + typedef signed short spx_int16_t; + typedef unsigned short spx_uint16_t; + typedef signed int spx_int32_t; + typedef unsigned int spx_uint32_t; + +#elif defined(CONFIG_TI_C54X) || defined (CONFIG_TI_C55X) + + typedef short spx_int16_t; + typedef unsigned short spx_uint16_t; + typedef long spx_int32_t; + typedef unsigned long spx_uint32_t; + +#elif defined(CONFIG_TI_C6X) + + typedef short spx_int16_t; + typedef unsigned short spx_uint16_t; + typedef int spx_int32_t; + typedef unsigned int spx_uint32_t; + +#else + +# include "speex_config_types.h" + +#endif + #endif /* _SPEEX_TYPES_H */ diff --git a/apps/codecs/libspeex/speex_callbacks.c b/apps/codecs/libspeex/speex_callbacks.c index 0b99188797..682322e695 100644 --- a/apps/codecs/libspeex/speex_callbacks.c +++ b/apps/codecs/libspeex/speex_callbacks.c @@ -73,7 +73,7 @@ int speex_inband_handler(SpeexBits *bits, SpeexCallback *callback_list, void *st int speex_std_mode_request_handler(SpeexBits *bits, void *state, void *data) { - int m; + spx_int32_t m; m = speex_bits_unpack_unsigned(bits, 4); speex_encoder_ctl(data, SPEEX_SET_MODE, &m); return 0; @@ -81,7 +81,7 @@ int speex_std_mode_request_handler(SpeexBits *bits, void *state, void *data) int speex_std_low_mode_request_handler(SpeexBits *bits, void *state, void *data) { - int m; + spx_int32_t m; m = speex_bits_unpack_unsigned(bits, 4); speex_encoder_ctl(data, SPEEX_SET_LOW_MODE, &m); return 0; @@ -89,7 +89,7 @@ int speex_std_low_mode_request_handler(SpeexBits *bits, void *state, void *data) int speex_std_high_mode_request_handler(SpeexBits *bits, void *state, void *data) { - int m; + spx_int32_t m; m = speex_bits_unpack_unsigned(bits, 4); speex_encoder_ctl(data, SPEEX_SET_HIGH_MODE, &m); return 0; @@ -97,7 +97,7 @@ int speex_std_high_mode_request_handler(SpeexBits *bits, void *state, void *data int speex_std_vbr_request_handler(SpeexBits *bits, void *state, void *data) { - int vbr; + spx_int32_t vbr; vbr = speex_bits_unpack_unsigned(bits, 1); speex_encoder_ctl(data, SPEEX_SET_VBR, &vbr); return 0; @@ -105,7 +105,7 @@ int speex_std_vbr_request_handler(SpeexBits *bits, void *state, void *data) int speex_std_enh_request_handler(SpeexBits *bits, void *state, void *data) { - int enh; + spx_int32_t enh; enh = speex_bits_unpack_unsigned(bits, 1); speex_decoder_ctl(data, SPEEX_SET_ENH, &enh); return 0; @@ -113,7 +113,7 @@ int speex_std_enh_request_handler(SpeexBits *bits, void *state, void *data) int speex_std_vbr_quality_request_handler(SpeexBits *bits, void *state, void *data) { - int qual; + float qual; qual = speex_bits_unpack_unsigned(bits, 4); speex_encoder_ctl(data, SPEEX_SET_VBR_QUALITY, &qual); return 0; diff --git a/apps/codecs/libspeex/stereo.c b/apps/codecs/libspeex/stereo.c index 72adb45f73..f18387e572 100644 --- a/apps/codecs/libspeex/stereo.c +++ b/apps/codecs/libspeex/stereo.c @@ -36,7 +36,7 @@ #include #include #include "vq.h" -#include +#include /*float e_ratio_quant[4] = {1, 1.26, 1.587, 2};*/ static const float e_ratio_quant[4] = {.25f, .315f, .397f, .5f}; @@ -148,7 +148,7 @@ void speex_decode_stereo_int(spx_int16_t *data, int frame_size, SpeexStereoState { float balance, e_ratio; int i; - float e_tot=0.0, e_left, e_right, e_sum; + float e_tot=0, e_left, e_right, e_sum; balance=stereo->balance; e_ratio=stereo->e_ratio; @@ -157,11 +157,11 @@ void speex_decode_stereo_int(spx_int16_t *data, int frame_size, SpeexStereoState e_tot += ((float)data[i])*data[i]; } e_sum=e_tot/e_ratio; - e_left = e_sum*balance / (1.0+balance); + e_left = e_sum*balance / (1+balance); e_right = e_sum-e_left; - e_left = spx_sqrtf(e_left/(e_tot+.01)); - e_right = spx_sqrtf(e_right/(e_tot+.01)); + e_left = sqrt(e_left/(e_tot+.01)); + e_right = sqrt(e_right/(e_tot+.01)); for (i=frame_size-1;i>=0;i--) { @@ -183,7 +183,7 @@ int speex_std_stereo_request_handler(SpeexBits *bits, void *state, void *data) if (speex_bits_unpack_unsigned(bits, 1)) sign=-1; tmp = speex_bits_unpack_unsigned(bits, 5); - stereo->balance = spx_exp(sign*.25*tmp); + stereo->balance = exp(sign*.25*tmp); tmp = speex_bits_unpack_unsigned(bits, 2); stereo->e_ratio = e_ratio_quant[tmp]; diff --git a/apps/codecs/libspeex/testdenoise.c b/apps/codecs/libspeex/testdenoise.c new file mode 100644 index 0000000000..42644cb011 --- /dev/null +++ b/apps/codecs/libspeex/testdenoise.c @@ -0,0 +1,44 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +#define NN 160 + +int main() +{ + short in[NN]; + int i; + SpeexPreprocessState *st; + int count=0; + float f; + + st = speex_preprocess_state_init(NN, 8000); + i=1; + speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DENOISE, &i); + i=0; + speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_AGC, &i); + f=8000; + speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_AGC_LEVEL, &f); + i=0; + speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DEREVERB, &i); + f=.0; + speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DEREVERB_DECAY, &f); + f=.0; + speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DEREVERB_LEVEL, &f); + while (1) + { + int vad; + fread(in, sizeof(short), NN, stdin); + if (feof(stdin)) + break; + vad = speex_preprocess_run(st, in); + /*fprintf (stderr, "%d\n", vad);*/ + fwrite(in, sizeof(short), NN, stdout); + count++; + } + speex_preprocess_state_destroy(st); + return 0; +} diff --git a/apps/codecs/libspeex/testecho.c b/apps/codecs/libspeex/testecho.c new file mode 100644 index 0000000000..7c32c8f60e --- /dev/null +++ b/apps/codecs/libspeex/testecho.c @@ -0,0 +1,53 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include "speex/speex_echo.h" +#include "speex/speex_preprocess.h" + + +#define NN 128 +#define TAIL 1024 + +int main(int argc, char **argv) +{ + int echo_fd, ref_fd, e_fd; + short echo_buf[NN], ref_buf[NN], e_buf[NN]; + SpeexEchoState *st; + SpeexPreprocessState *den; + + if (argc != 4) + { + fprintf (stderr, "testecho mic_signal.sw speaker_signal.sw output.sw\n"); + exit(1); + } + echo_fd = open (argv[2], O_RDONLY); + ref_fd = open (argv[1], O_RDONLY); + e_fd = open (argv[3], O_WRONLY | O_CREAT | O_TRUNC, 0644); + + st = speex_echo_state_init(NN, TAIL); + den = speex_preprocess_state_init(NN, 8000); + int tmp = 8000; + speex_echo_ctl(st, SPEEX_ECHO_SET_SAMPLING_RATE, &tmp); + speex_preprocess_ctl(den, SPEEX_PREPROCESS_SET_ECHO_STATE, st); + + while (read(ref_fd, ref_buf, NN*2)) + { + read(echo_fd, echo_buf, NN*2); + speex_echo_cancellation(st, ref_buf, echo_buf, e_buf); + speex_preprocess_run(den, e_buf); + write(e_fd, e_buf, NN*2); + } + speex_echo_state_destroy(st); + speex_preprocess_state_destroy(den); + close(e_fd); + close(echo_fd); + close(ref_fd); + return 0; +} diff --git a/apps/codecs/libspeex/testenc.c b/apps/codecs/libspeex/testenc.c new file mode 100644 index 0000000000..eabd02cc76 --- /dev/null +++ b/apps/codecs/libspeex/testenc.c @@ -0,0 +1,142 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include + +#ifdef FIXED_DEBUG +extern long long spx_mips; +#endif + +#define FRAME_SIZE 160 +#include +int main(int argc, char **argv) +{ + char *inFile, *outFile, *bitsFile; + FILE *fin, *fout, *fbits=NULL; + short in_short[FRAME_SIZE]; + short out_short[FRAME_SIZE]; + float sigpow,errpow,snr, seg_snr=0; + int snr_frames = 0; + char cbits[200]; + int nbBits; + int i; + void *st; + void *dec; + SpeexBits bits; + spx_int32_t tmp; + int bitCount=0; + spx_int32_t skip_group_delay; + SpeexCallback callback; + + sigpow = 0; + errpow = 0; + + st = speex_encoder_init(&speex_nb_mode); + dec = speex_decoder_init(&speex_nb_mode); + + /* BEGIN: You probably don't need the following in a real application */ + callback.callback_id = SPEEX_INBAND_CHAR; + callback.func = speex_std_char_handler; + callback.data = stderr; + speex_decoder_ctl(dec, SPEEX_SET_HANDLER, &callback); + + callback.callback_id = SPEEX_INBAND_MODE_REQUEST; + callback.func = speex_std_mode_request_handler; + callback.data = st; + speex_decoder_ctl(dec, SPEEX_SET_HANDLER, &callback); + /* END of unnecessary stuff */ + + tmp=1; + speex_decoder_ctl(dec, SPEEX_SET_ENH, &tmp); + tmp=0; + speex_encoder_ctl(st, SPEEX_SET_VBR, &tmp); + tmp=8; + speex_encoder_ctl(st, SPEEX_SET_QUALITY, &tmp); + tmp=1; + speex_encoder_ctl(st, SPEEX_SET_COMPLEXITY, &tmp); + + /* Turn this off if you want to measure SNR (on by default) */ + tmp=1; + speex_encoder_ctl(st, SPEEX_SET_HIGHPASS, &tmp); + speex_decoder_ctl(dec, SPEEX_SET_HIGHPASS, &tmp); + + speex_encoder_ctl(st, SPEEX_GET_LOOKAHEAD, &skip_group_delay); + speex_decoder_ctl(dec, SPEEX_GET_LOOKAHEAD, &tmp); + skip_group_delay += tmp; + + if (argc != 4 && argc != 3) + { + fprintf (stderr, "Usage: encode [in file] [out file] [bits file]\nargc = %d", argc); + exit(1); + } + inFile = argv[1]; + fin = fopen(inFile, "r"); + outFile = argv[2]; + fout = fopen(outFile, "w+"); + if (argc==4) + { + bitsFile = argv[3]; + fbits = fopen(bitsFile, "w"); + } + speex_bits_init(&bits); + while (!feof(fin)) + { + fread(in_short, sizeof(short), FRAME_SIZE, fin); + if (feof(fin)) + break; + speex_bits_reset(&bits); + + speex_encode_int(st, in_short, &bits); + nbBits = speex_bits_write(&bits, cbits, 200); + bitCount+=bits.nbBits; + + if (argc==4) + fwrite(cbits, 1, nbBits, fbits); + speex_bits_rewind(&bits); + + speex_decode_int(dec, &bits, out_short); + speex_bits_reset(&bits); + + fwrite(&out_short[skip_group_delay], sizeof(short), FRAME_SIZE-skip_group_delay, fout); + skip_group_delay = 0; + } + fprintf (stderr, "Total encoded size: %d bits\n", bitCount); + speex_encoder_destroy(st); + speex_decoder_destroy(dec); + speex_bits_destroy(&bits); + + /* This code just computes SNR, so you don't need it either */ + rewind(fin); + rewind(fout); + + while ( FRAME_SIZE == fread(in_short, sizeof(short), FRAME_SIZE, fin) + && + FRAME_SIZE == fread(out_short, sizeof(short), FRAME_SIZE,fout) ) + { + float s=0, e=0; + for (i=0;i +#include +#include +#include + +#ifdef FIXED_DEBUG +extern long long spx_mips; +#endif + +#define FRAME_SIZE 640 +#include +int main(int argc, char **argv) +{ + char *inFile, *outFile, *bitsFile; + FILE *fin, *fout, *fbits=NULL; + short in_short[FRAME_SIZE]; + short out_short[FRAME_SIZE]; + float in_float[FRAME_SIZE]; + float sigpow,errpow,snr, seg_snr=0; + int snr_frames = 0; + char cbits[200]; + int nbBits; + int i; + void *st; + void *dec; + SpeexBits bits; + spx_int32_t tmp; + int bitCount=0; + spx_int32_t skip_group_delay; + SpeexCallback callback; + + sigpow = 0; + errpow = 0; + + st = speex_encoder_init(&speex_uwb_mode); + dec = speex_decoder_init(&speex_uwb_mode); + + callback.callback_id = SPEEX_INBAND_CHAR; + callback.func = speex_std_char_handler; + callback.data = stderr; + speex_decoder_ctl(dec, SPEEX_SET_HANDLER, &callback); + + callback.callback_id = SPEEX_INBAND_MODE_REQUEST; + callback.func = speex_std_mode_request_handler; + callback.data = st; + speex_decoder_ctl(dec, SPEEX_SET_HANDLER, &callback); + + tmp=0; + speex_decoder_ctl(dec, SPEEX_SET_ENH, &tmp); + tmp=0; + speex_encoder_ctl(st, SPEEX_SET_VBR, &tmp); + tmp=7; + speex_encoder_ctl(st, SPEEX_SET_QUALITY, &tmp); + tmp=1; + speex_encoder_ctl(st, SPEEX_SET_COMPLEXITY, &tmp); + + speex_encoder_ctl(st, SPEEX_GET_LOOKAHEAD, &skip_group_delay); + speex_decoder_ctl(dec, SPEEX_GET_LOOKAHEAD, &tmp); + skip_group_delay += tmp; + + + if (argc != 4 && argc != 3) + { + fprintf (stderr, "Usage: encode [in file] [out file] [bits file]\nargc = %d", argc); + exit(1); + } + inFile = argv[1]; + fin = fopen(inFile, "r"); + outFile = argv[2]; + fout = fopen(outFile, "w+"); + if (argc==4) + { + bitsFile = argv[3]; + fbits = fopen(bitsFile, "w"); + } + speex_bits_init(&bits); + while (!feof(fin)) + { + fread(in_short, sizeof(short), FRAME_SIZE, fin); + if (feof(fin)) + break; + for (i=0;i +#include +#include +#include + +#ifdef FIXED_DEBUG +extern long long spx_mips; +#endif + +#define FRAME_SIZE 320 +#include +int main(int argc, char **argv) +{ + char *inFile, *outFile, *bitsFile; + FILE *fin, *fout, *fbits=NULL; + short in_short[FRAME_SIZE]; + short out_short[FRAME_SIZE]; + float in_float[FRAME_SIZE]; + float sigpow,errpow,snr, seg_snr=0; + int snr_frames = 0; + char cbits[200]; + int nbBits; + int i; + void *st; + void *dec; + SpeexBits bits; + spx_int32_t tmp; + int bitCount=0; + spx_int32_t skip_group_delay; + SpeexCallback callback; + + sigpow = 0; + errpow = 0; + + st = speex_encoder_init(&speex_wb_mode); + dec = speex_decoder_init(&speex_wb_mode); + + callback.callback_id = SPEEX_INBAND_CHAR; + callback.func = speex_std_char_handler; + callback.data = stderr; + speex_decoder_ctl(dec, SPEEX_SET_HANDLER, &callback); + + callback.callback_id = SPEEX_INBAND_MODE_REQUEST; + callback.func = speex_std_mode_request_handler; + callback.data = st; + speex_decoder_ctl(dec, SPEEX_SET_HANDLER, &callback); + + tmp=1; + speex_decoder_ctl(dec, SPEEX_SET_ENH, &tmp); + tmp=0; + speex_encoder_ctl(st, SPEEX_SET_VBR, &tmp); + tmp=8; + speex_encoder_ctl(st, SPEEX_SET_QUALITY, &tmp); + tmp=3; + speex_encoder_ctl(st, SPEEX_SET_COMPLEXITY, &tmp); + /*tmp=3; + speex_encoder_ctl(st, SPEEX_SET_HIGH_MODE, &tmp); + tmp=6; + speex_encoder_ctl(st, SPEEX_SET_LOW_MODE, &tmp); +*/ + + speex_encoder_ctl(st, SPEEX_GET_LOOKAHEAD, &skip_group_delay); + speex_decoder_ctl(dec, SPEEX_GET_LOOKAHEAD, &tmp); + skip_group_delay += tmp; + + + if (argc != 4 && argc != 3) + { + fprintf (stderr, "Usage: encode [in file] [out file] [bits file]\nargc = %d", argc); + exit(1); + } + inFile = argv[1]; + fin = fopen(inFile, "r"); + outFile = argv[2]; + fout = fopen(outFile, "w+"); + if (argc==4) + { + bitsFile = argv[3]; + fbits = fopen(bitsFile, "w"); + } + speex_bits_init(&bits); + while (!feof(fin)) + { + fread(in_short, sizeof(short), FRAME_SIZE, fin); + if (feof(fin)) + break; + for (i=0;i +#include "speex/speex_resampler.h" +#include +#include + +#define NN 256 + +int main(int argc, char **argv) +{ + int i; + short *in; + short *out; + float *fin, *fout; + int count = 0; + SpeexResamplerState *st = speex_resampler_init(1, 8000, 12000, 8000, 12000, 5); + speex_resampler_set_rate(st, 16000, 8001, 8000, 15999); + speex_resampler_skip_zeros(st); + + in = malloc(NN*sizeof(short)); + out = malloc(2*NN*sizeof(short)); + fin = malloc(NN*sizeof(float)); + fout = malloc(2*NN*sizeof(float)); + while (1) + { + int in_len; + int out_len; + fread(in, sizeof(short), NN, stdin); + if (feof(stdin)) + break; + for (i=0;i #define sqr(x) ((x)*(x)) diff --git a/apps/codecs/libspeex/vq.c b/apps/codecs/libspeex/vq.c index 8b659f00e4..d40133f3e8 100644 --- a/apps/codecs/libspeex/vq.c +++ b/apps/codecs/libspeex/vq.c @@ -41,7 +41,7 @@ #ifdef _USE_SSE #include #include "vq_sse.h" -#elif (defined(ARM4_ASM) || defined(ARM5E_ASM)) +#elif defined(SHORTCUTS) && (defined(ARM4_ASM) || defined(ARM5E_ASM)) #include "vq_arm4.h" #elif defined(BFIN_ASM) #include "vq_bfin.h" diff --git a/apps/codecs/libspeex/vq_arm4.h b/apps/codecs/libspeex/vq_arm4.h index b8c6457691..585b8613c5 100644 --- a/apps/codecs/libspeex/vq_arm4.h +++ b/apps/codecs/libspeex/vq_arm4.h @@ -35,12 +35,12 @@ #define OVERRIDE_VQ_NBEST void vq_nbest(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist, char *stack) { - int i/*,j*/; + int i,j; for (i=0;i