mirror of
https://github.com/Rockbox/rockbox.git
synced 2025-11-21 11:02:45 -05:00
Add codecs to librbcodec.
Change-Id: Id7f4717d51ed02d67cb9f9cb3c0ada4a81843f97 Reviewed-on: http://gerrit.rockbox.org/137 Reviewed-by: Nils Wallménius <nils@rockbox.org> Tested-by: Nils Wallménius <nils@rockbox.org>
This commit is contained in:
parent
a0009907de
commit
f40bfc9267
757 changed files with 122 additions and 122 deletions
3
lib/rbcodec/codecs/libwma/SOURCES
Normal file
3
lib/rbcodec/codecs/libwma/SOURCES
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
wmadeci.c
|
||||
wmafixed.c
|
||||
../lib/ffmpeg_bitstream.c
|
||||
18
lib/rbcodec/codecs/libwma/libwma.make
Normal file
18
lib/rbcodec/codecs/libwma/libwma.make
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
# __________ __ ___.
|
||||
# Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
||||
# Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
||||
# Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
||||
# Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
||||
# \/ \/ \/ \/ \/
|
||||
# $Id$
|
||||
#
|
||||
|
||||
# libwma
|
||||
WMALIB := $(CODECDIR)/libwma.a
|
||||
WMALIB_SRC := $(call preprocess, $(RBCODECLIB_DIR)/codecs/libwma/SOURCES)
|
||||
WMALIB_OBJ := $(call c2obj, $(WMALIB_SRC))
|
||||
OTHER_SRC += $(WMALIB_SRC)
|
||||
|
||||
$(WMALIB): $(WMALIB_OBJ)
|
||||
$(SILENT)$(shell rm -f $@)
|
||||
$(call PRINTS,AR $(@F))$(AR) rcs $@ $^ >/dev/null
|
||||
5
lib/rbcodec/codecs/libwma/types.h
Normal file
5
lib/rbcodec/codecs/libwma/types.h
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
#include <codecs/lib/codeclib.h>
|
||||
|
||||
#define fixed32 int32_t
|
||||
#define fixed64 int64_t
|
||||
|
||||
2609
lib/rbcodec/codecs/libwma/wmadata.h
Normal file
2609
lib/rbcodec/codecs/libwma/wmadata.h
Normal file
File diff suppressed because it is too large
Load diff
181
lib/rbcodec/codecs/libwma/wmadec.h
Normal file
181
lib/rbcodec/codecs/libwma/wmadec.h
Normal file
|
|
@ -0,0 +1,181 @@
|
|||
/*
|
||||
* WMA compatible decoder
|
||||
* Copyright (c) 2002 The FFmpeg Project.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#ifndef _WMADEC_H
|
||||
#define _WMADEC_H
|
||||
|
||||
#include <codecs/libasf/asf.h>
|
||||
#include "ffmpeg_get_bits.h"
|
||||
#include "types.h"
|
||||
|
||||
//#define TRACE
|
||||
/* size of blocks */
|
||||
#define BLOCK_MIN_BITS 7
|
||||
#define BLOCK_MAX_BITS 11
|
||||
#define BLOCK_MAX_SIZE (1 << BLOCK_MAX_BITS)
|
||||
|
||||
#define BLOCK_NB_SIZES (BLOCK_MAX_BITS - BLOCK_MIN_BITS + 1)
|
||||
|
||||
/* XXX: find exact max size */
|
||||
#define HIGH_BAND_MAX_SIZE 16
|
||||
|
||||
#define NB_LSP_COEFS 10
|
||||
|
||||
/* XXX: is it a suitable value ? */
|
||||
#define MAX_CODED_SUPERFRAME_SIZE 16384
|
||||
|
||||
#define M_PI 3.14159265358979323846
|
||||
|
||||
#define M_PI_F 0x3243f // in fixed 32 format
|
||||
#define TWO_M_PI_F 0x6487f //in fixed 32
|
||||
|
||||
#define MAX_CHANNELS 2
|
||||
|
||||
#define NOISE_TAB_SIZE 8192
|
||||
|
||||
#define LSP_POW_BITS 7
|
||||
|
||||
|
||||
#if (CONFIG_CPU == PP5022) || (CONFIG_CPU == PP5024) || (CONFIG_CPU == MCF5250)
|
||||
/* PP5022/24 and MCF5250 have 128KB of IRAM. 80KB are allocated for codecs */
|
||||
#define IBSS_ATTR_WMA_LARGE_IRAM IBSS_ATTR
|
||||
#define IBSS_ATTR_WMA_XL_IRAM
|
||||
#define ICONST_ATTR_WMA_XL_IRAM
|
||||
|
||||
#elif defined(CPU_S5L870X)
|
||||
/* S5L870x has even more IRAM. Use it. */
|
||||
#define IBSS_ATTR_WMA_LARGE_IRAM IBSS_ATTR
|
||||
#define IBSS_ATTR_WMA_XL_IRAM IBSS_ATTR
|
||||
#define ICONST_ATTR_WMA_XL_IRAM ICONST_ATTR
|
||||
|
||||
#else
|
||||
/* other PP's and MCF5249 have 96KB of IRAM */
|
||||
#define IBSS_ATTR_WMA_LARGE_IRAM
|
||||
#define IBSS_ATTR_WMA_XL_IRAM
|
||||
#define ICONST_ATTR_WMA_XL_IRAM
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define VLCBITS 7 /*7 is the lowest without glitching*/
|
||||
#define VLCMAX ((22+VLCBITS-1)/VLCBITS)
|
||||
|
||||
#define EXPVLCBITS 7
|
||||
#define EXPMAX ((19+EXPVLCBITS-1)/EXPVLCBITS)
|
||||
|
||||
#define HGAINVLCBITS 9
|
||||
#define HGAINMAX ((13+HGAINVLCBITS-1)/HGAINVLCBITS)
|
||||
|
||||
|
||||
typedef struct CoefVLCTable
|
||||
{
|
||||
int n; /* total number of codes */
|
||||
const uint32_t *huffcodes; /* VLC bit values */
|
||||
const uint8_t *huffbits; /* VLC bit size */
|
||||
const uint16_t *levels; /* table to build run/level tables */
|
||||
}
|
||||
CoefVLCTable;
|
||||
|
||||
typedef struct WMADecodeContext
|
||||
{
|
||||
GetBitContext gb;
|
||||
|
||||
int nb_block_sizes; /* number of block sizes */
|
||||
|
||||
int sample_rate;
|
||||
int nb_channels;
|
||||
int bit_rate;
|
||||
int version; /* 1 = 0x160 (WMAV1), 2 = 0x161 (WMAV2) */
|
||||
int block_align;
|
||||
int use_bit_reservoir;
|
||||
int use_variable_block_len;
|
||||
int use_exp_vlc; /* exponent coding: 0 = lsp, 1 = vlc + delta */
|
||||
int use_noise_coding; /* true if perceptual noise is added */
|
||||
int byte_offset_bits;
|
||||
VLC exp_vlc;
|
||||
int exponent_sizes[BLOCK_NB_SIZES];
|
||||
uint16_t exponent_bands[BLOCK_NB_SIZES][25];
|
||||
int high_band_start[BLOCK_NB_SIZES]; /* index of first coef in high band */
|
||||
int coefs_start; /* first coded coef */
|
||||
int coefs_end[BLOCK_NB_SIZES]; /* max number of coded coefficients */
|
||||
int exponent_high_sizes[BLOCK_NB_SIZES];
|
||||
int exponent_high_bands[BLOCK_NB_SIZES][HIGH_BAND_MAX_SIZE];
|
||||
VLC hgain_vlc;
|
||||
|
||||
/* coded values in high bands */
|
||||
int high_band_coded[MAX_CHANNELS][HIGH_BAND_MAX_SIZE];
|
||||
int high_band_values[MAX_CHANNELS][HIGH_BAND_MAX_SIZE];
|
||||
|
||||
/* there are two possible tables for spectral coefficients */
|
||||
VLC coef_vlc[2];
|
||||
uint16_t *run_table[2];
|
||||
uint16_t *level_table[2];
|
||||
/* frame info */
|
||||
int frame_len; /* frame length in samples */
|
||||
int frame_len_bits; /* frame_len = 1 << frame_len_bits */
|
||||
|
||||
/* block info */
|
||||
int reset_block_lengths;
|
||||
int block_len_bits; /* log2 of current block length */
|
||||
int next_block_len_bits; /* log2 of next block length */
|
||||
int prev_block_len_bits; /* log2 of prev block length */
|
||||
int block_len; /* block length in samples */
|
||||
int block_num; /* block number in current frame */
|
||||
int block_pos; /* current position in frame */
|
||||
uint8_t ms_stereo; /* true if mid/side stereo mode */
|
||||
uint8_t channel_coded[MAX_CHANNELS]; /* true if channel is coded */
|
||||
int exponents_bsize[MAX_CHANNELS]; // log2 ratio frame/exp. length
|
||||
fixed32 exponents[MAX_CHANNELS][BLOCK_MAX_SIZE] MEM_ALIGN_ATTR;
|
||||
fixed32 max_exponent[MAX_CHANNELS];
|
||||
int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE];
|
||||
fixed32 (*coefs)[MAX_CHANNELS][BLOCK_MAX_SIZE];
|
||||
fixed32 *windows[BLOCK_NB_SIZES];
|
||||
/* output buffer for one frame and the last for IMDCT windowing */
|
||||
fixed32 (*frame_out)[MAX_CHANNELS][BLOCK_MAX_SIZE*2];
|
||||
|
||||
/* last frame info */
|
||||
uint8_t last_superframe[MAX_CODED_SUPERFRAME_SIZE + 4] MEM_ALIGN_ATTR; /* padding added */
|
||||
int last_bitoffset;
|
||||
int last_superframe_len;
|
||||
fixed32 *noise_table;
|
||||
int noise_index;
|
||||
fixed32 noise_mult; /* XXX: suppress that and integrate it in the noise array */
|
||||
/* lsp_to_curve tables */
|
||||
fixed32 lsp_cos_table[BLOCK_MAX_SIZE] MEM_ALIGN_ATTR;
|
||||
void *lsp_pow_m_table1;
|
||||
void *lsp_pow_m_table2;
|
||||
|
||||
/* State of current superframe decoding */
|
||||
int bit_offset;
|
||||
int nb_frames;
|
||||
int current_frame;
|
||||
|
||||
#ifdef TRACE
|
||||
|
||||
int frame_count;
|
||||
#endif
|
||||
}
|
||||
WMADecodeContext;
|
||||
|
||||
int wma_decode_init(WMADecodeContext* s, asf_waveformatex_t *wfx);
|
||||
int wma_decode_superframe_init(WMADecodeContext* s,
|
||||
const uint8_t *buf, int buf_size);
|
||||
int wma_decode_superframe_frame(WMADecodeContext* s,
|
||||
const uint8_t *buf, int buf_size);
|
||||
#endif
|
||||
1445
lib/rbcodec/codecs/libwma/wmadeci.c
Normal file
1445
lib/rbcodec/codecs/libwma/wmadeci.c
Normal file
File diff suppressed because it is too large
Load diff
223
lib/rbcodec/codecs/libwma/wmafixed.c
Normal file
223
lib/rbcodec/codecs/libwma/wmafixed.c
Normal file
|
|
@ -0,0 +1,223 @@
|
|||
/****************************************************************************
|
||||
* __________ __ ___.
|
||||
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
||||
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
||||
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
||||
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
||||
* \/ \/ \/ \/ \/
|
||||
*
|
||||
* Copyright (C) 2007 Michael Giacomelli
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
||||
* KIND, either express or implied.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#include "wmadec.h"
|
||||
#include "wmafixed.h"
|
||||
#include <codecs.h>
|
||||
|
||||
fixed64 IntTo64(int x){
|
||||
fixed64 res = 0;
|
||||
unsigned char *p = (unsigned char *)&res;
|
||||
|
||||
#ifdef ROCKBOX_BIG_ENDIAN
|
||||
p[5] = x & 0xff;
|
||||
p[4] = (x & 0xff00)>>8;
|
||||
p[3] = (x & 0xff0000)>>16;
|
||||
p[2] = (x & 0xff000000)>>24;
|
||||
#else
|
||||
p[2] = x & 0xff;
|
||||
p[3] = (x & 0xff00)>>8;
|
||||
p[4] = (x & 0xff0000)>>16;
|
||||
p[5] = (x & 0xff000000)>>24;
|
||||
#endif
|
||||
return res;
|
||||
}
|
||||
|
||||
int IntFrom64(fixed64 x)
|
||||
{
|
||||
int res = 0;
|
||||
unsigned char *p = (unsigned char *)&x;
|
||||
|
||||
#ifdef ROCKBOX_BIG_ENDIAN
|
||||
res = p[5] | (p[4]<<8) | (p[3]<<16) | (p[2]<<24);
|
||||
#else
|
||||
res = p[2] | (p[3]<<8) | (p[4]<<16) | (p[5]<<24);
|
||||
#endif
|
||||
return res;
|
||||
}
|
||||
|
||||
fixed32 Fixed32From64(fixed64 x)
|
||||
{
|
||||
return x & 0xFFFFFFFF;
|
||||
}
|
||||
|
||||
fixed64 Fixed32To64(fixed32 x)
|
||||
{
|
||||
return (fixed64)x;
|
||||
}
|
||||
|
||||
/*
|
||||
Not performance senstitive code here
|
||||
|
||||
*/
|
||||
|
||||
fixed32 fixdiv32(fixed32 x, fixed32 y)
|
||||
{
|
||||
fixed64 temp;
|
||||
|
||||
if(x == 0)
|
||||
return 0;
|
||||
if(y == 0)
|
||||
return 0x7fffffff;
|
||||
temp = x;
|
||||
temp <<= PRECISION;
|
||||
return (fixed32)(temp / y);
|
||||
}
|
||||
|
||||
fixed64 fixdiv64(fixed64 x, fixed64 y)
|
||||
{
|
||||
fixed64 temp;
|
||||
|
||||
if(x == 0)
|
||||
return 0;
|
||||
if(y == 0)
|
||||
return 0x07ffffffffffffffLL;
|
||||
temp = x;
|
||||
temp <<= PRECISION64;
|
||||
return (fixed64)(temp / y);
|
||||
}
|
||||
|
||||
fixed32 fixsqrt32(fixed32 x)
|
||||
{
|
||||
|
||||
unsigned long r = 0, s, v = (unsigned long)x;
|
||||
|
||||
#define STEP(k) s = r + (1 << k * 2); r >>= 1; \
|
||||
if (s <= v) { v -= s; r |= (1 << k * 2); }
|
||||
|
||||
STEP(15);
|
||||
STEP(14);
|
||||
STEP(13);
|
||||
STEP(12);
|
||||
STEP(11);
|
||||
STEP(10);
|
||||
STEP(9);
|
||||
STEP(8);
|
||||
STEP(7);
|
||||
STEP(6);
|
||||
STEP(5);
|
||||
STEP(4);
|
||||
STEP(3);
|
||||
STEP(2);
|
||||
STEP(1);
|
||||
STEP(0);
|
||||
|
||||
return (fixed32)(r << (PRECISION / 2));
|
||||
}
|
||||
|
||||
|
||||
static const long cordic_circular_gain = 0xb2458939; /* 0.607252929 */
|
||||
|
||||
/* Table of values of atan(2^-i) in 0.32 format fractions of pi where pi = 0xffffffff / 2 */
|
||||
static const unsigned long atan_table[] = {
|
||||
0x1fffffff, /* +0.785398163 (or pi/4) */
|
||||
0x12e4051d, /* +0.463647609 */
|
||||
0x09fb385b, /* +0.244978663 */
|
||||
0x051111d4, /* +0.124354995 */
|
||||
0x028b0d43, /* +0.062418810 */
|
||||
0x0145d7e1, /* +0.031239833 */
|
||||
0x00a2f61e, /* +0.015623729 */
|
||||
0x00517c55, /* +0.007812341 */
|
||||
0x0028be53, /* +0.003906230 */
|
||||
0x00145f2e, /* +0.001953123 */
|
||||
0x000a2f98, /* +0.000976562 */
|
||||
0x000517cc, /* +0.000488281 */
|
||||
0x00028be6, /* +0.000244141 */
|
||||
0x000145f3, /* +0.000122070 */
|
||||
0x0000a2f9, /* +0.000061035 */
|
||||
0x0000517c, /* +0.000030518 */
|
||||
0x000028be, /* +0.000015259 */
|
||||
0x0000145f, /* +0.000007629 */
|
||||
0x00000a2f, /* +0.000003815 */
|
||||
0x00000517, /* +0.000001907 */
|
||||
0x0000028b, /* +0.000000954 */
|
||||
0x00000145, /* +0.000000477 */
|
||||
0x000000a2, /* +0.000000238 */
|
||||
0x00000051, /* +0.000000119 */
|
||||
0x00000028, /* +0.000000060 */
|
||||
0x00000014, /* +0.000000030 */
|
||||
0x0000000a, /* +0.000000015 */
|
||||
0x00000005, /* +0.000000007 */
|
||||
0x00000002, /* +0.000000004 */
|
||||
0x00000001, /* +0.000000002 */
|
||||
0x00000000, /* +0.000000001 */
|
||||
0x00000000, /* +0.000000000 */
|
||||
};
|
||||
|
||||
/**
|
||||
* Implements sin and cos using CORDIC rotation.
|
||||
*
|
||||
* @param phase has range from 0 to 0xffffffff, representing 0 and
|
||||
* 2*pi respectively.
|
||||
* @param cos return address for cos
|
||||
* @return sin of phase, value is a signed value from LONG_MIN to LONG_MAX,
|
||||
* representing -1 and 1 respectively.
|
||||
*
|
||||
* Gives at least 24 bits precision (last 2-8 bits or so are probably off)
|
||||
*/
|
||||
|
||||
long fsincos(unsigned long phase, fixed32 *cos)
|
||||
{
|
||||
int32_t x, x1, y, y1;
|
||||
unsigned long z, z1;
|
||||
int i;
|
||||
|
||||
/* Setup initial vector */
|
||||
x = cordic_circular_gain;
|
||||
y = 0;
|
||||
z = phase;
|
||||
|
||||
/* The phase has to be somewhere between 0..pi for this to work right */
|
||||
if (z < 0xffffffff / 4) {
|
||||
/* z in first quadrant, z += pi/2 to correct */
|
||||
x = -x;
|
||||
z += 0xffffffff / 4;
|
||||
} else if (z < 3 * (0xffffffff / 4)) {
|
||||
/* z in third quadrant, z -= pi/2 to correct */
|
||||
z -= 0xffffffff / 4;
|
||||
} else {
|
||||
/* z in fourth quadrant, z -= 3pi/2 to correct */
|
||||
x = -x;
|
||||
z -= 3 * (0xffffffff / 4);
|
||||
}
|
||||
|
||||
/* Each iteration adds roughly 1-bit of extra precision */
|
||||
for (i = 0; i < 31; i++) {
|
||||
x1 = x >> i;
|
||||
y1 = y >> i;
|
||||
z1 = atan_table[i];
|
||||
|
||||
/* Decided which direction to rotate vector. Pivot point is pi/2 */
|
||||
if (z >= 0xffffffff / 4) {
|
||||
x -= y1;
|
||||
y += x1;
|
||||
z -= z1;
|
||||
} else {
|
||||
x += y1;
|
||||
y -= x1;
|
||||
z += z1;
|
||||
}
|
||||
}
|
||||
|
||||
if (cos)
|
||||
*cos = x;
|
||||
|
||||
return y;
|
||||
}
|
||||
237
lib/rbcodec/codecs/libwma/wmafixed.h
Normal file
237
lib/rbcodec/codecs/libwma/wmafixed.h
Normal file
|
|
@ -0,0 +1,237 @@
|
|||
/****************************************************************************
|
||||
* __________ __ ___.
|
||||
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
||||
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
||||
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
||||
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
||||
* \/ \/ \/ \/ \/
|
||||
*
|
||||
* Copyright (C) 2007 Michael Giacomelli
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
||||
* KIND, either express or implied.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
/* fixed precision code. We use a combination of Sign 15.16 and Sign.31
|
||||
precision here.
|
||||
|
||||
The WMA decoder does not always follow this convention, and occasionally
|
||||
renormalizes values to other formats in order to maximize precision.
|
||||
However, only the two precisions above are provided in this file.
|
||||
|
||||
*/
|
||||
|
||||
#include "types.h"
|
||||
|
||||
#define PRECISION 16
|
||||
#define PRECISION64 16
|
||||
|
||||
|
||||
#define fixtof64(x) (float)((float)(x) / (float)(1 << PRECISION64)) //does not work on int64_t!
|
||||
#define ftofix32(x) ((fixed32)((x) * (float)(1 << PRECISION) + ((x) < 0 ? -0.5 : 0.5)))
|
||||
#define itofix64(x) (IntTo64(x))
|
||||
#define itofix32(x) ((x) << PRECISION)
|
||||
#define fixtoi32(x) ((x) >> PRECISION)
|
||||
#define fixtoi64(x) (IntFrom64(x))
|
||||
|
||||
|
||||
/*fixed functions*/
|
||||
|
||||
fixed64 IntTo64(int x);
|
||||
int IntFrom64(fixed64 x);
|
||||
fixed32 Fixed32From64(fixed64 x);
|
||||
fixed64 Fixed32To64(fixed32 x);
|
||||
fixed32 fixdiv32(fixed32 x, fixed32 y);
|
||||
fixed64 fixdiv64(fixed64 x, fixed64 y);
|
||||
fixed32 fixsqrt32(fixed32 x);
|
||||
/* Inverse gain of circular cordic rotation in s0.31 format. */
|
||||
long fsincos(unsigned long phase, fixed32 *cos);
|
||||
|
||||
|
||||
#ifdef CPU_ARM
|
||||
|
||||
/*Sign-15.16 format */
|
||||
#define fixmul32(x, y) \
|
||||
({ int32_t __hi; \
|
||||
uint32_t __lo; \
|
||||
int32_t __result; \
|
||||
asm ("smull %0, %1, %3, %4\n\t" \
|
||||
"movs %0, %0, lsr %5\n\t" \
|
||||
"adc %2, %0, %1, lsl %6" \
|
||||
: "=&r" (__lo), "=&r" (__hi), "=r" (__result) \
|
||||
: "%r" (x), "r" (y), \
|
||||
"M" (PRECISION), "M" (32 - PRECISION) \
|
||||
: "cc"); \
|
||||
__result; \
|
||||
})
|
||||
|
||||
#elif defined(CPU_COLDFIRE)
|
||||
|
||||
static inline int32_t fixmul32(int32_t x, int32_t y)
|
||||
{
|
||||
#if PRECISION != 16
|
||||
#warning Coldfire fixmul32() only works for PRECISION == 16
|
||||
#endif
|
||||
int32_t t1;
|
||||
asm (
|
||||
"mac.l %[x], %[y], %%acc0 \n" // multiply
|
||||
"mulu.l %[y], %[x] \n" // get lower half, avoid emac stall
|
||||
"movclr.l %%acc0, %[t1] \n" // get higher half
|
||||
"lsr.l #1, %[t1] \n"
|
||||
"move.w %[t1], %[x] \n"
|
||||
"swap %[x] \n"
|
||||
: [t1] "=&d" (t1), [x] "+d" (x)
|
||||
: [y] "d" (y)
|
||||
);
|
||||
return x;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline fixed32 fixmul32(fixed32 x, fixed32 y)
|
||||
{
|
||||
fixed64 temp;
|
||||
temp = x;
|
||||
temp *= y;
|
||||
|
||||
temp >>= PRECISION;
|
||||
|
||||
return (fixed32)temp;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* Helper functions for wma_window.
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
#ifdef CPU_ARM
|
||||
static inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data,
|
||||
const fixed32 *window, int n)
|
||||
{
|
||||
/* Block sizes are always power of two */
|
||||
asm volatile (
|
||||
"0:"
|
||||
"ldmia %[d]!, {r0, r1};"
|
||||
"ldmia %[w]!, {r4, r5};"
|
||||
/* consume the first data and window value so we can use those
|
||||
* registers again */
|
||||
"smull r8, r9, r0, r4;"
|
||||
"ldmia %[dst], {r0, r4};"
|
||||
"add r0, r0, r9, lsl #1;" /* *dst=*dst+(r9<<1)*/
|
||||
"smull r8, r9, r1, r5;"
|
||||
"add r1, r4, r9, lsl #1;"
|
||||
"stmia %[dst]!, {r0, r1};"
|
||||
"subs %[n], %[n], #2;"
|
||||
"bne 0b;"
|
||||
: [d] "+r" (data), [w] "+r" (window), [dst] "+r" (dst), [n] "+r" (n)
|
||||
: : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
|
||||
}
|
||||
|
||||
static inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1,
|
||||
int len)
|
||||
{
|
||||
/* Block sizes are always power of two */
|
||||
asm volatile (
|
||||
"add %[s1], %[s1], %[n], lsl #2;"
|
||||
"0:"
|
||||
"ldmia %[s0]!, {r0, r1};"
|
||||
"ldmdb %[s1]!, {r4, r5};"
|
||||
"smull r8, r9, r0, r5;"
|
||||
"mov r0, r9, lsl #1;"
|
||||
"smull r8, r9, r1, r4;"
|
||||
"mov r1, r9, lsl #1;"
|
||||
"stmia %[dst]!, {r0, r1};"
|
||||
"subs %[n], %[n], #2;"
|
||||
"bne 0b;"
|
||||
: [s0] "+r" (src0), [s1] "+r" (src1), [dst] "+r" (dst), [n] "+r" (len)
|
||||
: : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
|
||||
}
|
||||
|
||||
#elif defined(CPU_COLDFIRE)
|
||||
|
||||
static inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *data,
|
||||
const fixed32 *window, int n)
|
||||
{
|
||||
/* Block sizes are always power of two. Smallest block is always way bigger
|
||||
* than four too.*/
|
||||
asm volatile (
|
||||
"0:"
|
||||
"movem.l (%[d]), %%d0-%%d3;"
|
||||
"movem.l (%[w]), %%d4-%%d5/%%a0-%%a1;"
|
||||
"mac.l %%d0, %%d4, %%acc0;"
|
||||
"mac.l %%d1, %%d5, %%acc1;"
|
||||
"mac.l %%d2, %%a0, %%acc2;"
|
||||
"mac.l %%d3, %%a1, %%acc3;"
|
||||
"lea.l (16, %[d]), %[d];"
|
||||
"lea.l (16, %[w]), %[w];"
|
||||
"movclr.l %%acc0, %%d0;"
|
||||
"movclr.l %%acc1, %%d1;"
|
||||
"movclr.l %%acc2, %%d2;"
|
||||
"movclr.l %%acc3, %%d3;"
|
||||
"movem.l (%[dst]), %%d4-%%d5/%%a0-%%a1;"
|
||||
"add.l %%d4, %%d0;"
|
||||
"add.l %%d5, %%d1;"
|
||||
"add.l %%a0, %%d2;"
|
||||
"add.l %%a1, %%d3;"
|
||||
"movem.l %%d0-%%d3, (%[dst]);"
|
||||
"lea.l (16, %[dst]), %[dst];"
|
||||
"subq.l #4, %[n];"
|
||||
"jne 0b;"
|
||||
: [d] "+a" (data), [w] "+a" (window), [dst] "+a" (dst), [n] "+d" (n)
|
||||
: : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
|
||||
}
|
||||
|
||||
static inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1,
|
||||
int len)
|
||||
{
|
||||
/* Block sizes are always power of two. Smallest block is always way bigger
|
||||
* than four too.*/
|
||||
asm volatile (
|
||||
"lea.l (-16, %[s1], %[n]*4), %[s1];"
|
||||
"0:"
|
||||
"movem.l (%[s0]), %%d0-%%d3;"
|
||||
"movem.l (%[s1]), %%d4-%%d5/%%a0-%%a1;"
|
||||
"mac.l %%d0, %%a1, %%acc0;"
|
||||
"mac.l %%d1, %%a0, %%acc1;"
|
||||
"mac.l %%d2, %%d5, %%acc2;"
|
||||
"mac.l %%d3, %%d4, %%acc3;"
|
||||
"lea.l (16, %[s0]), %[s0];"
|
||||
"lea.l (-16, %[s1]), %[s1];"
|
||||
"movclr.l %%acc0, %%d0;"
|
||||
"movclr.l %%acc1, %%d1;"
|
||||
"movclr.l %%acc2, %%d2;"
|
||||
"movclr.l %%acc3, %%d3;"
|
||||
"movem.l %%d0-%%d3, (%[dst]);"
|
||||
"lea.l (16, %[dst]), %[dst];"
|
||||
"subq.l #4, %[n];"
|
||||
"jne 0b;"
|
||||
: [s0] "+a" (src0), [s1] "+a" (src1), [dst] "+a" (dst), [n] "+d" (len)
|
||||
: : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline void vector_fmul_add_add(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){
|
||||
int i;
|
||||
for(i=0; i<len; i++)
|
||||
dst[i] = fixmul32b(src0[i], src1[i]) + dst[i];
|
||||
}
|
||||
|
||||
static inline void vector_fmul_reverse(fixed32 *dst, const fixed32 *src0, const fixed32 *src1, int len){
|
||||
int i;
|
||||
src1 += len-1;
|
||||
for(i=0; i<len; i++)
|
||||
dst[i] = fixmul32b(src0[i], src1[-i]);
|
||||
}
|
||||
|
||||
#endif
|
||||
Loading…
Add table
Add a link
Reference in a new issue