1
0
Fork 0
forked from len0rd/rockbox

Partial conversion of the wmapro decoder to fixed point arithmetic. Currently inverse quantization & rescaling,

imdct and windowing are all in fixed point.


git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27302 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Mohamed Tarek 2010-07-05 22:33:37 +00:00
parent 6a04479d63
commit d884af2b99
14 changed files with 3644 additions and 43 deletions

View file

@ -14,8 +14,18 @@ IMPORT DETAILS
Based on ffmpeg svn r22886 dated 15 April 2010.
Currently, the files contain minimal changes from their original state in order
to be able to compile cleanly.
The code is slowly being modified to convert it from floating point maths to
fixed point.
As of 6 July 2010, the following steps are all working properly in fixed point:
- Inverse quantization and rescaling
- IMDCT
- Windowing
Results of comparing output wav files from the partially fixed point decoder to
the output wav files of ffmpeg decoder are :
- average relative error = 0.016%
- maximum relative error = 0.3%
COMPILING

View file

@ -2,8 +2,10 @@ wmaprodec.c
wma.c
dsputil.c
mdct.c
mdct_tables.c
fft.c
bitstream.c
wmapro_mdct.c
libavutil/log.c
libavutil/mem.c
libavutil/mathematics.c

View file

@ -101,10 +101,10 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
s2 = inverse ? 1.0 : -1.0;
s->fft_permute = ff_fft_permute_c;
s->fft_calc = ff_fft_calc_c;
s->fft_calc = fff_fft_calc_c;
//#if CONFIG_MDCT
s->imdct_calc = ff_imdct_calc_c;
s->imdct_half = ff_imdct_half_c;
s->imdct_half = fff_imdct_half_c;
s->mdct_calc = ff_mdct_calc_c;
//#endif
s->exptab1 = NULL;
@ -361,7 +361,7 @@ static void (* const fft_dispatch[])(FFTComplex*) = {
fft2048, fft4096, fft8192, fft16384, fft32768, fft65536,
};
void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
void fff_fft_calc_c(FFTContext *s, FFTComplex *z)
{
fft_dispatch[s->nbits-2](z);
}

View file

@ -110,7 +110,7 @@ extern SINTABLE(65536);
*/
int ff_fft_init(FFTContext *s, int nbits, int inverse);
void ff_fft_permute_c(FFTContext *s, FFTComplex *z);
void ff_fft_calc_c(FFTContext *s, FFTComplex *z);
void fff_fft_calc_c(FFTContext *s, FFTComplex *z);
void ff_fft_init_altivec(FFTContext *s);
void ff_fft_init_mmx(FFTContext *s);
@ -127,7 +127,7 @@ static inline void ff_fft_permute(FFTContext *s, FFTComplex *z)
* Do a complex FFT with the parameters defined in ff_fft_init(). The
* input data must be permuted before. No 1.0/sqrt(n) normalization is done.
*/
static inline void ff_fft_calc(FFTContext *s, FFTComplex *z)
static inline void fff_fft_calc(FFTContext *s, FFTComplex *z)
{
s->fft_calc(s, z);
}
@ -135,11 +135,11 @@ void ff_fft_end(FFTContext *s);
/* MDCT computation */
static inline void ff_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input)
static inline void fff_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input)
{
s->imdct_calc(s, output, input);
}
static inline void ff_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input)
static inline void fff_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input)
{
s->imdct_half(s, output, input);
}
@ -181,7 +181,7 @@ extern SINETABLE_CONST float * const ff_sine_windows[13];
int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale);
void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input);
void fff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_mdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input);
void ff_mdct_end(FFTContext *s);

View file

@ -121,7 +121,7 @@ av_cold int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale)
* @param output N/2 samples
* @param input N/2 samples
*/
void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input)
void fff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input)
{
int k, n8, n4, n2, n, j;
const uint16_t *revtab = s->revtab;
@ -144,8 +144,8 @@ void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input)
in1 += 2;
in2 -= 2;
}
ff_fft_calc(s, z);
fff_fft_calc(s, z);
/* post rotation + reordering */
for(k = 0; k < n8; k++) {
FFTSample r0, i0, r1, i1;
@ -170,7 +170,7 @@ void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input)
int n2 = n >> 1;
int n4 = n >> 2;
ff_imdct_half_c(s, output+n4, input);
fff_imdct_half_c(s, output+n4, input);
for(k = 0; k < n4; k++) {
output[k] = -output[n2-k-1];
@ -211,7 +211,7 @@ void ff_mdct_calc_c(FFTContext *s, FFTSample *out, const FFTSample *input)
CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]);
}
ff_fft_calc(s, x);
fff_fft_calc(s, x);
/* post rotation */
for(i=0;i<n8;i++) {

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,9 @@
#ifndef _MDCT_TABLES_H_
#define _MDCT_TABLES_H_
#include <inttypes.h>
extern const int32_t *sine_windows[6];
extern const int32_t sincos_lookup_wmap[8064];
#endif /* _MDCT_TABLES_H_ */

View file

@ -0,0 +1,45 @@
#ifndef _QUANT_H_
#define _QUANT_H_
#include <inttypes.h>
/* This table contains unscaled integer casts of the floating point inverse
* quantization factors used by wma pro. The formula for calculating the
* floating point value is :
* quant = pow(10.0, exp/20)
* 'exp' is an integer value which I have exmerimentally found to fall in the
* range (50,139). */
const int32_t quant_tab[90] = {
0x0000013C, 0x00000163, 0x0000018E, 0x000001BF,
0x000001F5, 0x00000232, 0x00000277, 0x000002C4,
0x0000031A, 0x0000037B, 0x000003E8, 0x00000462,
0x000004EB, 0x00000585, 0x00000631, 0x000006F2,
0x000007CB, 0x000008BF, 0x000009D0, 0x00000B02,
0x00000C5A, 0x00000DDC, 0x00000F8D, 0x00001173,
0x00001394, 0x000015F7, 0x000018A6, 0x00001BA7,
0x00001F07, 0x000022D1, 0x00002710, 0x00002BD4,
0x0000312D, 0x0000372D, 0x00003DE9, 0x00004577,
0x00004DF1, 0x00005773, 0x0000621F, 0x00006E18,
0x00007B87, 0x00008A99, 0x00009B83, 0x0000AE7C,
0x0000C3C7, 0x0000DBAA, 0x0000F678, 0x0001148B,
0x00013649, 0x00015C25, 0x000186A0, 0x0001B64A,
0x0001EBC5, 0x000227C6, 0x00026B19, 0x0002B6A4,
0x00030B66, 0x00036A80, 0x0003D535, 0x00044CEE,
0x0004D344, 0x000569FD, 0x0006131B, 0x0006D0DC,
0x0007A5C3, 0x000894A5, 0x0009A0AD, 0x000ACD6A,
0x000C1ED8, 0x000D9973, 0x000F4240, 0x00111EE2,
0x001335AD, 0x00158DBA, 0x00182EFD, 0x001B2267,
0x001E71FE, 0x00222901, 0x0026540E, 0x002B014F,
0x003040A6, 0x003623E6, 0x003CBF10, 0x00442894,
0x004C79A0, 0x0055CE75, 0x006046C5, 0x006C0622,
0x00793472, 0x0087FE7D,
};
#define EXP_MIN 50
#define EXP_MAX 139
/* return the correct value of quant based on exp */
#define QUANT(exp) quant_tab[exp - EXP_MIN]
#endif /* _QUANT_H_ */

View file

@ -0,0 +1,8 @@
#ifndef _TYPES_H_
#define _TYPES_H_
#include <inttypes.h>
#define FIXED int32_t
#endif

View file

@ -0,0 +1,46 @@
#include <inttypes.h>
#include "types.h"
#define fixtof16(x) (float)((float)(x) / (float)(1 << 16))
#define ftofix16(x) ((int32_t)((x) * (float)(1 << 16) + ((x) < 0 ? -0.5:0.5)))
static inline FIXED fixmulshift(FIXED x, FIXED y, int shamt)
{
int64_t temp;
temp = x;
temp *= y;
temp >>= shamt;
return (int32_t)temp;
}
static inline void vector_fixmul_window(FIXED *dst, const FIXED *src0,
const FIXED *src1, const FIXED *win,
FIXED add_bias, int len)
{
int i, j;
dst += len;
win += len;
src0+= len;
for(i=-len, j=len-1; i<0; i++, j--) {
FIXED s0 = src0[i];
FIXED s1 = src1[j];
FIXED wi = win[i];
FIXED wj = win[j];
dst[i] = fixmulshift(s0,-1*wj,31) - fixmulshift(s1,-1*wi,31) + (add_bias<<16);
dst[j] = fixmulshift(s0,-1*wi,31) + fixmulshift(s1,-1*wj,31) + (add_bias<<16);
}
}
static inline void vector_fixmul_scalar(FIXED *dst, const FIXED *src, FIXED mul,
int len)
{
int i;
for(i=0; i<len; i++) {
dst[i] = fixmulshift(src[i],mul,32);
}
}

View file

@ -0,0 +1,51 @@
#include <inttypes.h>
#include "wmapro_mdct.h"
#include "mdct_tables.h" /* for sincos_lookup_wmap */
#include "../lib/mdct_lookup.h" /* for revtab */
#include "../lib/fft.h" /* for FFT data structures */
#include "codeclib.h"
#include "../lib/codeclib_misc.h" /* for XNPROD31 */
void imdct_half(unsigned int nbits, int32_t *output, const int32_t *input){
int k, n8, n4, n2, n, j;
//const uint16_t *revtab = s->revtab;
const int32_t *in1, *in2;
FFTComplex *z = (FFTComplex *)output;
n = 1 << nbits;
n2 = n >> 1;
n4 = n >> 2;
n8 = n >> 3;
const int32_t *T = sincos_lookup_wmap + ((n2) - (1<<7));
/* pre rotation */
const int revtab_shift = (14- nbits);
in1 = input;
in2 = input + n2 - 1;
int step = 2<<(12-nbits);
for(k = 0; k < n4; k++) {
j=revtab[k]>>revtab_shift;
XNPROD31(*in2, *in1, T[1]<<16, T[0]<<16, &z[j].re, &z[j].im );
in1 += 2;
in2 -= 2;
T += 2;
}
ff_fft_calc_c(nbits-2, z);
/* post rotation + reordering */
T = sincos_lookup_wmap + ((n2) - (1<<7)) + n4;
const int32_t *V = T;
for(k = 0; k < n8; k++) {
int32_t r0, i0, r1, i1;
XNPROD31(z[n8-k-1].im, z[n8-k-1].re, T[0]<<16, T[1]<<16, &r0, &i1 );
XNPROD31(z[n8+k ].im, z[n8+k ].re, V[0]<<16, V[1]<<16, &r1, &i0 );
z[n8-k-1].re = r0;
z[n8-k-1].im = i0;
z[n8+k ].re = r1;
z[n8+k ].im = i1;
T-=2;
V+=2;
}
}

View file

@ -0,0 +1,8 @@
#ifndef _WMAPRO_MDCT_H_
#define _WMAPRO_MDCT_H_
#include <inttypes.h>
void imdct_half(unsigned int nbits, int32_t *output, const int32_t *input);
#endif

View file

@ -94,6 +94,12 @@
#include "dsputil.h"
#include "wma.h"
#include "wmaprodec.h"
#include "wmapro_mdct.h"
#include "mdct_tables.h"
#include "quant.h"
#include "types.h"
#include "wmapro_math.h"
#include "codecs.h"
/* Some defines to make it compile */
#define AVERROR_INVALIDDATA -1
@ -148,7 +154,9 @@ typedef struct {
int* scale_factors; ///< pointer to the scale factor values used for decoding
uint8_t table_idx; ///< index in sf_offsets for the scale factor reference block
float* coeffs; ///< pointer to the subframe decode buffer
FIXED* fixcoeffs;
DECLARE_ALIGNED(16, float, out)[WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2]; ///< output buffer
DECLARE_ALIGNED(16, FIXED, fixout)[WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2]; ///< output buffer
} WMAProChannelCtx;
/**
@ -174,6 +182,7 @@ typedef struct WMAProDecodeCtx {
PutBitContext pb; ///< context for filling the frame_data buffer
FFTContext mdct_ctx[WMAPRO_BLOCK_SIZES]; ///< MDCT context per block size
DECLARE_ALIGNED(16, float, tmp)[WMAPRO_BLOCK_MAX_SIZE]; ///< IMDCT output buffer
DECLARE_ALIGNED(16, FIXED, fixtmp)[WMAPRO_BLOCK_MAX_SIZE]; ///< IMDCT output buffer
float* windows[WMAPRO_BLOCK_SIZES]; ///< windows for the different block sizes
/* frame size dependent frame information (set during initialization) */
@ -208,8 +217,9 @@ typedef struct WMAProDecodeCtx {
uint32_t frame_num; ///< current frame number (not used for decoding)
GetBitContext gb; ///< bitstream reader context
int buf_bit_size; ///< buffer size in bits
float* samples; ///< current samplebuffer pointer
float* samples_end; ///< maximum samplebuffer pointer
float* samplesf; ///< current samplebuffer pointer
FIXED* samples;
FIXED* samples_end; ///< maximum samplebuffer pointer
uint8_t drc_gain; ///< gain for the DRC tool
int8_t skip_frame; ///< skip output step
int8_t parsed_all_subframes; ///< all subframes decoded?
@ -1023,22 +1033,30 @@ static void inverse_channel_transform(WMAProDecodeCtx *s)
static void wmapro_window(WMAProDecodeCtx *s)
{
int i;
for (i = 0; i < s->channels_for_cur_subframe; i++) {
int c = s->channel_indexes_for_cur_subframe[i];
float* window;
FIXED* window;
float* win2;
int winlen = s->channel[c].prev_block_len;
float* start = s->channel[c].coeffs - (winlen >> 1);
FIXED *xstart= s->channel[c].fixcoeffs - (winlen >> 1);
int j;
if (s->subframe_len < winlen) {
start += (winlen - s->subframe_len) >> 1;
xstart += (winlen - s->subframe_len) >> 1;
winlen = s->subframe_len;
}
window = s->windows[av_log2(winlen) - BLOCK_MIN_BITS];
window = sine_windows[av_log2(winlen) - BLOCK_MIN_BITS];
win2 = s->windows[av_log2(winlen) - BLOCK_MIN_BITS];
winlen >>= 1;
s->dsp.vector_fmul_window(start, start, start + winlen,
win2, 0, winlen);
vector_fixmul_window(xstart, xstart, xstart + winlen,
window, 0, winlen);
s->channel[c].prev_block_len = s->subframe_len;
@ -1116,6 +1134,8 @@ static int decode_subframe(WMAProDecodeCtx *s)
s->channel[c].coeffs = &s->channel[c].out[(s->samples_per_frame >> 1)
+ offset];
s->channel[c].fixcoeffs = &s->channel[c].fixout[(s->samples_per_frame >> 1)
+ offset];
}
s->subframe_len = subframe_len;
@ -1228,10 +1248,12 @@ static int decode_subframe(WMAProDecodeCtx *s)
const int* sf = s->channel[c].scale_factors;
int b;
if (c == s->lfe_channel)
if (c == s->lfe_channel){
memset(&s->tmp[cur_subwoofer_cutoff], 0, sizeof(*s->tmp) *
(subframe_len - cur_subwoofer_cutoff));
memset(&s->fixtmp[cur_subwoofer_cutoff], 0, sizeof(*s->fixtmp) *
(subframe_len - cur_subwoofer_cutoff));
}
/** inverse quantization and rescaling */
for (b = 0; b < s->num_bands; b++) {
const int end = FFMIN(s->cur_sfb_offsets[b+1], s->subframe_len);
@ -1239,21 +1261,42 @@ static int decode_subframe(WMAProDecodeCtx *s)
(s->channel[c].max_scale_factor - *sf++) *
s->channel[c].scale_factor_step;
const float quant = pow(10.0, exp / 20.0);
if(exp < EXP_MIN || exp > EXP_MAX) {
LOGF("in wmaprodec.c : unhandled value for exp, please report sample.\n");
return -1;
}
const FIXED fixquant = QUANT(exp);
int start = s->cur_sfb_offsets[b];
int j;
for(j = 0; j < WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE/2; j++)
s->channel[c].fixout[j] = ftofix16(s->channel[c].out[j]);
s->dsp.vector_fmul_scalar(s->tmp + start,
s->channel[c].coeffs + start,
quant, end - start);
quant, end - start);
vector_fixmul_scalar(s->fixtmp+start,
s->channel[c].fixcoeffs + start,
fixquant, end-start);
}
int j;
/** apply imdct (ff_imdct_half == DCTIV with reverse) */
ff_imdct_half(&s->mdct_ctx[av_log2(subframe_len) - BLOCK_MIN_BITS],
fff_imdct_half(&s->mdct_ctx[av_log2(subframe_len) - BLOCK_MIN_BITS],
s->channel[c].coeffs, s->tmp);
imdct_half((s->mdct_ctx[av_log2(subframe_len) - BLOCK_MIN_BITS]).mdct_bits,
s->channel[c].fixcoeffs, s->fixtmp);
}
}
/** window and overlapp-add */
wmapro_window(s);
/** handled one subframe */
for (i = 0; i < s->channels_for_cur_subframe; i++) {
int c = s->channel_indexes_for_cur_subframe[i];
@ -1354,13 +1397,17 @@ static int decode_frame(WMAProDecodeCtx *s)
/** interleave samples and write them to the output buffer */
for (i = 0; i < s->num_channels; i++) {
float* ptr = s->samples + i;
FIXED* ptr = s->samples + i;
float* fptr = s->samplesf + i;
int incr = s->num_channels;
float* iptr = s->channel[i].out;
float* iend = iptr + s->samples_per_frame;
FIXED* iptr = s->channel[i].fixout;
float* fiptr = s->channel[i].out;
FIXED* iend = iptr + s->samples_per_frame;
float* fiend = fiptr + s->samples_per_frame;
int j;
while (iptr < iend) {
*ptr = av_clipf(*iptr++, -1.0, 32767.0 / 32768.0);
*ptr = *iptr++ << 1;
ptr += incr;
}
@ -1548,20 +1595,6 @@ int decode_packet(AVCodecContext *avctx,
*data_size = (int8_t *)s->samples - (int8_t *)data;
s->packet_offset = get_bits_count(gb) & 7;
/* Convert the pcm samples to signed 16-bit integers. This is the format that
* the rockbox simulator works with. */
#ifdef ROCKBOX
float* fptr = data;
int32_t* ptr = data;
int x;
for(x = 0; x < *data_size; x++)
{
fptr[x] *= ((float)(INT32_MAX));
ptr[x] = (int32_t)fptr[x];
}
#endif
return (s->packet_loss) ? AVERROR_INVALIDDATA : get_bits_count(gb) >> 3;
}

View file

@ -66,7 +66,7 @@ enum codec_status codec_main(void)
int pktcnt = 0; /* Count of the packets played */
/* Generic codec initialisation */
ci->configure(DSP_SET_SAMPLE_DEPTH, 32);
ci->configure(DSP_SET_SAMPLE_DEPTH, 17);
next_track: