1
0
Fork 0
forked from len0rd/rockbox

Modify libatrac to use fixed-point arithmetic.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@22298 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Mohamed Tarek 2009-08-13 20:38:59 +00:00
parent c956059ec5
commit 432e2ecc13
10 changed files with 324 additions and 5756 deletions

View file

@ -1,8 +1,8 @@
CFLAGS = -Wall -O3 -DTEST -D"DEBUGF=printf" CFLAGS = -Wall -O3 -DTEST -D"DEBUGF=printf" -D"ROCKBOX_LITTLE_ENDIAN=1" -D"ICONST_ATTR=" -D"ICODE_ATTR="
OBJS = atrac3.o dsputil.o bitstream.o fft.o mdct.o libavutil/log.o libavutil/mem.o ../librm/rm.o OBJS = atrac3.o bitstream.o libavutil/log.o libavutil/mem.o ../librm/rm.o fixp_math.o ../lib/mdct2.o ../lib/mdct_lookup.o
atractest: $(OBJS) atractest: $(OBJS)
gcc -o atractest $(OBJS) -lm gcc -o atractest $(OBJS)
.c.o : .c.o :
$(CC) $(CFLAGS) -c -o $@ $< $(CC) $(CFLAGS) -c -o $@ $<

View file

@ -8,13 +8,19 @@ ffmpeg is licensed under the Lesser GNU General Public License.
IMPORT DETAILS IMPORT DETAILS
The decoder is based on ffmpeg-svn r18079. It still uses floating The decoder is based on ffmpeg-svn r18079.
point math and not suitable to be used in rockbox. The decoder had been modified to use fixed-point arithmetic.
TESTING TESTING
The test program should compile in any Unix-like environment using the The test program should compile in any Unix-like environment using the
command "make -f Makefile.test". command "make -f Makefile.test".
For ARM targets add -DCPU_ARM to CFLAGS in Makefile.test to make use of
the asm ARM optimisations in rockbox's mdct library.
For Big-endian targets, change -D"ROCKBOX_LITTLE_ENDIAN=1"
to -D"ROCKBOX_BIG_ENDIAN=1" in Makefile.test.
Running "./atractest file.rm" will decode the audio data to a WAV file Running "./atractest file.rm" will decode the audio data to a WAV file
called "output.wav" in the current directory. called "output.wav" in the current directory.

View file

@ -38,7 +38,6 @@
#include "avcodec.h" #include "avcodec.h"
#include "bitstream.h" #include "bitstream.h"
#include "dsputil.h"
#include "bytestream.h" #include "bytestream.h"
#include <stdint.h> #include <stdint.h>
@ -50,6 +49,10 @@
#include "../librm/rm.h" #include "../librm/rm.h"
#include "atrac3data.h" #include "atrac3data.h"
#include "atrac3data_fixed.h"
#include "fixp_math.h"
//#include "fixp_mdct.h"
#include "../lib/mdct2.h"
#define JOINT_STEREO 0x12 #define JOINT_STEREO 0x12
#define STEREO 0x2 #define STEREO 0x2
@ -70,23 +73,23 @@ typedef struct {
typedef struct { typedef struct {
int pos; int pos;
int numCoefs; int numCoefs;
float coef[8]; int32_t coef[8];
} tonal_component; } tonal_component;
typedef struct { typedef struct {
int bandsCoded; int bandsCoded;
int numComponents; int numComponents;
tonal_component components[64]; tonal_component components[64];
float prevFrame[1024]; int32_t prevFrame[1024];
int gcBlkSwitch; int gcBlkSwitch;
gain_block gainBlock[2]; gain_block gainBlock[2];
DECLARE_ALIGNED_16(float, spectrum[1024]); int32_t spectrum[1024] __attribute__((aligned(16)));
DECLARE_ALIGNED_16(float, IMDCT_buf[1024]); int32_t IMDCT_buf[1024] __attribute__((aligned(16)));
float delayBuf1[46]; ///<qmf delay buffers int32_t delayBuf1[46]; ///<qmf delay buffers
float delayBuf2[46]; int32_t delayBuf2[46];
float delayBuf3[46]; int32_t delayBuf3[46];
} channel_unit; } channel_unit;
typedef struct { typedef struct {
@ -114,9 +117,9 @@ typedef struct {
//@} //@}
//@{ //@{
/** data buffers */ /** data buffers */
float outSamples[2048]; int32_t outSamples[2048];
uint8_t* decoded_bytes_buffer; uint8_t* decoded_bytes_buffer;
float tempBuf[1070]; int32_t tempBuf[1070];
//@} //@}
//@{ //@{
/** extradata */ /** extradata */
@ -127,17 +130,8 @@ typedef struct {
//@} //@}
} ATRAC3Context; } ATRAC3Context;
static DECLARE_ALIGNED_16(float,mdct_window[512]); static int32_t qmf_window[48];
static float qmf_window[48];
static VLC spectral_coeff_tab[7]; static VLC spectral_coeff_tab[7];
static float SFTable[64];
static float gain_tab1[16];
static float gain_tab2[31];
static MDCTContext mdct_ctx;
static DSPContext dsp;
/* quadrature mirror synthesis filter */
/** /**
* Quadrature mirror synthesis filter. * Quadrature mirror synthesis filter.
@ -149,14 +143,12 @@ static DSPContext dsp;
* @param delayBuf delayBuf buffer * @param delayBuf delayBuf buffer
* @param temp temp buffer * @param temp temp buffer
*/ */
static void iqmf (int32_t *inlo, int32_t *inhi, unsigned int nIn, int32_t *pOut, int32_t *delayBuf, int32_t *temp)
static void iqmf (float *inlo, float *inhi, unsigned int nIn, float *pOut, float *delayBuf, float *temp)
{ {
int i, j; int i, j;
float *p1, *p3; int32_t *p1, *p3;
memcpy(temp, delayBuf, 46*sizeof(float)); memcpy(temp, delayBuf, 46*sizeof(int32_t));
p3 = temp + 46; p3 = temp + 46;
@ -171,12 +163,12 @@ static void iqmf (float *inlo, float *inhi, unsigned int nIn, float *pOut, float
/* loop2 */ /* loop2 */
p1 = temp; p1 = temp;
for (j = nIn; j != 0; j--) { for (j = nIn; j != 0; j--) {
float s1 = 0.0; int32_t s1 = 0;
float s2 = 0.0; int32_t s2 = 0;
for (i = 0; i < 48; i += 2) { for (i = 0; i < 48; i += 2) {
s1 += p1[i] * qmf_window[i]; s1 += fixmul31(p1[i], qmf_window[i]);
s2 += p1[i+1] * qmf_window[i+1]; s2 += fixmul31(p1[i+1], qmf_window[i+1]);
} }
pOut[0] = s2; pOut[0] = s2;
@ -187,7 +179,7 @@ static void iqmf (float *inlo, float *inhi, unsigned int nIn, float *pOut, float
} }
/* Update the delay buffer. */ /* Update the delay buffer. */
memcpy(delayBuf, temp + nIn*2, 46*sizeof(float)); memcpy(delayBuf, temp + (nIn << 1), 46*sizeof(int32_t));
} }
/** /**
@ -199,10 +191,9 @@ static void iqmf (float *inlo, float *inhi, unsigned int nIn, float *pOut, float
* @param odd_band 1 if the band is an odd band * @param odd_band 1 if the band is an odd band
*/ */
static void IMLT(float *pInput, float *pOutput, int odd_band) static void IMLT(int32_t *pInput, int32_t *pOutput, int odd_band)
{ {
int i; int i;
if (odd_band) { if (odd_band) {
/** /**
* Reverse the odd bands before IMDCT, this is an effect of the QMF transform * Reverse the odd bands before IMDCT, this is an effect of the QMF transform
@ -214,13 +205,15 @@ static void IMLT(float *pInput, float *pOutput, int odd_band)
*/ */
for (i=0; i<128; i++) for (i=0; i<128; i++)
FFSWAP(float, pInput[i], pInput[255-i]); FFSWAP(int32_t, pInput[i], pInput[255-i]);
} }
/* Apply the imdct. */
mdct_backward(512, pInput, pOutput);
ff_imdct_calc(&mdct_ctx,pOutput,pInput); /* Windowing. */
for(i = 0; i<512; i++)
/* Perform windowing on the output. */ pOutput[i] = fixmul31(pOutput[i], window_lookup[i]);
dsp.vector_fmul(pOutput,mdct_window,512);
} }
@ -259,30 +252,20 @@ static int decode_bytes(const uint8_t* inbuffer, uint8_t* out, int bytes){
static av_cold void init_atrac3_transforms(ATRAC3Context *q) { static av_cold void init_atrac3_transforms(ATRAC3Context *q) {
float enc_window[256]; int32_t s;
float s;
int i; int i;
/* Generate the mdct window, for details see /* Generate the mdct window, for details see
* http://wiki.multimedia.cx/index.php?title=RealAudio_atrc#Windows */ * http://wiki.multimedia.cx/index.php?title=RealAudio_atrc#Windows */
for (i=0 ; i<256; i++)
enc_window[i] = (sin(((i + 0.5) / 256.0 - 0.5) * M_PI) + 1.0) * 0.5;
if (!mdct_window[0]) /* mdct window had been generated and saved as a lookup table in atrac3data_fixed.h */
for (i=0 ; i<256; i++) {
mdct_window[i] = enc_window[i]/(enc_window[i]*enc_window[i] + enc_window[255-i]*enc_window[255-i]);
mdct_window[511-i] = mdct_window[i];
}
/* Generate the QMF window. */ /* Generate the QMF window. */
for (i=0 ; i<24; i++) { for (i=0 ; i<24; i++) {
s = qmf_48tap_half[i] * 2.0; s = qmf_48tap_half_fix[i] << 1;
qmf_window[i] = s; qmf_window[i] = s;
qmf_window[47 - i] = s; qmf_window[47 - i] = s;
} }
/* Initialize the MDCT transform. */
ff_mdct_init(&mdct_ctx, 9, 1);
} }
/** /**
@ -367,12 +350,12 @@ static void readQuantSpectralCoeffs (GetBitContext *gb, int selector, int coding
* @return outSubbands subband counter, fix for broken specification/files * @return outSubbands subband counter, fix for broken specification/files
*/ */
static int decodeSpectrum (GetBitContext *gb, float *pOut) static int decodeSpectrum (GetBitContext *gb, int32_t *pOut)
{ {
int numSubbands, codingMode, cnt, first, last, subbWidth, *pIn; int numSubbands, codingMode, cnt, first, last, subbWidth, *pIn;
int subband_vlc_index[32], SF_idxs[32]; int subband_vlc_index[32], SF_idxs[32];
int mantissas[128]; int mantissas[128];
float SF; int32_t SF;
numSubbands = get_bits(gb, 5); // number of coded subbands numSubbands = get_bits(gb, 5); // number of coded subbands
codingMode = get_bits1(gb); // coding Mode: 0 - VLC/ 1-CLC codingMode = get_bits1(gb); // coding Mode: 0 - VLC/ 1-CLC
@ -400,20 +383,20 @@ static int decodeSpectrum (GetBitContext *gb, float *pOut)
readQuantSpectralCoeffs (gb, subband_vlc_index[cnt], codingMode, mantissas, subbWidth); readQuantSpectralCoeffs (gb, subband_vlc_index[cnt], codingMode, mantissas, subbWidth);
/* Decode the scale factor for this subband. */ /* Decode the scale factor for this subband. */
SF = SFTable[SF_idxs[cnt]] * iMaxQuant[subband_vlc_index[cnt]]; SF = fixmul31(SFTable_fixed[SF_idxs[cnt]], iMaxQuant_fix[subband_vlc_index[cnt]]);
/* Inverse quantize the coefficients. */ /* Inverse quantize the coefficients. */
for (pIn=mantissas ; first<last; first++, pIn++) for (pIn=mantissas ; first<last; first++, pIn++)
pOut[first] = *pIn * SF; pOut[first] = fixmul16(*pIn, SF);
} else { } else {
/* This subband was not coded, so zero the entire subband. */ /* This subband was not coded, so zero the entire subband. */
memset(pOut+first, 0, subbWidth*sizeof(float)); memset(pOut+first, 0, subbWidth*sizeof(int32_t));
} }
} }
/* Clear the subbands that were not coded. */ /* Clear the subbands that were not coded. */
first = subbandTab[cnt]; first = subbandTab[cnt];
memset(pOut+first, 0, (1024 - first) * sizeof(float)); memset(pOut+first, 0, (1024 - first) * sizeof(int32_t));
return numSubbands; return numSubbands;
} }
@ -431,8 +414,8 @@ static int decodeTonalComponents (GetBitContext *gb, tonal_component *pComponent
int components, coding_mode_selector, coding_mode, coded_values_per_component; int components, coding_mode_selector, coding_mode, coded_values_per_component;
int sfIndx, coded_values, max_coded_values, quant_step_index, coded_components; int sfIndx, coded_values, max_coded_values, quant_step_index, coded_components;
int band_flags[4], mantissa[8]; int band_flags[4], mantissa[8];
float *pCoef; int32_t *pCoef;
float scalefactor; int32_t scalefactor;
int component_count = 0; int component_count = 0;
components = get_bits(gb,5); components = get_bits(gb,5);
@ -473,7 +456,7 @@ static int decodeTonalComponents (GetBitContext *gb, tonal_component *pComponent
coded_values = coded_values_per_component + 1; coded_values = coded_values_per_component + 1;
coded_values = FFMIN(max_coded_values,coded_values); coded_values = FFMIN(max_coded_values,coded_values);
scalefactor = SFTable[sfIndx] * iMaxQuant[quant_step_index]; scalefactor = fixmul31(SFTable_fixed[sfIndx], iMaxQuant_fix[quant_step_index]);
readQuantSpectralCoeffs(gb, quant_step_index, coding_mode, mantissa, coded_values); readQuantSpectralCoeffs(gb, quant_step_index, coding_mode, mantissa, coded_values);
@ -482,7 +465,7 @@ static int decodeTonalComponents (GetBitContext *gb, tonal_component *pComponent
/* inverse quant */ /* inverse quant */
pCoef = pComponent[component_count].coef; pCoef = pComponent[component_count].coef;
for (cnt = 0; cnt < coded_values; cnt++) for (cnt = 0; cnt < coded_values; cnt++)
pCoef[cnt] = mantissa[cnt] * scalefactor; pCoef[cnt] = fixmul16(mantissa[cnt], scalefactor);
component_count++; component_count++;
} }
@ -539,21 +522,21 @@ static int decodeGainControl (GetBitContext *gb, gain_block *pGb, int numBands)
* @param pGain2 next band gain info * @param pGain2 next band gain info
*/ */
static void gainCompensateAndOverlap (float *pIn, float *pPrev, float *pOut, gain_info *pGain1, gain_info *pGain2) static void gainCompensateAndOverlap (int32_t *pIn, int32_t *pPrev, int32_t *pOut, gain_info *pGain1, gain_info *pGain2)
{ {
/* gain compensation function */ /* gain compensation function */
float gain1, gain2, gain_inc; int32_t gain1, gain2, gain_inc;
int cnt, numdata, nsample, startLoc, endLoc; int cnt, numdata, nsample, startLoc, endLoc;
if (pGain2->num_gain_data == 0) if (pGain2->num_gain_data == 0)
gain1 = 1.0; gain1 = ONE_16;
else else
gain1 = gain_tab1[pGain2->levcode[0]]; gain1 = gain_tab1[pGain2->levcode[0]];
if (pGain1->num_gain_data == 0) { if (pGain1->num_gain_data == 0) {
for (cnt = 0; cnt < 256; cnt++) for (cnt = 0; cnt < 256; cnt++)
pOut[cnt] = pIn[cnt] * gain1 + pPrev[cnt]; pOut[cnt] = fixmul16(pIn[cnt], gain1) + pPrev[cnt];
} else { } else {
numdata = pGain1->num_gain_data; numdata = pGain1->num_gain_data;
pGain1->loccode[numdata] = 32; pGain1->loccode[numdata] = 32;
@ -570,36 +553,38 @@ static void gainCompensateAndOverlap (float *pIn, float *pPrev, float *pOut, gai
/* interpolate */ /* interpolate */
for (; nsample < startLoc; nsample++) for (; nsample < startLoc; nsample++)
pOut[nsample] = (pIn[nsample] * gain1 + pPrev[nsample]) * gain2; pOut[nsample] = fixmul16((fixmul16(pIn[nsample], gain1) + pPrev[nsample]), gain2);
/* interpolation is done over eight samples */ /* interpolation is done over eight samples */
for (; nsample < endLoc; nsample++) { for (; nsample < endLoc; nsample++) {
pOut[nsample] = (pIn[nsample] * gain1 + pPrev[nsample]) * gain2; pOut[nsample] = fixmul16((fixmul16(pIn[nsample], gain1) + pPrev[nsample]),gain2);
gain2 *= gain_inc; gain2 = fixmul16(gain2, gain_inc);
} }
} }
for (; nsample < 256; nsample++) for (; nsample < 256; nsample++)
pOut[nsample] = (pIn[nsample] * gain1) + pPrev[nsample]; pOut[nsample] = fixmul16(pIn[nsample], gain1) + pPrev[nsample];
} }
/* Delay for the overlapping part. */ /* Delay for the overlapping part. */
memcpy(pPrev, &pIn[256], 256*sizeof(float)); memcpy(pPrev, &pIn[256], 256*sizeof(int32_t));
} }
/** /**
* Combine the tonal band spectrum and regular band spectrum * Combine the tonal band spectrum and regular band spectrum
* Return position of the last tonal coefficient * Return position of the last tonal coefficient
* *
* @param pSpectrum output spectrum buffer * @param pSpectrum output spectrum buffer
* @param numComponents amount of tonal components * @param numComponents amount of tonal components
* @param pComponent tonal components for this band * @param pComponent tonal components for this band
*/ */
static int addTonalComponents (float *pSpectrum, int numComponents, tonal_component *pComponent) static int addTonalComponents (int32_t *pSpectrum, int numComponents, tonal_component *pComponent)
{ {
int cnt, i, lastPos = -1; int cnt, i, lastPos = -1;
float *pIn, *pOut; int32_t *pOut;
int32_t *pIn;
for (cnt = 0; cnt < numComponents; cnt++){ for (cnt = 0; cnt < numComponents; cnt++){
lastPos = FFMAX(pComponent[cnt].pos + pComponent[cnt].numCoefs, lastPos); lastPos = FFMAX(pComponent[cnt].pos + pComponent[cnt].numCoefs, lastPos);
@ -614,13 +599,13 @@ static int addTonalComponents (float *pSpectrum, int numComponents, tonal_compon
} }
#define INTERPOLATE(old,new,nsample) ((old) + (nsample)*0.125*((new)-(old))) #define INTERPOLATE(old,new,nsample) ((old*ONE_16) + fixmul16(((nsample*ONE_16)>>3), (((new) - (old))*ONE_16)))
static void reverseMatrixing(float *su1, float *su2, int *pPrevCode, int *pCurrCode) static void reverseMatrixing(int32_t *su1, int32_t *su2, int *pPrevCode, int *pCurrCode)
{ {
int i, band, nsample, s1, s2; int i, band, nsample, s1, s2;
float c1, c2; int32_t c1, c2;
float mc1_l, mc1_r, mc2_l, mc2_r; int32_t mc1_l, mc1_r, mc2_l, mc2_r;
for (i=0,band = 0; band < 4*256; band+=256,i++) { for (i=0,band = 0; band < 4*256; band+=256,i++) {
s1 = pPrevCode[i]; s1 = pPrevCode[i];
@ -629,18 +614,18 @@ static void reverseMatrixing(float *su1, float *su2, int *pPrevCode, int *pCurrC
if (s1 != s2) { if (s1 != s2) {
/* Selector value changed, interpolation needed. */ /* Selector value changed, interpolation needed. */
mc1_l = matrixCoeffs[s1*2]; mc1_l = matrixCoeffs_fix[s1<<1];
mc1_r = matrixCoeffs[s1*2+1]; mc1_r = matrixCoeffs_fix[(s1<<1)+1];
mc2_l = matrixCoeffs[s2*2]; mc2_l = matrixCoeffs_fix[s2<<1];
mc2_r = matrixCoeffs[s2*2+1]; mc2_r = matrixCoeffs_fix[(s2<<1)+1];
/* Interpolation is done over the first eight samples. */ /* Interpolation is done over the first eight samples. */
for(; nsample < 8; nsample++) { for(; nsample < 8; nsample++) {
c1 = su1[band+nsample]; c1 = su1[band+nsample];
c2 = su2[band+nsample]; c2 = su2[band+nsample];
c2 = c1 * INTERPOLATE(mc1_l,mc2_l,nsample) + c2 * INTERPOLATE(mc1_r,mc2_r,nsample); c2 = fixmul16(c1, INTERPOLATE(mc1_l, mc2_l, nsample)) + fixmul16(c2, INTERPOLATE(mc1_r, mc2_r, nsample));
su1[band+nsample] = c2; su1[band+nsample] = c2;
su2[band+nsample] = c1 * 2.0 - c2; su2[band+nsample] = (c1 << 1) - c2;
} }
} }
@ -650,8 +635,8 @@ static void reverseMatrixing(float *su1, float *su2, int *pPrevCode, int *pCurrC
for (; nsample < 256; nsample++) { for (; nsample < 256; nsample++) {
c1 = su1[band+nsample]; c1 = su1[band+nsample];
c2 = su2[band+nsample]; c2 = su2[band+nsample];
su1[band+nsample] = c2 * 2.0; su1[band+nsample] = c2 << 1;
su2[band+nsample] = (c1 - c2) * 2.0; su2[band+nsample] = (c1 - c2) << 1;
} }
break; break;
@ -659,8 +644,8 @@ static void reverseMatrixing(float *su1, float *su2, int *pPrevCode, int *pCurrC
for (; nsample < 256; nsample++) { for (; nsample < 256; nsample++) {
c1 = su1[band+nsample]; c1 = su1[band+nsample];
c2 = su2[band+nsample]; c2 = su2[band+nsample];
su1[band+nsample] = (c1 + c2) * 2.0; su1[band+nsample] = (c1 + c2) << 1;
su2[band+nsample] = c2 * -2.0; su2[band+nsample] = -1*(c2 << 1);
} }
break; break;
case 2: case 2:
@ -678,24 +663,23 @@ static void reverseMatrixing(float *su1, float *su2, int *pPrevCode, int *pCurrC
} }
} }
static void getChannelWeights (int indx, int flag, float ch[2]){ static void getChannelWeights (int indx, int flag, int32_t ch[2]){
if (indx == 7) { if (indx == 7) {
ch[0] = 1.0; ch[0] = ONE_16;
ch[1] = 1.0; ch[1] = ONE_16;
} else { } else {
ch[0] = (float)(indx & 7) / 7.0; ch[0] = fixdiv16(((indx & 7)*ONE_16), 7*ONE_16);
ch[1] = sqrt(2 - ch[0]*ch[0]); ch[1] = fastSqrt((ONE_16 << 1) - fixmul16(ch[0], ch[0]));
if(flag) if(flag)
FFSWAP(float, ch[0], ch[1]); FFSWAP(int32_t, ch[0], ch[1]);
} }
} }
static void channelWeighting (float *su1, float *su2, int *p3) static void channelWeighting (int32_t *su1, int32_t *su2, int *p3)
{ {
int band, nsample; int band, nsample;
/* w[x][y] y=0 is left y=1 is right */ /* w[x][y] y=0 is left y=1 is right */
float w[2][2]; int32_t w[2][2];
if (p3[1] != 7 || p3[3] != 7){ if (p3[1] != 7 || p3[3] != 7){
getChannelWeights(p3[1], p3[0], w[0]); getChannelWeights(p3[1], p3[0], w[0]);
@ -704,13 +688,13 @@ static void channelWeighting (float *su1, float *su2, int *p3)
for(band = 1; band < 4; band++) { for(band = 1; band < 4; band++) {
/* scale the channels by the weights */ /* scale the channels by the weights */
for(nsample = 0; nsample < 8; nsample++) { for(nsample = 0; nsample < 8; nsample++) {
su1[band*256+nsample] *= INTERPOLATE(w[0][0], w[0][1], nsample); su1[band*256+nsample] = fixmul16(su1[band*256+nsample], INTERPOLATE(w[0][0], w[0][1], nsample));
su2[band*256+nsample] *= INTERPOLATE(w[1][0], w[1][1], nsample); su2[band*256+nsample] = fixmul16(su2[band*256+nsample], INTERPOLATE(w[1][0], w[1][1], nsample));
} }
for(; nsample < 256; nsample++) { for(; nsample < 256; nsample++) {
su1[band*256+nsample] *= w[1][0]; su1[band*256+nsample] = fixmul16(su1[band*256+nsample], w[1][0]);
su2[band*256+nsample] *= w[1][1]; su2[band*256+nsample] = fixmul16(su2[band*256+nsample], w[1][1]);
} }
} }
} }
@ -728,10 +712,9 @@ static void channelWeighting (float *su1, float *su2, int *p3)
*/ */
static int decodeChannelSoundUnit (ATRAC3Context *q, GetBitContext *gb, channel_unit *pSnd, float *pOut, int channelNum, int codingMode) static int decodeChannelSoundUnit (ATRAC3Context *q, GetBitContext *gb, channel_unit *pSnd, int32_t *pOut, int channelNum, int codingMode)
{ {
int band, result=0, numSubbands, lastTonal, numBands; int band, result=0, numSubbands, lastTonal, numBands;
if (codingMode == JOINT_STEREO && channelNum == 1) { if (codingMode == JOINT_STEREO && channelNum == 1) {
if (get_bits(gb,2) != 3) { if (get_bits(gb,2) != 3) {
av_log(NULL,AV_LOG_ERROR,"JS mono Sound Unit id != 3.\n"); av_log(NULL,AV_LOG_ERROR,"JS mono Sound Unit id != 3.\n");
@ -771,7 +754,7 @@ static int decodeChannelSoundUnit (ATRAC3Context *q, GetBitContext *gb, channel_
if (band <= numBands) { if (band <= numBands) {
IMLT(&(pSnd->spectrum[band*256]), pSnd->IMDCT_buf, band&1); IMLT(&(pSnd->spectrum[band*256]), pSnd->IMDCT_buf, band&1);
} else } else
memset(pSnd->IMDCT_buf, 0, 512 * sizeof(float)); memset(pSnd->IMDCT_buf, 0, 512 * sizeof(int32_t));
/* gain compensation and overlapping */ /* gain compensation and overlapping */
gainCompensateAndOverlap (pSnd->IMDCT_buf, &(pSnd->prevFrame[band*256]), &(pOut[band*256]), gainCompensateAndOverlap (pSnd->IMDCT_buf, &(pSnd->prevFrame[band*256]), &(pOut[band*256]),
@ -795,7 +778,7 @@ static int decodeChannelSoundUnit (ATRAC3Context *q, GetBitContext *gb, channel_
static int decodeFrame(ATRAC3Context *q, const uint8_t* databuf) static int decodeFrame(ATRAC3Context *q, const uint8_t* databuf)
{ {
int result, i; int result, i;
float *p1, *p2, *p3, *p4; int32_t *p1, *p2, *p3, *p4;
uint8_t *ptr1; uint8_t *ptr1;
if (q->codingMode == JOINT_STEREO) { if (q->codingMode == JOINT_STEREO) {
@ -893,7 +876,6 @@ static int decodeFrame(ATRAC3Context *q, const uint8_t* databuf)
static int atrac3_decode_frame(RMContext *rmctx, ATRAC3Context *q, static int atrac3_decode_frame(RMContext *rmctx, ATRAC3Context *q,
void *data, int *data_size, void *data, int *data_size,
const uint8_t *buf, int buf_size) { const uint8_t *buf, int buf_size) {
//ATRAC3Context *q = rmctx->priv_data;
int result = 0, i; int result = 0, i;
const uint8_t* databuf; const uint8_t* databuf;
int16_t* samples = data; int16_t* samples = data;
@ -919,13 +901,13 @@ static int atrac3_decode_frame(RMContext *rmctx, ATRAC3Context *q,
if (q->channels == 1) { if (q->channels == 1) {
/* mono */ /* mono */
for (i = 0; i<1024; i++) for (i = 0; i<1024; i++)
samples[i] = av_clip_int16(round(q->outSamples[i])); samples[i] = av_clip_int16(q->outSamples[i]);
*data_size = 1024 * sizeof(int16_t); *data_size = 1024 * sizeof(int16_t);
} else { } else {
/* stereo */ /* stereo */
for (i = 0; i < 1024; i++) { for (i = 0; i < 1024; i++) {
samples[i*2] = av_clip_int16(round(q->outSamples[i])); samples[i*2] = av_clip_int16(q->outSamples[i]);
samples[i*2+1] = av_clip_int16(round(q->outSamples[1024+i])); samples[i*2+1] = av_clip_int16(q->outSamples[1024+i]);
} }
*data_size = 2048 * sizeof(int16_t); *data_size = 2048 * sizeof(int16_t);
} }
@ -944,7 +926,6 @@ static av_cold int atrac3_decode_init(ATRAC3Context *q, RMContext *rmctx)
{ {
int i; int i;
const uint8_t *edata_ptr = rmctx->codec_extradata; const uint8_t *edata_ptr = rmctx->codec_extradata;
//ATRAC3Context *q = rmctx->priv_data;
static VLC_TYPE atrac3_vlc_table[4096][2]; static VLC_TYPE atrac3_vlc_table[4096][2];
static int vlcs_initialized = 0; static int vlcs_initialized = 0;
@ -1051,17 +1032,6 @@ static av_cold int atrac3_decode_init(ATRAC3Context *q, RMContext *rmctx)
init_atrac3_transforms(q); init_atrac3_transforms(q);
/* Generate the scale factors. */
for (i=0 ; i<64 ; i++)
SFTable[i] = pow(2.0, (i - 15) / 3.0);
/* Generate gain tables. */
for (i=0 ; i<16 ; i++)
gain_tab1[i] = powf (2.0, (4 - i));
for (i=-15 ; i<16 ; i++)
gain_tab2[i+15] = powf (2.0, i * -0.125);
/* init the joint-stereo decoding data */ /* init the joint-stereo decoding data */
q->weighting_delay[0] = 0; q->weighting_delay[0] = 0;
q->weighting_delay[1] = 7; q->weighting_delay[1] = 7;
@ -1076,8 +1046,6 @@ static av_cold int atrac3_decode_init(ATRAC3Context *q, RMContext *rmctx)
q->matrix_coeff_index_next[i] = 3; q->matrix_coeff_index_next[i] = 3;
} }
dsputil_init(&dsp);
q->pUnits = av_mallocz(sizeof(channel_unit)*q->channels); q->pUnits = av_mallocz(sizeof(channel_unit)*q->channels);
if (!q->pUnits) { if (!q->pUnits) {
av_free(q->decoded_bytes_buffer); av_free(q->decoded_bytes_buffer);

View file

@ -0,0 +1,145 @@
/* tables for the scalefactor decoding */
/* scaled by 2^31*/
static const int32_t iMaxQuant_fix[8] = {
0x0, 0x55555580, 0x33333340, 0x24924940, 0x1c71c720, 0x11111120, 0x8421080,
0x4104108
};
/* scaled by 2^16 */
static const int32_t SFTable_fixed[64] = {
0x00000800, 0x00000a14, 0x00000cb3, 0x00001000, 0x00001429, 0x00001966,
0x00002000, 0x00002851, 0x000032cc, 0x00004000, 0x000050a3, 0x00006598,
0x00008000, 0x0000a145, 0x0000cb30, 0x00010000, 0x0001428a, 0x00019660,
0x00020000, 0x00028514, 0x00032cc0, 0x00040000, 0x00050a29, 0x00065980,
0x00080000, 0x000a1452, 0x000cb2ff, 0x00100000, 0x001428a3, 0x001965ff,
0x00200000, 0x00285146, 0x0032cbfd, 0x00400000, 0x0050a28c, 0x006597fb,
0x00800000, 0x00a14518, 0x00cb2ff5, 0x01000000, 0x01428a30, 0x01965fea,
0x02000000, 0x02851460, 0x032cbfd4, 0x04000000, 0x050a28c0, 0x06597fa8,
0x08000000, 0x0a145180, 0x0cb2ff50, 0x10000000, 0x1428a300, 0x1965fea0,
0x20000000, 0x28514600, 0x32cbfd40, 0x40000000, 0x50a28c00, 0x6597fa80,
0x80000000, 0x80000000, 0x80000000, 0x80000000,
};
/* transform data */
/* floating point values scaled by 2^31 */
static const int32_t qmf_48tap_half_fix[24] = {
0xffff855e, 0xfffcfbca, 0xfffe28eb, 0x9de6b, 0x7f028, 0xffe40d08,
0xffeef140, 0x42a692, 0x19ab1f, 0xff75dec7, 0xffe738f5, 0x100e928,
0xfffdfedf, 0xfe478b84, 0x50b279, 0x2c83f88, 0xff005ad7, 0xfba2ee80,
0x2685970, 0x6f42798, 0xfa6b6f10, 0xf3475f80, 0x10e7f7c0, 0x3b6c44c0
};
/* mdct window scaled by 2^31 */
static const int32_t window_lookup[512] = {
0xffffb10c, 0xfffd394b, 0xfff8494f, 0xfff0e025, 0xffe6fc5f, 0xffda9c15,
0xffcbbce6, 0xffba5bf4, 0xffa675e8, 0xff9006f0, 0xff770aba, 0xff5b7c7e,
0xff3d56f2, 0xff1c9452, 0xfef92e59, 0xfed31e45, 0xfeaa5cd5, 0xfe7ee247,
0xfe50a657, 0xfe1fa041, 0xfdebc6c1, 0xfdb5100d, 0xfd7b71d5, 0xfd3ee149,
0xfcff5311, 0xfcbcbb49, 0xfc770d99, 0xfc2e3d15, 0xfbe23c39, 0xfb92fd29,
0xfb407141, 0xfaea8989, 0xfa913661, 0xfa3467b1, 0xf9d40cd9, 0xf9701499,
0xf9086d41, 0xf89d04a9, 0xf82dc7f1, 0xf7baa3e1, 0xf74384b1, 0xf6c85611,
0xf6490321, 0xf5c576b1, 0xf53d9b21, 0xf4b15a01, 0xf4209ce1, 0xf38b4c71,
0xf2f15171, 0xf2529411, 0xf1aefbf1, 0xf10670a1, 0xf058d941, 0xefa61cc1,
0xeeee21c1, 0xee30cec1, 0xed6e0a41, 0xeca5ba61, 0xebd7c5c1, 0xeb041241,
0xea2a8601, 0xe94b0861, 0xe8657f61, 0xe779d241, 0xe687e861, 0xe58fa9e1,
0xe490fec1, 0xe38bd101, 0xe28009c1, 0xe16d93e1, 0xe0545ba1, 0xdf344dc1,
0xde0d5881, 0xdcdf6bc1, 0xdbaa7801, 0xda6e70c1, 0xd92b4ac1, 0xd7e0fc81,
0xd68f7ec1, 0xd536cd41, 0xd3d6e5c1, 0xd26fc901, 0xd10179c1, 0xcf8bff41,
0xce0f6301, 0xcc8bb241, 0xcb00fdc1, 0xc96f5b01, 0xc7d6e141, 0xc637af41,
0xc491e4c1, 0xc2e5a801, 0xc1332401, 0xbf7a8701, 0xbdbc0681, 0xbbf7da01,
0xba2e4181, 0xb85f7f81, 0xb68bde01, 0xb4b3a981, 0xb2d73781, 0xb0f6df01,
0xaf12ff01, 0xad2bfa81, 0xab423981, 0xa9562981, 0xa7683c01, 0xa578e701,
0xa388a681, 0xa197f801, 0x9fa75e81, 0x9db75f01, 0x9bc88201, 0x99db5301,
0x97f06001, 0x96083601, 0x94236601, 0x92427f81, 0x90661481, 0x8e8eb481,
0x8cbced01, 0x8af14d81, 0x892c5f81, 0x876eab01, 0x85b8b681, 0x840b0301,
0x82660c01, 0x80ca4a01, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x80ca4a01, 0x82660c01, 0x840b0301, 0x85b8b681, 0x876eab01, 0x892c5f81,
0x8af14d81, 0x8cbced01, 0x8e8eb481, 0x90661481, 0x92427f81, 0x94236601,
0x96083601, 0x97f06001, 0x99db5301, 0x9bc88201, 0x9db75f01, 0x9fa75e81,
0xa197f801, 0xa388a681, 0xa578e701, 0xa7683c01, 0xa9562981, 0xab423981,
0xad2bfa81, 0xaf12ff01, 0xb0f6df01, 0xb2d73781, 0xb4b3a981, 0xb68bde01,
0xb85f7f81, 0xba2e4181, 0xbbf7da01, 0xbdbc0681, 0xbf7a8701, 0xc1332401,
0xc2e5a801, 0xc491e4c1, 0xc637af41, 0xc7d6e141, 0xc96f5b01, 0xcb00fdc1,
0xcc8bb241, 0xce0f6301, 0xcf8bff41, 0xd10179c1, 0xd26fc901, 0xd3d6e5c1,
0xd536cd41, 0xd68f7ec1, 0xd7e0fc81, 0xd92b4ac1, 0xda6e70c1, 0xdbaa7801,
0xdcdf6bc1, 0xde0d5881, 0xdf344dc1, 0xe0545ba1, 0xe16d93e1, 0xe28009c1,
0xe38bd101, 0xe490fec1, 0xe58fa9e1, 0xe687e861, 0xe779d241, 0xe8657f61,
0xe94b0861, 0xea2a8601, 0xeb041241, 0xebd7c5c1, 0xeca5ba61, 0xed6e0a41,
0xee30cec1, 0xeeee21c1, 0xefa61cc1, 0xf058d941, 0xf10670a1, 0xf1aefbf1,
0xf2529411, 0xf2f15171, 0xf38b4c71, 0xf4209ce1, 0xf4b15a01, 0xf53d9b21,
0xf5c576b1, 0xf6490321, 0xf6c85611, 0xf74384b1, 0xf7baa3e1, 0xf82dc7f1,
0xf89d04a9, 0xf9086d41, 0xf9701499, 0xf9d40cd9, 0xfa3467b1, 0xfa913661,
0xfaea8989, 0xfb407141, 0xfb92fd29, 0xfbe23c39, 0xfc2e3d15, 0xfc770d99,
0xfcbcbb49, 0xfcff5311, 0xfd3ee149, 0xfd7b71d5, 0xfdb5100d, 0xfdebc6c1,
0xfe1fa041, 0xfe50a657, 0xfe7ee247, 0xfeaa5cd5, 0xfed31e45, 0xfef92e59,
0xff1c9452, 0xff3d56f2, 0xff5b7c7e, 0xff770aba, 0xff9006f0, 0xffa675e8,
0xffba5bf4, 0xffcbbce6, 0xffda9c15, 0xffe6fc5f, 0xfff0e025, 0xfff8494f,
0xfffd394b, 0xffffb10c,
};
/* Gain tables scaled by 2^16 */
static const int32_t gain_tab1[16] = {
0x00100000, 0x00080000, 0x00040000, 0x00020000, 0x00010000, 0x00008000,
0x00004000, 0x00002000, 0x00001000, 0x00000800, 0x00000400, 0x00000200,
0x00000100, 0x00000080, 0x00000040, 0x00000020,
};
static const int32_t gain_tab2[31] = {
0x0003ab03, 0x00035d14, 0x0003159d, 0x0002d414, 0x000297fb, 0x000260e0,
0x00022e57, 0x00020000, 0x0001d582, 0x0001ae8a, 0x00018ace, 0x00016a0a,
0x00014bfe, 0x00013070, 0x0001172c, 0x00010000, 0x0000eac1, 0x0000d745,
0x0000c567, 0x0000b505, 0x0000a5ff, 0x00009838, 0x00008b96, 0x00008000,
0x00007560, 0x00006ba2, 0x000062b4, 0x00005a82, 0x000052ff, 0x00004c1c,
0x000045cb,
};
/* Joint-Stereo related tables, scaled by 2^16 */
static const int32_t matrixCoeffs_fix[8] = {
0x00000000, 0x00020000, 0x00020000, 0x00020000,
0x00000000, 0x00000000, 0x00010000, 0x00010000,
};

File diff suppressed because it is too large Load diff

View file

@ -1,898 +0,0 @@
/*
* DSP utils
* Copyright (c) 2000, 2001, 2002 Fabrice Bellard
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file libavcodec/dsputil.h
* DSP utils.
* note, many functions in here may use MMX which trashes the FPU state, it is
* absolutely necessary to call emms_c() between dsp & float/double code
*/
#ifndef AVCODEC_DSPUTIL_H
#define AVCODEC_DSPUTIL_H
#include "libavutil/intreadwrite.h"
#include "avcodec.h"
//#define DEBUG
/* dct code */
typedef short DCTELEM;
typedef int DWTELEM;
typedef short IDWTELEM;
void fdct_ifast (DCTELEM *data);
void fdct_ifast248 (DCTELEM *data);
void ff_jpeg_fdct_islow (DCTELEM *data);
void ff_fdct248_islow (DCTELEM *data);
void j_rev_dct (DCTELEM *data);
void j_rev_dct4 (DCTELEM *data);
void j_rev_dct2 (DCTELEM *data);
void j_rev_dct1 (DCTELEM *data);
void ff_wmv2_idct_c(DCTELEM *data);
void ff_fdct_mmx(DCTELEM *block);
void ff_fdct_mmx2(DCTELEM *block);
void ff_fdct_sse2(DCTELEM *block);
void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride);
void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride);
void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block);
void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block);
void ff_h264_idct_add16_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
void ff_h264_idct_add16intra_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1,
const float *src2, int src3, int blocksize, int step);
void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1,
const float *win, float add_bias, int len);
void ff_float_to_int16_c(int16_t *dst, const float *src, long len);
void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels);
/* encoding scans */
extern const uint8_t ff_alternate_horizontal_scan[64];
extern const uint8_t ff_alternate_vertical_scan[64];
extern const uint8_t ff_zigzag_direct[64];
extern const uint8_t ff_zigzag248_direct[64];
/* pixel operations */
#define MAX_NEG_CROP 1024
/* temporary */
extern uint32_t ff_squareTbl[512];
extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP];
/* VP3 DSP functions */
void ff_vp3_idct_c(DCTELEM *block/* align 16*/);
void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
/* VP6 DSP functions */
void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, int stride,
const int16_t *h_weights, const int16_t *v_weights);
/* 1/2^n downscaling functions from imgconvert.c */
void ff_img_copy_plane(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
void ff_shrink88(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
/* minimum alignment rules ;)
If you notice errors in the align stuff, need more alignment for some ASM code
for some CPU or need to use a function with less aligned data then send a mail
to the ffmpeg-devel mailing list, ...
!warning These alignments might not match reality, (missing attribute((align))
stuff somewhere possible).
I (Michael) did not check them, these are just the alignments which I think
could be reached easily ...
!future video codecs might need functions with less strict alignment
*/
/*
void get_pixels_c(DCTELEM *block, const uint8_t *pixels, int line_size);
void diff_pixels_c(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride);
void put_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
void add_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
void clear_blocks_c(DCTELEM *blocks);
*/
/* add and put pixel (decoding) */
// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
//h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller then 4
typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h);
typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h);
typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset);
typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset);
#define DEF_OLD_QPEL(name)\
void ff_put_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
void ff_avg_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
DEF_OLD_QPEL(qpel16_mc11_old_c)
DEF_OLD_QPEL(qpel16_mc31_old_c)
DEF_OLD_QPEL(qpel16_mc12_old_c)
DEF_OLD_QPEL(qpel16_mc32_old_c)
DEF_OLD_QPEL(qpel16_mc13_old_c)
DEF_OLD_QPEL(qpel16_mc33_old_c)
DEF_OLD_QPEL(qpel8_mc11_old_c)
DEF_OLD_QPEL(qpel8_mc31_old_c)
DEF_OLD_QPEL(qpel8_mc12_old_c)
DEF_OLD_QPEL(qpel8_mc32_old_c)
DEF_OLD_QPEL(qpel8_mc13_old_c)
DEF_OLD_QPEL(qpel8_mc33_old_c)
#define CALL_2X_PIXELS(a, b, n)\
static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
b(block , pixels , line_size, h);\
b(block+n, pixels+n, line_size, h);\
}
/* motion estimation */
// h is limited to {width/2, width, 2*width} but never larger than 16 and never smaller then 2
// although currently h<4 is not used as functions with width <8 are neither used nor implemented
typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/;
// for snow slices
typedef struct slice_buffer_s slice_buffer;
/**
* Scantable.
*/
typedef struct ScanTable{
const uint8_t *scantable;
uint8_t permutated[64];
uint8_t raster_end[64];
#if ARCH_PPC
/** Used by dct_quantize_altivec to find last-non-zero */
DECLARE_ALIGNED(16, uint8_t, inverse[64]);
#endif
} ScanTable;
void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize,
int block_w, int block_h,
int src_x, int src_y, int w, int h);
/**
* DSPContext.
*/
typedef struct DSPContext {
/* pixel ops : interface with DCT */
void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
void (*add_pixels8)(uint8_t *pixels, DCTELEM *block, int line_size);
void (*add_pixels4)(uint8_t *pixels, DCTELEM *block, int line_size);
int (*sum_abs_dctelem)(DCTELEM *block/*align 16*/);
/**
* translational global motion compensation.
*/
void (*gmc1)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
/**
* global motion compensation.
*/
void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy,
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
void (*clear_block)(DCTELEM *block/*align 16*/);
void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
int (*pix_sum)(uint8_t * pix, int line_size);
int (*pix_norm1)(uint8_t * pix, int line_size);
// 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4
me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */
me_cmp_func sse[6];
me_cmp_func hadamard8_diff[6];
me_cmp_func dct_sad[6];
me_cmp_func quant_psnr[6];
me_cmp_func bit[6];
me_cmp_func rd[6];
me_cmp_func vsad[6];
me_cmp_func vsse[6];
me_cmp_func nsse[6];
me_cmp_func w53[6];
me_cmp_func w97[6];
me_cmp_func dct_max[6];
me_cmp_func dct264_sad[6];
me_cmp_func me_pre_cmp[6];
me_cmp_func me_cmp[6];
me_cmp_func me_sub_cmp[6];
me_cmp_func mb_cmp[6];
me_cmp_func ildct_cmp[6]; //only width 16 used
me_cmp_func frame_skip_cmp[6]; //only width 8 used
int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
int size);
/**
* Halfpel motion compensation with rounding (a+b+1)>>1.
* this is an array[4][4] of motion compensation functions for 4
* horizontal blocksizes (8,16) and the 4 halfpel positions<br>
* *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
* @param block destination where the result is stored
* @param pixels source
* @param line_size number of bytes in a horizontal line of block
* @param h height
*/
op_pixels_func put_pixels_tab[4][4];
/**
* Halfpel motion compensation with rounding (a+b+1)>>1.
* This is an array[4][4] of motion compensation functions for 4
* horizontal blocksizes (8,16) and the 4 halfpel positions<br>
* *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
* @param block destination into which the result is averaged (a+b+1)>>1
* @param pixels source
* @param line_size number of bytes in a horizontal line of block
* @param h height
*/
op_pixels_func avg_pixels_tab[4][4];
/**
* Halfpel motion compensation with no rounding (a+b)>>1.
* this is an array[2][4] of motion compensation functions for 2
* horizontal blocksizes (8,16) and the 4 halfpel positions<br>
* *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
* @param block destination where the result is stored
* @param pixels source
* @param line_size number of bytes in a horizontal line of block
* @param h height
*/
op_pixels_func put_no_rnd_pixels_tab[4][4];
/**
* Halfpel motion compensation with no rounding (a+b)>>1.
* this is an array[2][4] of motion compensation functions for 2
* horizontal blocksizes (8,16) and the 4 halfpel positions<br>
* *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
* @param block destination into which the result is averaged (a+b)>>1
* @param pixels source
* @param line_size number of bytes in a horizontal line of block
* @param h height
*/
op_pixels_func avg_no_rnd_pixels_tab[4][4];
void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h);
/**
* Thirdpel motion compensation with rounding (a+b+1)>>1.
* this is an array[12] of motion compensation functions for the 9 thirdpe
* positions<br>
* *pixels_tab[ xthirdpel + 4*ythirdpel ]
* @param block destination where the result is stored
* @param pixels source
* @param line_size number of bytes in a horizontal line of block
* @param h height
*/
tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
tpel_mc_func avg_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
qpel_mc_func put_qpel_pixels_tab[2][16];
qpel_mc_func avg_qpel_pixels_tab[2][16];
qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
qpel_mc_func put_mspel_pixels_tab[8];
/**
* h264 Chroma MC
*/
h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
/* This is really one func used in VC-1 decoding */
h264_chroma_mc_func put_no_rnd_h264_chroma_pixels_tab[3];
h264_chroma_mc_func avg_h264_chroma_pixels_tab[3];
qpel_mc_func put_h264_qpel_pixels_tab[4][16];
qpel_mc_func avg_h264_qpel_pixels_tab[4][16];
qpel_mc_func put_2tap_qpel_pixels_tab[4][16];
qpel_mc_func avg_2tap_qpel_pixels_tab[4][16];
h264_weight_func weight_h264_pixels_tab[10];
h264_biweight_func biweight_h264_pixels_tab[10];
/* AVS specific */
qpel_mc_func put_cavs_qpel_pixels_tab[2][16];
qpel_mc_func avg_cavs_qpel_pixels_tab[2][16];
void (*cavs_filter_lv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
void (*cavs_filter_lh)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
void (*cavs_filter_cv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
void (*cavs_filter_ch)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
void (*cavs_idct8_add)(uint8_t *dst, DCTELEM *block, int stride);
me_cmp_func pix_abs[2][4];
/* huffyuv specific */
void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
void (*add_bytes_l2)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 16*/, int w);
void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w);
/**
* subtract huffyuv's variant of median prediction
* note, this might read from src1[-1], src2[-1]
*/
void (*sub_hfyu_median_prediction)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top);
void (*add_hfyu_median_prediction)(uint8_t *dst, uint8_t *top, uint8_t *diff, int w, int *left, int *left_top);
/* this might write to dst[w] */
void (*add_png_paeth_prediction)(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w);
void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0);
void (*h264_h_loop_filter_luma)(uint8_t *pix/*align 4 */, int stride, int alpha, int beta, int8_t *tc0);
/* v/h_loop_filter_luma_intra: align 16 */
void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
void (*h264_v_loop_filter_chroma)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0);
void (*h264_h_loop_filter_chroma)(uint8_t *pix/*align 4*/, int stride, int alpha, int beta, int8_t *tc0);
void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
// h264_loop_filter_strength: simd only. the C version is inlined in h264.c
void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field);
void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
void (*h261_loop_filter)(uint8_t *src, int stride);
void (*x8_v_loop_filter)(uint8_t *src, int stride, int qscale);
void (*x8_h_loop_filter)(uint8_t *src, int stride, int qscale);
void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values);
void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, int stride,
const int16_t *h_weights,const int16_t *v_weights);
/* assume len is a multiple of 4, and arrays are 16-byte aligned */
void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
/* no alignment needed */
void (*flac_compute_autocorr)(const int32_t *data, int len, int lag, double *autoc);
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
void (*vector_fmul)(float *dst, const float *src, int len);
void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len);
/* assume len is a multiple of 8, and src arrays are 16-byte aligned */
void (*vector_fmul_add_add)(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step);
/* assume len is a multiple of 4, and arrays are 16-byte aligned */
void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len);
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len);
/* C version: convert floats from the range [384.0,386.0] to ints in [-32768,32767]
* simd versions: convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */
void (*float_to_int16)(int16_t *dst, const float *src, long len);
void (*float_to_int16_interleave)(int16_t *dst, const float **src, long len, int channels);
/* (I)DCT */
void (*fdct)(DCTELEM *block/* align 16*/);
void (*fdct248)(DCTELEM *block/* align 16*/);
/* IDCT really*/
void (*idct)(DCTELEM *block/* align 16*/);
/**
* block -> idct -> clip to unsigned 8 bit -> dest.
* (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
* @param line_size size in bytes of a horizontal line of dest
*/
void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
/**
* block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
* @param line_size size in bytes of a horizontal line of dest
*/
void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
/**
* idct input permutation.
* several optimized IDCTs need a permutated input (relative to the normal order of the reference
* IDCT)
* this permutation must be performed before the idct_put/add, note, normally this can be merged
* with the zigzag/alternate scan<br>
* an example to avoid confusion:
* - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...)
* - (x -> referece dct -> reference idct -> x)
* - (x -> referece dct -> simple_mmx_perm = idct_permutation -> simple_idct_mmx -> x)
* - (->decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant -> simple_idct_mmx ->...)
*/
uint8_t idct_permutation[64];
int idct_permutation_type;
#define FF_NO_IDCT_PERM 1
#define FF_LIBMPEG2_IDCT_PERM 2
#define FF_SIMPLE_IDCT_PERM 3
#define FF_TRANSPOSE_IDCT_PERM 4
#define FF_PARTTRANS_IDCT_PERM 5
#define FF_SSE2_IDCT_PERM 6
int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale);
void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
#define BASIS_SHIFT 16
#define RECON_SHIFT 6
void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w);
#define EDGE_WIDTH 16
/* h264 functions */
/* NOTE!!! if you implement any of h264_idct8_add, h264_idct8_add4 then you must implement all of them
NOTE!!! if you implement any of h264_idct_add, h264_idct_add16, h264_idct_add16intra, h264_idct_add8 then you must implement all of them
The reason for above, is that no 2 out of one list may use a different permutation.
*/
void (*h264_idct_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
void (*h264_idct8_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
void (*h264_dct)(DCTELEM block[4][4]);
void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
/* snow wavelet */
void (*vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
void (*horizontal_compose97i)(IDWTELEM *b, int width);
void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
void (*prefetch)(void *mem, int stride, int h);
void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
/* vc1 functions */
void (*vc1_inv_trans_8x8)(DCTELEM *b);
void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block);
void (*vc1_v_overlap)(uint8_t* src, int stride);
void (*vc1_h_overlap)(uint8_t* src, int stride);
/* put 8x8 block with bicubic interpolation and quarterpel precision
* last argument is actually round value instead of height
*/
op_pixels_func put_vc1_mspel_pixels_tab[16];
/* intrax8 functions */
void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize);
void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize,
int * range, int * sum, int edges);
/* ape functions */
/**
* Add contents of the second vector to the first one.
* @param len length of vectors, should be multiple of 16
*/
void (*add_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len);
/**
* Add contents of the second vector to the first one.
* @param len length of vectors, should be multiple of 16
*/
void (*sub_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len);
/**
* Calculate scalar product of two vectors.
* @param len length of vectors, should be multiple of 16
* @param shift number of bits to discard from product
*/
int32_t (*scalarproduct_int16)(int16_t *v1, int16_t *v2/*align 16*/, int len, int shift);
/* rv30 functions */
qpel_mc_func put_rv30_tpel_pixels_tab[4][16];
qpel_mc_func avg_rv30_tpel_pixels_tab[4][16];
/* rv40 functions */
qpel_mc_func put_rv40_qpel_pixels_tab[4][16];
qpel_mc_func avg_rv40_qpel_pixels_tab[4][16];
h264_chroma_mc_func put_rv40_chroma_pixels_tab[3];
h264_chroma_mc_func avg_rv40_chroma_pixels_tab[3];
} DSPContext;
void dsputil_static_init(void);
void dsputil_init(DSPContext* p);
int ff_check_alignment(void);
/**
* permute block according to permuatation.
* @param last last non zero element in scantable order
*/
void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last);
void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
#define BYTE_VEC32(c) ((c)*0x01010101UL)
static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
{
return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
}
static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
{
return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
}
static inline int get_penalty_factor(int lambda, int lambda2, int type){
switch(type&0xFF){
default:
case FF_CMP_SAD:
return lambda>>FF_LAMBDA_SHIFT;
case FF_CMP_DCT:
return (3*lambda)>>(FF_LAMBDA_SHIFT+1);
case FF_CMP_W53:
return (4*lambda)>>(FF_LAMBDA_SHIFT);
case FF_CMP_W97:
return (2*lambda)>>(FF_LAMBDA_SHIFT);
case FF_CMP_SATD:
case FF_CMP_DCT264:
return (2*lambda)>>FF_LAMBDA_SHIFT;
case FF_CMP_RD:
case FF_CMP_PSNR:
case FF_CMP_SSE:
case FF_CMP_NSSE:
return lambda2>>FF_LAMBDA_SHIFT;
case FF_CMP_BIT:
return 1;
}
}
/**
* Empty mmx state.
* this must be called between any dsp function and float/double code.
* for example sin(); dsp->idct_put(); emms_c(); cos()
*/
#define emms_c()
/* should be defined by architectures supporting
one or more MultiMedia extension */
int mm_support(void);
void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);
#define DECLARE_ALIGNED_16(t, v) DECLARE_ALIGNED(16, t, v)
#if HAVE_MMX
#undef emms_c
extern int mm_flags;
void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
static inline void emms(void)
{
__asm__ volatile ("emms;":::"memory");
}
#define emms_c() \
{\
if (mm_flags & FF_MM_MMX)\
emms();\
}
void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
#elif ARCH_ARM
extern int mm_flags;
#if HAVE_NEON
# define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v)
# define STRIDE_ALIGN 16
#endif
#elif ARCH_PPC
extern int mm_flags;
#define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v)
#define STRIDE_ALIGN 16
#elif HAVE_MMI
#define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v)
#define STRIDE_ALIGN 16
#else
#define mm_flags 0
#define mm_support() 0
#endif
#ifndef DECLARE_ALIGNED_8
# define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(8, t, v)
#endif
#ifndef STRIDE_ALIGN
# define STRIDE_ALIGN 8
#endif
/* PSNR */
void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3],
int orig_linesize[3], int coded_linesize,
AVCodecContext *avctx);
/* FFT computation */
/* NOTE: soon integer code will be added, so you must use the
FFTSample type */
typedef float FFTSample;
struct MDCTContext;
typedef struct FFTComplex {
FFTSample re, im;
} FFTComplex;
typedef struct FFTContext {
int nbits;
int inverse;
uint16_t *revtab;
FFTComplex *exptab;
FFTComplex *exptab1; /* only used by SSE code */
FFTComplex *tmp_buf;
void (*fft_permute)(struct FFTContext *s, FFTComplex *z);
void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
void (*imdct_calc)(struct MDCTContext *s, FFTSample *output, const FFTSample *input);
void (*imdct_half)(struct MDCTContext *s, FFTSample *output, const FFTSample *input);
} FFTContext;
extern FFTSample* ff_cos_tabs[13];
/**
* Sets up a complex FFT.
* @param nbits log2 of the length of the input array
* @param inverse if 0 perform the forward transform, if 1 perform the inverse
*/
int ff_fft_init(FFTContext *s, int nbits, int inverse);
void ff_fft_permute_c(FFTContext *s, FFTComplex *z);
void ff_fft_permute_sse(FFTContext *s, FFTComplex *z);
void ff_fft_calc_c(FFTContext *s, FFTComplex *z);
void ff_fft_calc_sse(FFTContext *s, FFTComplex *z);
void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z);
void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z);
void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z);
/**
* Do the permutation needed BEFORE calling ff_fft_calc().
*/
static inline void ff_fft_permute(FFTContext *s, FFTComplex *z)
{
s->fft_permute(s, z);
}
/**
* Do a complex FFT with the parameters defined in ff_fft_init(). The
* input data must be permuted before. No 1.0/sqrt(n) normalization is done.
*/
static inline void ff_fft_calc(FFTContext *s, FFTComplex *z)
{
s->fft_calc(s, z);
}
void ff_fft_end(FFTContext *s);
/* MDCT computation */
typedef struct MDCTContext {
int n; /* size of MDCT (i.e. number of input data * 2) */
int nbits; /* n = 2^nbits */
/* pre/post rotation tables */
FFTSample *tcos;
FFTSample *tsin;
FFTContext fft;
} MDCTContext;
static inline void ff_imdct_calc(MDCTContext *s, FFTSample *output, const FFTSample *input)
{
s->fft.imdct_calc(s, output, input);
}
static inline void ff_imdct_half(MDCTContext *s, FFTSample *output, const FFTSample *input)
{
s->fft.imdct_half(s, output, input);
}
/**
* Generate a Kaiser-Bessel Derived Window.
* @param window pointer to half window
* @param alpha determines window shape
* @param n size of half window
*/
void ff_kbd_window_init(float *window, float alpha, int n);
/**
* Generate a sine window.
* @param window pointer to half window
* @param n size of half window
*/
void ff_sine_window_init(float *window, int n);
extern float ff_sine_128 [ 128];
extern float ff_sine_256 [ 256];
extern float ff_sine_512 [ 512];
extern float ff_sine_1024[1024];
extern float ff_sine_2048[2048];
extern float ff_sine_4096[4096];
extern float *ff_sine_windows[6];
int ff_mdct_init(MDCTContext *s, int nbits, int inverse);
void ff_imdct_calc_c(MDCTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_c(MDCTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_calc_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, const FFTSample *input);
void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input);
void ff_mdct_calc(MDCTContext *s, FFTSample *out, const FFTSample *input);
void ff_mdct_end(MDCTContext *s);
/* Real Discrete Fourier Transform */
enum RDFTransformType {
RDFT,
IRDFT,
RIDFT,
IRIDFT,
};
typedef struct {
int nbits;
int inverse;
int sign_convention;
/* pre/post rotation tables */
FFTSample *tcos;
FFTSample *tsin;
FFTContext fft;
} RDFTContext;
/**
* Sets up a real FFT.
* @param nbits log2 of the length of the input array
* @param trans the type of transform
*/
int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans);
void ff_rdft_calc(RDFTContext *s, FFTSample *data);
void ff_rdft_end(RDFTContext *s);
#define WRAPPER8_16(name8, name16)\
static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
return name8(s, dst , src , stride, h)\
+name8(s, dst+8 , src+8 , stride, h);\
}
#define WRAPPER8_16_SQ(name8, name16)\
static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
int score=0;\
score +=name8(s, dst , src , stride, 8);\
score +=name8(s, dst+8 , src+8 , stride, 8);\
if(h==16){\
dst += 8*stride;\
src += 8*stride;\
score +=name8(s, dst , src , stride, 8);\
score +=name8(s, dst+8 , src+8 , stride, 8);\
}\
return score;\
}
static inline void copy_block2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
{
AV_WN16(dst , AV_RN16(src ));
dst+=dstStride;
src+=srcStride;
}
}
static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
{
AV_WN32(dst , AV_RN32(src ));
dst+=dstStride;
src+=srcStride;
}
}
static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
{
AV_WN32(dst , AV_RN32(src ));
AV_WN32(dst+4 , AV_RN32(src+4 ));
dst+=dstStride;
src+=srcStride;
}
}
static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
{
AV_WN32(dst , AV_RN32(src ));
AV_WN32(dst+4 , AV_RN32(src+4 ));
dst[8]= src[8];
dst+=dstStride;
src+=srcStride;
}
}
static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
{
AV_WN32(dst , AV_RN32(src ));
AV_WN32(dst+4 , AV_RN32(src+4 ));
AV_WN32(dst+8 , AV_RN32(src+8 ));
AV_WN32(dst+12, AV_RN32(src+12));
dst+=dstStride;
src+=srcStride;
}
}
static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
{
int i;
for(i=0; i<h; i++)
{
AV_WN32(dst , AV_RN32(src ));
AV_WN32(dst+4 , AV_RN32(src+4 ));
AV_WN32(dst+8 , AV_RN32(src+8 ));
AV_WN32(dst+12, AV_RN32(src+12));
dst[16]= src[16];
dst+=dstStride;
src+=srcStride;
}
}
#endif /* AVCODEC_DSPUTIL_H */

View file

@ -1,374 +0,0 @@
/*
* FFT/IFFT transforms
* Copyright (c) 2008 Loren Merritt
* Copyright (c) 2002 Fabrice Bellard
* Partly based on libdjbfft by D. J. Bernstein
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file libavcodec/fft.c
* FFT/IFFT transforms.
*/
#include "dsputil.h"
/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
DECLARE_ALIGNED_16(FFTSample, ff_cos_16[8]);
DECLARE_ALIGNED_16(FFTSample, ff_cos_32[16]);
DECLARE_ALIGNED_16(FFTSample, ff_cos_64[32]);
DECLARE_ALIGNED_16(FFTSample, ff_cos_128[64]);
DECLARE_ALIGNED_16(FFTSample, ff_cos_256[128]);
DECLARE_ALIGNED_16(FFTSample, ff_cos_512[256]);
DECLARE_ALIGNED_16(FFTSample, ff_cos_1024[512]);
DECLARE_ALIGNED_16(FFTSample, ff_cos_2048[1024]);
DECLARE_ALIGNED_16(FFTSample, ff_cos_4096[2048]);
DECLARE_ALIGNED_16(FFTSample, ff_cos_8192[4096]);
DECLARE_ALIGNED_16(FFTSample, ff_cos_16384[8192]);
DECLARE_ALIGNED_16(FFTSample, ff_cos_32768[16384]);
DECLARE_ALIGNED_16(FFTSample, ff_cos_65536[32768]);
FFTSample *ff_cos_tabs[] = {
ff_cos_16, ff_cos_32, ff_cos_64, ff_cos_128, ff_cos_256, ff_cos_512, ff_cos_1024,
ff_cos_2048, ff_cos_4096, ff_cos_8192, ff_cos_16384, ff_cos_32768, ff_cos_65536,
};
static int split_radix_permutation(int i, int n, int inverse)
{
int m;
if(n <= 2) return i&1;
m = n >> 1;
if(!(i&m)) return split_radix_permutation(i, m, inverse)*2;
m >>= 1;
if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1;
else return split_radix_permutation(i, m, inverse)*4 - 1;
}
av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
{
int i, j, m, n;
float alpha, c1, s1, s2;
int split_radix = 1;
int av_unused has_vectors;
if (nbits < 2 || nbits > 16)
goto fail;
s->nbits = nbits;
n = 1 << nbits;
s->tmp_buf = NULL;
s->exptab = av_malloc((n / 2) * sizeof(FFTComplex));
if (!s->exptab)
goto fail;
s->revtab = av_malloc(n * sizeof(uint16_t));
if (!s->revtab)
goto fail;
s->inverse = inverse;
s2 = inverse ? 1.0 : -1.0;
s->fft_permute = ff_fft_permute_c;
s->fft_calc = ff_fft_calc_c;
s->imdct_calc = ff_imdct_calc_c;
s->imdct_half = ff_imdct_half_c;
s->exptab1 = NULL;
#if HAVE_MMX && HAVE_YASM
has_vectors = mm_support();
if (has_vectors & FF_MM_SSE && HAVE_SSE) {
/* SSE for P3/P4/K8 */
s->imdct_calc = ff_imdct_calc_sse;
s->imdct_half = ff_imdct_half_sse;
s->fft_permute = ff_fft_permute_sse;
s->fft_calc = ff_fft_calc_sse;
} else if (has_vectors & FF_MM_3DNOWEXT && HAVE_AMD3DNOWEXT) {
/* 3DNowEx for K7 */
s->imdct_calc = ff_imdct_calc_3dn2;
s->imdct_half = ff_imdct_half_3dn2;
s->fft_calc = ff_fft_calc_3dn2;
} else if (has_vectors & FF_MM_3DNOW && HAVE_AMD3DNOW) {
/* 3DNow! for K6-2/3 */
s->imdct_calc = ff_imdct_calc_3dn;
s->imdct_half = ff_imdct_half_3dn;
s->fft_calc = ff_fft_calc_3dn;
}
#elif HAVE_ALTIVEC
has_vectors = mm_support();
if (has_vectors & FF_MM_ALTIVEC) {
s->fft_calc = ff_fft_calc_altivec;
split_radix = 0;
}
#endif
if (split_radix) {
for(j=4; j<=nbits; j++) {
int m = 1<<j;
double freq = 2*M_PI/m;
FFTSample *tab = ff_cos_tabs[j-4];
for(i=0; i<=m/4; i++)
tab[i] = cos(i*freq);
for(i=1; i<m/4; i++)
tab[m/2-i] = tab[i];
}
for(i=0; i<n; i++)
s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = i;
s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
} else {
int np, nblocks, np2, l;
FFTComplex *q;
for(i=0; i<(n/2); i++) {
alpha = 2 * M_PI * (float)i / (float)n;
c1 = cos(alpha);
s1 = sin(alpha) * s2;
s->exptab[i].re = c1;
s->exptab[i].im = s1;
}
np = 1 << nbits;
nblocks = np >> 3;
np2 = np >> 1;
s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex));
if (!s->exptab1)
goto fail;
q = s->exptab1;
do {
for(l = 0; l < np2; l += 2 * nblocks) {
*q++ = s->exptab[l];
*q++ = s->exptab[l + nblocks];
q->re = -s->exptab[l].im;
q->im = s->exptab[l].re;
q++;
q->re = -s->exptab[l + nblocks].im;
q->im = s->exptab[l + nblocks].re;
q++;
}
nblocks = nblocks >> 1;
} while (nblocks != 0);
av_freep(&s->exptab);
/* compute bit reverse table */
for(i=0;i<n;i++) {
m=0;
for(j=0;j<nbits;j++) {
m |= ((i >> j) & 1) << (nbits-j-1);
}
s->revtab[i]=m;
}
}
return 0;
fail:
av_freep(&s->revtab);
av_freep(&s->exptab);
av_freep(&s->exptab1);
av_freep(&s->tmp_buf);
return -1;
}
void ff_fft_permute_c(FFTContext *s, FFTComplex *z)
{
int j, k, np;
FFTComplex tmp;
const uint16_t *revtab = s->revtab;
np = 1 << s->nbits;
if (s->tmp_buf) {
/* TODO: handle split-radix permute in a more optimal way, probably in-place */
for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
return;
}
/* reverse */
for(j=0;j<np;j++) {
k = revtab[j];
if (k < j) {
tmp = z[k];
z[k] = z[j];
z[j] = tmp;
}
}
}
av_cold void ff_fft_end(FFTContext *s)
{
av_freep(&s->revtab);
av_freep(&s->exptab);
av_freep(&s->exptab1);
av_freep(&s->tmp_buf);
}
#define sqrthalf (float)M_SQRT1_2
#define BF(x,y,a,b) {\
x = a - b;\
y = a + b;\
}
#define BUTTERFLIES(a0,a1,a2,a3) {\
BF(t3, t5, t5, t1);\
BF(a2.re, a0.re, a0.re, t5);\
BF(a3.im, a1.im, a1.im, t3);\
BF(t4, t6, t2, t6);\
BF(a3.re, a1.re, a1.re, t4);\
BF(a2.im, a0.im, a0.im, t6);\
}
// force loading all the inputs before storing any.
// this is slightly slower for small data, but avoids store->load aliasing
// for addresses separated by large powers of 2.
#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
BF(t3, t5, t5, t1);\
BF(a2.re, a0.re, r0, t5);\
BF(a3.im, a1.im, i1, t3);\
BF(t4, t6, t2, t6);\
BF(a3.re, a1.re, r1, t4);\
BF(a2.im, a0.im, i0, t6);\
}
#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
t1 = a2.re * wre + a2.im * wim;\
t2 = a2.im * wre - a2.re * wim;\
t5 = a3.re * wre - a3.im * wim;\
t6 = a3.im * wre + a3.re * wim;\
BUTTERFLIES(a0,a1,a2,a3)\
}
#define TRANSFORM_ZERO(a0,a1,a2,a3) {\
t1 = a2.re;\
t2 = a2.im;\
t5 = a3.re;\
t6 = a3.im;\
BUTTERFLIES(a0,a1,a2,a3)\
}
/* z[0...8n-1], w[1...2n-1] */
#define PASS(name)\
static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
{\
FFTSample t1, t2, t3, t4, t5, t6;\
int o1 = 2*n;\
int o2 = 4*n;\
int o3 = 6*n;\
const FFTSample *wim = wre+o1;\
n--;\
\
TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
do {\
z += 2;\
wre += 2;\
wim -= 2;\
TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
} while(--n);\
}
PASS(pass)
#undef BUTTERFLIES
#define BUTTERFLIES BUTTERFLIES_BIG
PASS(pass_big)
#define DECL_FFT(n,n2,n4)\
static void fft##n(FFTComplex *z)\
{\
fft##n2(z);\
fft##n4(z+n4*2);\
fft##n4(z+n4*3);\
pass(z,ff_cos_##n,n4/2);\
}
static void fft4(FFTComplex *z)
{
FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
BF(t3, t1, z[0].re, z[1].re);
BF(t8, t6, z[3].re, z[2].re);
BF(z[2].re, z[0].re, t1, t6);
BF(t4, t2, z[0].im, z[1].im);
BF(t7, t5, z[2].im, z[3].im);
BF(z[3].im, z[1].im, t4, t8);
BF(z[3].re, z[1].re, t3, t7);
BF(z[2].im, z[0].im, t2, t5);
}
static void fft8(FFTComplex *z)
{
FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
fft4(z);
BF(t1, z[5].re, z[4].re, -z[5].re);
BF(t2, z[5].im, z[4].im, -z[5].im);
BF(t3, z[7].re, z[6].re, -z[7].re);
BF(t4, z[7].im, z[6].im, -z[7].im);
BF(t8, t1, t3, t1);
BF(t7, t2, t2, t4);
BF(z[4].re, z[0].re, z[0].re, t1);
BF(z[4].im, z[0].im, z[0].im, t2);
BF(z[6].re, z[2].re, z[2].re, t7);
BF(z[6].im, z[2].im, z[2].im, t8);
TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf);
}
#if !CONFIG_SMALL
static void fft16(FFTComplex *z)
{
FFTSample t1, t2, t3, t4, t5, t6;
fft8(z);
fft4(z+8);
fft4(z+12);
TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf);
TRANSFORM(z[1],z[5],z[9],z[13],ff_cos_16[1],ff_cos_16[3]);
TRANSFORM(z[3],z[7],z[11],z[15],ff_cos_16[3],ff_cos_16[1]);
}
#else
DECL_FFT(16,8,4)
#endif
DECL_FFT(32,16,8)
DECL_FFT(64,32,16)
DECL_FFT(128,64,32)
DECL_FFT(256,128,64)
DECL_FFT(512,256,128)
#if !CONFIG_SMALL
#define pass pass_big
#endif
DECL_FFT(1024,512,256)
DECL_FFT(2048,1024,512)
DECL_FFT(4096,2048,1024)
DECL_FFT(8192,4096,2048)
DECL_FFT(16384,8192,4096)
DECL_FFT(32768,16384,8192)
DECL_FFT(65536,32768,16384)
static void (*fft_dispatch[])(FFTComplex*) = {
fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
fft2048, fft4096, fft8192, fft16384, fft32768, fft65536,
};
void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
{
fft_dispatch[s->nbits-2](z);
}

View file

@ -0,0 +1,66 @@
#include "fixp_math.h"
inline int32_t fixmul31(int32_t x, int32_t y)
{
int64_t temp;
temp = x;
temp *= y;
temp >>= 31; //16+31-16 = 31 bits
return (int32_t)temp;
}
/*
* Fast integer square root adapted from algorithm,
* Martin Guy @ UKC, June 1985.
* Originally from a book on programming abaci by Mr C. Woo.
* This is taken from :
* http://wiki.forum.nokia.com/index.php/How_to_use_fixed_point_maths#How_to_get_square_root_for_integers
* with a added shift up of the result by 8 bits to return result in 16.16 fixed-point representation.
*/
inline int32_t fastSqrt(int32_t n)
{
/*
* Logically, these are unsigned.
* We need the sign bit to test
* whether (op - res - one) underflowed.
*/
int32_t op, res, one;
op = n;
res = 0;
/* "one" starts at the highest power of four <= than the argument. */
one = 1 << 30; /* second-to-top bit set */
while (one > op) one >>= 2;
while (one != 0)
{
if (op >= res + one)
{
op = op - (res + one);
res = res + (one<<1);
}
res >>= 1;
one >>= 2;
}
return(res << 8);
}
inline int32_t fixmul16(int32_t x, int32_t y)
{
int64_t temp;
temp = x;
temp *= y;
temp >>= 16;
return (int32_t)temp;
}
inline int32_t fixdiv16(int32_t x, int32_t y)
{
int64_t temp;
temp = x << 16;
temp /= y;
return (int32_t)temp;
}

View file

@ -0,0 +1,14 @@
#include <stdlib.h>
/* Macros for converting between various fixed-point representations and floating point. */
#define ONE_16 (1L << 16)
#define fixtof64(x) (float)((float)(x) / (float)(1 << 16)) //does not work on int64_t!
#define ftofix32(x) ((int32_t)((x) * (float)(1 << 16) + ((x) < 0 ? -0.5 : 0.5)))
#define ftofix31(x) ((int32_t)((x) * (float)(1 << 31) + ((x) < 0 ? -0.5 : 0.5)))
#define fix31tof64(x) (float)((float)(x) / (float)(1 << 31))
/* Fixed point math routines for use in atrac3.c */
inline int32_t fixdiv16(int32_t x, int32_t y);
inline int32_t fixmul16(int32_t x, int32_t y);
inline int32_t fixmul31(int32_t x, int32_t y);
inline int32_t fastSqrt(int32_t n);

View file

@ -1,245 +0,0 @@
/*
* MDCT/IMDCT transforms
* Copyright (c) 2002 Fabrice Bellard
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "dsputil.h"
#ifndef M_E
#define M_E 2.7182818284590452354 /* e */
#endif
#ifndef M_LN2
#define M_LN2 0.69314718055994530942 /* log_e 2 */
#endif
#ifndef M_LN10
#define M_LN10 2.30258509299404568402 /* log_e 10 */
#endif
#ifndef M_PI
#define M_PI 3.14159265358979323846 /* pi */
#endif
#ifndef M_SQRT1_2
#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */
#endif
/**
* @file libavcodec/mdct.c
* MDCT/IMDCT transforms.
*/
// Generate a Kaiser-Bessel Derived Window.
#define BESSEL_I0_ITER 50 // default: 50 iterations of Bessel I0 approximation
av_cold void ff_kbd_window_init(float *window, float alpha, int n)
{
int i, j;
double sum = 0.0, bessel, tmp;
double local_window[n];
double alpha2 = (alpha * M_PI / n) * (alpha * M_PI / n);
for (i = 0; i < n; i++) {
tmp = i * (n - i) * alpha2;
bessel = 1.0;
for (j = BESSEL_I0_ITER; j > 0; j--)
bessel = bessel * tmp / (j * j) + 1;
sum += bessel;
local_window[i] = sum;
}
sum++;
for (i = 0; i < n; i++)
window[i] = sqrt(local_window[i] / sum);
}
DECLARE_ALIGNED(16, float, ff_sine_128 [ 128]);
DECLARE_ALIGNED(16, float, ff_sine_256 [ 256]);
DECLARE_ALIGNED(16, float, ff_sine_512 [ 512]);
DECLARE_ALIGNED(16, float, ff_sine_1024[1024]);
DECLARE_ALIGNED(16, float, ff_sine_2048[2048]);
DECLARE_ALIGNED(16, float, ff_sine_4096[4096]);
float *ff_sine_windows[6] = {
ff_sine_128, ff_sine_256, ff_sine_512, ff_sine_1024, ff_sine_2048, ff_sine_4096
};
// Generate a sine window.
av_cold void ff_sine_window_init(float *window, int n) {
int i;
for(i = 0; i < n; i++)
window[i] = sinf((i + 0.5) * (M_PI / (2.0 * n)));
}
/**
* init MDCT or IMDCT computation.
*/
av_cold int ff_mdct_init(MDCTContext *s, int nbits, int inverse)
{
int n, n4, i;
double alpha;
memset(s, 0, sizeof(*s));
n = 1 << nbits;
s->nbits = nbits;
s->n = n;
n4 = n >> 2;
s->tcos = av_malloc(n4 * sizeof(FFTSample));
if (!s->tcos)
goto fail;
s->tsin = av_malloc(n4 * sizeof(FFTSample));
if (!s->tsin)
goto fail;
for(i=0;i<n4;i++) {
alpha = 2 * M_PI * (i + 1.0 / 8.0) / n;
s->tcos[i] = -cos(alpha);
s->tsin[i] = -sin(alpha);
}
if (ff_fft_init(&s->fft, s->nbits - 2, inverse) < 0)
goto fail;
return 0;
fail:
av_freep(&s->tcos);
av_freep(&s->tsin);
return -1;
}
/* complex multiplication: p = a * b */
#define CMUL(pre, pim, are, aim, bre, bim) \
{\
FFTSample _are = (are);\
FFTSample _aim = (aim);\
FFTSample _bre = (bre);\
FFTSample _bim = (bim);\
(pre) = _are * _bre - _aim * _bim;\
(pim) = _are * _bim + _aim * _bre;\
}
/**
* Compute the middle half of the inverse MDCT of size N = 2^nbits,
* thus excluding the parts that can be derived by symmetry
* @param output N/2 samples
* @param input N/2 samples
*/
void ff_imdct_half_c(MDCTContext *s, FFTSample *output, const FFTSample *input)
{
int k, n8, n4, n2, n, j;
const uint16_t *revtab = s->fft.revtab;
const FFTSample *tcos = s->tcos;
const FFTSample *tsin = s->tsin;
const FFTSample *in1, *in2;
FFTComplex *z = (FFTComplex *)output;
n = 1 << s->nbits;
n2 = n >> 1;
n4 = n >> 2;
n8 = n >> 3;
/* pre rotation */
in1 = input;
in2 = input + n2 - 1;
for(k = 0; k < n4; k++) {
j=revtab[k];
CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]);
in1 += 2;
in2 -= 2;
}
ff_fft_calc(&s->fft, z);
/* post rotation + reordering */
output += n4;
for(k = 0; k < n8; k++) {
FFTSample r0, i0, r1, i1;
CMUL(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]);
CMUL(r1, i0, z[n8+k ].im, z[n8+k ].re, tsin[n8+k ], tcos[n8+k ]);
z[n8-k-1].re = r0;
z[n8-k-1].im = i0;
z[n8+k ].re = r1;
z[n8+k ].im = i1;
}
}
/**
* Compute inverse MDCT of size N = 2^nbits
* @param output N samples
* @param input N/2 samples
*/
void ff_imdct_calc_c(MDCTContext *s, FFTSample *output, const FFTSample *input)
{
int k;
int n = 1 << s->nbits;
int n2 = n >> 1;
int n4 = n >> 2;
ff_imdct_half_c(s, output+n4, input);
for(k = 0; k < n4; k++) {
output[k] = -output[n2-k-1];
output[n-k-1] = output[n2+k];
}
}
/**
* Compute MDCT of size N = 2^nbits
* @param input N samples
* @param out N/2 samples
*/
void ff_mdct_calc(MDCTContext *s, FFTSample *out, const FFTSample *input)
{
int i, j, n, n8, n4, n2, n3;
FFTSample re, im;
const uint16_t *revtab = s->fft.revtab;
const FFTSample *tcos = s->tcos;
const FFTSample *tsin = s->tsin;
FFTComplex *x = (FFTComplex *)out;
n = 1 << s->nbits;
n2 = n >> 1;
n4 = n >> 2;
n8 = n >> 3;
n3 = 3 * n4;
/* pre rotation */
for(i=0;i<n8;i++) {
re = -input[2*i+3*n4] - input[n3-1-2*i];
im = -input[n4+2*i] + input[n4-1-2*i];
j = revtab[i];
CMUL(x[j].re, x[j].im, re, im, -tcos[i], tsin[i]);
re = input[2*i] - input[n2-1-2*i];
im = -(input[n2+2*i] + input[n-1-2*i]);
j = revtab[n8 + i];
CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]);
}
ff_fft_calc(&s->fft, x);
/* post rotation */
for(i=0;i<n8;i++) {
FFTSample r0, i0, r1, i1;
CMUL(i1, r0, x[n8-i-1].re, x[n8-i-1].im, -tsin[n8-i-1], -tcos[n8-i-1]);
CMUL(i0, r1, x[n8+i ].re, x[n8+i ].im, -tsin[n8+i ], -tcos[n8+i ]);
x[n8-i-1].re = r0;
x[n8-i-1].im = i0;
x[n8+i ].re = r1;
x[n8+i ].im = i1;
}
}
av_cold void ff_mdct_end(MDCTContext *s)
{
av_freep(&s->tcos);
av_freep(&s->tsin);
ff_fft_end(&s->fft);
}