forked from len0rd/rockbox
Modify libatrac to use fixed-point arithmetic.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@22298 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
c956059ec5
commit
432e2ecc13
10 changed files with 324 additions and 5756 deletions
|
@ -1,8 +1,8 @@
|
||||||
CFLAGS = -Wall -O3 -DTEST -D"DEBUGF=printf"
|
CFLAGS = -Wall -O3 -DTEST -D"DEBUGF=printf" -D"ROCKBOX_LITTLE_ENDIAN=1" -D"ICONST_ATTR=" -D"ICODE_ATTR="
|
||||||
OBJS = atrac3.o dsputil.o bitstream.o fft.o mdct.o libavutil/log.o libavutil/mem.o ../librm/rm.o
|
OBJS = atrac3.o bitstream.o libavutil/log.o libavutil/mem.o ../librm/rm.o fixp_math.o ../lib/mdct2.o ../lib/mdct_lookup.o
|
||||||
|
|
||||||
atractest: $(OBJS)
|
atractest: $(OBJS)
|
||||||
gcc -o atractest $(OBJS) -lm
|
gcc -o atractest $(OBJS)
|
||||||
|
|
||||||
.c.o :
|
.c.o :
|
||||||
$(CC) $(CFLAGS) -c -o $@ $<
|
$(CC) $(CFLAGS) -c -o $@ $<
|
||||||
|
|
|
@ -8,13 +8,19 @@ ffmpeg is licensed under the Lesser GNU General Public License.
|
||||||
|
|
||||||
IMPORT DETAILS
|
IMPORT DETAILS
|
||||||
|
|
||||||
The decoder is based on ffmpeg-svn r18079. It still uses floating
|
The decoder is based on ffmpeg-svn r18079.
|
||||||
point math and not suitable to be used in rockbox.
|
The decoder had been modified to use fixed-point arithmetic.
|
||||||
|
|
||||||
TESTING
|
TESTING
|
||||||
|
|
||||||
The test program should compile in any Unix-like environment using the
|
The test program should compile in any Unix-like environment using the
|
||||||
command "make -f Makefile.test".
|
command "make -f Makefile.test".
|
||||||
|
|
||||||
|
For ARM targets add -DCPU_ARM to CFLAGS in Makefile.test to make use of
|
||||||
|
the asm ARM optimisations in rockbox's mdct library.
|
||||||
|
|
||||||
|
For Big-endian targets, change -D"ROCKBOX_LITTLE_ENDIAN=1"
|
||||||
|
to -D"ROCKBOX_BIG_ENDIAN=1" in Makefile.test.
|
||||||
|
|
||||||
Running "./atractest file.rm" will decode the audio data to a WAV file
|
Running "./atractest file.rm" will decode the audio data to a WAV file
|
||||||
called "output.wav" in the current directory.
|
called "output.wav" in the current directory.
|
||||||
|
|
|
@ -38,7 +38,6 @@
|
||||||
|
|
||||||
#include "avcodec.h"
|
#include "avcodec.h"
|
||||||
#include "bitstream.h"
|
#include "bitstream.h"
|
||||||
#include "dsputil.h"
|
|
||||||
#include "bytestream.h"
|
#include "bytestream.h"
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
@ -50,6 +49,10 @@
|
||||||
|
|
||||||
#include "../librm/rm.h"
|
#include "../librm/rm.h"
|
||||||
#include "atrac3data.h"
|
#include "atrac3data.h"
|
||||||
|
#include "atrac3data_fixed.h"
|
||||||
|
#include "fixp_math.h"
|
||||||
|
//#include "fixp_mdct.h"
|
||||||
|
#include "../lib/mdct2.h"
|
||||||
|
|
||||||
#define JOINT_STEREO 0x12
|
#define JOINT_STEREO 0x12
|
||||||
#define STEREO 0x2
|
#define STEREO 0x2
|
||||||
|
@ -70,23 +73,23 @@ typedef struct {
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int pos;
|
int pos;
|
||||||
int numCoefs;
|
int numCoefs;
|
||||||
float coef[8];
|
int32_t coef[8];
|
||||||
} tonal_component;
|
} tonal_component;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int bandsCoded;
|
int bandsCoded;
|
||||||
int numComponents;
|
int numComponents;
|
||||||
tonal_component components[64];
|
tonal_component components[64];
|
||||||
float prevFrame[1024];
|
int32_t prevFrame[1024];
|
||||||
int gcBlkSwitch;
|
int gcBlkSwitch;
|
||||||
gain_block gainBlock[2];
|
gain_block gainBlock[2];
|
||||||
|
|
||||||
DECLARE_ALIGNED_16(float, spectrum[1024]);
|
int32_t spectrum[1024] __attribute__((aligned(16)));
|
||||||
DECLARE_ALIGNED_16(float, IMDCT_buf[1024]);
|
int32_t IMDCT_buf[1024] __attribute__((aligned(16)));
|
||||||
|
|
||||||
float delayBuf1[46]; ///<qmf delay buffers
|
int32_t delayBuf1[46]; ///<qmf delay buffers
|
||||||
float delayBuf2[46];
|
int32_t delayBuf2[46];
|
||||||
float delayBuf3[46];
|
int32_t delayBuf3[46];
|
||||||
} channel_unit;
|
} channel_unit;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
@ -114,9 +117,9 @@ typedef struct {
|
||||||
//@}
|
//@}
|
||||||
//@{
|
//@{
|
||||||
/** data buffers */
|
/** data buffers */
|
||||||
float outSamples[2048];
|
int32_t outSamples[2048];
|
||||||
uint8_t* decoded_bytes_buffer;
|
uint8_t* decoded_bytes_buffer;
|
||||||
float tempBuf[1070];
|
int32_t tempBuf[1070];
|
||||||
//@}
|
//@}
|
||||||
//@{
|
//@{
|
||||||
/** extradata */
|
/** extradata */
|
||||||
|
@ -127,17 +130,8 @@ typedef struct {
|
||||||
//@}
|
//@}
|
||||||
} ATRAC3Context;
|
} ATRAC3Context;
|
||||||
|
|
||||||
static DECLARE_ALIGNED_16(float,mdct_window[512]);
|
static int32_t qmf_window[48];
|
||||||
static float qmf_window[48];
|
|
||||||
static VLC spectral_coeff_tab[7];
|
static VLC spectral_coeff_tab[7];
|
||||||
static float SFTable[64];
|
|
||||||
static float gain_tab1[16];
|
|
||||||
static float gain_tab2[31];
|
|
||||||
static MDCTContext mdct_ctx;
|
|
||||||
static DSPContext dsp;
|
|
||||||
|
|
||||||
|
|
||||||
/* quadrature mirror synthesis filter */
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Quadrature mirror synthesis filter.
|
* Quadrature mirror synthesis filter.
|
||||||
|
@ -149,14 +143,12 @@ static DSPContext dsp;
|
||||||
* @param delayBuf delayBuf buffer
|
* @param delayBuf delayBuf buffer
|
||||||
* @param temp temp buffer
|
* @param temp temp buffer
|
||||||
*/
|
*/
|
||||||
|
static void iqmf (int32_t *inlo, int32_t *inhi, unsigned int nIn, int32_t *pOut, int32_t *delayBuf, int32_t *temp)
|
||||||
|
|
||||||
static void iqmf (float *inlo, float *inhi, unsigned int nIn, float *pOut, float *delayBuf, float *temp)
|
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
float *p1, *p3;
|
int32_t *p1, *p3;
|
||||||
|
|
||||||
memcpy(temp, delayBuf, 46*sizeof(float));
|
memcpy(temp, delayBuf, 46*sizeof(int32_t));
|
||||||
|
|
||||||
p3 = temp + 46;
|
p3 = temp + 46;
|
||||||
|
|
||||||
|
@ -171,12 +163,12 @@ static void iqmf (float *inlo, float *inhi, unsigned int nIn, float *pOut, float
|
||||||
/* loop2 */
|
/* loop2 */
|
||||||
p1 = temp;
|
p1 = temp;
|
||||||
for (j = nIn; j != 0; j--) {
|
for (j = nIn; j != 0; j--) {
|
||||||
float s1 = 0.0;
|
int32_t s1 = 0;
|
||||||
float s2 = 0.0;
|
int32_t s2 = 0;
|
||||||
|
|
||||||
for (i = 0; i < 48; i += 2) {
|
for (i = 0; i < 48; i += 2) {
|
||||||
s1 += p1[i] * qmf_window[i];
|
s1 += fixmul31(p1[i], qmf_window[i]);
|
||||||
s2 += p1[i+1] * qmf_window[i+1];
|
s2 += fixmul31(p1[i+1], qmf_window[i+1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
pOut[0] = s2;
|
pOut[0] = s2;
|
||||||
|
@ -187,7 +179,7 @@ static void iqmf (float *inlo, float *inhi, unsigned int nIn, float *pOut, float
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Update the delay buffer. */
|
/* Update the delay buffer. */
|
||||||
memcpy(delayBuf, temp + nIn*2, 46*sizeof(float));
|
memcpy(delayBuf, temp + (nIn << 1), 46*sizeof(int32_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -199,10 +191,9 @@ static void iqmf (float *inlo, float *inhi, unsigned int nIn, float *pOut, float
|
||||||
* @param odd_band 1 if the band is an odd band
|
* @param odd_band 1 if the band is an odd band
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void IMLT(float *pInput, float *pOutput, int odd_band)
|
static void IMLT(int32_t *pInput, int32_t *pOutput, int odd_band)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (odd_band) {
|
if (odd_band) {
|
||||||
/**
|
/**
|
||||||
* Reverse the odd bands before IMDCT, this is an effect of the QMF transform
|
* Reverse the odd bands before IMDCT, this is an effect of the QMF transform
|
||||||
|
@ -214,13 +205,15 @@ static void IMLT(float *pInput, float *pOutput, int odd_band)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
for (i=0; i<128; i++)
|
for (i=0; i<128; i++)
|
||||||
FFSWAP(float, pInput[i], pInput[255-i]);
|
FFSWAP(int32_t, pInput[i], pInput[255-i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Apply the imdct. */
|
||||||
|
mdct_backward(512, pInput, pOutput);
|
||||||
|
|
||||||
ff_imdct_calc(&mdct_ctx,pOutput,pInput);
|
/* Windowing. */
|
||||||
|
for(i = 0; i<512; i++)
|
||||||
/* Perform windowing on the output. */
|
pOutput[i] = fixmul31(pOutput[i], window_lookup[i]);
|
||||||
dsp.vector_fmul(pOutput,mdct_window,512);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -259,30 +252,20 @@ static int decode_bytes(const uint8_t* inbuffer, uint8_t* out, int bytes){
|
||||||
|
|
||||||
|
|
||||||
static av_cold void init_atrac3_transforms(ATRAC3Context *q) {
|
static av_cold void init_atrac3_transforms(ATRAC3Context *q) {
|
||||||
float enc_window[256];
|
int32_t s;
|
||||||
float s;
|
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
/* Generate the mdct window, for details see
|
/* Generate the mdct window, for details see
|
||||||
* http://wiki.multimedia.cx/index.php?title=RealAudio_atrc#Windows */
|
* http://wiki.multimedia.cx/index.php?title=RealAudio_atrc#Windows */
|
||||||
for (i=0 ; i<256; i++)
|
|
||||||
enc_window[i] = (sin(((i + 0.5) / 256.0 - 0.5) * M_PI) + 1.0) * 0.5;
|
|
||||||
|
|
||||||
if (!mdct_window[0])
|
/* mdct window had been generated and saved as a lookup table in atrac3data_fixed.h */
|
||||||
for (i=0 ; i<256; i++) {
|
|
||||||
mdct_window[i] = enc_window[i]/(enc_window[i]*enc_window[i] + enc_window[255-i]*enc_window[255-i]);
|
|
||||||
mdct_window[511-i] = mdct_window[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Generate the QMF window. */
|
/* Generate the QMF window. */
|
||||||
for (i=0 ; i<24; i++) {
|
for (i=0 ; i<24; i++) {
|
||||||
s = qmf_48tap_half[i] * 2.0;
|
s = qmf_48tap_half_fix[i] << 1;
|
||||||
qmf_window[i] = s;
|
qmf_window[i] = s;
|
||||||
qmf_window[47 - i] = s;
|
qmf_window[47 - i] = s;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Initialize the MDCT transform. */
|
|
||||||
ff_mdct_init(&mdct_ctx, 9, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -367,12 +350,12 @@ static void readQuantSpectralCoeffs (GetBitContext *gb, int selector, int coding
|
||||||
* @return outSubbands subband counter, fix for broken specification/files
|
* @return outSubbands subband counter, fix for broken specification/files
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int decodeSpectrum (GetBitContext *gb, float *pOut)
|
static int decodeSpectrum (GetBitContext *gb, int32_t *pOut)
|
||||||
{
|
{
|
||||||
int numSubbands, codingMode, cnt, first, last, subbWidth, *pIn;
|
int numSubbands, codingMode, cnt, first, last, subbWidth, *pIn;
|
||||||
int subband_vlc_index[32], SF_idxs[32];
|
int subband_vlc_index[32], SF_idxs[32];
|
||||||
int mantissas[128];
|
int mantissas[128];
|
||||||
float SF;
|
int32_t SF;
|
||||||
|
|
||||||
numSubbands = get_bits(gb, 5); // number of coded subbands
|
numSubbands = get_bits(gb, 5); // number of coded subbands
|
||||||
codingMode = get_bits1(gb); // coding Mode: 0 - VLC/ 1-CLC
|
codingMode = get_bits1(gb); // coding Mode: 0 - VLC/ 1-CLC
|
||||||
|
@ -400,20 +383,20 @@ static int decodeSpectrum (GetBitContext *gb, float *pOut)
|
||||||
readQuantSpectralCoeffs (gb, subband_vlc_index[cnt], codingMode, mantissas, subbWidth);
|
readQuantSpectralCoeffs (gb, subband_vlc_index[cnt], codingMode, mantissas, subbWidth);
|
||||||
|
|
||||||
/* Decode the scale factor for this subband. */
|
/* Decode the scale factor for this subband. */
|
||||||
SF = SFTable[SF_idxs[cnt]] * iMaxQuant[subband_vlc_index[cnt]];
|
SF = fixmul31(SFTable_fixed[SF_idxs[cnt]], iMaxQuant_fix[subband_vlc_index[cnt]]);
|
||||||
|
|
||||||
/* Inverse quantize the coefficients. */
|
/* Inverse quantize the coefficients. */
|
||||||
for (pIn=mantissas ; first<last; first++, pIn++)
|
for (pIn=mantissas ; first<last; first++, pIn++)
|
||||||
pOut[first] = *pIn * SF;
|
pOut[first] = fixmul16(*pIn, SF);
|
||||||
} else {
|
} else {
|
||||||
/* This subband was not coded, so zero the entire subband. */
|
/* This subband was not coded, so zero the entire subband. */
|
||||||
memset(pOut+first, 0, subbWidth*sizeof(float));
|
memset(pOut+first, 0, subbWidth*sizeof(int32_t));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Clear the subbands that were not coded. */
|
/* Clear the subbands that were not coded. */
|
||||||
first = subbandTab[cnt];
|
first = subbandTab[cnt];
|
||||||
memset(pOut+first, 0, (1024 - first) * sizeof(float));
|
memset(pOut+first, 0, (1024 - first) * sizeof(int32_t));
|
||||||
return numSubbands;
|
return numSubbands;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -431,8 +414,8 @@ static int decodeTonalComponents (GetBitContext *gb, tonal_component *pComponent
|
||||||
int components, coding_mode_selector, coding_mode, coded_values_per_component;
|
int components, coding_mode_selector, coding_mode, coded_values_per_component;
|
||||||
int sfIndx, coded_values, max_coded_values, quant_step_index, coded_components;
|
int sfIndx, coded_values, max_coded_values, quant_step_index, coded_components;
|
||||||
int band_flags[4], mantissa[8];
|
int band_flags[4], mantissa[8];
|
||||||
float *pCoef;
|
int32_t *pCoef;
|
||||||
float scalefactor;
|
int32_t scalefactor;
|
||||||
int component_count = 0;
|
int component_count = 0;
|
||||||
|
|
||||||
components = get_bits(gb,5);
|
components = get_bits(gb,5);
|
||||||
|
@ -473,7 +456,7 @@ static int decodeTonalComponents (GetBitContext *gb, tonal_component *pComponent
|
||||||
coded_values = coded_values_per_component + 1;
|
coded_values = coded_values_per_component + 1;
|
||||||
coded_values = FFMIN(max_coded_values,coded_values);
|
coded_values = FFMIN(max_coded_values,coded_values);
|
||||||
|
|
||||||
scalefactor = SFTable[sfIndx] * iMaxQuant[quant_step_index];
|
scalefactor = fixmul31(SFTable_fixed[sfIndx], iMaxQuant_fix[quant_step_index]);
|
||||||
|
|
||||||
readQuantSpectralCoeffs(gb, quant_step_index, coding_mode, mantissa, coded_values);
|
readQuantSpectralCoeffs(gb, quant_step_index, coding_mode, mantissa, coded_values);
|
||||||
|
|
||||||
|
@ -482,7 +465,7 @@ static int decodeTonalComponents (GetBitContext *gb, tonal_component *pComponent
|
||||||
/* inverse quant */
|
/* inverse quant */
|
||||||
pCoef = pComponent[component_count].coef;
|
pCoef = pComponent[component_count].coef;
|
||||||
for (cnt = 0; cnt < coded_values; cnt++)
|
for (cnt = 0; cnt < coded_values; cnt++)
|
||||||
pCoef[cnt] = mantissa[cnt] * scalefactor;
|
pCoef[cnt] = fixmul16(mantissa[cnt], scalefactor);
|
||||||
|
|
||||||
component_count++;
|
component_count++;
|
||||||
}
|
}
|
||||||
|
@ -539,21 +522,21 @@ static int decodeGainControl (GetBitContext *gb, gain_block *pGb, int numBands)
|
||||||
* @param pGain2 next band gain info
|
* @param pGain2 next band gain info
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void gainCompensateAndOverlap (float *pIn, float *pPrev, float *pOut, gain_info *pGain1, gain_info *pGain2)
|
static void gainCompensateAndOverlap (int32_t *pIn, int32_t *pPrev, int32_t *pOut, gain_info *pGain1, gain_info *pGain2)
|
||||||
{
|
{
|
||||||
/* gain compensation function */
|
/* gain compensation function */
|
||||||
float gain1, gain2, gain_inc;
|
int32_t gain1, gain2, gain_inc;
|
||||||
int cnt, numdata, nsample, startLoc, endLoc;
|
int cnt, numdata, nsample, startLoc, endLoc;
|
||||||
|
|
||||||
|
|
||||||
if (pGain2->num_gain_data == 0)
|
if (pGain2->num_gain_data == 0)
|
||||||
gain1 = 1.0;
|
gain1 = ONE_16;
|
||||||
else
|
else
|
||||||
gain1 = gain_tab1[pGain2->levcode[0]];
|
gain1 = gain_tab1[pGain2->levcode[0]];
|
||||||
|
|
||||||
if (pGain1->num_gain_data == 0) {
|
if (pGain1->num_gain_data == 0) {
|
||||||
for (cnt = 0; cnt < 256; cnt++)
|
for (cnt = 0; cnt < 256; cnt++)
|
||||||
pOut[cnt] = pIn[cnt] * gain1 + pPrev[cnt];
|
pOut[cnt] = fixmul16(pIn[cnt], gain1) + pPrev[cnt];
|
||||||
} else {
|
} else {
|
||||||
numdata = pGain1->num_gain_data;
|
numdata = pGain1->num_gain_data;
|
||||||
pGain1->loccode[numdata] = 32;
|
pGain1->loccode[numdata] = 32;
|
||||||
|
@ -570,36 +553,38 @@ static void gainCompensateAndOverlap (float *pIn, float *pPrev, float *pOut, gai
|
||||||
|
|
||||||
/* interpolate */
|
/* interpolate */
|
||||||
for (; nsample < startLoc; nsample++)
|
for (; nsample < startLoc; nsample++)
|
||||||
pOut[nsample] = (pIn[nsample] * gain1 + pPrev[nsample]) * gain2;
|
pOut[nsample] = fixmul16((fixmul16(pIn[nsample], gain1) + pPrev[nsample]), gain2);
|
||||||
|
|
||||||
/* interpolation is done over eight samples */
|
/* interpolation is done over eight samples */
|
||||||
for (; nsample < endLoc; nsample++) {
|
for (; nsample < endLoc; nsample++) {
|
||||||
pOut[nsample] = (pIn[nsample] * gain1 + pPrev[nsample]) * gain2;
|
pOut[nsample] = fixmul16((fixmul16(pIn[nsample], gain1) + pPrev[nsample]),gain2);
|
||||||
gain2 *= gain_inc;
|
gain2 = fixmul16(gain2, gain_inc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (; nsample < 256; nsample++)
|
for (; nsample < 256; nsample++)
|
||||||
pOut[nsample] = (pIn[nsample] * gain1) + pPrev[nsample];
|
pOut[nsample] = fixmul16(pIn[nsample], gain1) + pPrev[nsample];
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Delay for the overlapping part. */
|
/* Delay for the overlapping part. */
|
||||||
memcpy(pPrev, &pIn[256], 256*sizeof(float));
|
memcpy(pPrev, &pIn[256], 256*sizeof(int32_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Combine the tonal band spectrum and regular band spectrum
|
* Combine the tonal band spectrum and regular band spectrum
|
||||||
* Return position of the last tonal coefficient
|
* Return position of the last tonal coefficient
|
||||||
|
|
||||||
*
|
*
|
||||||
* @param pSpectrum output spectrum buffer
|
* @param pSpectrum output spectrum buffer
|
||||||
* @param numComponents amount of tonal components
|
* @param numComponents amount of tonal components
|
||||||
* @param pComponent tonal components for this band
|
* @param pComponent tonal components for this band
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int addTonalComponents (float *pSpectrum, int numComponents, tonal_component *pComponent)
|
static int addTonalComponents (int32_t *pSpectrum, int numComponents, tonal_component *pComponent)
|
||||||
{
|
{
|
||||||
int cnt, i, lastPos = -1;
|
int cnt, i, lastPos = -1;
|
||||||
float *pIn, *pOut;
|
int32_t *pOut;
|
||||||
|
int32_t *pIn;
|
||||||
|
|
||||||
for (cnt = 0; cnt < numComponents; cnt++){
|
for (cnt = 0; cnt < numComponents; cnt++){
|
||||||
lastPos = FFMAX(pComponent[cnt].pos + pComponent[cnt].numCoefs, lastPos);
|
lastPos = FFMAX(pComponent[cnt].pos + pComponent[cnt].numCoefs, lastPos);
|
||||||
|
@ -614,13 +599,13 @@ static int addTonalComponents (float *pSpectrum, int numComponents, tonal_compon
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#define INTERPOLATE(old,new,nsample) ((old) + (nsample)*0.125*((new)-(old)))
|
#define INTERPOLATE(old,new,nsample) ((old*ONE_16) + fixmul16(((nsample*ONE_16)>>3), (((new) - (old))*ONE_16)))
|
||||||
|
|
||||||
static void reverseMatrixing(float *su1, float *su2, int *pPrevCode, int *pCurrCode)
|
static void reverseMatrixing(int32_t *su1, int32_t *su2, int *pPrevCode, int *pCurrCode)
|
||||||
{
|
{
|
||||||
int i, band, nsample, s1, s2;
|
int i, band, nsample, s1, s2;
|
||||||
float c1, c2;
|
int32_t c1, c2;
|
||||||
float mc1_l, mc1_r, mc2_l, mc2_r;
|
int32_t mc1_l, mc1_r, mc2_l, mc2_r;
|
||||||
|
|
||||||
for (i=0,band = 0; band < 4*256; band+=256,i++) {
|
for (i=0,band = 0; band < 4*256; band+=256,i++) {
|
||||||
s1 = pPrevCode[i];
|
s1 = pPrevCode[i];
|
||||||
|
@ -629,18 +614,18 @@ static void reverseMatrixing(float *su1, float *su2, int *pPrevCode, int *pCurrC
|
||||||
|
|
||||||
if (s1 != s2) {
|
if (s1 != s2) {
|
||||||
/* Selector value changed, interpolation needed. */
|
/* Selector value changed, interpolation needed. */
|
||||||
mc1_l = matrixCoeffs[s1*2];
|
mc1_l = matrixCoeffs_fix[s1<<1];
|
||||||
mc1_r = matrixCoeffs[s1*2+1];
|
mc1_r = matrixCoeffs_fix[(s1<<1)+1];
|
||||||
mc2_l = matrixCoeffs[s2*2];
|
mc2_l = matrixCoeffs_fix[s2<<1];
|
||||||
mc2_r = matrixCoeffs[s2*2+1];
|
mc2_r = matrixCoeffs_fix[(s2<<1)+1];
|
||||||
|
|
||||||
/* Interpolation is done over the first eight samples. */
|
/* Interpolation is done over the first eight samples. */
|
||||||
for(; nsample < 8; nsample++) {
|
for(; nsample < 8; nsample++) {
|
||||||
c1 = su1[band+nsample];
|
c1 = su1[band+nsample];
|
||||||
c2 = su2[band+nsample];
|
c2 = su2[band+nsample];
|
||||||
c2 = c1 * INTERPOLATE(mc1_l,mc2_l,nsample) + c2 * INTERPOLATE(mc1_r,mc2_r,nsample);
|
c2 = fixmul16(c1, INTERPOLATE(mc1_l, mc2_l, nsample)) + fixmul16(c2, INTERPOLATE(mc1_r, mc2_r, nsample));
|
||||||
su1[band+nsample] = c2;
|
su1[band+nsample] = c2;
|
||||||
su2[band+nsample] = c1 * 2.0 - c2;
|
su2[band+nsample] = (c1 << 1) - c2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -650,8 +635,8 @@ static void reverseMatrixing(float *su1, float *su2, int *pPrevCode, int *pCurrC
|
||||||
for (; nsample < 256; nsample++) {
|
for (; nsample < 256; nsample++) {
|
||||||
c1 = su1[band+nsample];
|
c1 = su1[band+nsample];
|
||||||
c2 = su2[band+nsample];
|
c2 = su2[band+nsample];
|
||||||
su1[band+nsample] = c2 * 2.0;
|
su1[band+nsample] = c2 << 1;
|
||||||
su2[band+nsample] = (c1 - c2) * 2.0;
|
su2[band+nsample] = (c1 - c2) << 1;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -659,8 +644,8 @@ static void reverseMatrixing(float *su1, float *su2, int *pPrevCode, int *pCurrC
|
||||||
for (; nsample < 256; nsample++) {
|
for (; nsample < 256; nsample++) {
|
||||||
c1 = su1[band+nsample];
|
c1 = su1[band+nsample];
|
||||||
c2 = su2[band+nsample];
|
c2 = su2[band+nsample];
|
||||||
su1[band+nsample] = (c1 + c2) * 2.0;
|
su1[band+nsample] = (c1 + c2) << 1;
|
||||||
su2[band+nsample] = c2 * -2.0;
|
su2[band+nsample] = -1*(c2 << 1);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
|
@ -678,24 +663,23 @@ static void reverseMatrixing(float *su1, float *su2, int *pPrevCode, int *pCurrC
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void getChannelWeights (int indx, int flag, float ch[2]){
|
static void getChannelWeights (int indx, int flag, int32_t ch[2]){
|
||||||
|
|
||||||
if (indx == 7) {
|
if (indx == 7) {
|
||||||
ch[0] = 1.0;
|
ch[0] = ONE_16;
|
||||||
ch[1] = 1.0;
|
ch[1] = ONE_16;
|
||||||
} else {
|
} else {
|
||||||
ch[0] = (float)(indx & 7) / 7.0;
|
ch[0] = fixdiv16(((indx & 7)*ONE_16), 7*ONE_16);
|
||||||
ch[1] = sqrt(2 - ch[0]*ch[0]);
|
ch[1] = fastSqrt((ONE_16 << 1) - fixmul16(ch[0], ch[0]));
|
||||||
if(flag)
|
if(flag)
|
||||||
FFSWAP(float, ch[0], ch[1]);
|
FFSWAP(int32_t, ch[0], ch[1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void channelWeighting (float *su1, float *su2, int *p3)
|
static void channelWeighting (int32_t *su1, int32_t *su2, int *p3)
|
||||||
{
|
{
|
||||||
int band, nsample;
|
int band, nsample;
|
||||||
/* w[x][y] y=0 is left y=1 is right */
|
/* w[x][y] y=0 is left y=1 is right */
|
||||||
float w[2][2];
|
int32_t w[2][2];
|
||||||
|
|
||||||
if (p3[1] != 7 || p3[3] != 7){
|
if (p3[1] != 7 || p3[3] != 7){
|
||||||
getChannelWeights(p3[1], p3[0], w[0]);
|
getChannelWeights(p3[1], p3[0], w[0]);
|
||||||
|
@ -704,13 +688,13 @@ static void channelWeighting (float *su1, float *su2, int *p3)
|
||||||
for(band = 1; band < 4; band++) {
|
for(band = 1; band < 4; band++) {
|
||||||
/* scale the channels by the weights */
|
/* scale the channels by the weights */
|
||||||
for(nsample = 0; nsample < 8; nsample++) {
|
for(nsample = 0; nsample < 8; nsample++) {
|
||||||
su1[band*256+nsample] *= INTERPOLATE(w[0][0], w[0][1], nsample);
|
su1[band*256+nsample] = fixmul16(su1[band*256+nsample], INTERPOLATE(w[0][0], w[0][1], nsample));
|
||||||
su2[band*256+nsample] *= INTERPOLATE(w[1][0], w[1][1], nsample);
|
su2[band*256+nsample] = fixmul16(su2[band*256+nsample], INTERPOLATE(w[1][0], w[1][1], nsample));
|
||||||
}
|
}
|
||||||
|
|
||||||
for(; nsample < 256; nsample++) {
|
for(; nsample < 256; nsample++) {
|
||||||
su1[band*256+nsample] *= w[1][0];
|
su1[band*256+nsample] = fixmul16(su1[band*256+nsample], w[1][0]);
|
||||||
su2[band*256+nsample] *= w[1][1];
|
su2[band*256+nsample] = fixmul16(su2[band*256+nsample], w[1][1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -728,10 +712,9 @@ static void channelWeighting (float *su1, float *su2, int *p3)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
static int decodeChannelSoundUnit (ATRAC3Context *q, GetBitContext *gb, channel_unit *pSnd, float *pOut, int channelNum, int codingMode)
|
static int decodeChannelSoundUnit (ATRAC3Context *q, GetBitContext *gb, channel_unit *pSnd, int32_t *pOut, int channelNum, int codingMode)
|
||||||
{
|
{
|
||||||
int band, result=0, numSubbands, lastTonal, numBands;
|
int band, result=0, numSubbands, lastTonal, numBands;
|
||||||
|
|
||||||
if (codingMode == JOINT_STEREO && channelNum == 1) {
|
if (codingMode == JOINT_STEREO && channelNum == 1) {
|
||||||
if (get_bits(gb,2) != 3) {
|
if (get_bits(gb,2) != 3) {
|
||||||
av_log(NULL,AV_LOG_ERROR,"JS mono Sound Unit id != 3.\n");
|
av_log(NULL,AV_LOG_ERROR,"JS mono Sound Unit id != 3.\n");
|
||||||
|
@ -771,7 +754,7 @@ static int decodeChannelSoundUnit (ATRAC3Context *q, GetBitContext *gb, channel_
|
||||||
if (band <= numBands) {
|
if (band <= numBands) {
|
||||||
IMLT(&(pSnd->spectrum[band*256]), pSnd->IMDCT_buf, band&1);
|
IMLT(&(pSnd->spectrum[band*256]), pSnd->IMDCT_buf, band&1);
|
||||||
} else
|
} else
|
||||||
memset(pSnd->IMDCT_buf, 0, 512 * sizeof(float));
|
memset(pSnd->IMDCT_buf, 0, 512 * sizeof(int32_t));
|
||||||
|
|
||||||
/* gain compensation and overlapping */
|
/* gain compensation and overlapping */
|
||||||
gainCompensateAndOverlap (pSnd->IMDCT_buf, &(pSnd->prevFrame[band*256]), &(pOut[band*256]),
|
gainCompensateAndOverlap (pSnd->IMDCT_buf, &(pSnd->prevFrame[band*256]), &(pOut[band*256]),
|
||||||
|
@ -795,7 +778,7 @@ static int decodeChannelSoundUnit (ATRAC3Context *q, GetBitContext *gb, channel_
|
||||||
static int decodeFrame(ATRAC3Context *q, const uint8_t* databuf)
|
static int decodeFrame(ATRAC3Context *q, const uint8_t* databuf)
|
||||||
{
|
{
|
||||||
int result, i;
|
int result, i;
|
||||||
float *p1, *p2, *p3, *p4;
|
int32_t *p1, *p2, *p3, *p4;
|
||||||
uint8_t *ptr1;
|
uint8_t *ptr1;
|
||||||
|
|
||||||
if (q->codingMode == JOINT_STEREO) {
|
if (q->codingMode == JOINT_STEREO) {
|
||||||
|
@ -893,7 +876,6 @@ static int decodeFrame(ATRAC3Context *q, const uint8_t* databuf)
|
||||||
static int atrac3_decode_frame(RMContext *rmctx, ATRAC3Context *q,
|
static int atrac3_decode_frame(RMContext *rmctx, ATRAC3Context *q,
|
||||||
void *data, int *data_size,
|
void *data, int *data_size,
|
||||||
const uint8_t *buf, int buf_size) {
|
const uint8_t *buf, int buf_size) {
|
||||||
//ATRAC3Context *q = rmctx->priv_data;
|
|
||||||
int result = 0, i;
|
int result = 0, i;
|
||||||
const uint8_t* databuf;
|
const uint8_t* databuf;
|
||||||
int16_t* samples = data;
|
int16_t* samples = data;
|
||||||
|
@ -919,13 +901,13 @@ static int atrac3_decode_frame(RMContext *rmctx, ATRAC3Context *q,
|
||||||
if (q->channels == 1) {
|
if (q->channels == 1) {
|
||||||
/* mono */
|
/* mono */
|
||||||
for (i = 0; i<1024; i++)
|
for (i = 0; i<1024; i++)
|
||||||
samples[i] = av_clip_int16(round(q->outSamples[i]));
|
samples[i] = av_clip_int16(q->outSamples[i]);
|
||||||
*data_size = 1024 * sizeof(int16_t);
|
*data_size = 1024 * sizeof(int16_t);
|
||||||
} else {
|
} else {
|
||||||
/* stereo */
|
/* stereo */
|
||||||
for (i = 0; i < 1024; i++) {
|
for (i = 0; i < 1024; i++) {
|
||||||
samples[i*2] = av_clip_int16(round(q->outSamples[i]));
|
samples[i*2] = av_clip_int16(q->outSamples[i]);
|
||||||
samples[i*2+1] = av_clip_int16(round(q->outSamples[1024+i]));
|
samples[i*2+1] = av_clip_int16(q->outSamples[1024+i]);
|
||||||
}
|
}
|
||||||
*data_size = 2048 * sizeof(int16_t);
|
*data_size = 2048 * sizeof(int16_t);
|
||||||
}
|
}
|
||||||
|
@ -944,7 +926,6 @@ static av_cold int atrac3_decode_init(ATRAC3Context *q, RMContext *rmctx)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
const uint8_t *edata_ptr = rmctx->codec_extradata;
|
const uint8_t *edata_ptr = rmctx->codec_extradata;
|
||||||
//ATRAC3Context *q = rmctx->priv_data;
|
|
||||||
static VLC_TYPE atrac3_vlc_table[4096][2];
|
static VLC_TYPE atrac3_vlc_table[4096][2];
|
||||||
static int vlcs_initialized = 0;
|
static int vlcs_initialized = 0;
|
||||||
|
|
||||||
|
@ -1051,17 +1032,6 @@ static av_cold int atrac3_decode_init(ATRAC3Context *q, RMContext *rmctx)
|
||||||
|
|
||||||
init_atrac3_transforms(q);
|
init_atrac3_transforms(q);
|
||||||
|
|
||||||
/* Generate the scale factors. */
|
|
||||||
for (i=0 ; i<64 ; i++)
|
|
||||||
SFTable[i] = pow(2.0, (i - 15) / 3.0);
|
|
||||||
|
|
||||||
/* Generate gain tables. */
|
|
||||||
for (i=0 ; i<16 ; i++)
|
|
||||||
gain_tab1[i] = powf (2.0, (4 - i));
|
|
||||||
|
|
||||||
for (i=-15 ; i<16 ; i++)
|
|
||||||
gain_tab2[i+15] = powf (2.0, i * -0.125);
|
|
||||||
|
|
||||||
/* init the joint-stereo decoding data */
|
/* init the joint-stereo decoding data */
|
||||||
q->weighting_delay[0] = 0;
|
q->weighting_delay[0] = 0;
|
||||||
q->weighting_delay[1] = 7;
|
q->weighting_delay[1] = 7;
|
||||||
|
@ -1076,8 +1046,6 @@ static av_cold int atrac3_decode_init(ATRAC3Context *q, RMContext *rmctx)
|
||||||
q->matrix_coeff_index_next[i] = 3;
|
q->matrix_coeff_index_next[i] = 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
dsputil_init(&dsp);
|
|
||||||
|
|
||||||
q->pUnits = av_mallocz(sizeof(channel_unit)*q->channels);
|
q->pUnits = av_mallocz(sizeof(channel_unit)*q->channels);
|
||||||
if (!q->pUnits) {
|
if (!q->pUnits) {
|
||||||
av_free(q->decoded_bytes_buffer);
|
av_free(q->decoded_bytes_buffer);
|
||||||
|
|
145
apps/codecs/libatrac/atrac3data_fixed.h
Normal file
145
apps/codecs/libatrac/atrac3data_fixed.h
Normal file
|
@ -0,0 +1,145 @@
|
||||||
|
|
||||||
|
/* tables for the scalefactor decoding */
|
||||||
|
/* scaled by 2^31*/
|
||||||
|
static const int32_t iMaxQuant_fix[8] = {
|
||||||
|
0x0, 0x55555580, 0x33333340, 0x24924940, 0x1c71c720, 0x11111120, 0x8421080,
|
||||||
|
0x4104108
|
||||||
|
};
|
||||||
|
|
||||||
|
/* scaled by 2^16 */
|
||||||
|
static const int32_t SFTable_fixed[64] = {
|
||||||
|
0x00000800, 0x00000a14, 0x00000cb3, 0x00001000, 0x00001429, 0x00001966,
|
||||||
|
0x00002000, 0x00002851, 0x000032cc, 0x00004000, 0x000050a3, 0x00006598,
|
||||||
|
0x00008000, 0x0000a145, 0x0000cb30, 0x00010000, 0x0001428a, 0x00019660,
|
||||||
|
0x00020000, 0x00028514, 0x00032cc0, 0x00040000, 0x00050a29, 0x00065980,
|
||||||
|
0x00080000, 0x000a1452, 0x000cb2ff, 0x00100000, 0x001428a3, 0x001965ff,
|
||||||
|
0x00200000, 0x00285146, 0x0032cbfd, 0x00400000, 0x0050a28c, 0x006597fb,
|
||||||
|
0x00800000, 0x00a14518, 0x00cb2ff5, 0x01000000, 0x01428a30, 0x01965fea,
|
||||||
|
0x02000000, 0x02851460, 0x032cbfd4, 0x04000000, 0x050a28c0, 0x06597fa8,
|
||||||
|
0x08000000, 0x0a145180, 0x0cb2ff50, 0x10000000, 0x1428a300, 0x1965fea0,
|
||||||
|
0x20000000, 0x28514600, 0x32cbfd40, 0x40000000, 0x50a28c00, 0x6597fa80,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* transform data */
|
||||||
|
/* floating point values scaled by 2^31 */
|
||||||
|
static const int32_t qmf_48tap_half_fix[24] = {
|
||||||
|
0xffff855e, 0xfffcfbca, 0xfffe28eb, 0x9de6b, 0x7f028, 0xffe40d08,
|
||||||
|
0xffeef140, 0x42a692, 0x19ab1f, 0xff75dec7, 0xffe738f5, 0x100e928,
|
||||||
|
0xfffdfedf, 0xfe478b84, 0x50b279, 0x2c83f88, 0xff005ad7, 0xfba2ee80,
|
||||||
|
0x2685970, 0x6f42798, 0xfa6b6f10, 0xf3475f80, 0x10e7f7c0, 0x3b6c44c0
|
||||||
|
};
|
||||||
|
|
||||||
|
/* mdct window scaled by 2^31 */
|
||||||
|
static const int32_t window_lookup[512] = {
|
||||||
|
0xffffb10c, 0xfffd394b, 0xfff8494f, 0xfff0e025, 0xffe6fc5f, 0xffda9c15,
|
||||||
|
0xffcbbce6, 0xffba5bf4, 0xffa675e8, 0xff9006f0, 0xff770aba, 0xff5b7c7e,
|
||||||
|
0xff3d56f2, 0xff1c9452, 0xfef92e59, 0xfed31e45, 0xfeaa5cd5, 0xfe7ee247,
|
||||||
|
0xfe50a657, 0xfe1fa041, 0xfdebc6c1, 0xfdb5100d, 0xfd7b71d5, 0xfd3ee149,
|
||||||
|
0xfcff5311, 0xfcbcbb49, 0xfc770d99, 0xfc2e3d15, 0xfbe23c39, 0xfb92fd29,
|
||||||
|
0xfb407141, 0xfaea8989, 0xfa913661, 0xfa3467b1, 0xf9d40cd9, 0xf9701499,
|
||||||
|
0xf9086d41, 0xf89d04a9, 0xf82dc7f1, 0xf7baa3e1, 0xf74384b1, 0xf6c85611,
|
||||||
|
0xf6490321, 0xf5c576b1, 0xf53d9b21, 0xf4b15a01, 0xf4209ce1, 0xf38b4c71,
|
||||||
|
0xf2f15171, 0xf2529411, 0xf1aefbf1, 0xf10670a1, 0xf058d941, 0xefa61cc1,
|
||||||
|
0xeeee21c1, 0xee30cec1, 0xed6e0a41, 0xeca5ba61, 0xebd7c5c1, 0xeb041241,
|
||||||
|
0xea2a8601, 0xe94b0861, 0xe8657f61, 0xe779d241, 0xe687e861, 0xe58fa9e1,
|
||||||
|
0xe490fec1, 0xe38bd101, 0xe28009c1, 0xe16d93e1, 0xe0545ba1, 0xdf344dc1,
|
||||||
|
0xde0d5881, 0xdcdf6bc1, 0xdbaa7801, 0xda6e70c1, 0xd92b4ac1, 0xd7e0fc81,
|
||||||
|
0xd68f7ec1, 0xd536cd41, 0xd3d6e5c1, 0xd26fc901, 0xd10179c1, 0xcf8bff41,
|
||||||
|
0xce0f6301, 0xcc8bb241, 0xcb00fdc1, 0xc96f5b01, 0xc7d6e141, 0xc637af41,
|
||||||
|
0xc491e4c1, 0xc2e5a801, 0xc1332401, 0xbf7a8701, 0xbdbc0681, 0xbbf7da01,
|
||||||
|
0xba2e4181, 0xb85f7f81, 0xb68bde01, 0xb4b3a981, 0xb2d73781, 0xb0f6df01,
|
||||||
|
0xaf12ff01, 0xad2bfa81, 0xab423981, 0xa9562981, 0xa7683c01, 0xa578e701,
|
||||||
|
0xa388a681, 0xa197f801, 0x9fa75e81, 0x9db75f01, 0x9bc88201, 0x99db5301,
|
||||||
|
0x97f06001, 0x96083601, 0x94236601, 0x92427f81, 0x90661481, 0x8e8eb481,
|
||||||
|
0x8cbced01, 0x8af14d81, 0x892c5f81, 0x876eab01, 0x85b8b681, 0x840b0301,
|
||||||
|
0x82660c01, 0x80ca4a01, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||||
|
0x80ca4a01, 0x82660c01, 0x840b0301, 0x85b8b681, 0x876eab01, 0x892c5f81,
|
||||||
|
0x8af14d81, 0x8cbced01, 0x8e8eb481, 0x90661481, 0x92427f81, 0x94236601,
|
||||||
|
0x96083601, 0x97f06001, 0x99db5301, 0x9bc88201, 0x9db75f01, 0x9fa75e81,
|
||||||
|
0xa197f801, 0xa388a681, 0xa578e701, 0xa7683c01, 0xa9562981, 0xab423981,
|
||||||
|
0xad2bfa81, 0xaf12ff01, 0xb0f6df01, 0xb2d73781, 0xb4b3a981, 0xb68bde01,
|
||||||
|
0xb85f7f81, 0xba2e4181, 0xbbf7da01, 0xbdbc0681, 0xbf7a8701, 0xc1332401,
|
||||||
|
0xc2e5a801, 0xc491e4c1, 0xc637af41, 0xc7d6e141, 0xc96f5b01, 0xcb00fdc1,
|
||||||
|
0xcc8bb241, 0xce0f6301, 0xcf8bff41, 0xd10179c1, 0xd26fc901, 0xd3d6e5c1,
|
||||||
|
0xd536cd41, 0xd68f7ec1, 0xd7e0fc81, 0xd92b4ac1, 0xda6e70c1, 0xdbaa7801,
|
||||||
|
0xdcdf6bc1, 0xde0d5881, 0xdf344dc1, 0xe0545ba1, 0xe16d93e1, 0xe28009c1,
|
||||||
|
0xe38bd101, 0xe490fec1, 0xe58fa9e1, 0xe687e861, 0xe779d241, 0xe8657f61,
|
||||||
|
0xe94b0861, 0xea2a8601, 0xeb041241, 0xebd7c5c1, 0xeca5ba61, 0xed6e0a41,
|
||||||
|
0xee30cec1, 0xeeee21c1, 0xefa61cc1, 0xf058d941, 0xf10670a1, 0xf1aefbf1,
|
||||||
|
0xf2529411, 0xf2f15171, 0xf38b4c71, 0xf4209ce1, 0xf4b15a01, 0xf53d9b21,
|
||||||
|
0xf5c576b1, 0xf6490321, 0xf6c85611, 0xf74384b1, 0xf7baa3e1, 0xf82dc7f1,
|
||||||
|
0xf89d04a9, 0xf9086d41, 0xf9701499, 0xf9d40cd9, 0xfa3467b1, 0xfa913661,
|
||||||
|
0xfaea8989, 0xfb407141, 0xfb92fd29, 0xfbe23c39, 0xfc2e3d15, 0xfc770d99,
|
||||||
|
0xfcbcbb49, 0xfcff5311, 0xfd3ee149, 0xfd7b71d5, 0xfdb5100d, 0xfdebc6c1,
|
||||||
|
0xfe1fa041, 0xfe50a657, 0xfe7ee247, 0xfeaa5cd5, 0xfed31e45, 0xfef92e59,
|
||||||
|
0xff1c9452, 0xff3d56f2, 0xff5b7c7e, 0xff770aba, 0xff9006f0, 0xffa675e8,
|
||||||
|
0xffba5bf4, 0xffcbbce6, 0xffda9c15, 0xffe6fc5f, 0xfff0e025, 0xfff8494f,
|
||||||
|
0xfffd394b, 0xffffb10c,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Gain tables scaled by 2^16 */
|
||||||
|
static const int32_t gain_tab1[16] = {
|
||||||
|
0x00100000, 0x00080000, 0x00040000, 0x00020000, 0x00010000, 0x00008000,
|
||||||
|
0x00004000, 0x00002000, 0x00001000, 0x00000800, 0x00000400, 0x00000200,
|
||||||
|
0x00000100, 0x00000080, 0x00000040, 0x00000020,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const int32_t gain_tab2[31] = {
|
||||||
|
0x0003ab03, 0x00035d14, 0x0003159d, 0x0002d414, 0x000297fb, 0x000260e0,
|
||||||
|
0x00022e57, 0x00020000, 0x0001d582, 0x0001ae8a, 0x00018ace, 0x00016a0a,
|
||||||
|
0x00014bfe, 0x00013070, 0x0001172c, 0x00010000, 0x0000eac1, 0x0000d745,
|
||||||
|
0x0000c567, 0x0000b505, 0x0000a5ff, 0x00009838, 0x00008b96, 0x00008000,
|
||||||
|
0x00007560, 0x00006ba2, 0x000062b4, 0x00005a82, 0x000052ff, 0x00004c1c,
|
||||||
|
0x000045cb,
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Joint-Stereo related tables, scaled by 2^16 */
|
||||||
|
static const int32_t matrixCoeffs_fix[8] = {
|
||||||
|
0x00000000, 0x00020000, 0x00020000, 0x00020000,
|
||||||
|
0x00000000, 0x00000000, 0x00010000, 0x00010000,
|
||||||
|
};
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,898 +0,0 @@
|
||||||
/*
|
|
||||||
* DSP utils
|
|
||||||
* Copyright (c) 2000, 2001, 2002 Fabrice Bellard
|
|
||||||
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
|
|
||||||
*
|
|
||||||
* This file is part of FFmpeg.
|
|
||||||
*
|
|
||||||
* FFmpeg is free software; you can redistribute it and/or
|
|
||||||
* modify it under the terms of the GNU Lesser General Public
|
|
||||||
* License as published by the Free Software Foundation; either
|
|
||||||
* version 2.1 of the License, or (at your option) any later version.
|
|
||||||
*
|
|
||||||
* FFmpeg is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
||||||
* Lesser General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU Lesser General Public
|
|
||||||
* License along with FFmpeg; if not, write to the Free Software
|
|
||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @file libavcodec/dsputil.h
|
|
||||||
* DSP utils.
|
|
||||||
* note, many functions in here may use MMX which trashes the FPU state, it is
|
|
||||||
* absolutely necessary to call emms_c() between dsp & float/double code
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef AVCODEC_DSPUTIL_H
|
|
||||||
#define AVCODEC_DSPUTIL_H
|
|
||||||
|
|
||||||
#include "libavutil/intreadwrite.h"
|
|
||||||
#include "avcodec.h"
|
|
||||||
|
|
||||||
|
|
||||||
//#define DEBUG
|
|
||||||
/* dct code */
|
|
||||||
typedef short DCTELEM;
|
|
||||||
typedef int DWTELEM;
|
|
||||||
typedef short IDWTELEM;
|
|
||||||
|
|
||||||
void fdct_ifast (DCTELEM *data);
|
|
||||||
void fdct_ifast248 (DCTELEM *data);
|
|
||||||
void ff_jpeg_fdct_islow (DCTELEM *data);
|
|
||||||
void ff_fdct248_islow (DCTELEM *data);
|
|
||||||
|
|
||||||
void j_rev_dct (DCTELEM *data);
|
|
||||||
void j_rev_dct4 (DCTELEM *data);
|
|
||||||
void j_rev_dct2 (DCTELEM *data);
|
|
||||||
void j_rev_dct1 (DCTELEM *data);
|
|
||||||
void ff_wmv2_idct_c(DCTELEM *data);
|
|
||||||
|
|
||||||
void ff_fdct_mmx(DCTELEM *block);
|
|
||||||
void ff_fdct_mmx2(DCTELEM *block);
|
|
||||||
void ff_fdct_sse2(DCTELEM *block);
|
|
||||||
|
|
||||||
void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride);
|
|
||||||
void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride);
|
|
||||||
void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
|
|
||||||
void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
|
|
||||||
void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block);
|
|
||||||
void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block);
|
|
||||||
void ff_h264_idct_add16_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
|
|
||||||
void ff_h264_idct_add16intra_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
|
|
||||||
void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
|
|
||||||
void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
|
|
||||||
|
|
||||||
void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1,
|
|
||||||
const float *src2, int src3, int blocksize, int step);
|
|
||||||
void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1,
|
|
||||||
const float *win, float add_bias, int len);
|
|
||||||
void ff_float_to_int16_c(int16_t *dst, const float *src, long len);
|
|
||||||
void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels);
|
|
||||||
|
|
||||||
/* encoding scans */
|
|
||||||
extern const uint8_t ff_alternate_horizontal_scan[64];
|
|
||||||
extern const uint8_t ff_alternate_vertical_scan[64];
|
|
||||||
extern const uint8_t ff_zigzag_direct[64];
|
|
||||||
extern const uint8_t ff_zigzag248_direct[64];
|
|
||||||
|
|
||||||
/* pixel operations */
|
|
||||||
#define MAX_NEG_CROP 1024
|
|
||||||
|
|
||||||
/* temporary */
|
|
||||||
extern uint32_t ff_squareTbl[512];
|
|
||||||
extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP];
|
|
||||||
|
|
||||||
/* VP3 DSP functions */
|
|
||||||
void ff_vp3_idct_c(DCTELEM *block/* align 16*/);
|
|
||||||
void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
|
|
||||||
void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
|
|
||||||
|
|
||||||
void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
|
|
||||||
void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
|
|
||||||
|
|
||||||
/* VP6 DSP functions */
|
|
||||||
void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, int stride,
|
|
||||||
const int16_t *h_weights, const int16_t *v_weights);
|
|
||||||
|
|
||||||
/* 1/2^n downscaling functions from imgconvert.c */
|
|
||||||
void ff_img_copy_plane(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
|
|
||||||
void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
|
|
||||||
void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
|
|
||||||
void ff_shrink88(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
|
|
||||||
|
|
||||||
void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
|
|
||||||
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
|
|
||||||
|
|
||||||
/* minimum alignment rules ;)
|
|
||||||
If you notice errors in the align stuff, need more alignment for some ASM code
|
|
||||||
for some CPU or need to use a function with less aligned data then send a mail
|
|
||||||
to the ffmpeg-devel mailing list, ...
|
|
||||||
|
|
||||||
!warning These alignments might not match reality, (missing attribute((align))
|
|
||||||
stuff somewhere possible).
|
|
||||||
I (Michael) did not check them, these are just the alignments which I think
|
|
||||||
could be reached easily ...
|
|
||||||
|
|
||||||
!future video codecs might need functions with less strict alignment
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
void get_pixels_c(DCTELEM *block, const uint8_t *pixels, int line_size);
|
|
||||||
void diff_pixels_c(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride);
|
|
||||||
void put_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
|
|
||||||
void add_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
|
|
||||||
void clear_blocks_c(DCTELEM *blocks);
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* add and put pixel (decoding) */
|
|
||||||
// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
|
|
||||||
//h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller then 4
|
|
||||||
typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h);
|
|
||||||
typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h);
|
|
||||||
typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
|
|
||||||
typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
|
|
||||||
typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset);
|
|
||||||
typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset);
|
|
||||||
|
|
||||||
#define DEF_OLD_QPEL(name)\
|
|
||||||
void ff_put_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
|
|
||||||
void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
|
|
||||||
void ff_avg_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
|
|
||||||
|
|
||||||
DEF_OLD_QPEL(qpel16_mc11_old_c)
|
|
||||||
DEF_OLD_QPEL(qpel16_mc31_old_c)
|
|
||||||
DEF_OLD_QPEL(qpel16_mc12_old_c)
|
|
||||||
DEF_OLD_QPEL(qpel16_mc32_old_c)
|
|
||||||
DEF_OLD_QPEL(qpel16_mc13_old_c)
|
|
||||||
DEF_OLD_QPEL(qpel16_mc33_old_c)
|
|
||||||
DEF_OLD_QPEL(qpel8_mc11_old_c)
|
|
||||||
DEF_OLD_QPEL(qpel8_mc31_old_c)
|
|
||||||
DEF_OLD_QPEL(qpel8_mc12_old_c)
|
|
||||||
DEF_OLD_QPEL(qpel8_mc32_old_c)
|
|
||||||
DEF_OLD_QPEL(qpel8_mc13_old_c)
|
|
||||||
DEF_OLD_QPEL(qpel8_mc33_old_c)
|
|
||||||
|
|
||||||
#define CALL_2X_PIXELS(a, b, n)\
|
|
||||||
static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
|
||||||
b(block , pixels , line_size, h);\
|
|
||||||
b(block+n, pixels+n, line_size, h);\
|
|
||||||
}
|
|
||||||
|
|
||||||
/* motion estimation */
|
|
||||||
// h is limited to {width/2, width, 2*width} but never larger than 16 and never smaller then 2
|
|
||||||
// although currently h<4 is not used as functions with width <8 are neither used nor implemented
|
|
||||||
typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/;
|
|
||||||
|
|
||||||
|
|
||||||
// for snow slices
|
|
||||||
typedef struct slice_buffer_s slice_buffer;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Scantable.
|
|
||||||
*/
|
|
||||||
typedef struct ScanTable{
|
|
||||||
const uint8_t *scantable;
|
|
||||||
uint8_t permutated[64];
|
|
||||||
uint8_t raster_end[64];
|
|
||||||
#if ARCH_PPC
|
|
||||||
/** Used by dct_quantize_altivec to find last-non-zero */
|
|
||||||
DECLARE_ALIGNED(16, uint8_t, inverse[64]);
|
|
||||||
#endif
|
|
||||||
} ScanTable;
|
|
||||||
|
|
||||||
void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
|
|
||||||
|
|
||||||
void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize,
|
|
||||||
int block_w, int block_h,
|
|
||||||
int src_x, int src_y, int w, int h);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* DSPContext.
|
|
||||||
*/
|
|
||||||
typedef struct DSPContext {
|
|
||||||
/* pixel ops : interface with DCT */
|
|
||||||
void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
|
|
||||||
void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
|
|
||||||
void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
|
|
||||||
void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
|
|
||||||
void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
|
|
||||||
void (*add_pixels8)(uint8_t *pixels, DCTELEM *block, int line_size);
|
|
||||||
void (*add_pixels4)(uint8_t *pixels, DCTELEM *block, int line_size);
|
|
||||||
int (*sum_abs_dctelem)(DCTELEM *block/*align 16*/);
|
|
||||||
/**
|
|
||||||
* translational global motion compensation.
|
|
||||||
*/
|
|
||||||
void (*gmc1)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
|
|
||||||
/**
|
|
||||||
* global motion compensation.
|
|
||||||
*/
|
|
||||||
void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy,
|
|
||||||
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
|
|
||||||
void (*clear_block)(DCTELEM *block/*align 16*/);
|
|
||||||
void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
|
|
||||||
int (*pix_sum)(uint8_t * pix, int line_size);
|
|
||||||
int (*pix_norm1)(uint8_t * pix, int line_size);
|
|
||||||
// 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4
|
|
||||||
|
|
||||||
me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */
|
|
||||||
me_cmp_func sse[6];
|
|
||||||
me_cmp_func hadamard8_diff[6];
|
|
||||||
me_cmp_func dct_sad[6];
|
|
||||||
me_cmp_func quant_psnr[6];
|
|
||||||
me_cmp_func bit[6];
|
|
||||||
me_cmp_func rd[6];
|
|
||||||
me_cmp_func vsad[6];
|
|
||||||
me_cmp_func vsse[6];
|
|
||||||
me_cmp_func nsse[6];
|
|
||||||
me_cmp_func w53[6];
|
|
||||||
me_cmp_func w97[6];
|
|
||||||
me_cmp_func dct_max[6];
|
|
||||||
me_cmp_func dct264_sad[6];
|
|
||||||
|
|
||||||
me_cmp_func me_pre_cmp[6];
|
|
||||||
me_cmp_func me_cmp[6];
|
|
||||||
me_cmp_func me_sub_cmp[6];
|
|
||||||
me_cmp_func mb_cmp[6];
|
|
||||||
me_cmp_func ildct_cmp[6]; //only width 16 used
|
|
||||||
me_cmp_func frame_skip_cmp[6]; //only width 8 used
|
|
||||||
|
|
||||||
int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
|
|
||||||
int size);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Halfpel motion compensation with rounding (a+b+1)>>1.
|
|
||||||
* this is an array[4][4] of motion compensation functions for 4
|
|
||||||
* horizontal blocksizes (8,16) and the 4 halfpel positions<br>
|
|
||||||
* *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
|
|
||||||
* @param block destination where the result is stored
|
|
||||||
* @param pixels source
|
|
||||||
* @param line_size number of bytes in a horizontal line of block
|
|
||||||
* @param h height
|
|
||||||
*/
|
|
||||||
op_pixels_func put_pixels_tab[4][4];
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Halfpel motion compensation with rounding (a+b+1)>>1.
|
|
||||||
* This is an array[4][4] of motion compensation functions for 4
|
|
||||||
* horizontal blocksizes (8,16) and the 4 halfpel positions<br>
|
|
||||||
* *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
|
|
||||||
* @param block destination into which the result is averaged (a+b+1)>>1
|
|
||||||
* @param pixels source
|
|
||||||
* @param line_size number of bytes in a horizontal line of block
|
|
||||||
* @param h height
|
|
||||||
*/
|
|
||||||
op_pixels_func avg_pixels_tab[4][4];
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Halfpel motion compensation with no rounding (a+b)>>1.
|
|
||||||
* this is an array[2][4] of motion compensation functions for 2
|
|
||||||
* horizontal blocksizes (8,16) and the 4 halfpel positions<br>
|
|
||||||
* *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
|
|
||||||
* @param block destination where the result is stored
|
|
||||||
* @param pixels source
|
|
||||||
* @param line_size number of bytes in a horizontal line of block
|
|
||||||
* @param h height
|
|
||||||
*/
|
|
||||||
op_pixels_func put_no_rnd_pixels_tab[4][4];
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Halfpel motion compensation with no rounding (a+b)>>1.
|
|
||||||
* this is an array[2][4] of motion compensation functions for 2
|
|
||||||
* horizontal blocksizes (8,16) and the 4 halfpel positions<br>
|
|
||||||
* *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
|
|
||||||
* @param block destination into which the result is averaged (a+b)>>1
|
|
||||||
* @param pixels source
|
|
||||||
* @param line_size number of bytes in a horizontal line of block
|
|
||||||
* @param h height
|
|
||||||
*/
|
|
||||||
op_pixels_func avg_no_rnd_pixels_tab[4][4];
|
|
||||||
|
|
||||||
void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Thirdpel motion compensation with rounding (a+b+1)>>1.
|
|
||||||
* this is an array[12] of motion compensation functions for the 9 thirdpe
|
|
||||||
* positions<br>
|
|
||||||
* *pixels_tab[ xthirdpel + 4*ythirdpel ]
|
|
||||||
* @param block destination where the result is stored
|
|
||||||
* @param pixels source
|
|
||||||
* @param line_size number of bytes in a horizontal line of block
|
|
||||||
* @param h height
|
|
||||||
*/
|
|
||||||
tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
|
|
||||||
tpel_mc_func avg_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
|
|
||||||
|
|
||||||
qpel_mc_func put_qpel_pixels_tab[2][16];
|
|
||||||
qpel_mc_func avg_qpel_pixels_tab[2][16];
|
|
||||||
qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
|
|
||||||
qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
|
|
||||||
qpel_mc_func put_mspel_pixels_tab[8];
|
|
||||||
|
|
||||||
/**
|
|
||||||
* h264 Chroma MC
|
|
||||||
*/
|
|
||||||
h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
|
|
||||||
/* This is really one func used in VC-1 decoding */
|
|
||||||
h264_chroma_mc_func put_no_rnd_h264_chroma_pixels_tab[3];
|
|
||||||
h264_chroma_mc_func avg_h264_chroma_pixels_tab[3];
|
|
||||||
|
|
||||||
qpel_mc_func put_h264_qpel_pixels_tab[4][16];
|
|
||||||
qpel_mc_func avg_h264_qpel_pixels_tab[4][16];
|
|
||||||
|
|
||||||
qpel_mc_func put_2tap_qpel_pixels_tab[4][16];
|
|
||||||
qpel_mc_func avg_2tap_qpel_pixels_tab[4][16];
|
|
||||||
|
|
||||||
h264_weight_func weight_h264_pixels_tab[10];
|
|
||||||
h264_biweight_func biweight_h264_pixels_tab[10];
|
|
||||||
|
|
||||||
/* AVS specific */
|
|
||||||
qpel_mc_func put_cavs_qpel_pixels_tab[2][16];
|
|
||||||
qpel_mc_func avg_cavs_qpel_pixels_tab[2][16];
|
|
||||||
void (*cavs_filter_lv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
|
|
||||||
void (*cavs_filter_lh)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
|
|
||||||
void (*cavs_filter_cv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
|
|
||||||
void (*cavs_filter_ch)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
|
|
||||||
void (*cavs_idct8_add)(uint8_t *dst, DCTELEM *block, int stride);
|
|
||||||
|
|
||||||
me_cmp_func pix_abs[2][4];
|
|
||||||
|
|
||||||
/* huffyuv specific */
|
|
||||||
void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
|
|
||||||
void (*add_bytes_l2)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 16*/, int w);
|
|
||||||
void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w);
|
|
||||||
/**
|
|
||||||
* subtract huffyuv's variant of median prediction
|
|
||||||
* note, this might read from src1[-1], src2[-1]
|
|
||||||
*/
|
|
||||||
void (*sub_hfyu_median_prediction)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top);
|
|
||||||
void (*add_hfyu_median_prediction)(uint8_t *dst, uint8_t *top, uint8_t *diff, int w, int *left, int *left_top);
|
|
||||||
/* this might write to dst[w] */
|
|
||||||
void (*add_png_paeth_prediction)(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
|
|
||||||
void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w);
|
|
||||||
|
|
||||||
void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0);
|
|
||||||
void (*h264_h_loop_filter_luma)(uint8_t *pix/*align 4 */, int stride, int alpha, int beta, int8_t *tc0);
|
|
||||||
/* v/h_loop_filter_luma_intra: align 16 */
|
|
||||||
void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
|
|
||||||
void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
|
|
||||||
void (*h264_v_loop_filter_chroma)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0);
|
|
||||||
void (*h264_h_loop_filter_chroma)(uint8_t *pix/*align 4*/, int stride, int alpha, int beta, int8_t *tc0);
|
|
||||||
void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
|
|
||||||
void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
|
|
||||||
// h264_loop_filter_strength: simd only. the C version is inlined in h264.c
|
|
||||||
void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
|
|
||||||
int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field);
|
|
||||||
|
|
||||||
void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
|
|
||||||
void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
|
|
||||||
|
|
||||||
void (*h261_loop_filter)(uint8_t *src, int stride);
|
|
||||||
|
|
||||||
void (*x8_v_loop_filter)(uint8_t *src, int stride, int qscale);
|
|
||||||
void (*x8_h_loop_filter)(uint8_t *src, int stride, int qscale);
|
|
||||||
|
|
||||||
void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
|
|
||||||
void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values);
|
|
||||||
|
|
||||||
void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, int stride,
|
|
||||||
const int16_t *h_weights,const int16_t *v_weights);
|
|
||||||
|
|
||||||
/* assume len is a multiple of 4, and arrays are 16-byte aligned */
|
|
||||||
void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
|
|
||||||
void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
|
|
||||||
/* no alignment needed */
|
|
||||||
void (*flac_compute_autocorr)(const int32_t *data, int len, int lag, double *autoc);
|
|
||||||
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
|
|
||||||
void (*vector_fmul)(float *dst, const float *src, int len);
|
|
||||||
void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len);
|
|
||||||
/* assume len is a multiple of 8, and src arrays are 16-byte aligned */
|
|
||||||
void (*vector_fmul_add_add)(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step);
|
|
||||||
/* assume len is a multiple of 4, and arrays are 16-byte aligned */
|
|
||||||
void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len);
|
|
||||||
/* assume len is a multiple of 8, and arrays are 16-byte aligned */
|
|
||||||
void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len);
|
|
||||||
|
|
||||||
/* C version: convert floats from the range [384.0,386.0] to ints in [-32768,32767]
|
|
||||||
* simd versions: convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */
|
|
||||||
void (*float_to_int16)(int16_t *dst, const float *src, long len);
|
|
||||||
void (*float_to_int16_interleave)(int16_t *dst, const float **src, long len, int channels);
|
|
||||||
|
|
||||||
/* (I)DCT */
|
|
||||||
void (*fdct)(DCTELEM *block/* align 16*/);
|
|
||||||
void (*fdct248)(DCTELEM *block/* align 16*/);
|
|
||||||
|
|
||||||
/* IDCT really*/
|
|
||||||
void (*idct)(DCTELEM *block/* align 16*/);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* block -> idct -> clip to unsigned 8 bit -> dest.
|
|
||||||
* (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
|
|
||||||
* @param line_size size in bytes of a horizontal line of dest
|
|
||||||
*/
|
|
||||||
void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
|
|
||||||
* @param line_size size in bytes of a horizontal line of dest
|
|
||||||
*/
|
|
||||||
void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* idct input permutation.
|
|
||||||
* several optimized IDCTs need a permutated input (relative to the normal order of the reference
|
|
||||||
* IDCT)
|
|
||||||
* this permutation must be performed before the idct_put/add, note, normally this can be merged
|
|
||||||
* with the zigzag/alternate scan<br>
|
|
||||||
* an example to avoid confusion:
|
|
||||||
* - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...)
|
|
||||||
* - (x -> referece dct -> reference idct -> x)
|
|
||||||
* - (x -> referece dct -> simple_mmx_perm = idct_permutation -> simple_idct_mmx -> x)
|
|
||||||
* - (->decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant -> simple_idct_mmx ->...)
|
|
||||||
*/
|
|
||||||
uint8_t idct_permutation[64];
|
|
||||||
int idct_permutation_type;
|
|
||||||
#define FF_NO_IDCT_PERM 1
|
|
||||||
#define FF_LIBMPEG2_IDCT_PERM 2
|
|
||||||
#define FF_SIMPLE_IDCT_PERM 3
|
|
||||||
#define FF_TRANSPOSE_IDCT_PERM 4
|
|
||||||
#define FF_PARTTRANS_IDCT_PERM 5
|
|
||||||
#define FF_SSE2_IDCT_PERM 6
|
|
||||||
|
|
||||||
int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale);
|
|
||||||
void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
|
|
||||||
#define BASIS_SHIFT 16
|
|
||||||
#define RECON_SHIFT 6
|
|
||||||
|
|
||||||
void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w);
|
|
||||||
#define EDGE_WIDTH 16
|
|
||||||
|
|
||||||
/* h264 functions */
|
|
||||||
/* NOTE!!! if you implement any of h264_idct8_add, h264_idct8_add4 then you must implement all of them
|
|
||||||
NOTE!!! if you implement any of h264_idct_add, h264_idct_add16, h264_idct_add16intra, h264_idct_add8 then you must implement all of them
|
|
||||||
The reason for above, is that no 2 out of one list may use a different permutation.
|
|
||||||
*/
|
|
||||||
void (*h264_idct_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
|
|
||||||
void (*h264_idct8_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
|
|
||||||
void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
|
|
||||||
void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
|
|
||||||
void (*h264_dct)(DCTELEM block[4][4]);
|
|
||||||
void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
|
|
||||||
void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
|
|
||||||
void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
|
|
||||||
void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
|
|
||||||
|
|
||||||
/* snow wavelet */
|
|
||||||
void (*vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
|
|
||||||
void (*horizontal_compose97i)(IDWTELEM *b, int width);
|
|
||||||
void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
|
|
||||||
|
|
||||||
void (*prefetch)(void *mem, int stride, int h);
|
|
||||||
|
|
||||||
void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
|
|
||||||
|
|
||||||
/* vc1 functions */
|
|
||||||
void (*vc1_inv_trans_8x8)(DCTELEM *b);
|
|
||||||
void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block);
|
|
||||||
void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block);
|
|
||||||
void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block);
|
|
||||||
void (*vc1_v_overlap)(uint8_t* src, int stride);
|
|
||||||
void (*vc1_h_overlap)(uint8_t* src, int stride);
|
|
||||||
/* put 8x8 block with bicubic interpolation and quarterpel precision
|
|
||||||
* last argument is actually round value instead of height
|
|
||||||
*/
|
|
||||||
op_pixels_func put_vc1_mspel_pixels_tab[16];
|
|
||||||
|
|
||||||
/* intrax8 functions */
|
|
||||||
void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize);
|
|
||||||
void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize,
|
|
||||||
int * range, int * sum, int edges);
|
|
||||||
|
|
||||||
/* ape functions */
|
|
||||||
/**
|
|
||||||
* Add contents of the second vector to the first one.
|
|
||||||
* @param len length of vectors, should be multiple of 16
|
|
||||||
*/
|
|
||||||
void (*add_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len);
|
|
||||||
/**
|
|
||||||
* Add contents of the second vector to the first one.
|
|
||||||
* @param len length of vectors, should be multiple of 16
|
|
||||||
*/
|
|
||||||
void (*sub_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len);
|
|
||||||
/**
|
|
||||||
* Calculate scalar product of two vectors.
|
|
||||||
* @param len length of vectors, should be multiple of 16
|
|
||||||
* @param shift number of bits to discard from product
|
|
||||||
*/
|
|
||||||
int32_t (*scalarproduct_int16)(int16_t *v1, int16_t *v2/*align 16*/, int len, int shift);
|
|
||||||
|
|
||||||
/* rv30 functions */
|
|
||||||
qpel_mc_func put_rv30_tpel_pixels_tab[4][16];
|
|
||||||
qpel_mc_func avg_rv30_tpel_pixels_tab[4][16];
|
|
||||||
|
|
||||||
/* rv40 functions */
|
|
||||||
qpel_mc_func put_rv40_qpel_pixels_tab[4][16];
|
|
||||||
qpel_mc_func avg_rv40_qpel_pixels_tab[4][16];
|
|
||||||
h264_chroma_mc_func put_rv40_chroma_pixels_tab[3];
|
|
||||||
h264_chroma_mc_func avg_rv40_chroma_pixels_tab[3];
|
|
||||||
} DSPContext;
|
|
||||||
|
|
||||||
void dsputil_static_init(void);
|
|
||||||
void dsputil_init(DSPContext* p);
|
|
||||||
|
|
||||||
int ff_check_alignment(void);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* permute block according to permuatation.
|
|
||||||
* @param last last non zero element in scantable order
|
|
||||||
*/
|
|
||||||
void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last);
|
|
||||||
|
|
||||||
void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
|
|
||||||
|
|
||||||
#define BYTE_VEC32(c) ((c)*0x01010101UL)
|
|
||||||
|
|
||||||
static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
|
|
||||||
{
|
|
||||||
return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
|
|
||||||
{
|
|
||||||
return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int get_penalty_factor(int lambda, int lambda2, int type){
|
|
||||||
switch(type&0xFF){
|
|
||||||
default:
|
|
||||||
case FF_CMP_SAD:
|
|
||||||
return lambda>>FF_LAMBDA_SHIFT;
|
|
||||||
case FF_CMP_DCT:
|
|
||||||
return (3*lambda)>>(FF_LAMBDA_SHIFT+1);
|
|
||||||
case FF_CMP_W53:
|
|
||||||
return (4*lambda)>>(FF_LAMBDA_SHIFT);
|
|
||||||
case FF_CMP_W97:
|
|
||||||
return (2*lambda)>>(FF_LAMBDA_SHIFT);
|
|
||||||
case FF_CMP_SATD:
|
|
||||||
case FF_CMP_DCT264:
|
|
||||||
return (2*lambda)>>FF_LAMBDA_SHIFT;
|
|
||||||
case FF_CMP_RD:
|
|
||||||
case FF_CMP_PSNR:
|
|
||||||
case FF_CMP_SSE:
|
|
||||||
case FF_CMP_NSSE:
|
|
||||||
return lambda2>>FF_LAMBDA_SHIFT;
|
|
||||||
case FF_CMP_BIT:
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Empty mmx state.
|
|
||||||
* this must be called between any dsp function and float/double code.
|
|
||||||
* for example sin(); dsp->idct_put(); emms_c(); cos()
|
|
||||||
*/
|
|
||||||
#define emms_c()
|
|
||||||
|
|
||||||
/* should be defined by architectures supporting
|
|
||||||
one or more MultiMedia extension */
|
|
||||||
int mm_support(void);
|
|
||||||
|
|
||||||
void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
|
|
||||||
void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx);
|
|
||||||
void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
|
|
||||||
void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx);
|
|
||||||
void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
|
|
||||||
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
|
|
||||||
void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
|
|
||||||
void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
|
|
||||||
void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);
|
|
||||||
|
|
||||||
#define DECLARE_ALIGNED_16(t, v) DECLARE_ALIGNED(16, t, v)
|
|
||||||
|
|
||||||
#if HAVE_MMX
|
|
||||||
|
|
||||||
#undef emms_c
|
|
||||||
|
|
||||||
extern int mm_flags;
|
|
||||||
|
|
||||||
void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
|
|
||||||
void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
|
|
||||||
void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
|
|
||||||
|
|
||||||
static inline void emms(void)
|
|
||||||
{
|
|
||||||
__asm__ volatile ("emms;":::"memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#define emms_c() \
|
|
||||||
{\
|
|
||||||
if (mm_flags & FF_MM_MMX)\
|
|
||||||
emms();\
|
|
||||||
}
|
|
||||||
|
|
||||||
void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
|
|
||||||
|
|
||||||
#elif ARCH_ARM
|
|
||||||
|
|
||||||
extern int mm_flags;
|
|
||||||
|
|
||||||
#if HAVE_NEON
|
|
||||||
# define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v)
|
|
||||||
# define STRIDE_ALIGN 16
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#elif ARCH_PPC
|
|
||||||
|
|
||||||
extern int mm_flags;
|
|
||||||
|
|
||||||
#define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v)
|
|
||||||
#define STRIDE_ALIGN 16
|
|
||||||
|
|
||||||
#elif HAVE_MMI
|
|
||||||
|
|
||||||
#define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(16, t, v)
|
|
||||||
#define STRIDE_ALIGN 16
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
#define mm_flags 0
|
|
||||||
#define mm_support() 0
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef DECLARE_ALIGNED_8
|
|
||||||
# define DECLARE_ALIGNED_8(t, v) DECLARE_ALIGNED(8, t, v)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef STRIDE_ALIGN
|
|
||||||
# define STRIDE_ALIGN 8
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* PSNR */
|
|
||||||
void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3],
|
|
||||||
int orig_linesize[3], int coded_linesize,
|
|
||||||
AVCodecContext *avctx);
|
|
||||||
|
|
||||||
/* FFT computation */
|
|
||||||
|
|
||||||
/* NOTE: soon integer code will be added, so you must use the
|
|
||||||
FFTSample type */
|
|
||||||
typedef float FFTSample;
|
|
||||||
|
|
||||||
struct MDCTContext;
|
|
||||||
|
|
||||||
typedef struct FFTComplex {
|
|
||||||
FFTSample re, im;
|
|
||||||
} FFTComplex;
|
|
||||||
|
|
||||||
typedef struct FFTContext {
|
|
||||||
int nbits;
|
|
||||||
int inverse;
|
|
||||||
uint16_t *revtab;
|
|
||||||
FFTComplex *exptab;
|
|
||||||
FFTComplex *exptab1; /* only used by SSE code */
|
|
||||||
FFTComplex *tmp_buf;
|
|
||||||
void (*fft_permute)(struct FFTContext *s, FFTComplex *z);
|
|
||||||
void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
|
|
||||||
void (*imdct_calc)(struct MDCTContext *s, FFTSample *output, const FFTSample *input);
|
|
||||||
void (*imdct_half)(struct MDCTContext *s, FFTSample *output, const FFTSample *input);
|
|
||||||
} FFTContext;
|
|
||||||
|
|
||||||
extern FFTSample* ff_cos_tabs[13];
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets up a complex FFT.
|
|
||||||
* @param nbits log2 of the length of the input array
|
|
||||||
* @param inverse if 0 perform the forward transform, if 1 perform the inverse
|
|
||||||
*/
|
|
||||||
int ff_fft_init(FFTContext *s, int nbits, int inverse);
|
|
||||||
void ff_fft_permute_c(FFTContext *s, FFTComplex *z);
|
|
||||||
void ff_fft_permute_sse(FFTContext *s, FFTComplex *z);
|
|
||||||
void ff_fft_calc_c(FFTContext *s, FFTComplex *z);
|
|
||||||
void ff_fft_calc_sse(FFTContext *s, FFTComplex *z);
|
|
||||||
void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z);
|
|
||||||
void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z);
|
|
||||||
void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Do the permutation needed BEFORE calling ff_fft_calc().
|
|
||||||
*/
|
|
||||||
static inline void ff_fft_permute(FFTContext *s, FFTComplex *z)
|
|
||||||
{
|
|
||||||
s->fft_permute(s, z);
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Do a complex FFT with the parameters defined in ff_fft_init(). The
|
|
||||||
* input data must be permuted before. No 1.0/sqrt(n) normalization is done.
|
|
||||||
*/
|
|
||||||
static inline void ff_fft_calc(FFTContext *s, FFTComplex *z)
|
|
||||||
{
|
|
||||||
s->fft_calc(s, z);
|
|
||||||
}
|
|
||||||
void ff_fft_end(FFTContext *s);
|
|
||||||
|
|
||||||
/* MDCT computation */
|
|
||||||
|
|
||||||
typedef struct MDCTContext {
|
|
||||||
int n; /* size of MDCT (i.e. number of input data * 2) */
|
|
||||||
int nbits; /* n = 2^nbits */
|
|
||||||
/* pre/post rotation tables */
|
|
||||||
FFTSample *tcos;
|
|
||||||
FFTSample *tsin;
|
|
||||||
FFTContext fft;
|
|
||||||
} MDCTContext;
|
|
||||||
|
|
||||||
static inline void ff_imdct_calc(MDCTContext *s, FFTSample *output, const FFTSample *input)
|
|
||||||
{
|
|
||||||
s->fft.imdct_calc(s, output, input);
|
|
||||||
}
|
|
||||||
static inline void ff_imdct_half(MDCTContext *s, FFTSample *output, const FFTSample *input)
|
|
||||||
{
|
|
||||||
s->fft.imdct_half(s, output, input);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Generate a Kaiser-Bessel Derived Window.
|
|
||||||
* @param window pointer to half window
|
|
||||||
* @param alpha determines window shape
|
|
||||||
* @param n size of half window
|
|
||||||
*/
|
|
||||||
void ff_kbd_window_init(float *window, float alpha, int n);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Generate a sine window.
|
|
||||||
* @param window pointer to half window
|
|
||||||
* @param n size of half window
|
|
||||||
*/
|
|
||||||
void ff_sine_window_init(float *window, int n);
|
|
||||||
extern float ff_sine_128 [ 128];
|
|
||||||
extern float ff_sine_256 [ 256];
|
|
||||||
extern float ff_sine_512 [ 512];
|
|
||||||
extern float ff_sine_1024[1024];
|
|
||||||
extern float ff_sine_2048[2048];
|
|
||||||
extern float ff_sine_4096[4096];
|
|
||||||
extern float *ff_sine_windows[6];
|
|
||||||
|
|
||||||
int ff_mdct_init(MDCTContext *s, int nbits, int inverse);
|
|
||||||
void ff_imdct_calc_c(MDCTContext *s, FFTSample *output, const FFTSample *input);
|
|
||||||
void ff_imdct_half_c(MDCTContext *s, FFTSample *output, const FFTSample *input);
|
|
||||||
void ff_imdct_calc_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input);
|
|
||||||
void ff_imdct_half_3dn(MDCTContext *s, FFTSample *output, const FFTSample *input);
|
|
||||||
void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input);
|
|
||||||
void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input);
|
|
||||||
void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, const FFTSample *input);
|
|
||||||
void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input);
|
|
||||||
void ff_mdct_calc(MDCTContext *s, FFTSample *out, const FFTSample *input);
|
|
||||||
void ff_mdct_end(MDCTContext *s);
|
|
||||||
|
|
||||||
/* Real Discrete Fourier Transform */
|
|
||||||
|
|
||||||
enum RDFTransformType {
|
|
||||||
RDFT,
|
|
||||||
IRDFT,
|
|
||||||
RIDFT,
|
|
||||||
IRIDFT,
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int nbits;
|
|
||||||
int inverse;
|
|
||||||
int sign_convention;
|
|
||||||
|
|
||||||
/* pre/post rotation tables */
|
|
||||||
FFTSample *tcos;
|
|
||||||
FFTSample *tsin;
|
|
||||||
FFTContext fft;
|
|
||||||
} RDFTContext;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets up a real FFT.
|
|
||||||
* @param nbits log2 of the length of the input array
|
|
||||||
* @param trans the type of transform
|
|
||||||
*/
|
|
||||||
int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans);
|
|
||||||
void ff_rdft_calc(RDFTContext *s, FFTSample *data);
|
|
||||||
void ff_rdft_end(RDFTContext *s);
|
|
||||||
|
|
||||||
#define WRAPPER8_16(name8, name16)\
|
|
||||||
static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
|
|
||||||
return name8(s, dst , src , stride, h)\
|
|
||||||
+name8(s, dst+8 , src+8 , stride, h);\
|
|
||||||
}
|
|
||||||
|
|
||||||
#define WRAPPER8_16_SQ(name8, name16)\
|
|
||||||
static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
|
|
||||||
int score=0;\
|
|
||||||
score +=name8(s, dst , src , stride, 8);\
|
|
||||||
score +=name8(s, dst+8 , src+8 , stride, 8);\
|
|
||||||
if(h==16){\
|
|
||||||
dst += 8*stride;\
|
|
||||||
src += 8*stride;\
|
|
||||||
score +=name8(s, dst , src , stride, 8);\
|
|
||||||
score +=name8(s, dst+8 , src+8 , stride, 8);\
|
|
||||||
}\
|
|
||||||
return score;\
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static inline void copy_block2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
for(i=0; i<h; i++)
|
|
||||||
{
|
|
||||||
AV_WN16(dst , AV_RN16(src ));
|
|
||||||
dst+=dstStride;
|
|
||||||
src+=srcStride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
for(i=0; i<h; i++)
|
|
||||||
{
|
|
||||||
AV_WN32(dst , AV_RN32(src ));
|
|
||||||
dst+=dstStride;
|
|
||||||
src+=srcStride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
for(i=0; i<h; i++)
|
|
||||||
{
|
|
||||||
AV_WN32(dst , AV_RN32(src ));
|
|
||||||
AV_WN32(dst+4 , AV_RN32(src+4 ));
|
|
||||||
dst+=dstStride;
|
|
||||||
src+=srcStride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
for(i=0; i<h; i++)
|
|
||||||
{
|
|
||||||
AV_WN32(dst , AV_RN32(src ));
|
|
||||||
AV_WN32(dst+4 , AV_RN32(src+4 ));
|
|
||||||
dst[8]= src[8];
|
|
||||||
dst+=dstStride;
|
|
||||||
src+=srcStride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
for(i=0; i<h; i++)
|
|
||||||
{
|
|
||||||
AV_WN32(dst , AV_RN32(src ));
|
|
||||||
AV_WN32(dst+4 , AV_RN32(src+4 ));
|
|
||||||
AV_WN32(dst+8 , AV_RN32(src+8 ));
|
|
||||||
AV_WN32(dst+12, AV_RN32(src+12));
|
|
||||||
dst+=dstStride;
|
|
||||||
src+=srcStride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
for(i=0; i<h; i++)
|
|
||||||
{
|
|
||||||
AV_WN32(dst , AV_RN32(src ));
|
|
||||||
AV_WN32(dst+4 , AV_RN32(src+4 ));
|
|
||||||
AV_WN32(dst+8 , AV_RN32(src+8 ));
|
|
||||||
AV_WN32(dst+12, AV_RN32(src+12));
|
|
||||||
dst[16]= src[16];
|
|
||||||
dst+=dstStride;
|
|
||||||
src+=srcStride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* AVCODEC_DSPUTIL_H */
|
|
|
@ -1,374 +0,0 @@
|
||||||
/*
|
|
||||||
* FFT/IFFT transforms
|
|
||||||
* Copyright (c) 2008 Loren Merritt
|
|
||||||
* Copyright (c) 2002 Fabrice Bellard
|
|
||||||
* Partly based on libdjbfft by D. J. Bernstein
|
|
||||||
*
|
|
||||||
* This file is part of FFmpeg.
|
|
||||||
*
|
|
||||||
* FFmpeg is free software; you can redistribute it and/or
|
|
||||||
* modify it under the terms of the GNU Lesser General Public
|
|
||||||
* License as published by the Free Software Foundation; either
|
|
||||||
* version 2.1 of the License, or (at your option) any later version.
|
|
||||||
*
|
|
||||||
* FFmpeg is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
||||||
* Lesser General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU Lesser General Public
|
|
||||||
* License along with FFmpeg; if not, write to the Free Software
|
|
||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @file libavcodec/fft.c
|
|
||||||
* FFT/IFFT transforms.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "dsputil.h"
|
|
||||||
|
|
||||||
/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
|
|
||||||
DECLARE_ALIGNED_16(FFTSample, ff_cos_16[8]);
|
|
||||||
DECLARE_ALIGNED_16(FFTSample, ff_cos_32[16]);
|
|
||||||
DECLARE_ALIGNED_16(FFTSample, ff_cos_64[32]);
|
|
||||||
DECLARE_ALIGNED_16(FFTSample, ff_cos_128[64]);
|
|
||||||
DECLARE_ALIGNED_16(FFTSample, ff_cos_256[128]);
|
|
||||||
DECLARE_ALIGNED_16(FFTSample, ff_cos_512[256]);
|
|
||||||
DECLARE_ALIGNED_16(FFTSample, ff_cos_1024[512]);
|
|
||||||
DECLARE_ALIGNED_16(FFTSample, ff_cos_2048[1024]);
|
|
||||||
DECLARE_ALIGNED_16(FFTSample, ff_cos_4096[2048]);
|
|
||||||
DECLARE_ALIGNED_16(FFTSample, ff_cos_8192[4096]);
|
|
||||||
DECLARE_ALIGNED_16(FFTSample, ff_cos_16384[8192]);
|
|
||||||
DECLARE_ALIGNED_16(FFTSample, ff_cos_32768[16384]);
|
|
||||||
DECLARE_ALIGNED_16(FFTSample, ff_cos_65536[32768]);
|
|
||||||
FFTSample *ff_cos_tabs[] = {
|
|
||||||
ff_cos_16, ff_cos_32, ff_cos_64, ff_cos_128, ff_cos_256, ff_cos_512, ff_cos_1024,
|
|
||||||
ff_cos_2048, ff_cos_4096, ff_cos_8192, ff_cos_16384, ff_cos_32768, ff_cos_65536,
|
|
||||||
};
|
|
||||||
|
|
||||||
static int split_radix_permutation(int i, int n, int inverse)
|
|
||||||
{
|
|
||||||
int m;
|
|
||||||
if(n <= 2) return i&1;
|
|
||||||
m = n >> 1;
|
|
||||||
if(!(i&m)) return split_radix_permutation(i, m, inverse)*2;
|
|
||||||
m >>= 1;
|
|
||||||
if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1;
|
|
||||||
else return split_radix_permutation(i, m, inverse)*4 - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
|
|
||||||
{
|
|
||||||
int i, j, m, n;
|
|
||||||
float alpha, c1, s1, s2;
|
|
||||||
int split_radix = 1;
|
|
||||||
int av_unused has_vectors;
|
|
||||||
|
|
||||||
if (nbits < 2 || nbits > 16)
|
|
||||||
goto fail;
|
|
||||||
s->nbits = nbits;
|
|
||||||
n = 1 << nbits;
|
|
||||||
|
|
||||||
s->tmp_buf = NULL;
|
|
||||||
s->exptab = av_malloc((n / 2) * sizeof(FFTComplex));
|
|
||||||
if (!s->exptab)
|
|
||||||
goto fail;
|
|
||||||
s->revtab = av_malloc(n * sizeof(uint16_t));
|
|
||||||
if (!s->revtab)
|
|
||||||
goto fail;
|
|
||||||
s->inverse = inverse;
|
|
||||||
|
|
||||||
s2 = inverse ? 1.0 : -1.0;
|
|
||||||
|
|
||||||
s->fft_permute = ff_fft_permute_c;
|
|
||||||
s->fft_calc = ff_fft_calc_c;
|
|
||||||
s->imdct_calc = ff_imdct_calc_c;
|
|
||||||
s->imdct_half = ff_imdct_half_c;
|
|
||||||
s->exptab1 = NULL;
|
|
||||||
|
|
||||||
#if HAVE_MMX && HAVE_YASM
|
|
||||||
has_vectors = mm_support();
|
|
||||||
if (has_vectors & FF_MM_SSE && HAVE_SSE) {
|
|
||||||
/* SSE for P3/P4/K8 */
|
|
||||||
s->imdct_calc = ff_imdct_calc_sse;
|
|
||||||
s->imdct_half = ff_imdct_half_sse;
|
|
||||||
s->fft_permute = ff_fft_permute_sse;
|
|
||||||
s->fft_calc = ff_fft_calc_sse;
|
|
||||||
} else if (has_vectors & FF_MM_3DNOWEXT && HAVE_AMD3DNOWEXT) {
|
|
||||||
/* 3DNowEx for K7 */
|
|
||||||
s->imdct_calc = ff_imdct_calc_3dn2;
|
|
||||||
s->imdct_half = ff_imdct_half_3dn2;
|
|
||||||
s->fft_calc = ff_fft_calc_3dn2;
|
|
||||||
} else if (has_vectors & FF_MM_3DNOW && HAVE_AMD3DNOW) {
|
|
||||||
/* 3DNow! for K6-2/3 */
|
|
||||||
s->imdct_calc = ff_imdct_calc_3dn;
|
|
||||||
s->imdct_half = ff_imdct_half_3dn;
|
|
||||||
s->fft_calc = ff_fft_calc_3dn;
|
|
||||||
}
|
|
||||||
#elif HAVE_ALTIVEC
|
|
||||||
has_vectors = mm_support();
|
|
||||||
if (has_vectors & FF_MM_ALTIVEC) {
|
|
||||||
s->fft_calc = ff_fft_calc_altivec;
|
|
||||||
split_radix = 0;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (split_radix) {
|
|
||||||
for(j=4; j<=nbits; j++) {
|
|
||||||
int m = 1<<j;
|
|
||||||
double freq = 2*M_PI/m;
|
|
||||||
FFTSample *tab = ff_cos_tabs[j-4];
|
|
||||||
for(i=0; i<=m/4; i++)
|
|
||||||
tab[i] = cos(i*freq);
|
|
||||||
for(i=1; i<m/4; i++)
|
|
||||||
tab[m/2-i] = tab[i];
|
|
||||||
}
|
|
||||||
for(i=0; i<n; i++)
|
|
||||||
s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = i;
|
|
||||||
s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
|
|
||||||
} else {
|
|
||||||
int np, nblocks, np2, l;
|
|
||||||
FFTComplex *q;
|
|
||||||
|
|
||||||
for(i=0; i<(n/2); i++) {
|
|
||||||
alpha = 2 * M_PI * (float)i / (float)n;
|
|
||||||
c1 = cos(alpha);
|
|
||||||
s1 = sin(alpha) * s2;
|
|
||||||
s->exptab[i].re = c1;
|
|
||||||
s->exptab[i].im = s1;
|
|
||||||
}
|
|
||||||
|
|
||||||
np = 1 << nbits;
|
|
||||||
nblocks = np >> 3;
|
|
||||||
np2 = np >> 1;
|
|
||||||
s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex));
|
|
||||||
if (!s->exptab1)
|
|
||||||
goto fail;
|
|
||||||
q = s->exptab1;
|
|
||||||
do {
|
|
||||||
for(l = 0; l < np2; l += 2 * nblocks) {
|
|
||||||
*q++ = s->exptab[l];
|
|
||||||
*q++ = s->exptab[l + nblocks];
|
|
||||||
|
|
||||||
q->re = -s->exptab[l].im;
|
|
||||||
q->im = s->exptab[l].re;
|
|
||||||
q++;
|
|
||||||
q->re = -s->exptab[l + nblocks].im;
|
|
||||||
q->im = s->exptab[l + nblocks].re;
|
|
||||||
q++;
|
|
||||||
}
|
|
||||||
nblocks = nblocks >> 1;
|
|
||||||
} while (nblocks != 0);
|
|
||||||
av_freep(&s->exptab);
|
|
||||||
|
|
||||||
/* compute bit reverse table */
|
|
||||||
for(i=0;i<n;i++) {
|
|
||||||
m=0;
|
|
||||||
for(j=0;j<nbits;j++) {
|
|
||||||
m |= ((i >> j) & 1) << (nbits-j-1);
|
|
||||||
}
|
|
||||||
s->revtab[i]=m;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
fail:
|
|
||||||
av_freep(&s->revtab);
|
|
||||||
av_freep(&s->exptab);
|
|
||||||
av_freep(&s->exptab1);
|
|
||||||
av_freep(&s->tmp_buf);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ff_fft_permute_c(FFTContext *s, FFTComplex *z)
|
|
||||||
{
|
|
||||||
int j, k, np;
|
|
||||||
FFTComplex tmp;
|
|
||||||
const uint16_t *revtab = s->revtab;
|
|
||||||
np = 1 << s->nbits;
|
|
||||||
|
|
||||||
if (s->tmp_buf) {
|
|
||||||
/* TODO: handle split-radix permute in a more optimal way, probably in-place */
|
|
||||||
for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
|
|
||||||
memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* reverse */
|
|
||||||
for(j=0;j<np;j++) {
|
|
||||||
k = revtab[j];
|
|
||||||
if (k < j) {
|
|
||||||
tmp = z[k];
|
|
||||||
z[k] = z[j];
|
|
||||||
z[j] = tmp;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
av_cold void ff_fft_end(FFTContext *s)
|
|
||||||
{
|
|
||||||
av_freep(&s->revtab);
|
|
||||||
av_freep(&s->exptab);
|
|
||||||
av_freep(&s->exptab1);
|
|
||||||
av_freep(&s->tmp_buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
#define sqrthalf (float)M_SQRT1_2
|
|
||||||
|
|
||||||
#define BF(x,y,a,b) {\
|
|
||||||
x = a - b;\
|
|
||||||
y = a + b;\
|
|
||||||
}
|
|
||||||
|
|
||||||
#define BUTTERFLIES(a0,a1,a2,a3) {\
|
|
||||||
BF(t3, t5, t5, t1);\
|
|
||||||
BF(a2.re, a0.re, a0.re, t5);\
|
|
||||||
BF(a3.im, a1.im, a1.im, t3);\
|
|
||||||
BF(t4, t6, t2, t6);\
|
|
||||||
BF(a3.re, a1.re, a1.re, t4);\
|
|
||||||
BF(a2.im, a0.im, a0.im, t6);\
|
|
||||||
}
|
|
||||||
|
|
||||||
// force loading all the inputs before storing any.
|
|
||||||
// this is slightly slower for small data, but avoids store->load aliasing
|
|
||||||
// for addresses separated by large powers of 2.
|
|
||||||
#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
|
|
||||||
FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
|
|
||||||
BF(t3, t5, t5, t1);\
|
|
||||||
BF(a2.re, a0.re, r0, t5);\
|
|
||||||
BF(a3.im, a1.im, i1, t3);\
|
|
||||||
BF(t4, t6, t2, t6);\
|
|
||||||
BF(a3.re, a1.re, r1, t4);\
|
|
||||||
BF(a2.im, a0.im, i0, t6);\
|
|
||||||
}
|
|
||||||
|
|
||||||
#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
|
|
||||||
t1 = a2.re * wre + a2.im * wim;\
|
|
||||||
t2 = a2.im * wre - a2.re * wim;\
|
|
||||||
t5 = a3.re * wre - a3.im * wim;\
|
|
||||||
t6 = a3.im * wre + a3.re * wim;\
|
|
||||||
BUTTERFLIES(a0,a1,a2,a3)\
|
|
||||||
}
|
|
||||||
|
|
||||||
#define TRANSFORM_ZERO(a0,a1,a2,a3) {\
|
|
||||||
t1 = a2.re;\
|
|
||||||
t2 = a2.im;\
|
|
||||||
t5 = a3.re;\
|
|
||||||
t6 = a3.im;\
|
|
||||||
BUTTERFLIES(a0,a1,a2,a3)\
|
|
||||||
}
|
|
||||||
|
|
||||||
/* z[0...8n-1], w[1...2n-1] */
|
|
||||||
#define PASS(name)\
|
|
||||||
static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
|
|
||||||
{\
|
|
||||||
FFTSample t1, t2, t3, t4, t5, t6;\
|
|
||||||
int o1 = 2*n;\
|
|
||||||
int o2 = 4*n;\
|
|
||||||
int o3 = 6*n;\
|
|
||||||
const FFTSample *wim = wre+o1;\
|
|
||||||
n--;\
|
|
||||||
\
|
|
||||||
TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
|
|
||||||
TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
|
|
||||||
do {\
|
|
||||||
z += 2;\
|
|
||||||
wre += 2;\
|
|
||||||
wim -= 2;\
|
|
||||||
TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
|
|
||||||
TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
|
|
||||||
} while(--n);\
|
|
||||||
}
|
|
||||||
|
|
||||||
PASS(pass)
|
|
||||||
#undef BUTTERFLIES
|
|
||||||
#define BUTTERFLIES BUTTERFLIES_BIG
|
|
||||||
PASS(pass_big)
|
|
||||||
|
|
||||||
#define DECL_FFT(n,n2,n4)\
|
|
||||||
static void fft##n(FFTComplex *z)\
|
|
||||||
{\
|
|
||||||
fft##n2(z);\
|
|
||||||
fft##n4(z+n4*2);\
|
|
||||||
fft##n4(z+n4*3);\
|
|
||||||
pass(z,ff_cos_##n,n4/2);\
|
|
||||||
}
|
|
||||||
|
|
||||||
static void fft4(FFTComplex *z)
|
|
||||||
{
|
|
||||||
FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
|
|
||||||
|
|
||||||
BF(t3, t1, z[0].re, z[1].re);
|
|
||||||
BF(t8, t6, z[3].re, z[2].re);
|
|
||||||
BF(z[2].re, z[0].re, t1, t6);
|
|
||||||
BF(t4, t2, z[0].im, z[1].im);
|
|
||||||
BF(t7, t5, z[2].im, z[3].im);
|
|
||||||
BF(z[3].im, z[1].im, t4, t8);
|
|
||||||
BF(z[3].re, z[1].re, t3, t7);
|
|
||||||
BF(z[2].im, z[0].im, t2, t5);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void fft8(FFTComplex *z)
|
|
||||||
{
|
|
||||||
FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
|
|
||||||
|
|
||||||
fft4(z);
|
|
||||||
|
|
||||||
BF(t1, z[5].re, z[4].re, -z[5].re);
|
|
||||||
BF(t2, z[5].im, z[4].im, -z[5].im);
|
|
||||||
BF(t3, z[7].re, z[6].re, -z[7].re);
|
|
||||||
BF(t4, z[7].im, z[6].im, -z[7].im);
|
|
||||||
BF(t8, t1, t3, t1);
|
|
||||||
BF(t7, t2, t2, t4);
|
|
||||||
BF(z[4].re, z[0].re, z[0].re, t1);
|
|
||||||
BF(z[4].im, z[0].im, z[0].im, t2);
|
|
||||||
BF(z[6].re, z[2].re, z[2].re, t7);
|
|
||||||
BF(z[6].im, z[2].im, z[2].im, t8);
|
|
||||||
|
|
||||||
TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf);
|
|
||||||
}
|
|
||||||
|
|
||||||
#if !CONFIG_SMALL
|
|
||||||
static void fft16(FFTComplex *z)
|
|
||||||
{
|
|
||||||
FFTSample t1, t2, t3, t4, t5, t6;
|
|
||||||
|
|
||||||
fft8(z);
|
|
||||||
fft4(z+8);
|
|
||||||
fft4(z+12);
|
|
||||||
|
|
||||||
TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
|
|
||||||
TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf);
|
|
||||||
TRANSFORM(z[1],z[5],z[9],z[13],ff_cos_16[1],ff_cos_16[3]);
|
|
||||||
TRANSFORM(z[3],z[7],z[11],z[15],ff_cos_16[3],ff_cos_16[1]);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
DECL_FFT(16,8,4)
|
|
||||||
#endif
|
|
||||||
DECL_FFT(32,16,8)
|
|
||||||
DECL_FFT(64,32,16)
|
|
||||||
DECL_FFT(128,64,32)
|
|
||||||
DECL_FFT(256,128,64)
|
|
||||||
DECL_FFT(512,256,128)
|
|
||||||
#if !CONFIG_SMALL
|
|
||||||
#define pass pass_big
|
|
||||||
#endif
|
|
||||||
DECL_FFT(1024,512,256)
|
|
||||||
DECL_FFT(2048,1024,512)
|
|
||||||
DECL_FFT(4096,2048,1024)
|
|
||||||
DECL_FFT(8192,4096,2048)
|
|
||||||
DECL_FFT(16384,8192,4096)
|
|
||||||
DECL_FFT(32768,16384,8192)
|
|
||||||
DECL_FFT(65536,32768,16384)
|
|
||||||
|
|
||||||
static void (*fft_dispatch[])(FFTComplex*) = {
|
|
||||||
fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
|
|
||||||
fft2048, fft4096, fft8192, fft16384, fft32768, fft65536,
|
|
||||||
};
|
|
||||||
|
|
||||||
void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
|
|
||||||
{
|
|
||||||
fft_dispatch[s->nbits-2](z);
|
|
||||||
}
|
|
||||||
|
|
66
apps/codecs/libatrac/fixp_math.c
Normal file
66
apps/codecs/libatrac/fixp_math.c
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
#include "fixp_math.h"
|
||||||
|
|
||||||
|
inline int32_t fixmul31(int32_t x, int32_t y)
|
||||||
|
{
|
||||||
|
int64_t temp;
|
||||||
|
temp = x;
|
||||||
|
temp *= y;
|
||||||
|
|
||||||
|
temp >>= 31; //16+31-16 = 31 bits
|
||||||
|
|
||||||
|
return (int32_t)temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fast integer square root adapted from algorithm,
|
||||||
|
* Martin Guy @ UKC, June 1985.
|
||||||
|
* Originally from a book on programming abaci by Mr C. Woo.
|
||||||
|
* This is taken from :
|
||||||
|
* http://wiki.forum.nokia.com/index.php/How_to_use_fixed_point_maths#How_to_get_square_root_for_integers
|
||||||
|
* with a added shift up of the result by 8 bits to return result in 16.16 fixed-point representation.
|
||||||
|
*/
|
||||||
|
inline int32_t fastSqrt(int32_t n)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Logically, these are unsigned.
|
||||||
|
* We need the sign bit to test
|
||||||
|
* whether (op - res - one) underflowed.
|
||||||
|
*/
|
||||||
|
int32_t op, res, one;
|
||||||
|
op = n;
|
||||||
|
res = 0;
|
||||||
|
/* "one" starts at the highest power of four <= than the argument. */
|
||||||
|
one = 1 << 30; /* second-to-top bit set */
|
||||||
|
while (one > op) one >>= 2;
|
||||||
|
while (one != 0)
|
||||||
|
{
|
||||||
|
if (op >= res + one)
|
||||||
|
{
|
||||||
|
op = op - (res + one);
|
||||||
|
res = res + (one<<1);
|
||||||
|
}
|
||||||
|
res >>= 1;
|
||||||
|
one >>= 2;
|
||||||
|
}
|
||||||
|
return(res << 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline int32_t fixmul16(int32_t x, int32_t y)
|
||||||
|
{
|
||||||
|
int64_t temp;
|
||||||
|
temp = x;
|
||||||
|
temp *= y;
|
||||||
|
|
||||||
|
temp >>= 16;
|
||||||
|
|
||||||
|
return (int32_t)temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline int32_t fixdiv16(int32_t x, int32_t y)
|
||||||
|
{
|
||||||
|
int64_t temp;
|
||||||
|
temp = x << 16;
|
||||||
|
temp /= y;
|
||||||
|
|
||||||
|
return (int32_t)temp;
|
||||||
|
}
|
14
apps/codecs/libatrac/fixp_math.h
Normal file
14
apps/codecs/libatrac/fixp_math.h
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
/* Macros for converting between various fixed-point representations and floating point. */
|
||||||
|
#define ONE_16 (1L << 16)
|
||||||
|
#define fixtof64(x) (float)((float)(x) / (float)(1 << 16)) //does not work on int64_t!
|
||||||
|
#define ftofix32(x) ((int32_t)((x) * (float)(1 << 16) + ((x) < 0 ? -0.5 : 0.5)))
|
||||||
|
#define ftofix31(x) ((int32_t)((x) * (float)(1 << 31) + ((x) < 0 ? -0.5 : 0.5)))
|
||||||
|
#define fix31tof64(x) (float)((float)(x) / (float)(1 << 31))
|
||||||
|
|
||||||
|
/* Fixed point math routines for use in atrac3.c */
|
||||||
|
inline int32_t fixdiv16(int32_t x, int32_t y);
|
||||||
|
inline int32_t fixmul16(int32_t x, int32_t y);
|
||||||
|
inline int32_t fixmul31(int32_t x, int32_t y);
|
||||||
|
inline int32_t fastSqrt(int32_t n);
|
|
@ -1,245 +0,0 @@
|
||||||
/*
|
|
||||||
* MDCT/IMDCT transforms
|
|
||||||
* Copyright (c) 2002 Fabrice Bellard
|
|
||||||
*
|
|
||||||
* This file is part of FFmpeg.
|
|
||||||
*
|
|
||||||
* FFmpeg is free software; you can redistribute it and/or
|
|
||||||
* modify it under the terms of the GNU Lesser General Public
|
|
||||||
* License as published by the Free Software Foundation; either
|
|
||||||
* version 2.1 of the License, or (at your option) any later version.
|
|
||||||
*
|
|
||||||
* FFmpeg is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
||||||
* Lesser General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU Lesser General Public
|
|
||||||
* License along with FFmpeg; if not, write to the Free Software
|
|
||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
||||||
*/
|
|
||||||
#include "dsputil.h"
|
|
||||||
|
|
||||||
#ifndef M_E
|
|
||||||
#define M_E 2.7182818284590452354 /* e */
|
|
||||||
#endif
|
|
||||||
#ifndef M_LN2
|
|
||||||
#define M_LN2 0.69314718055994530942 /* log_e 2 */
|
|
||||||
#endif
|
|
||||||
#ifndef M_LN10
|
|
||||||
#define M_LN10 2.30258509299404568402 /* log_e 10 */
|
|
||||||
#endif
|
|
||||||
#ifndef M_PI
|
|
||||||
#define M_PI 3.14159265358979323846 /* pi */
|
|
||||||
#endif
|
|
||||||
#ifndef M_SQRT1_2
|
|
||||||
#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @file libavcodec/mdct.c
|
|
||||||
* MDCT/IMDCT transforms.
|
|
||||||
*/
|
|
||||||
|
|
||||||
// Generate a Kaiser-Bessel Derived Window.
|
|
||||||
#define BESSEL_I0_ITER 50 // default: 50 iterations of Bessel I0 approximation
|
|
||||||
av_cold void ff_kbd_window_init(float *window, float alpha, int n)
|
|
||||||
{
|
|
||||||
int i, j;
|
|
||||||
double sum = 0.0, bessel, tmp;
|
|
||||||
double local_window[n];
|
|
||||||
double alpha2 = (alpha * M_PI / n) * (alpha * M_PI / n);
|
|
||||||
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
tmp = i * (n - i) * alpha2;
|
|
||||||
bessel = 1.0;
|
|
||||||
for (j = BESSEL_I0_ITER; j > 0; j--)
|
|
||||||
bessel = bessel * tmp / (j * j) + 1;
|
|
||||||
sum += bessel;
|
|
||||||
local_window[i] = sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
sum++;
|
|
||||||
for (i = 0; i < n; i++)
|
|
||||||
window[i] = sqrt(local_window[i] / sum);
|
|
||||||
}
|
|
||||||
|
|
||||||
DECLARE_ALIGNED(16, float, ff_sine_128 [ 128]);
|
|
||||||
DECLARE_ALIGNED(16, float, ff_sine_256 [ 256]);
|
|
||||||
DECLARE_ALIGNED(16, float, ff_sine_512 [ 512]);
|
|
||||||
DECLARE_ALIGNED(16, float, ff_sine_1024[1024]);
|
|
||||||
DECLARE_ALIGNED(16, float, ff_sine_2048[2048]);
|
|
||||||
DECLARE_ALIGNED(16, float, ff_sine_4096[4096]);
|
|
||||||
float *ff_sine_windows[6] = {
|
|
||||||
ff_sine_128, ff_sine_256, ff_sine_512, ff_sine_1024, ff_sine_2048, ff_sine_4096
|
|
||||||
};
|
|
||||||
|
|
||||||
// Generate a sine window.
|
|
||||||
av_cold void ff_sine_window_init(float *window, int n) {
|
|
||||||
int i;
|
|
||||||
for(i = 0; i < n; i++)
|
|
||||||
window[i] = sinf((i + 0.5) * (M_PI / (2.0 * n)));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* init MDCT or IMDCT computation.
|
|
||||||
*/
|
|
||||||
av_cold int ff_mdct_init(MDCTContext *s, int nbits, int inverse)
|
|
||||||
{
|
|
||||||
int n, n4, i;
|
|
||||||
double alpha;
|
|
||||||
|
|
||||||
memset(s, 0, sizeof(*s));
|
|
||||||
n = 1 << nbits;
|
|
||||||
s->nbits = nbits;
|
|
||||||
s->n = n;
|
|
||||||
n4 = n >> 2;
|
|
||||||
s->tcos = av_malloc(n4 * sizeof(FFTSample));
|
|
||||||
if (!s->tcos)
|
|
||||||
goto fail;
|
|
||||||
s->tsin = av_malloc(n4 * sizeof(FFTSample));
|
|
||||||
if (!s->tsin)
|
|
||||||
goto fail;
|
|
||||||
|
|
||||||
for(i=0;i<n4;i++) {
|
|
||||||
alpha = 2 * M_PI * (i + 1.0 / 8.0) / n;
|
|
||||||
s->tcos[i] = -cos(alpha);
|
|
||||||
s->tsin[i] = -sin(alpha);
|
|
||||||
}
|
|
||||||
if (ff_fft_init(&s->fft, s->nbits - 2, inverse) < 0)
|
|
||||||
goto fail;
|
|
||||||
return 0;
|
|
||||||
fail:
|
|
||||||
av_freep(&s->tcos);
|
|
||||||
av_freep(&s->tsin);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* complex multiplication: p = a * b */
|
|
||||||
#define CMUL(pre, pim, are, aim, bre, bim) \
|
|
||||||
{\
|
|
||||||
FFTSample _are = (are);\
|
|
||||||
FFTSample _aim = (aim);\
|
|
||||||
FFTSample _bre = (bre);\
|
|
||||||
FFTSample _bim = (bim);\
|
|
||||||
(pre) = _are * _bre - _aim * _bim;\
|
|
||||||
(pim) = _are * _bim + _aim * _bre;\
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute the middle half of the inverse MDCT of size N = 2^nbits,
|
|
||||||
* thus excluding the parts that can be derived by symmetry
|
|
||||||
* @param output N/2 samples
|
|
||||||
* @param input N/2 samples
|
|
||||||
*/
|
|
||||||
void ff_imdct_half_c(MDCTContext *s, FFTSample *output, const FFTSample *input)
|
|
||||||
{
|
|
||||||
int k, n8, n4, n2, n, j;
|
|
||||||
const uint16_t *revtab = s->fft.revtab;
|
|
||||||
const FFTSample *tcos = s->tcos;
|
|
||||||
const FFTSample *tsin = s->tsin;
|
|
||||||
const FFTSample *in1, *in2;
|
|
||||||
FFTComplex *z = (FFTComplex *)output;
|
|
||||||
|
|
||||||
n = 1 << s->nbits;
|
|
||||||
n2 = n >> 1;
|
|
||||||
n4 = n >> 2;
|
|
||||||
n8 = n >> 3;
|
|
||||||
|
|
||||||
/* pre rotation */
|
|
||||||
in1 = input;
|
|
||||||
in2 = input + n2 - 1;
|
|
||||||
for(k = 0; k < n4; k++) {
|
|
||||||
j=revtab[k];
|
|
||||||
CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]);
|
|
||||||
in1 += 2;
|
|
||||||
in2 -= 2;
|
|
||||||
}
|
|
||||||
ff_fft_calc(&s->fft, z);
|
|
||||||
|
|
||||||
/* post rotation + reordering */
|
|
||||||
output += n4;
|
|
||||||
for(k = 0; k < n8; k++) {
|
|
||||||
FFTSample r0, i0, r1, i1;
|
|
||||||
CMUL(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]);
|
|
||||||
CMUL(r1, i0, z[n8+k ].im, z[n8+k ].re, tsin[n8+k ], tcos[n8+k ]);
|
|
||||||
z[n8-k-1].re = r0;
|
|
||||||
z[n8-k-1].im = i0;
|
|
||||||
z[n8+k ].re = r1;
|
|
||||||
z[n8+k ].im = i1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute inverse MDCT of size N = 2^nbits
|
|
||||||
* @param output N samples
|
|
||||||
* @param input N/2 samples
|
|
||||||
*/
|
|
||||||
void ff_imdct_calc_c(MDCTContext *s, FFTSample *output, const FFTSample *input)
|
|
||||||
{
|
|
||||||
int k;
|
|
||||||
int n = 1 << s->nbits;
|
|
||||||
int n2 = n >> 1;
|
|
||||||
int n4 = n >> 2;
|
|
||||||
|
|
||||||
ff_imdct_half_c(s, output+n4, input);
|
|
||||||
|
|
||||||
for(k = 0; k < n4; k++) {
|
|
||||||
output[k] = -output[n2-k-1];
|
|
||||||
output[n-k-1] = output[n2+k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute MDCT of size N = 2^nbits
|
|
||||||
* @param input N samples
|
|
||||||
* @param out N/2 samples
|
|
||||||
*/
|
|
||||||
void ff_mdct_calc(MDCTContext *s, FFTSample *out, const FFTSample *input)
|
|
||||||
{
|
|
||||||
int i, j, n, n8, n4, n2, n3;
|
|
||||||
FFTSample re, im;
|
|
||||||
const uint16_t *revtab = s->fft.revtab;
|
|
||||||
const FFTSample *tcos = s->tcos;
|
|
||||||
const FFTSample *tsin = s->tsin;
|
|
||||||
FFTComplex *x = (FFTComplex *)out;
|
|
||||||
|
|
||||||
n = 1 << s->nbits;
|
|
||||||
n2 = n >> 1;
|
|
||||||
n4 = n >> 2;
|
|
||||||
n8 = n >> 3;
|
|
||||||
n3 = 3 * n4;
|
|
||||||
|
|
||||||
/* pre rotation */
|
|
||||||
for(i=0;i<n8;i++) {
|
|
||||||
re = -input[2*i+3*n4] - input[n3-1-2*i];
|
|
||||||
im = -input[n4+2*i] + input[n4-1-2*i];
|
|
||||||
j = revtab[i];
|
|
||||||
CMUL(x[j].re, x[j].im, re, im, -tcos[i], tsin[i]);
|
|
||||||
|
|
||||||
re = input[2*i] - input[n2-1-2*i];
|
|
||||||
im = -(input[n2+2*i] + input[n-1-2*i]);
|
|
||||||
j = revtab[n8 + i];
|
|
||||||
CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
ff_fft_calc(&s->fft, x);
|
|
||||||
|
|
||||||
/* post rotation */
|
|
||||||
for(i=0;i<n8;i++) {
|
|
||||||
FFTSample r0, i0, r1, i1;
|
|
||||||
CMUL(i1, r0, x[n8-i-1].re, x[n8-i-1].im, -tsin[n8-i-1], -tcos[n8-i-1]);
|
|
||||||
CMUL(i0, r1, x[n8+i ].re, x[n8+i ].im, -tsin[n8+i ], -tcos[n8+i ]);
|
|
||||||
x[n8-i-1].re = r0;
|
|
||||||
x[n8-i-1].im = i0;
|
|
||||||
x[n8+i ].re = r1;
|
|
||||||
x[n8+i ].im = i1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
av_cold void ff_mdct_end(MDCTContext *s)
|
|
||||||
{
|
|
||||||
av_freep(&s->tcos);
|
|
||||||
av_freep(&s->tsin);
|
|
||||||
ff_fft_end(&s->fft);
|
|
||||||
}
|
|
Loading…
Add table
Add a link
Reference in a new issue