forked from len0rd/rockbox
Further optimization of atrac3 codec. Refacturate gainCompensateAndOverlap(), avoid multiplication if not needed, unroll loops. Speeds up codec by 1.1 MHz (+2%) on ARM.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24668 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
ad1ba429b9
commit
51a8be1a0f
1 changed files with 145 additions and 38 deletions
|
@ -59,6 +59,7 @@ static int32_t qmf_window[48] IBSS_ATTR;
|
|||
static VLC spectral_coeff_tab[7];
|
||||
static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM;
|
||||
|
||||
|
||||
/**
|
||||
* Matrixing within quadrature mirror synthesis filter.
|
||||
*
|
||||
|
@ -91,6 +92,7 @@ static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM;
|
|||
}
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* Matrixing within quadrature mirror synthesis filter.
|
||||
*
|
||||
|
@ -195,6 +197,7 @@ static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM;
|
|||
}
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* IMDCT windowing.
|
||||
*
|
||||
|
@ -214,6 +217,7 @@ atrac3_imdct_windowing(int32_t *buffer,
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Quadrature mirror synthesis filter.
|
||||
*
|
||||
|
@ -240,12 +244,13 @@ static void iqmf (int32_t *inlo, int32_t *inhi, unsigned int nIn, int32_t *pOut,
|
|||
memcpy(delayBuf, temp + (nIn << 1), 46*sizeof(int32_t));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Regular 512 points IMDCT without overlapping, with the exception of the swapping of odd bands
|
||||
* caused by the reverse spectra of the QMF.
|
||||
*
|
||||
* @param pInput float input
|
||||
* @param pOutput float output
|
||||
* @param pInput input
|
||||
* @param pOutput output
|
||||
* @param odd_band 1 if the band is an odd band
|
||||
*/
|
||||
|
||||
|
@ -274,7 +279,7 @@ static int decode_bytes(const uint8_t* inbuffer, uint8_t* out, int bytes){
|
|||
uint32_t* obuf = (uint32_t*) out;
|
||||
|
||||
#if ((defined(TEST) || defined(SIMULATOR)) && !defined(CPU_ARM))
|
||||
off = 0; //no check for memory alignment of inbuffer
|
||||
off = 0; /* no check for memory alignment of inbuffer */
|
||||
#else
|
||||
off = (intptr_t)inbuffer & 3;
|
||||
#endif /* TEST */
|
||||
|
@ -306,6 +311,7 @@ static void init_atrac3_transforms(void) {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Mantissa decoding
|
||||
*
|
||||
|
@ -338,7 +344,7 @@ static void readQuantSpectralCoeffs (GetBitContext *gb, int selector, int coding
|
|||
} else {
|
||||
for (cnt = 0; cnt < numCodes; cnt++) {
|
||||
if (numBits)
|
||||
code = get_bits(gb, numBits); //numBits is always 4 in this case
|
||||
code = get_bits(gb, numBits); /* numBits is always 4 in this case */
|
||||
else
|
||||
code = 0;
|
||||
mantissas[cnt*2] = seTab_0[code >> 2];
|
||||
|
@ -366,6 +372,7 @@ static void readQuantSpectralCoeffs (GetBitContext *gb, int selector, int coding
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Restore the quantized band spectrum coefficients
|
||||
*
|
||||
|
@ -382,8 +389,8 @@ int decodeSpectrum (GetBitContext *gb, int32_t *pOut)
|
|||
int mantissas[128];
|
||||
int32_t SF;
|
||||
|
||||
numSubbands = get_bits(gb, 5); // number of coded subbands
|
||||
codingMode = get_bits1(gb); // coding Mode: 0 - VLC/ 1-CLC
|
||||
numSubbands = get_bits(gb, 5); /* number of coded subbands */
|
||||
codingMode = get_bits1(gb); /* coding Mode: 0 - VLC/ 1-CLC */
|
||||
|
||||
/* Get the VLC selector table for the subbands, 0 means not coded. */
|
||||
for (cnt = 0; cnt <= numSubbands; cnt++)
|
||||
|
@ -437,6 +444,7 @@ int decodeSpectrum (GetBitContext *gb, int32_t *pOut)
|
|||
return numSubbands;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Restore the quantized tonal components
|
||||
*
|
||||
|
@ -517,6 +525,7 @@ static int decodeTonalComponents (GetBitContext *gb, tonal_component *pComponent
|
|||
return component_count;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Decode gain parameters for the coded bands
|
||||
*
|
||||
|
@ -554,21 +563,125 @@ static int decodeGainControl (GetBitContext *gb, gain_block *pGb, int numBands)
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Apply fix (constant) gain and overlap for sample[start...255].
|
||||
*
|
||||
* @param pIn input buffer
|
||||
* @param pPrev previous buffer to perform overlap against
|
||||
* @param pOut output buffer
|
||||
* @param start index to start with (always a multiple of 8)
|
||||
* @param gain gain to apply
|
||||
*/
|
||||
|
||||
static void applyFixGain (int32_t *pIn, int32_t *pPrev, int32_t *pOut,
|
||||
int32_t start, int32_t gain)
|
||||
{
|
||||
int32_t i = start;
|
||||
|
||||
/* start is always a multiple of 8 and therefore allows us to unroll the
|
||||
* loop to 8 calculation per loop
|
||||
*/
|
||||
if (ONE_16 == gain) {
|
||||
/* gain1 = 1.0 -> no multiplication needed, just adding */
|
||||
/* Remark: This path is called >90%. */
|
||||
do {
|
||||
pOut[i] = pIn[i] + pPrev[i]; i++;
|
||||
pOut[i] = pIn[i] + pPrev[i]; i++;
|
||||
pOut[i] = pIn[i] + pPrev[i]; i++;
|
||||
pOut[i] = pIn[i] + pPrev[i]; i++;
|
||||
pOut[i] = pIn[i] + pPrev[i]; i++;
|
||||
pOut[i] = pIn[i] + pPrev[i]; i++;
|
||||
pOut[i] = pIn[i] + pPrev[i]; i++;
|
||||
pOut[i] = pIn[i] + pPrev[i]; i++;
|
||||
} while (i<256);
|
||||
} else {
|
||||
/* gain1 != 1.0 -> we need to do a multiplication */
|
||||
/* Remark: This path is called seldom. */
|
||||
do {
|
||||
pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
|
||||
pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
|
||||
pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
|
||||
pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
|
||||
pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
|
||||
pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
|
||||
pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
|
||||
pOut[i] = fixmul16(pIn[i], gain) + pPrev[i]; i++;
|
||||
} while (i<256);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Apply variable gain and overlap. Returns sample index after applying gain,
|
||||
* resulting sample index is always a multiple of 8.
|
||||
*
|
||||
* @param pIn input buffer
|
||||
* @param pPrev previous buffer to perform overlap against
|
||||
* @param pOut output buffer
|
||||
* @param start index to start with (always a multiple of 8)
|
||||
* @param end end index for first loop (always a multiple of 8)
|
||||
* @param gain1 current bands gain to apply
|
||||
* @param gain2 next bands gain to apply
|
||||
* @param gain_inc stepwise adaption from gain1 to gain2
|
||||
*/
|
||||
|
||||
static int applyVariableGain (int32_t *pIn, int32_t *pPrev, int32_t *pOut,
|
||||
int32_t start, int32_t end,
|
||||
int32_t gain1, int32_t gain2, int32_t gain_inc)
|
||||
{
|
||||
int32_t i = start;
|
||||
|
||||
/* Apply fix gains until end index is reached */
|
||||
do {
|
||||
pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
|
||||
pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
|
||||
pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
|
||||
pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
|
||||
pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
|
||||
pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
|
||||
pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
|
||||
pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
|
||||
} while (i < end);
|
||||
|
||||
/* Interpolation is done over next eight samples */
|
||||
pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
|
||||
gain2 = fixmul16(gain2, gain_inc);
|
||||
pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
|
||||
gain2 = fixmul16(gain2, gain_inc);
|
||||
pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
|
||||
gain2 = fixmul16(gain2, gain_inc);
|
||||
pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
|
||||
gain2 = fixmul16(gain2, gain_inc);
|
||||
pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
|
||||
gain2 = fixmul16(gain2, gain_inc);
|
||||
pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
|
||||
gain2 = fixmul16(gain2, gain_inc);
|
||||
pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
|
||||
gain2 = fixmul16(gain2, gain_inc);
|
||||
pOut[i] = fixmul16((fixmul16(pIn[i], gain1) + pPrev[i]), gain2); i++;
|
||||
gain2 = fixmul16(gain2, gain_inc);
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Apply gain parameters and perform the MDCT overlapping part
|
||||
*
|
||||
* @param pIn input float buffer
|
||||
* @param pPrev previous float buffer to perform overlap against
|
||||
* @param pOut output float buffer
|
||||
* @param pIn input buffer
|
||||
* @param pPrev previous buffer to perform overlap against
|
||||
* @param pOut output buffer
|
||||
* @param pGain1 current band gain info
|
||||
* @param pGain2 next band gain info
|
||||
*/
|
||||
|
||||
static void gainCompensateAndOverlap (int32_t *pIn, int32_t *pPrev, int32_t *pOut, gain_info *pGain1, gain_info *pGain2)
|
||||
static void gainCompensateAndOverlap (int32_t *pIn, int32_t *pPrev, int32_t *pOut,
|
||||
gain_info *pGain1, gain_info *pGain2)
|
||||
{
|
||||
/* gain compensation function */
|
||||
int32_t gain1, gain2, gain_inc;
|
||||
int cnt, numdata, nsample, startLoc, endLoc;
|
||||
int cnt, numdata, nsample, startLoc;
|
||||
|
||||
if (pGain2->num_gain_data == 0)
|
||||
gain1 = ONE_16;
|
||||
|
@ -576,41 +689,35 @@ static void gainCompensateAndOverlap (int32_t *pIn, int32_t *pPrev, int32_t *pOu
|
|||
gain1 = gain_tab1[pGain2->levcode[0]];
|
||||
|
||||
if (pGain1->num_gain_data == 0) {
|
||||
for (cnt = 0; cnt < 256; cnt++)
|
||||
pOut[cnt] = fixmul16(pIn[cnt], gain1) + pPrev[cnt];
|
||||
/* Remark: This path is called >90%. */
|
||||
/* Apply gain for all samples from 0...255 */
|
||||
applyFixGain(pIn, pPrev, pOut, 0, gain1);
|
||||
} else {
|
||||
/* Remark: This path is called seldom. */
|
||||
numdata = pGain1->num_gain_data;
|
||||
pGain1->loccode[numdata] = 32;
|
||||
pGain1->levcode[numdata] = 4;
|
||||
|
||||
nsample = 0; // current sample = 0
|
||||
nsample = 0; /* starting loop with =0 */
|
||||
|
||||
for (cnt = 0; cnt < numdata; cnt++) {
|
||||
startLoc = pGain1->loccode[cnt] * 8;
|
||||
endLoc = startLoc + 8;
|
||||
|
||||
gain2 = gain_tab1[pGain1->levcode[cnt]];
|
||||
gain2 = gain_tab1[pGain1->levcode[cnt]];
|
||||
gain_inc = gain_tab2[(pGain1->levcode[cnt+1] - pGain1->levcode[cnt])+15];
|
||||
|
||||
/* interpolate */
|
||||
for (; nsample < startLoc; nsample++)
|
||||
pOut[nsample] = fixmul16((fixmul16(pIn[nsample], gain1) + pPrev[nsample]), gain2);
|
||||
|
||||
/* interpolation is done over eight samples */
|
||||
for (; nsample < endLoc; nsample++) {
|
||||
pOut[nsample] = fixmul16((fixmul16(pIn[nsample], gain1) + pPrev[nsample]),gain2);
|
||||
gain2 = fixmul16(gain2, gain_inc);
|
||||
}
|
||||
/* Apply variable gain (gain1 -> gain2) to samples */
|
||||
nsample = applyVariableGain(pIn, pPrev, pOut, nsample, startLoc, gain1, gain2, gain_inc);
|
||||
}
|
||||
|
||||
for (; nsample < 256; nsample++)
|
||||
pOut[nsample] = fixmul16(pIn[nsample], gain1) + pPrev[nsample];
|
||||
/* Apply gain for the residual samples from nsample...255 */
|
||||
applyFixGain(pIn, pPrev, pOut, nsample, gain1);
|
||||
}
|
||||
|
||||
/* Delay for the overlapping part. */
|
||||
memcpy(pPrev, &pIn[256], 256*sizeof(int32_t));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Combine the tonal band spectrum and regular band spectrum
|
||||
* Return position of the last tonal coefficient
|
||||
|
@ -639,6 +746,7 @@ static int addTonalComponents (int32_t *pSpectrum, int numComponents, tonal_comp
|
|||
return lastPos;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Linear equidistant interpolation between two points x and y. 7 interpolation
|
||||
* points can be calculated. Result is scaled by <<16.
|
||||
|
@ -712,7 +820,7 @@ static void reverseMatrixing(int32_t *su1, int32_t *su2, int *pPrevCode, int *pC
|
|||
}
|
||||
break;
|
||||
default:
|
||||
//assert(0);
|
||||
/* assert(0) */;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -755,18 +863,16 @@ static void channelWeighting (int32_t *su1, int32_t *su2, int *p3)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Decode a Sound Unit
|
||||
*
|
||||
* @param gb the GetBit context
|
||||
* @param pSnd the channel unit to be used
|
||||
* @param pOut the decoded samples before IQMF in float representation
|
||||
* @param pOut the decoded samples before IQMF
|
||||
* @param channelNum channel number
|
||||
* @param codingMode the coding mode (JOINT_STEREO or regular stereo/mono)
|
||||
*/
|
||||
|
||||
|
||||
static int decodeChannelSoundUnit (GetBitContext *gb, channel_unit *pSnd, int32_t *pOut, int channelNum, int codingMode)
|
||||
{
|
||||
int band, result=0, numSubbands, lastTonal, numBands;
|
||||
|
@ -807,8 +913,9 @@ static int decodeChannelSoundUnit (GetBitContext *gb, channel_unit *pSnd, int32_
|
|||
/* Perform the IMDCT step without overlapping. */
|
||||
if (band <= numBands) {
|
||||
IMLT(&(pSnd->spectrum[band*256]), pSnd->IMDCT_buf);
|
||||
} else
|
||||
} else {
|
||||
memset(pSnd->IMDCT_buf, 0, 512 * sizeof(int32_t));
|
||||
}
|
||||
|
||||
/* gain compensation and overlapping */
|
||||
gainCompensateAndOverlap (pSnd->IMDCT_buf, &(pSnd->prevFrame[band*256]), &(pOut[band*256]),
|
||||
|
@ -982,12 +1089,12 @@ int atrac3_decode_init(ATRAC3Context *q, RMContext *rmctx)
|
|||
/* Take care of the codec-specific extradata. */
|
||||
if (rmctx->extradata_size == 14) {
|
||||
/* Parse the extradata, WAV format */
|
||||
DEBUGF("[0-1] %d\n",rm_get_uint16le(&edata_ptr[0])); //Unknown value always 1
|
||||
DEBUGF("[0-1] %d\n",rm_get_uint16le(&edata_ptr[0])); /* Unknown value always 1 */
|
||||
q->samples_per_channel = rm_get_uint32le(&edata_ptr[2]);
|
||||
q->codingMode = rm_get_uint16le(&edata_ptr[6]);
|
||||
DEBUGF("[8-9] %d\n",rm_get_uint16le(&edata_ptr[8])); //Dupe of coding mode
|
||||
q->frame_factor = rm_get_uint16le(&edata_ptr[10]); //Unknown always 1
|
||||
DEBUGF("[12-13] %d\n",rm_get_uint16le(&edata_ptr[12])); //Unknown always 0
|
||||
DEBUGF("[8-9] %d\n",rm_get_uint16le(&edata_ptr[8])); /* Dupe of coding mode */
|
||||
q->frame_factor = rm_get_uint16le(&edata_ptr[10]); /* Unknown always 1 */
|
||||
DEBUGF("[12-13] %d\n",rm_get_uint16le(&edata_ptr[12])); /* Unknown always 0 */
|
||||
|
||||
/* setup */
|
||||
q->samples_per_frame = 1024 * q->channels;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue