forked from len0rd/rockbox
Adding ICODE for imdct (and its constituent ifft bits) gives 0.5MHz boost on arm (ipod video) and about 5MHz boost on coldfire (H120)
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24786 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
9f63f4f2fc
commit
730a618fae
2 changed files with 16 additions and 3 deletions
|
@ -57,6 +57,10 @@
|
||||||
/* asm-optimised functions and/or macros */
|
/* asm-optimised functions and/or macros */
|
||||||
#include "fft-ffmpeg_arm.h"
|
#include "fft-ffmpeg_arm.h"
|
||||||
|
|
||||||
|
#ifndef ICODE_ATTR_TREMOR_MDCT
|
||||||
|
#define ICODE_ATTR_TREMOR_MDCT ICODE_ATTR
|
||||||
|
#endif
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
static int split_radix_permutation(int i, int n, int inverse)
|
static int split_radix_permutation(int i, int n, int inverse)
|
||||||
{
|
{
|
||||||
|
@ -268,7 +272,8 @@ static inline void TRANSFORM_ZERO(FFTComplex * z, unsigned int n)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* z[0...8n-1], w[1...2n-1] */
|
/* z[0...8n-1], w[1...2n-1] */
|
||||||
static void pass(FFTComplex *z_arg, unsigned int STEP_arg, unsigned int n_arg)
|
void pass(FFTComplex *z_arg, unsigned int STEP_arg, unsigned int n_arg) ICODE_ATTR_TREMOR_MDCT;
|
||||||
|
void pass(FFTComplex *z_arg, unsigned int STEP_arg, unsigned int n_arg)
|
||||||
{
|
{
|
||||||
register FFTComplex * z = z_arg;
|
register FFTComplex * z = z_arg;
|
||||||
register unsigned int STEP = STEP_arg;
|
register unsigned int STEP = STEP_arg;
|
||||||
|
@ -314,7 +319,8 @@ static void pass(FFTComplex *z_arg, unsigned int STEP_arg, unsigned int n_arg)
|
||||||
8192/16 (from "ff_cos_16") is 512 bytes.
|
8192/16 (from "ff_cos_16") is 512 bytes.
|
||||||
i.e. for fft16, STEP = 8192/16 */
|
i.e. for fft16, STEP = 8192/16 */
|
||||||
#define DECL_FFT(n,n2,n4)\
|
#define DECL_FFT(n,n2,n4)\
|
||||||
static void fft##n(FFTComplex *z)\
|
void fft##n(FFTComplex *z) ICODE_ATTR_TREMOR_MDCT;\
|
||||||
|
void fft##n(FFTComplex *z)\
|
||||||
{\
|
{\
|
||||||
fft##n2(z);\
|
fft##n2(z);\
|
||||||
fft##n4(z+n4*2);\
|
fft##n4(z+n4*2);\
|
||||||
|
@ -374,7 +380,8 @@ static void fft8_dispatch(FFTComplex *z)
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef CONFIG_SMALL
|
#ifndef CONFIG_SMALL
|
||||||
static void fft16(FFTComplex *z)
|
void fft16(FFTComplex *z) ICODE_ATTR_TREMOR_MDCT;
|
||||||
|
void fft16(FFTComplex *z)
|
||||||
{
|
{
|
||||||
fft8(z);
|
fft8(z);
|
||||||
fft4(z+8);
|
fft4(z+8);
|
||||||
|
|
|
@ -25,6 +25,10 @@
|
||||||
#include "codeclib_misc.h"
|
#include "codeclib_misc.h"
|
||||||
#include "mdct_lookup.h"
|
#include "mdct_lookup.h"
|
||||||
|
|
||||||
|
#ifndef ICODE_ATTR_TREMOR_MDCT
|
||||||
|
#define ICODE_ATTR_TREMOR_MDCT ICODE_ATTR
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compute the middle half of the inverse MDCT of size N = 2^nbits
|
* Compute the middle half of the inverse MDCT of size N = 2^nbits
|
||||||
* thus excluding the parts that can be derived by symmetry
|
* thus excluding the parts that can be derived by symmetry
|
||||||
|
@ -34,6 +38,7 @@
|
||||||
* NOTE - CANNOT CURRENTLY OPERATE IN PLACE (input and output must
|
* NOTE - CANNOT CURRENTLY OPERATE IN PLACE (input and output must
|
||||||
* not overlap or intersect at all)
|
* not overlap or intersect at all)
|
||||||
*/
|
*/
|
||||||
|
void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input) ICODE_ATTR_TREMOR_MDCT;
|
||||||
void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input)
|
void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input)
|
||||||
{
|
{
|
||||||
int n8, n4, n2, n, j;
|
int n8, n4, n2, n, j;
|
||||||
|
@ -244,6 +249,7 @@ void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input)
|
||||||
* <-----------output----------->
|
* <-----------output----------->
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
void ff_imdct_calc(unsigned int nbits, fixed32 *output, const fixed32 *input) ICODE_ATTR_TREMOR_MDCT;
|
||||||
void ff_imdct_calc(unsigned int nbits, fixed32 *output, const fixed32 *input)
|
void ff_imdct_calc(unsigned int nbits, fixed32 *output, const fixed32 *input)
|
||||||
{
|
{
|
||||||
const int n = (1<<nbits);
|
const int n = (1<<nbits);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue