forked from len0rd/rockbox
Move ARM assembler of musepack synthesis filter to own file. Additionally add ICONST_ATTR to noise generator data.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@17562 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
c769cf586f
commit
18f13b149a
5 changed files with 265 additions and 161 deletions
|
@ -5,4 +5,6 @@ mpc_decoder.c
|
|||
requant.c
|
||||
streaminfo.c
|
||||
synth_filter.c
|
||||
|
||||
#if defined(CPU_ARM)
|
||||
synth_filter_arm.S
|
||||
#endif
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
#ifndef _mpcdec_math_h_
|
||||
#define _mpcdec_math_h_
|
||||
|
||||
#define MPC_FIXED_POINT
|
||||
#include "mpc_config.h"
|
||||
|
||||
#define MPC_FIXED_POINT_SHIFT 16
|
||||
|
||||
|
|
48
apps/codecs/libmusepack/mpc_config.h
Executable file
48
apps/codecs/libmusepack/mpc_config.h
Executable file
|
@ -0,0 +1,48 @@
|
|||
/***************************************************************************
|
||||
* __________ __ ___.
|
||||
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
||||
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
||||
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
||||
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
||||
* \/ \/ \/ \/ \/
|
||||
* $Id$
|
||||
*
|
||||
* Copyright (C) 2008 by Andree Buschmann
|
||||
*
|
||||
* All files in this archive are subject to the GNU General Public License.
|
||||
* See the file COPYING in the source tree root for full license agreement.
|
||||
*
|
||||
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
||||
* KIND, either express or implied.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef _mpc_config_h_
|
||||
#define _mpc_config_h_
|
||||
|
||||
#include "config.h"
|
||||
|
||||
/* choose fixed point or floating point */
|
||||
#define MPC_FIXED_POINT
|
||||
|
||||
#ifndef MPC_FIXED_POINT
|
||||
#error FIXME, mpc will not with floating point now
|
||||
#endif
|
||||
|
||||
/* choose speed vs. accuracy for MPC_FIXED_POINT
|
||||
* speed-setting will increase decoding speed on ARM only (+20%), loss of accuracy
|
||||
* equals about 5 dB SNR (15bit output precision) to not use the speed-optimization
|
||||
* -> comment OPTIMIZE_FOR_SPEED here for desired target */
|
||||
#if defined(MPC_FIXED_POINT)
|
||||
#if defined(CPU_COLDFIRE)
|
||||
// do nothing
|
||||
#elif defined(CPU_ARM)
|
||||
#define OPTIMIZE_FOR_SPEED
|
||||
#else
|
||||
#define OPTIMIZE_FOR_SPEED
|
||||
#endif
|
||||
#else
|
||||
// do nothing
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -39,22 +39,6 @@
|
|||
#include "musepack.h"
|
||||
#include "internal.h"
|
||||
|
||||
/* S E T T I N G S */
|
||||
// choose speed vs. accuracy for MPC_FIXED_POINT
|
||||
// speed-setting will increase decoding speed on ARM only (+20%), loss of accuracy equals about 5 dB SNR (15bit output precision)
|
||||
// to not use the speed-optimization -> comment OPTIMIZE_FOR_SPEED
|
||||
#if defined(MPC_FIXED_POINT)
|
||||
#if defined(CPU_COLDFIRE)
|
||||
// do nothing
|
||||
#elif defined(CPU_ARM)
|
||||
#define OPTIMIZE_FOR_SPEED
|
||||
#else
|
||||
#define OPTIMIZE_FOR_SPEED
|
||||
#endif
|
||||
#else
|
||||
// do nothing
|
||||
#endif
|
||||
|
||||
/* C O N S T A N T S */
|
||||
#undef _
|
||||
|
||||
|
@ -82,40 +66,40 @@
|
|||
#endif
|
||||
|
||||
// Di_opt coefficients are +/- 2^17
|
||||
static const MPC_SAMPLE_FORMAT Di_opt [32] [16] ICONST_ATTR = {
|
||||
/* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */
|
||||
{ _( 0), _( -29), _( 213), _( -459), _( 2037), _(-5153), _( 6574), _(-37489), _(75038), _(37489), _(6574), _( 5153), _(2037), _( 459), _(213), _(29) },
|
||||
{ _( -1), _( -31), _( 218), _( -519), _( 2000), _(-5517), _( 5959), _(-39336), _(74992), _(35640), _(7134), _( 4788), _(2063), _( 401), _(208), _(26) },
|
||||
{ _( -1), _( -35), _( 222), _( -581), _( 1952), _(-5879), _( 5288), _(-41176), _(74856), _(33791), _(7640), _( 4425), _(2080), _( 347), _(202), _(24) },
|
||||
{ _( -1), _( -38), _( 225), _( -645), _( 1893), _(-6237), _( 4561), _(-43006), _(74630), _(31947), _(8092), _( 4063), _(2087), _( 294), _(196), _(21) },
|
||||
{ _( -1), _( -41), _( 227), _( -711), _( 1822), _(-6589), _( 3776), _(-44821), _(74313), _(30112), _(8492), _( 3705), _(2085), _( 244), _(190), _(19) },
|
||||
{ _( -1), _( -45), _( 228), _( -779), _( 1739), _(-6935), _( 2935), _(-46617), _(73908), _(28289), _(8840), _( 3351), _(2075), _( 197), _(183), _(17) },
|
||||
{ _( -1), _( -49), _( 228), _( -848), _( 1644), _(-7271), _( 2037), _(-48390), _(73415), _(26482), _(9139), _( 3004), _(2057), _( 153), _(176), _(16) },
|
||||
{ _( -2), _( -53), _( 227), _( -919), _( 1535), _(-7597), _( 1082), _(-50137), _(72835), _(24694), _(9389), _( 2663), _(2032), _( 111), _(169), _(14) },
|
||||
{ _( -2), _( -58), _( 224), _( -991), _( 1414), _(-7910), _( 70), _(-51853), _(72169), _(22929), _(9592), _( 2330), _(2001), _( 72), _(161), _(13) },
|
||||
{ _( -2), _( -63), _( 221), _(-1064), _( 1280), _(-8209), _( -998), _(-53534), _(71420), _(21189), _(9750), _( 2006), _(1962), _( 36), _(154), _(11) },
|
||||
{ _( -2), _( -68), _( 215), _(-1137), _( 1131), _(-8491), _( -2122), _(-55178), _(70590), _(19478), _(9863), _( 1692), _(1919), _( 2), _(147), _(10) },
|
||||
{ _( -3), _( -73), _( 208), _(-1210), _( 970), _(-8755), _( -3300), _(-56778), _(69679), _(17799), _(9935), _( 1388), _(1870), _( -29), _(139), _( 9) },
|
||||
{ _( -3), _( -79), _( 200), _(-1283), _( 794), _(-8998), _( -4533), _(-58333), _(68692), _(16155), _(9966), _( 1095), _(1817), _( -57), _(132), _( 8) },
|
||||
{ _( -4), _( -85), _( 189), _(-1356), _( 605), _(-9219), _( -5818), _(-59838), _(67629), _(14548), _(9959), _( 814), _(1759), _( -83), _(125), _( 7) },
|
||||
{ _( -4), _( -91), _( 177), _(-1428), _( 402), _(-9416), _( -7154), _(-61289), _(66494), _(12980), _(9916), _( 545), _(1698), _(-106), _(117), _( 7) },
|
||||
{ _( -5), _( -97), _( 163), _(-1498), _( 185), _(-9585), _( -8540), _(-62684), _(65290), _(11455), _(9838), _( 288), _(1634), _(-127), _(111), _( 6) },
|
||||
{ _( -5), _(-104), _( 146), _(-1567), _( -45), _(-9727), _( -9975), _(-64019), _(64019), _( 9975), _(9727), _( 45), _(1567), _(-146), _(104), _( 5) },
|
||||
{ _( -6), _(-111), _( 127), _(-1634), _( -288), _(-9838), _(-11455), _(-65290), _(62684), _( 8540), _(9585), _( -185), _(1498), _(-163), _( 97), _( 5) },
|
||||
{ _( -7), _(-117), _( 106), _(-1698), _( -545), _(-9916), _(-12980), _(-66494), _(61289), _( 7154), _(9416), _( -402), _(1428), _(-177), _( 91), _( 4) },
|
||||
{ _( -7), _(-125), _( 83), _(-1759), _( -814), _(-9959), _(-14548), _(-67629), _(59838), _( 5818), _(9219), _( -605), _(1356), _(-189), _( 85), _( 4) },
|
||||
{ _( -8), _(-132), _( 57), _(-1817), _(-1095), _(-9966), _(-16155), _(-68692), _(58333), _( 4533), _(8998), _( -794), _(1283), _(-200), _( 79), _( 3) },
|
||||
{ _( -9), _(-139), _( 29), _(-1870), _(-1388), _(-9935), _(-17799), _(-69679), _(56778), _( 3300), _(8755), _( -970), _(1210), _(-208), _( 73), _( 3) },
|
||||
{ _(-10), _(-147), _( -2), _(-1919), _(-1692), _(-9863), _(-19478), _(-70590), _(55178), _( 2122), _(8491), _(-1131), _(1137), _(-215), _( 68), _( 2) },
|
||||
{ _(-11), _(-154), _( -36), _(-1962), _(-2006), _(-9750), _(-21189), _(-71420), _(53534), _( 998), _(8209), _(-1280), _(1064), _(-221), _( 63), _( 2) },
|
||||
{ _(-13), _(-161), _( -72), _(-2001), _(-2330), _(-9592), _(-22929), _(-72169), _(51853), _( -70), _(7910), _(-1414), _( 991), _(-224), _( 58), _( 2) },
|
||||
{ _(-14), _(-169), _(-111), _(-2032), _(-2663), _(-9389), _(-24694), _(-72835), _(50137), _(-1082), _(7597), _(-1535), _( 919), _(-227), _( 53), _( 2) },
|
||||
{ _(-16), _(-176), _(-153), _(-2057), _(-3004), _(-9139), _(-26482), _(-73415), _(48390), _(-2037), _(7271), _(-1644), _( 848), _(-228), _( 49), _( 1) },
|
||||
{ _(-17), _(-183), _(-197), _(-2075), _(-3351), _(-8840), _(-28289), _(-73908), _(46617), _(-2935), _(6935), _(-1739), _( 779), _(-228), _( 45), _( 1) },
|
||||
{ _(-19), _(-190), _(-244), _(-2085), _(-3705), _(-8492), _(-30112), _(-74313), _(44821), _(-3776), _(6589), _(-1822), _( 711), _(-227), _( 41), _( 1) },
|
||||
{ _(-21), _(-196), _(-294), _(-2087), _(-4063), _(-8092), _(-31947), _(-74630), _(43006), _(-4561), _(6237), _(-1893), _( 645), _(-225), _( 38), _( 1) },
|
||||
{ _(-24), _(-202), _(-347), _(-2080), _(-4425), _(-7640), _(-33791), _(-74856), _(41176), _(-5288), _(5879), _(-1952), _( 581), _(-222), _( 35), _( 1) },
|
||||
{ _(-26), _(-208), _(-401), _(-2063), _(-4788), _(-7134), _(-35640), _(-74992), _(39336), _(-5959), _(5517), _(-2000), _( 519), _(-218), _( 31), _( 1) }
|
||||
static const MPC_SAMPLE_FORMAT Di_opt [512] ICONST_ATTR = {
|
||||
/* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */
|
||||
/* 0 */ _( 0), _( -29), _( 213), _( -459), _( 2037), _(-5153), _( 6574), _(-37489), _(75038), _(37489), _(6574), _( 5153), _(2037), _( 459), _(213), _(29),
|
||||
/* 1 */ _( -1), _( -31), _( 218), _( -519), _( 2000), _(-5517), _( 5959), _(-39336), _(74992), _(35640), _(7134), _( 4788), _(2063), _( 401), _(208), _(26),
|
||||
/* 2 */ _( -1), _( -35), _( 222), _( -581), _( 1952), _(-5879), _( 5288), _(-41176), _(74856), _(33791), _(7640), _( 4425), _(2080), _( 347), _(202), _(24),
|
||||
/* 3 */ _( -1), _( -38), _( 225), _( -645), _( 1893), _(-6237), _( 4561), _(-43006), _(74630), _(31947), _(8092), _( 4063), _(2087), _( 294), _(196), _(21),
|
||||
/* 4 */ _( -1), _( -41), _( 227), _( -711), _( 1822), _(-6589), _( 3776), _(-44821), _(74313), _(30112), _(8492), _( 3705), _(2085), _( 244), _(190), _(19),
|
||||
/* 5 */ _( -1), _( -45), _( 228), _( -779), _( 1739), _(-6935), _( 2935), _(-46617), _(73908), _(28289), _(8840), _( 3351), _(2075), _( 197), _(183), _(17),
|
||||
/* 6 */ _( -1), _( -49), _( 228), _( -848), _( 1644), _(-7271), _( 2037), _(-48390), _(73415), _(26482), _(9139), _( 3004), _(2057), _( 153), _(176), _(16),
|
||||
/* 7 */ _( -2), _( -53), _( 227), _( -919), _( 1535), _(-7597), _( 1082), _(-50137), _(72835), _(24694), _(9389), _( 2663), _(2032), _( 111), _(169), _(14),
|
||||
/* 8 */ _( -2), _( -58), _( 224), _( -991), _( 1414), _(-7910), _( 70), _(-51853), _(72169), _(22929), _(9592), _( 2330), _(2001), _( 72), _(161), _(13),
|
||||
/* 9 */ _( -2), _( -63), _( 221), _(-1064), _( 1280), _(-8209), _( -998), _(-53534), _(71420), _(21189), _(9750), _( 2006), _(1962), _( 36), _(154), _(11),
|
||||
/* 10 */ _( -2), _( -68), _( 215), _(-1137), _( 1131), _(-8491), _( -2122), _(-55178), _(70590), _(19478), _(9863), _( 1692), _(1919), _( 2), _(147), _(10),
|
||||
/* 11 */ _( -3), _( -73), _( 208), _(-1210), _( 970), _(-8755), _( -3300), _(-56778), _(69679), _(17799), _(9935), _( 1388), _(1870), _( -29), _(139), _( 9),
|
||||
/* 12 */ _( -3), _( -79), _( 200), _(-1283), _( 794), _(-8998), _( -4533), _(-58333), _(68692), _(16155), _(9966), _( 1095), _(1817), _( -57), _(132), _( 8),
|
||||
/* 13 */ _( -4), _( -85), _( 189), _(-1356), _( 605), _(-9219), _( -5818), _(-59838), _(67629), _(14548), _(9959), _( 814), _(1759), _( -83), _(125), _( 7),
|
||||
/* 14 */ _( -4), _( -91), _( 177), _(-1428), _( 402), _(-9416), _( -7154), _(-61289), _(66494), _(12980), _(9916), _( 545), _(1698), _(-106), _(117), _( 7),
|
||||
/* 15 */ _( -5), _( -97), _( 163), _(-1498), _( 185), _(-9585), _( -8540), _(-62684), _(65290), _(11455), _(9838), _( 288), _(1634), _(-127), _(111), _( 6),
|
||||
/* 16 */ _( -5), _(-104), _( 146), _(-1567), _( -45), _(-9727), _( -9975), _(-64019), _(64019), _( 9975), _(9727), _( 45), _(1567), _(-146), _(104), _( 5),
|
||||
/* 17 */ _( -6), _(-111), _( 127), _(-1634), _( -288), _(-9838), _(-11455), _(-65290), _(62684), _( 8540), _(9585), _( -185), _(1498), _(-163), _( 97), _( 5),
|
||||
/* 18 */ _( -7), _(-117), _( 106), _(-1698), _( -545), _(-9916), _(-12980), _(-66494), _(61289), _( 7154), _(9416), _( -402), _(1428), _(-177), _( 91), _( 4),
|
||||
/* 19 */ _( -7), _(-125), _( 83), _(-1759), _( -814), _(-9959), _(-14548), _(-67629), _(59838), _( 5818), _(9219), _( -605), _(1356), _(-189), _( 85), _( 4),
|
||||
/* 20 */ _( -8), _(-132), _( 57), _(-1817), _(-1095), _(-9966), _(-16155), _(-68692), _(58333), _( 4533), _(8998), _( -794), _(1283), _(-200), _( 79), _( 3),
|
||||
/* 21 */ _( -9), _(-139), _( 29), _(-1870), _(-1388), _(-9935), _(-17799), _(-69679), _(56778), _( 3300), _(8755), _( -970), _(1210), _(-208), _( 73), _( 3),
|
||||
/* 22 */ _(-10), _(-147), _( -2), _(-1919), _(-1692), _(-9863), _(-19478), _(-70590), _(55178), _( 2122), _(8491), _(-1131), _(1137), _(-215), _( 68), _( 2),
|
||||
/* 23 */ _(-11), _(-154), _( -36), _(-1962), _(-2006), _(-9750), _(-21189), _(-71420), _(53534), _( 998), _(8209), _(-1280), _(1064), _(-221), _( 63), _( 2),
|
||||
/* 24 */ _(-13), _(-161), _( -72), _(-2001), _(-2330), _(-9592), _(-22929), _(-72169), _(51853), _( -70), _(7910), _(-1414), _( 991), _(-224), _( 58), _( 2),
|
||||
/* 25 */ _(-14), _(-169), _(-111), _(-2032), _(-2663), _(-9389), _(-24694), _(-72835), _(50137), _(-1082), _(7597), _(-1535), _( 919), _(-227), _( 53), _( 2),
|
||||
/* 26 */ _(-16), _(-176), _(-153), _(-2057), _(-3004), _(-9139), _(-26482), _(-73415), _(48390), _(-2037), _(7271), _(-1644), _( 848), _(-228), _( 49), _( 1),
|
||||
/* 27 */ _(-17), _(-183), _(-197), _(-2075), _(-3351), _(-8840), _(-28289), _(-73908), _(46617), _(-2935), _(6935), _(-1739), _( 779), _(-228), _( 45), _( 1),
|
||||
/* 28 */ _(-19), _(-190), _(-244), _(-2085), _(-3705), _(-8492), _(-30112), _(-74313), _(44821), _(-3776), _(6589), _(-1822), _( 711), _(-227), _( 41), _( 1),
|
||||
/* 29 */ _(-21), _(-196), _(-294), _(-2087), _(-4063), _(-8092), _(-31947), _(-74630), _(43006), _(-4561), _(6237), _(-1893), _( 645), _(-225), _( 38), _( 1),
|
||||
/* 30 */ _(-24), _(-202), _(-347), _(-2080), _(-4425), _(-7640), _(-33791), _(-74856), _(41176), _(-5288), _(5879), _(-1952), _( 581), _(-222), _( 35), _( 1),
|
||||
/* 31 */ _(-26), _(-208), _(-401), _(-2063), _(-4788), _(-7134), _(-35640), _(-74992), _(39336), _(-5959), _(5517), _(-2000), _( 519), _(-218), _( 31), _( 1)
|
||||
};
|
||||
|
||||
#undef _
|
||||
|
@ -457,58 +441,20 @@ mpc_calculate_new_V ( const MPC_SAMPLE_FORMAT * Sample, MPC_SAMPLE_FORMAT * V )
|
|||
// total: 111 adds, 107 subs, 80 muls, 80 shifts
|
||||
}
|
||||
|
||||
#if defined(CPU_ARM)
|
||||
extern void
|
||||
mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data,
|
||||
const MPC_SAMPLE_FORMAT * V,
|
||||
const MPC_SAMPLE_FORMAT * D);
|
||||
#else
|
||||
static void
|
||||
mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, const MPC_SAMPLE_FORMAT * V)
|
||||
mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data,
|
||||
const MPC_SAMPLE_FORMAT * V,
|
||||
const MPC_SAMPLE_FORMAT * D)
|
||||
{
|
||||
const MPC_SAMPLE_FORMAT *D = (const MPC_SAMPLE_FORMAT *) &Di_opt;
|
||||
mpc_int32_t k;
|
||||
|
||||
#if defined(OPTIMIZE_FOR_SPEED)
|
||||
#if defined(CPU_ARM)
|
||||
// 32=32x32-multiply assembler for ARM
|
||||
for ( k = 0; k < 32; k++, V++ )
|
||||
{
|
||||
asm volatile (
|
||||
"ldmia %[D]!, { r0-r7 } \n\t"
|
||||
"ldr r8, [%[V]] \n\t"
|
||||
"mul r9, r0, r8 \n\t"
|
||||
"ldr r8, [%[V], #96*4] \n\t"
|
||||
"mla r9, r1, r8, r9 \n\t"
|
||||
"ldr r8, [%[V], #128*4] \n\t"
|
||||
"mla r9, r2, r8, r9 \n\t"
|
||||
"ldr r8, [%[V], #224*4] \n\t"
|
||||
"mla r9, r3, r8, r9 \n\t"
|
||||
"ldr r8, [%[V], #256*4] \n\t"
|
||||
"mla r9, r4, r8, r9 \n\t"
|
||||
"ldr r8, [%[V], #352*4] \n\t"
|
||||
"mla r9, r5, r8, r9 \n\t"
|
||||
"ldr r8, [%[V], #384*4] \n\t"
|
||||
"mla r9, r6, r8, r9 \n\t"
|
||||
"ldr r8, [%[V], #480*4] \n\t"
|
||||
"mla r9, r7, r8, r9 \n\t"
|
||||
"ldmia %[D]!, { r0-r7 } \n\t"
|
||||
"ldr r8, [%[V], #512*4] \n\t"
|
||||
"mla r9, r0, r8, r9 \n\t"
|
||||
"ldr r8, [%[V], #608*4] \n\t"
|
||||
"mla r9, r1, r8, r9 \n\t"
|
||||
"ldr r8, [%[V], #640*4] \n\t"
|
||||
"mla r9, r2, r8, r9 \n\t"
|
||||
"ldr r8, [%[V], #736*4] \n\t"
|
||||
"mla r9, r3, r8, r9 \n\t"
|
||||
"ldr r8, [%[V], #768*4] \n\t"
|
||||
"mla r9, r4, r8, r9 \n\t"
|
||||
"ldr r8, [%[V], #864*4] \n\t"
|
||||
"mla r9, r5, r8, r9 \n\t"
|
||||
"ldr r8, [%[V], #896*4] \n\t"
|
||||
"mla r9, r6, r8, r9 \n\t"
|
||||
"ldr r8, [%[V], #992*4] \n\t"
|
||||
"mla r9, r7, r8, r9 \n\t"
|
||||
"str r9, [%[Data]], #4 \n"
|
||||
: [Data] "+r" (Data), [D] "+r" (D)
|
||||
: [V] "r" (V)
|
||||
: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9");
|
||||
}
|
||||
#else
|
||||
// 32=32x32-multiply (FIXED_POINT)
|
||||
for ( k = 0; k < 32; k++, D += 16, V++ )
|
||||
{
|
||||
|
@ -517,9 +463,8 @@ mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, const MPC_SAMPLE_FORMAT * V)
|
|||
+ V[512]*D[ 8] + V[608]*D[ 9] + V[640]*D[10] + V[736]*D[11]
|
||||
+ V[768]*D[12] + V[864]*D[13] + V[896]*D[14] + V[992]*D[15];
|
||||
Data += 1;
|
||||
// total: 16 muls, 15 adds
|
||||
// total: 32 * (16 muls, 15 adds)
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
#if defined(CPU_COLDFIRE)
|
||||
// 64=32x32-multiply assembler for Coldfire
|
||||
|
@ -553,71 +498,25 @@ mpc_decoder_windowing_D(MPC_SAMPLE_FORMAT * Data, const MPC_SAMPLE_FORMAT * V)
|
|||
: [V] "a" (V), [D] "a" (D)
|
||||
: "d0", "d1", "d2", "d3", "a5");
|
||||
}
|
||||
#elif defined(CPU_ARM)
|
||||
// 64=32x32-multiply assembler for ARM
|
||||
for ( k = 0; k < 32; k++, V++ )
|
||||
{
|
||||
asm volatile (
|
||||
"ldmia %[D]!, { r0-r3 } \n\t"
|
||||
"ldr r4, [%[V]] \n\t"
|
||||
"smull r5, r6, r0, r4 \n\t"
|
||||
"ldr r4, [%[V], #96*4] \n\t"
|
||||
"smlal r5, r6, r1, r4 \n\t"
|
||||
"ldr r4, [%[V], #128*4] \n\t"
|
||||
"smlal r5, r6, r2, r4 \n\t"
|
||||
"ldr r4, [%[V], #224*4] \n\t"
|
||||
"smlal r5, r6, r3, r4 \n\t"
|
||||
|
||||
"ldmia %[D]!, { r0-r3 } \n\t"
|
||||
"ldr r4, [%[V], #256*4] \n\t"
|
||||
"smlal r5, r6, r0, r4 \n\t"
|
||||
"ldr r4, [%[V], #352*4] \n\t"
|
||||
"smlal r5, r6, r1, r4 \n\t"
|
||||
"ldr r4, [%[V], #384*4] \n\t"
|
||||
"smlal r5, r6, r2, r4 \n\t"
|
||||
"ldr r4, [%[V], #480*4] \n\t"
|
||||
"smlal r5, r6, r3, r4 \n\t"
|
||||
|
||||
"ldmia %[D]!, { r0-r3 } \n\t"
|
||||
"ldr r4, [%[V], #512*4] \n\t"
|
||||
"smlal r5, r6, r0, r4 \n\t"
|
||||
"ldr r4, [%[V], #608*4] \n\t"
|
||||
"smlal r5, r6, r1, r4 \n\t"
|
||||
"ldr r4, [%[V], #640*4] \n\t"
|
||||
"smlal r5, r6, r2, r4 \n\t"
|
||||
"ldr r4, [%[V], #736*4] \n\t"
|
||||
"smlal r5, r6, r3, r4 \n\t"
|
||||
|
||||
"ldmia %[D]!, { r0-r3 } \n\t"
|
||||
"ldr r4, [%[V], #768*4] \n\t"
|
||||
"smlal r5, r6, r0, r4 \n\t"
|
||||
"ldr r4, [%[V], #864*4] \n\t"
|
||||
"smlal r5, r6, r1, r4 \n\t"
|
||||
"ldr r4, [%[V], #896*4] \n\t"
|
||||
"smlal r5, r6, r2, r4 \n\t"
|
||||
"ldr r4, [%[V], #992*4] \n\t"
|
||||
"smlal r5, r6, r3, r4 \n\t"
|
||||
"mov r4, r6, lsl #1 \n\t"
|
||||
"orr r4, r4, r5, lsr #31\n\t"
|
||||
"str r4, [%[Data]], #4 \n"
|
||||
: [Data] "+r" (Data), [D] "+r" (D)
|
||||
: [V] "r" (V)
|
||||
: "r0", "r1", "r2", "r3", "r4", "r5", "r6");
|
||||
}
|
||||
#else
|
||||
// 64=64x64-multiply (FIXED_POINT) or float=float*float (!FIXED_POINT) in C
|
||||
for ( k = 0; k < 32; k++, D += 16, V++ )
|
||||
{
|
||||
*Data = MPC_MULTIPLY_EX(V[ 0],D[ 0],31) + MPC_MULTIPLY_EX(V[ 96],D[ 1],31) + MPC_MULTIPLY_EX(V[128],D[ 2],31) + MPC_MULTIPLY_EX(V[224],D[ 3],31)
|
||||
+ MPC_MULTIPLY_EX(V[256],D[ 4],31) + MPC_MULTIPLY_EX(V[352],D[ 5],31) + MPC_MULTIPLY_EX(V[384],D[ 6],31) + MPC_MULTIPLY_EX(V[480],D[ 7],31)
|
||||
+ MPC_MULTIPLY_EX(V[512],D[ 8],31) + MPC_MULTIPLY_EX(V[608],D[ 9],31) + MPC_MULTIPLY_EX(V[640],D[10],31) + MPC_MULTIPLY_EX(V[736],D[11],31)
|
||||
+ MPC_MULTIPLY_EX(V[768],D[12],31) + MPC_MULTIPLY_EX(V[864],D[13],31) + MPC_MULTIPLY_EX(V[896],D[14],31) + MPC_MULTIPLY_EX(V[992],D[15],31);
|
||||
*Data = MPC_MULTIPLY_EX(V[ 0],D[ 0],31) + MPC_MULTIPLY_EX(V[ 96],D[ 1],31)
|
||||
+ MPC_MULTIPLY_EX(V[128],D[ 2],31) + MPC_MULTIPLY_EX(V[224],D[ 3],31)
|
||||
+ MPC_MULTIPLY_EX(V[256],D[ 4],31) + MPC_MULTIPLY_EX(V[352],D[ 5],31)
|
||||
+ MPC_MULTIPLY_EX(V[384],D[ 6],31) + MPC_MULTIPLY_EX(V[480],D[ 7],31)
|
||||
+ MPC_MULTIPLY_EX(V[512],D[ 8],31) + MPC_MULTIPLY_EX(V[608],D[ 9],31)
|
||||
+ MPC_MULTIPLY_EX(V[640],D[10],31) + MPC_MULTIPLY_EX(V[736],D[11],31)
|
||||
+ MPC_MULTIPLY_EX(V[768],D[12],31) + MPC_MULTIPLY_EX(V[864],D[13],31)
|
||||
+ MPC_MULTIPLY_EX(V[896],D[14],31) + MPC_MULTIPLY_EX(V[992],D[15],31);
|
||||
Data += 1;
|
||||
// total: 16 muls, 15 adds, 16 shifts
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
#endif /* CPU_ARM */
|
||||
|
||||
static void
|
||||
mpc_full_synthesis_filter(MPC_SAMPLE_FORMAT *OutData, MPC_SAMPLE_FORMAT *V, const MPC_SAMPLE_FORMAT *Y)
|
||||
|
@ -630,7 +529,7 @@ mpc_full_synthesis_filter(MPC_SAMPLE_FORMAT *OutData, MPC_SAMPLE_FORMAT *V, cons
|
|||
{
|
||||
V -= 64;
|
||||
mpc_calculate_new_V ( Y, V );
|
||||
mpc_decoder_windowing_D( OutData, V);
|
||||
mpc_decoder_windowing_D( OutData, V, Di_opt );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -661,7 +560,7 @@ mpc_decoder_synthese_filter_float(mpc_decoder *d, MPC_SAMPLE_FORMAT *OutData)
|
|||
/* */
|
||||
/*******************************************/
|
||||
|
||||
static const unsigned char Parity [256] = { // parity
|
||||
static const unsigned char Parity [256] ICONST_ATTR = { // parity
|
||||
0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,
|
||||
1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
|
||||
1,0,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,
|
||||
|
|
155
apps/codecs/libmusepack/synth_filter_arm.S
Executable file
155
apps/codecs/libmusepack/synth_filter_arm.S
Executable file
|
@ -0,0 +1,155 @@
|
|||
/***************************************************************************
|
||||
* __________ __ ___.
|
||||
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
||||
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
||||
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
||||
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
||||
* \/ \/ \/ \/ \/
|
||||
* $Id$
|
||||
*
|
||||
* Copyright (C) 2008 by Andree Buschmann
|
||||
*
|
||||
* All files in this archive are subject to the GNU General Public License.
|
||||
* See the file COPYING in the source tree root for full license agreement.
|
||||
*
|
||||
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
||||
* KIND, either express or implied.
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#include "mpc_config.h"
|
||||
|
||||
.section .text, "ax", %progbits
|
||||
|
||||
/****************************************************************************
|
||||
* void mpc_decoder_windowing_D(...)
|
||||
*
|
||||
* 2nd step within synthesis filter. Does the dewindowing.
|
||||
* 32=32x32 multiplies (OPTIMIZE_FOR_SPEED)
|
||||
* Uses pre-shifted V[] and D[] values.
|
||||
****************************************************************************/
|
||||
#if defined(OPTIMIZE_FOR_SPEED)
|
||||
.align 2
|
||||
.global mpc_decoder_windowing_D
|
||||
.type mpc_decoder_windowing_D, %function
|
||||
mpc_decoder_windowing_D:
|
||||
/* r0 = Data[] */
|
||||
/* r1 = V[] */
|
||||
/* r2 = D[] */
|
||||
/* lr = counter */
|
||||
|
||||
stmfd sp!, {r4-r12, lr}
|
||||
|
||||
mov lr, #32
|
||||
.loop32:
|
||||
ldmia r2!, { r3-r10 } /* load first 8 window coefficients */
|
||||
ldr r11, [r1] /* 0 */
|
||||
mul r12, r3, r11
|
||||
ldr r11, [r1, #96*4] /* 1 */
|
||||
mla r12, r4, r11, r12
|
||||
ldr r11, [r1, #128*4] /* 2 */
|
||||
mla r12, r5, r11, r12
|
||||
ldr r11, [r1, #224*4] /* 3 */
|
||||
mla r12, r6, r11, r12
|
||||
ldr r11, [r1, #256*4] /* 4 */
|
||||
mla r12, r7, r11, r12
|
||||
ldr r11, [r1, #352*4] /* 5 */
|
||||
mla r12, r8, r11, r12
|
||||
ldr r11, [r1, #384*4] /* 6 */
|
||||
mla r12, r9, r11, r12
|
||||
ldr r11, [r1, #480*4] /* 7 */
|
||||
mla r12, r10, r11, r12
|
||||
ldmia r2!, { r3-r10 } /* load last 8 window coefficients */
|
||||
ldr r11, [r1, #512*4] /* 8 */
|
||||
mla r12, r3, r11, r12
|
||||
ldr r11, [r1, #608*4] /* 9 */
|
||||
mla r12, r4, r11, r12
|
||||
ldr r11, [r1, #640*4] /* 10 */
|
||||
mla r12, r5, r11, r12
|
||||
ldr r11, [r1, #736*4] /* 11 */
|
||||
mla r12, r6, r11, r12
|
||||
ldr r11, [r1, #768*4] /* 12 */
|
||||
mla r12, r7, r11, r12
|
||||
ldr r11, [r1, #864*4] /* 13 */
|
||||
mla r12, r8, r11, r12
|
||||
ldr r11, [r1, #896*4] /* 14 */
|
||||
mla r12, r9, r11, r12
|
||||
ldr r11, [r1, #992*4] /* 15 */
|
||||
mla r12, r10, r11, r12
|
||||
str r12, [r0], #4 /* store Data */
|
||||
add r1, r1, #4 /* V++ */
|
||||
|
||||
subs lr, lr, #1
|
||||
bgt .loop32
|
||||
|
||||
ldmfd sp!, {r4-r12, pc}
|
||||
.mpc_dewindowing_end:
|
||||
.size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D
|
||||
#else
|
||||
/****************************************************************************
|
||||
* void mpc_decoder_windowing_D(...)
|
||||
*
|
||||
* 2nd step within synthesis filter. Does the dewindowing.
|
||||
* 64=32x32 multiplies
|
||||
* Drops lo-part of 64bit multiply results and will therefor loose 1 bit
|
||||
* accuracy. The decoder output is binary identical as this imprecision is
|
||||
* far below the output's 16bit resolution.
|
||||
****************************************************************************/
|
||||
.align 2
|
||||
.global mpc_decoder_windowing_D
|
||||
.type mpc_decoder_windowing_D, %function
|
||||
mpc_decoder_windowing_D:
|
||||
/* r0 = Data[] */
|
||||
/* r1 = V[] */
|
||||
/* r2 = D[] */
|
||||
/* lr = counter */
|
||||
|
||||
stmfd sp!, {r4-r12, lr}
|
||||
|
||||
mov lr, #32
|
||||
.loop32:
|
||||
ldmia r2!, { r3-r10 } /* load first 8 window coefficients */
|
||||
ldr r11, [r1] /* 0 */
|
||||
smull r11, r12, r3, r11
|
||||
ldr r11, [r1, #96*4] /* 1 */
|
||||
smlal r11, r12, r4, r11
|
||||
ldr r11, [r1, #128*4] /* 2 */
|
||||
smlal r11, r12, r5, r11
|
||||
ldr r11, [r1, #224*4] /* 3 */
|
||||
smlal r11, r12, r6, r11
|
||||
ldr r11, [r1, #256*4] /* 4 */
|
||||
smlal r11, r12, r7, r11
|
||||
ldr r11, [r1, #352*4] /* 5 */
|
||||
smlal r11, r12, r8, r11
|
||||
ldr r11, [r1, #384*4] /* 6 */
|
||||
smlal r11, r12, r9, r11
|
||||
ldr r11, [r1, #480*4] /* 7 */
|
||||
smlal r11, r12, r10, r11
|
||||
ldmia r2!, { r3-r10 } /* load last 8 window coefficients */
|
||||
ldr r11, [r1, #512*4] /* 8 */
|
||||
smlal r11, r12, r3, r11
|
||||
ldr r11, [r1, #608*4] /* 9 */
|
||||
smlal r11, r12, r4, r11
|
||||
ldr r11, [r1, #640*4] /* 10 */
|
||||
smlal r11, r12, r5, r11
|
||||
ldr r11, [r1, #736*4] /* 11 */
|
||||
smlal r11, r12, r6, r11
|
||||
ldr r11, [r1, #768*4] /* 12 */
|
||||
smlal r11, r12, r7, r11
|
||||
ldr r11, [r1, #864*4] /* 13 */
|
||||
smlal r11, r12, r8, r11
|
||||
ldr r11, [r1, #896*4] /* 14 */
|
||||
smlal r11, r12, r9, r11
|
||||
ldr r11, [r1, #992*4] /* 15 */
|
||||
smlal r11, r12, r10, r11
|
||||
mov r4, r12, lsl #1 /* get result from hi-part */
|
||||
str r4, [r0], #4 /* store Data */
|
||||
add r1, r1, #4 /* V++ */
|
||||
|
||||
subs lr, lr, #1
|
||||
bgt .loop32
|
||||
|
||||
ldmfd sp!, {r4-r12, pc}
|
||||
.mpc_dewindowing_end:
|
||||
.size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D
|
||||
#endif
|
Loading…
Add table
Add a link
Reference in a new issue