forked from len0rd/rockbox
libdemac: Add x86/x86_64 MMX asm for the filters. Not relevant for target but speeds up decoding on x86/x86_64 sims. Average speedup ranges from 25% for -c2000 to 3 times for -c5000; on Intel Atom it's even 45% for -c2000 to 6 times for -c5000.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24663 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
1bef4c6650
commit
b8eb272e48
3 changed files with 238 additions and 6 deletions
|
|
@ -46,6 +46,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
|
||||||
#elif defined(CPU_ARM) && (ARM_ARCH >= 5)
|
#elif defined(CPU_ARM) && (ARM_ARCH >= 5)
|
||||||
/* Assume all our ARMv5 targets are ARMv5te(j) */
|
/* Assume all our ARMv5 targets are ARMv5te(j) */
|
||||||
#include "vector_math16_armv5te.h"
|
#include "vector_math16_armv5te.h"
|
||||||
|
#elif (defined(__i386__) || defined(__i486__)) && defined(__MMX__) \
|
||||||
|
|| defined(__x86_64__)
|
||||||
|
#include "vector_math16_mmx.h"
|
||||||
#else
|
#else
|
||||||
#include "vector_math_generic.h"
|
#include "vector_math_generic.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
219
apps/codecs/demac/libdemac/vector_math16_mmx.h
Normal file
219
apps/codecs/demac/libdemac/vector_math16_mmx.h
Normal file
|
|
@ -0,0 +1,219 @@
|
||||||
|
/*
|
||||||
|
|
||||||
|
libdemac - A Monkey's Audio decoder
|
||||||
|
|
||||||
|
$Id$
|
||||||
|
|
||||||
|
Copyright (C) Dave Chapman 2007
|
||||||
|
|
||||||
|
MMX vector math copyright (C) 2010 Jens Arnold
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define FUSED_VECTOR_MATH
|
||||||
|
|
||||||
|
#define __E(__e) #__e
|
||||||
|
#define __S(__e) __E(__e)
|
||||||
|
|
||||||
|
static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t *s2)
|
||||||
|
{
|
||||||
|
int res, t;
|
||||||
|
#if ORDER > 256
|
||||||
|
int cnt = ORDER>>8;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
asm volatile (
|
||||||
|
#if ORDER > 256
|
||||||
|
"pxor %%mm2, %%mm2 \n"
|
||||||
|
".set ofs, 0 \n"
|
||||||
|
"1: \n"
|
||||||
|
".rept 64 \n"
|
||||||
|
#else
|
||||||
|
"movq (%[v1]), %%mm2 \n"
|
||||||
|
"movq %%mm2, %%mm0 \n"
|
||||||
|
"pmaddwd (%[f2]), %%mm2 \n"
|
||||||
|
"paddw (%[s2]), %%mm0 \n"
|
||||||
|
"movq %%mm0, (%[v1]) \n"
|
||||||
|
".set ofs, 8 \n"
|
||||||
|
|
||||||
|
".rept " __S(ORDER>>2 - 1) "\n"
|
||||||
|
#endif
|
||||||
|
"movq ofs(%[v1]), %%mm1 \n"
|
||||||
|
"movq %%mm1, %%mm0 \n"
|
||||||
|
"pmaddwd ofs(%[f2]), %%mm1 \n"
|
||||||
|
"paddw ofs(%[s2]), %%mm0 \n"
|
||||||
|
"movq %%mm0, ofs(%[v1]) \n"
|
||||||
|
"paddd %%mm1, %%mm2 \n"
|
||||||
|
".set ofs, ofs + 8 \n"
|
||||||
|
".endr \n"
|
||||||
|
#if ORDER > 256
|
||||||
|
"add $512, %[v1] \n"
|
||||||
|
"add $512, %[s2] \n"
|
||||||
|
"add $512, %[f2] \n"
|
||||||
|
"dec %[cnt] \n"
|
||||||
|
"jne 1b \n"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
"movd %%mm2, %[t] \n"
|
||||||
|
"psrlq $32, %%mm2 \n"
|
||||||
|
"movd %%mm2, %[res] \n"
|
||||||
|
"add %[t], %[res] \n"
|
||||||
|
: /* outputs */
|
||||||
|
#if ORDER > 256
|
||||||
|
[cnt]"+r"(cnt),
|
||||||
|
[s2] "+r"(s2),
|
||||||
|
[res]"=r"(res),
|
||||||
|
[t] "=r"(t)
|
||||||
|
: /* inputs */
|
||||||
|
[v1]"2"(v1),
|
||||||
|
[f2]"3"(f2)
|
||||||
|
#else
|
||||||
|
[res]"=r"(res),
|
||||||
|
[t] "=r"(t)
|
||||||
|
: /* inputs */
|
||||||
|
[v1]"r"(v1),
|
||||||
|
[f2]"r"(f2),
|
||||||
|
[s2]"r"(s2)
|
||||||
|
#endif
|
||||||
|
: /* clobbers */
|
||||||
|
"mm0", "mm1", "mm2"
|
||||||
|
);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t *s2)
|
||||||
|
{
|
||||||
|
int res, t;
|
||||||
|
#if ORDER > 256
|
||||||
|
int cnt = ORDER>>8;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
asm volatile (
|
||||||
|
#if ORDER > 256
|
||||||
|
"pxor %%mm2, %%mm2 \n"
|
||||||
|
".set ofs, 0 \n"
|
||||||
|
"1: \n"
|
||||||
|
".rept 64 \n"
|
||||||
|
#else
|
||||||
|
"movq (%[v1]), %%mm2 \n"
|
||||||
|
"movq %%mm2, %%mm0 \n"
|
||||||
|
"pmaddwd (%[f2]), %%mm2 \n"
|
||||||
|
"psubw (%[s2]), %%mm0 \n"
|
||||||
|
"movq %%mm0, (%[v1]) \n"
|
||||||
|
".set ofs, 8 \n"
|
||||||
|
|
||||||
|
".rept " __S(ORDER>>2 - 1) "\n"
|
||||||
|
#endif
|
||||||
|
"movq ofs(%[v1]), %%mm1 \n"
|
||||||
|
"movq %%mm1, %%mm0 \n"
|
||||||
|
"pmaddwd ofs(%[f2]), %%mm1 \n"
|
||||||
|
"psubw ofs(%[s2]), %%mm0 \n"
|
||||||
|
"movq %%mm0, ofs(%[v1]) \n"
|
||||||
|
"paddd %%mm1, %%mm2 \n"
|
||||||
|
".set ofs, ofs + 8 \n"
|
||||||
|
".endr \n"
|
||||||
|
#if ORDER > 256
|
||||||
|
"add $512, %[v1] \n"
|
||||||
|
"add $512, %[s2] \n"
|
||||||
|
"add $512, %[f2] \n"
|
||||||
|
"dec %[cnt] \n"
|
||||||
|
"jne 1b \n"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
"movd %%mm2, %[t] \n"
|
||||||
|
"psrlq $32, %%mm2 \n"
|
||||||
|
"movd %%mm2, %[res] \n"
|
||||||
|
"add %[t], %[res] \n"
|
||||||
|
: /* outputs */
|
||||||
|
#if ORDER > 256
|
||||||
|
[cnt]"+r"(cnt),
|
||||||
|
[s2] "+r"(s2),
|
||||||
|
[res]"=r"(res),
|
||||||
|
[t] "=r"(t)
|
||||||
|
: /* inputs */
|
||||||
|
[v1]"2"(v1),
|
||||||
|
[f2]"3"(f2)
|
||||||
|
#else
|
||||||
|
[res]"=r"(res),
|
||||||
|
[t] "=r"(t)
|
||||||
|
: /* inputs */
|
||||||
|
[v1]"r"(v1),
|
||||||
|
[f2]"r"(f2),
|
||||||
|
[s2]"r"(s2)
|
||||||
|
#endif
|
||||||
|
: /* clobbers */
|
||||||
|
"mm0", "mm1", "mm2"
|
||||||
|
);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
|
||||||
|
{
|
||||||
|
int res, t;
|
||||||
|
#if ORDER > 256
|
||||||
|
int cnt = ORDER>>8;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
asm volatile (
|
||||||
|
#if ORDER > 256
|
||||||
|
"pxor %%mm1, %%mm1 \n"
|
||||||
|
".set ofs, 0 \n"
|
||||||
|
"1: \n"
|
||||||
|
".rept 64 \n"
|
||||||
|
#else
|
||||||
|
"movq (%[v1]), %%mm1 \n"
|
||||||
|
"pmaddwd (%[v2]), %%mm1 \n"
|
||||||
|
".set ofs, 8 \n"
|
||||||
|
|
||||||
|
".rept " __S(ORDER>>2 - 1) "\n"
|
||||||
|
#endif
|
||||||
|
"movq ofs(%[v1]), %%mm0 \n"
|
||||||
|
"pmaddwd ofs(%[v2]), %%mm0 \n"
|
||||||
|
"paddd %%mm0, %%mm1 \n"
|
||||||
|
".set ofs, ofs + 8 \n"
|
||||||
|
".endr \n"
|
||||||
|
#if ORDER > 256
|
||||||
|
"add $512, %[v1] \n"
|
||||||
|
"add $512, %[v2] \n"
|
||||||
|
"dec %[cnt] \n"
|
||||||
|
"jne 1b \n"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
"movd %%mm1, %[t] \n"
|
||||||
|
"psrlq $32, %%mm1 \n"
|
||||||
|
"movd %%mm1, %[res] \n"
|
||||||
|
"add %[t], %[res] \n"
|
||||||
|
: /* outputs */
|
||||||
|
#if ORDER > 256
|
||||||
|
[cnt]"+r"(cnt),
|
||||||
|
[res]"=r"(res),
|
||||||
|
[t] "=r"(t)
|
||||||
|
: /* inputs */
|
||||||
|
[v1]"1"(v1),
|
||||||
|
[v2]"2"(v2)
|
||||||
|
#else
|
||||||
|
[res]"=r"(res),
|
||||||
|
[t] "=r"(t)
|
||||||
|
: /* inputs */
|
||||||
|
[v1]"r"(v1),
|
||||||
|
[v2]"r"(v2)
|
||||||
|
#endif
|
||||||
|
: /* clobbers */
|
||||||
|
"mm0", "mm1"
|
||||||
|
);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
22
tools/configure
vendored
22
tools/configure
vendored
|
|
@ -171,12 +171,20 @@ simcc () {
|
||||||
GCCOPTS="$GCCOPTS -I\$(SIMDIR)"
|
GCCOPTS="$GCCOPTS -I\$(SIMDIR)"
|
||||||
|
|
||||||
if test "X$crosscompile" != "Xyes"; then
|
if test "X$crosscompile" != "Xyes"; then
|
||||||
if [ "`uname -m`" = "x86_64" ] || [ "`uname -m`" = "amd64" ]; then
|
case `uname -m` in
|
||||||
# fPIC is needed to make shared objects link
|
x86_64|amd64)
|
||||||
# setting visibility to hidden is necessary to avoid strange crashes
|
# fPIC is needed to make shared objects link
|
||||||
# due to symbol clashing
|
# setting visibility to hidden is necessary to avoid strange crashes
|
||||||
GCCOPTS="$GCCOPTS -fPIC -fvisibility=hidden"
|
# due to symbol clashing
|
||||||
fi
|
GCCOPTS="$GCCOPTS -fPIC -fvisibility=hidden"
|
||||||
|
# x86_64 supports MMX by default
|
||||||
|
;;
|
||||||
|
|
||||||
|
i686)
|
||||||
|
echo "Enabling MMX support"
|
||||||
|
GCCOPTS="$GCCOPTS -mmmx"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
id=$$
|
id=$$
|
||||||
cat >$tmpdir/conftest-$id.c <<EOF
|
cat >$tmpdir/conftest-$id.c <<EOF
|
||||||
|
|
@ -218,6 +226,8 @@ EOF
|
||||||
LDOPTS="-mconsole $sdl_libs"
|
LDOPTS="-mconsole $sdl_libs"
|
||||||
output="rockboxui.exe" # use this as output binary name
|
output="rockboxui.exe" # use this as output binary name
|
||||||
endian="little" # windows is little endian
|
endian="little" # windows is little endian
|
||||||
|
echo "Enabling MMX support"
|
||||||
|
GCCOPTS="$GCCOPTS -mmmx"
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue