forked from len0rd/rockbox
Fractals: Have helper functions in header file to keep them inlined
- Should fix performance degradation caused because of the split - Thanks for all who noticed (amiconn et al.) git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24266 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
d812362905
commit
563f2602f4
7 changed files with 118 additions and 204 deletions
|
@ -1,10 +1,3 @@
|
||||||
fractal.c
|
fractal.c
|
||||||
fractal_rect.c
|
fractal_rect.c
|
||||||
mandelbrot_set.c
|
mandelbrot_set.c
|
||||||
#if CONFIG_CPU == SH7034
|
|
||||||
cpu_sh7043.c
|
|
||||||
#elif defined CPU_COLDFIRE
|
|
||||||
cpu_coldfire.c
|
|
||||||
#elif defined CPU_ARM
|
|
||||||
cpu_arm.c
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -1,40 +0,0 @@
|
||||||
/***************************************************************************
|
|
||||||
* __________ __ ___.
|
|
||||||
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
|
||||||
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
|
||||||
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
|
||||||
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
|
||||||
* \/ \/ \/ \/ \/
|
|
||||||
* $Id$
|
|
||||||
*
|
|
||||||
* Copyright (C) 2009 Tomer Shalev
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* This program is free software; you can redistribute it and/or
|
|
||||||
* modify it under the terms of the GNU General Public License
|
|
||||||
* as published by the Free Software Foundation; either version 2
|
|
||||||
* of the License, or (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
|
||||||
* KIND, either express or implied.
|
|
||||||
*
|
|
||||||
****************************************************************************/
|
|
||||||
#include "cpu_arm.h"
|
|
||||||
|
|
||||||
inline long muls32_asr26(long a, long b)
|
|
||||||
{
|
|
||||||
long r, t1;
|
|
||||||
asm (
|
|
||||||
"smull %[r], %[t1], %[a], %[b] \n"
|
|
||||||
"mov %[r], %[r], lsr #26 \n"
|
|
||||||
"orr %[r], %[r], %[t1], lsl #6 \n"
|
|
||||||
: /* outputs */
|
|
||||||
[r] "=&r,&r,&r"(r),
|
|
||||||
[t1]"=&r,&r,&r"(t1)
|
|
||||||
: /* inputs */
|
|
||||||
[a] "%r,%r,%r" (a),
|
|
||||||
[b] "r,0,1" (b)
|
|
||||||
);
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
|
@ -22,6 +22,21 @@
|
||||||
#ifndef _CPU_ARM_H
|
#ifndef _CPU_ARM_H
|
||||||
#define _CPU_ARM_H
|
#define _CPU_ARM_H
|
||||||
|
|
||||||
inline long muls32_asr26(long a, long b);
|
inline static long muls32_asr26(long a, long b)
|
||||||
|
{
|
||||||
|
long r, t1;
|
||||||
|
asm (
|
||||||
|
"smull %[r], %[t1], %[a], %[b] \n"
|
||||||
|
"mov %[r], %[r], lsr #26 \n"
|
||||||
|
"orr %[r], %[r], %[t1], lsl #6 \n"
|
||||||
|
: /* outputs */
|
||||||
|
[r] "=&r,&r,&r"(r),
|
||||||
|
[t1]"=&r,&r,&r"(t1)
|
||||||
|
: /* inputs */
|
||||||
|
[a] "%r,%r,%r" (a),
|
||||||
|
[b] "r,0,1" (b)
|
||||||
|
);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,58 +0,0 @@
|
||||||
/***************************************************************************
|
|
||||||
* __________ __ ___.
|
|
||||||
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
|
||||||
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
|
||||||
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
|
||||||
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
|
||||||
* \/ \/ \/ \/ \/
|
|
||||||
* $Id$
|
|
||||||
*
|
|
||||||
* Copyright (C) 2009 Tomer Shalev
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* This program is free software; you can redistribute it and/or
|
|
||||||
* modify it under the terms of the GNU General Public License
|
|
||||||
* as published by the Free Software Foundation; either version 2
|
|
||||||
* of the License, or (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
|
||||||
* KIND, either express or implied.
|
|
||||||
*
|
|
||||||
****************************************************************************/
|
|
||||||
#include "cpu_coldfire.h"
|
|
||||||
|
|
||||||
inline short muls16_asr10(short a, short b)
|
|
||||||
{
|
|
||||||
asm (
|
|
||||||
"muls.w %[a],%[b] \n"
|
|
||||||
"asr.l #8,%[b] \n"
|
|
||||||
"asr.l #2,%[b] \n"
|
|
||||||
: /* outputs */
|
|
||||||
[b]"+d"(b)
|
|
||||||
: /* inputs */
|
|
||||||
[a]"d" (a)
|
|
||||||
);
|
|
||||||
return b;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline long muls32_asr26(long a, long b)
|
|
||||||
{
|
|
||||||
long r, t1;
|
|
||||||
asm (
|
|
||||||
"mac.l %[a], %[b], %%acc0 \n" /* multiply */
|
|
||||||
"move.l %%accext01, %[t1] \n" /* get low part */
|
|
||||||
"movclr.l %%acc0, %[r] \n" /* get high part */
|
|
||||||
"asl.l #5, %[r] \n" /* hi <<= 5, plus one free */
|
|
||||||
"lsr.l #3, %[t1] \n" /* lo >>= 3 */
|
|
||||||
"and.l #0x1f, %[t1] \n" /* mask out unrelated bits */
|
|
||||||
"or.l %[t1], %[r] \n" /* combine result */
|
|
||||||
: /* outputs */
|
|
||||||
[r] "=d"(r),
|
|
||||||
[t1]"=d"(t1)
|
|
||||||
: /* inputs */
|
|
||||||
[a] "d" (a),
|
|
||||||
[b] "d" (b)
|
|
||||||
);
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
|
@ -22,8 +22,39 @@
|
||||||
#ifndef _CPU_COLDFIRE_H
|
#ifndef _CPU_COLDFIRE_H
|
||||||
#define _CPU_COLDFIRE_H
|
#define _CPU_COLDFIRE_H
|
||||||
|
|
||||||
inline short muls16_asr10(short a, short b);
|
inline static short muls16_asr10(short a, short b)
|
||||||
|
{
|
||||||
|
asm (
|
||||||
|
"muls.w %[a],%[b] \n"
|
||||||
|
"asr.l #8,%[b] \n"
|
||||||
|
"asr.l #2,%[b] \n"
|
||||||
|
: /* outputs */
|
||||||
|
[b]"+d"(b)
|
||||||
|
: /* inputs */
|
||||||
|
[a]"d" (a)
|
||||||
|
);
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
inline long muls32_asr26(long a, long b);
|
inline static long muls32_asr26(long a, long b)
|
||||||
|
{
|
||||||
|
long r, t1;
|
||||||
|
asm (
|
||||||
|
"mac.l %[a], %[b], %%acc0 \n" /* multiply */
|
||||||
|
"move.l %%accext01, %[t1] \n" /* get low part */
|
||||||
|
"movclr.l %%acc0, %[r] \n" /* get high part */
|
||||||
|
"asl.l #5, %[r] \n" /* hi <<= 5, plus one free */
|
||||||
|
"lsr.l #3, %[t1] \n" /* lo >>= 3 */
|
||||||
|
"and.l #0x1f, %[t1] \n" /* mask out unrelated bits */
|
||||||
|
"or.l %[t1], %[r] \n" /* combine result */
|
||||||
|
: /* outputs */
|
||||||
|
[r] "=d"(r),
|
||||||
|
[t1]"=d"(t1)
|
||||||
|
: /* inputs */
|
||||||
|
[a] "d" (a),
|
||||||
|
[b] "d" (b)
|
||||||
|
);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,94 +0,0 @@
|
||||||
/***************************************************************************
|
|
||||||
* __________ __ ___.
|
|
||||||
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
|
||||||
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
|
||||||
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
|
||||||
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
|
||||||
* \/ \/ \/ \/ \/
|
|
||||||
* $Id$
|
|
||||||
*
|
|
||||||
* Copyright (C) 2009 Tomer Shalev
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* This program is free software; you can redistribute it and/or
|
|
||||||
* modify it under the terms of the GNU General Public License
|
|
||||||
* as published by the Free Software Foundation; either version 2
|
|
||||||
* of the License, or (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
|
||||||
* KIND, either express or implied.
|
|
||||||
*
|
|
||||||
****************************************************************************/
|
|
||||||
#include "cpu_sh7043.h"
|
|
||||||
|
|
||||||
inline short muls16_asr10(short a, short b)
|
|
||||||
{
|
|
||||||
short r;
|
|
||||||
asm (
|
|
||||||
"muls %[a],%[b] \n"
|
|
||||||
"sts macl,%[r] \n"
|
|
||||||
"shlr8 %[r] \n"
|
|
||||||
"shlr2 %[r] \n"
|
|
||||||
: /* outputs */
|
|
||||||
[r]"=r"(r)
|
|
||||||
: /* inputs */
|
|
||||||
[a]"r"(a),
|
|
||||||
[b]"r"(b)
|
|
||||||
);
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline long muls32_asr26(long a, long b)
|
|
||||||
{
|
|
||||||
long r, t1, t2, t3;
|
|
||||||
asm (
|
|
||||||
/* Signed 32bit * 32bit -> 64bit multiplication.
|
|
||||||
Notation: xxab * xxcd, where each letter represents 16 bits.
|
|
||||||
xx is the 64 bit sign extension. */
|
|
||||||
"swap.w %[a],%[t1] \n" /* t1 = ba */
|
|
||||||
"mulu %[t1],%[b] \n" /* a * d */
|
|
||||||
"swap.w %[b],%[t3] \n" /* t3 = dc */
|
|
||||||
"sts macl,%[t2] \n" /* t2 = a * d */
|
|
||||||
"mulu %[t1],%[t3] \n" /* a * c */
|
|
||||||
"sts macl,%[r] \n" /* hi = a * c */
|
|
||||||
"mulu %[a],%[t3] \n" /* b * c */
|
|
||||||
"clrt \n"
|
|
||||||
"sts macl,%[t3] \n" /* t3 = b * c */
|
|
||||||
"addc %[t2],%[t3] \n" /* t3 += t2, carry -> t2 */
|
|
||||||
"movt %[t2] \n"
|
|
||||||
"mulu %[a],%[b] \n" /* b * d */
|
|
||||||
"mov %[t3],%[t1] \n" /* t1t3 = t2t3 << 16 */
|
|
||||||
"xtrct %[t2],%[t1] \n"
|
|
||||||
"shll16 %[t3] \n"
|
|
||||||
"sts macl,%[t2] \n" /* lo = b * d */
|
|
||||||
"clrt \n" /* hi.lo += t1t3 */
|
|
||||||
"addc %[t3],%[t2] \n"
|
|
||||||
"addc %[t1],%[r] \n"
|
|
||||||
"cmp/pz %[a] \n" /* ab >= 0 ? */
|
|
||||||
"bt 1f \n"
|
|
||||||
"sub %[b],%[r] \n" /* no: hi -= cd (sign extension of ab is -1) */
|
|
||||||
"1: \n"
|
|
||||||
"cmp/pz %[b] \n" /* cd >= 0 ? */
|
|
||||||
"bt 2f \n"
|
|
||||||
"sub %[a],%[r] \n" /* no: hi -= ab (sign extension of cd is -1) */
|
|
||||||
"2: \n"
|
|
||||||
/* Shift right by 26 and return low 32 bits */
|
|
||||||
"shll2 %[r] \n" /* hi <<= 6 */
|
|
||||||
"shll2 %[r] \n"
|
|
||||||
"shll2 %[r] \n"
|
|
||||||
"shlr16 %[t2] \n" /* (unsigned)lo >>= 26 */
|
|
||||||
"shlr8 %[t2] \n"
|
|
||||||
"shlr2 %[t2] \n"
|
|
||||||
"or %[t2],%[r] \n" /* combine result */
|
|
||||||
: /* outputs */
|
|
||||||
[r] "=&r"(r),
|
|
||||||
[t1]"=&r"(t1),
|
|
||||||
[t2]"=&r"(t2),
|
|
||||||
[t3]"=&r"(t3)
|
|
||||||
: /* inputs */
|
|
||||||
[a] "r" (a),
|
|
||||||
[b] "r" (b)
|
|
||||||
);
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
|
@ -22,8 +22,75 @@
|
||||||
#ifndef _CPU_SH7043_H
|
#ifndef _CPU_SH7043_H
|
||||||
#define _CPU_SH7043_H
|
#define _CPU_SH7043_H
|
||||||
|
|
||||||
inline short muls16_asr10(short a, short b);
|
inline static short muls16_asr10(short a, short b)
|
||||||
|
{
|
||||||
|
short r;
|
||||||
|
asm (
|
||||||
|
"muls %[a],%[b] \n"
|
||||||
|
"sts macl,%[r] \n"
|
||||||
|
"shlr8 %[r] \n"
|
||||||
|
"shlr2 %[r] \n"
|
||||||
|
: /* outputs */
|
||||||
|
[r]"=r"(r)
|
||||||
|
: /* inputs */
|
||||||
|
[a]"r"(a),
|
||||||
|
[b]"r"(b)
|
||||||
|
);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
inline long muls32_asr26(long a, long b);
|
inline static long muls32_asr26(long a, long b)
|
||||||
|
{
|
||||||
|
long r, t1, t2, t3;
|
||||||
|
asm (
|
||||||
|
/* Signed 32bit * 32bit -> 64bit multiplication.
|
||||||
|
Notation: xxab * xxcd, where each letter represents 16 bits.
|
||||||
|
xx is the 64 bit sign extension. */
|
||||||
|
"swap.w %[a],%[t1] \n" /* t1 = ba */
|
||||||
|
"mulu %[t1],%[b] \n" /* a * d */
|
||||||
|
"swap.w %[b],%[t3] \n" /* t3 = dc */
|
||||||
|
"sts macl,%[t2] \n" /* t2 = a * d */
|
||||||
|
"mulu %[t1],%[t3] \n" /* a * c */
|
||||||
|
"sts macl,%[r] \n" /* hi = a * c */
|
||||||
|
"mulu %[a],%[t3] \n" /* b * c */
|
||||||
|
"clrt \n"
|
||||||
|
"sts macl,%[t3] \n" /* t3 = b * c */
|
||||||
|
"addc %[t2],%[t3] \n" /* t3 += t2, carry -> t2 */
|
||||||
|
"movt %[t2] \n"
|
||||||
|
"mulu %[a],%[b] \n" /* b * d */
|
||||||
|
"mov %[t3],%[t1] \n" /* t1t3 = t2t3 << 16 */
|
||||||
|
"xtrct %[t2],%[t1] \n"
|
||||||
|
"shll16 %[t3] \n"
|
||||||
|
"sts macl,%[t2] \n" /* lo = b * d */
|
||||||
|
"clrt \n" /* hi.lo += t1t3 */
|
||||||
|
"addc %[t3],%[t2] \n"
|
||||||
|
"addc %[t1],%[r] \n"
|
||||||
|
"cmp/pz %[a] \n" /* ab >= 0 ? */
|
||||||
|
"bt 1f \n"
|
||||||
|
"sub %[b],%[r] \n" /* no: hi -= cd (sign extension of ab is -1) */
|
||||||
|
"1: \n"
|
||||||
|
"cmp/pz %[b] \n" /* cd >= 0 ? */
|
||||||
|
"bt 2f \n"
|
||||||
|
"sub %[a],%[r] \n" /* no: hi -= ab (sign extension of cd is -1) */
|
||||||
|
"2: \n"
|
||||||
|
/* Shift right by 26 and return low 32 bits */
|
||||||
|
"shll2 %[r] \n" /* hi <<= 6 */
|
||||||
|
"shll2 %[r] \n"
|
||||||
|
"shll2 %[r] \n"
|
||||||
|
"shlr16 %[t2] \n" /* (unsigned)lo >>= 26 */
|
||||||
|
"shlr8 %[t2] \n"
|
||||||
|
"shlr2 %[t2] \n"
|
||||||
|
"or %[t2],%[r] \n" /* combine result */
|
||||||
|
: /* outputs */
|
||||||
|
[r] "=&r"(r),
|
||||||
|
[t1]"=&r"(t1),
|
||||||
|
[t2]"=&r"(t2),
|
||||||
|
[t3]"=&r"(t3)
|
||||||
|
: /* inputs */
|
||||||
|
[a] "r" (a),
|
||||||
|
[b] "r" (b)
|
||||||
|
);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue