forked from len0rd/rockbox
Assembler optimised memset16() for ARM, by Thom Johansen. Should speed up LCD clearing and solid rectangle drawing on colour iPods somewhat.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@10900 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
825fb8a264
commit
9d2f7b5c6d
4 changed files with 83 additions and 7 deletions
|
|
@ -53,7 +53,7 @@ target/sh/memset-sh.S
|
||||||
common/memcpy.c
|
common/memcpy.c
|
||||||
common/memmove.c
|
common/memmove.c
|
||||||
target/arm/memset-arm.S
|
target/arm/memset-arm.S
|
||||||
common/memset16.c
|
target/arm/memset16-arm.S
|
||||||
#else
|
#else
|
||||||
common/memcpy.c
|
common/memcpy.c
|
||||||
common/memmove.c
|
common/memmove.c
|
||||||
|
|
|
||||||
|
|
@ -22,15 +22,13 @@
|
||||||
#define UNALIGNED(X) ((long)X & (sizeof(long) - 1))
|
#define UNALIGNED(X) ((long)X & (sizeof(long) - 1))
|
||||||
#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE)
|
#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE)
|
||||||
|
|
||||||
void *memset16(void *dst, int val, size_t len)
|
void memset16(void *dst, int val, size_t len)
|
||||||
{
|
{
|
||||||
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
|
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
|
||||||
unsigned short *p = (unsigned short *)dst;
|
unsigned short *p = (unsigned short *)dst;
|
||||||
|
|
||||||
while (len--)
|
while (len--)
|
||||||
*p++ = val;
|
*p++ = val;
|
||||||
|
|
||||||
return dst;
|
|
||||||
#else
|
#else
|
||||||
unsigned short *p = (unsigned short *)dst;
|
unsigned short *p = (unsigned short *)dst;
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
|
|
@ -73,7 +71,5 @@ void *memset16(void *dst, int val, size_t len)
|
||||||
|
|
||||||
while (len--)
|
while (len--)
|
||||||
*p++ = val;
|
*p++ = val;
|
||||||
|
|
||||||
return dst;
|
|
||||||
#endif /* not PREFER_SIZE_OVER_SPEED */
|
#endif /* not PREFER_SIZE_OVER_SPEED */
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,6 @@
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
|
|
||||||
void *memset16(void *dst, int val, size_t len);
|
void memset16(void *dst, int val, size_t len);
|
||||||
|
|
||||||
#endif /* _MEMORY_H_ */
|
#endif /* _MEMORY_H_ */
|
||||||
|
|
|
||||||
80
firmware/target/arm/memset16-arm.S
Executable file
80
firmware/target/arm/memset16-arm.S
Executable file
|
|
@ -0,0 +1,80 @@
|
||||||
|
/***************************************************************************
|
||||||
|
* __________ __ ___.
|
||||||
|
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
||||||
|
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
||||||
|
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
||||||
|
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
||||||
|
* \/ \/ \/ \/ \/
|
||||||
|
* $Id$
|
||||||
|
*
|
||||||
|
* Copyright (C) 2006 by Thom Johansen
|
||||||
|
*
|
||||||
|
* All files in this archive are subject to the GNU General Public License.
|
||||||
|
* See the file COPYING in the source tree root for full license agreement.
|
||||||
|
*
|
||||||
|
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
||||||
|
* KIND, either express or implied.
|
||||||
|
*
|
||||||
|
****************************************************************************/
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
.section .icode,"ax",%progbits
|
||||||
|
|
||||||
|
.align 2
|
||||||
|
|
||||||
|
/* The following code is based on code from the Linux kernel version 2.6.15.3,
|
||||||
|
* linux/arch/arm/lib/memset.S
|
||||||
|
*
|
||||||
|
* Copyright (C) 1995-2000 Russell King
|
||||||
|
*/
|
||||||
|
|
||||||
|
.global memset16
|
||||||
|
.type memset16,%function
|
||||||
|
memset16:
|
||||||
|
tst r0, #2 @ unaligned?
|
||||||
|
cmpne r2, #0
|
||||||
|
strneh r1, [r0], #2 @ store one halfword to align
|
||||||
|
subne r2, r2, #1
|
||||||
|
|
||||||
|
/*
|
||||||
|
* we know that the pointer in r0 is aligned to a word boundary.
|
||||||
|
*/
|
||||||
|
orr r1, r1, r1, lsl #16
|
||||||
|
mov r3, r1
|
||||||
|
cmp r2, #8
|
||||||
|
blt 4f
|
||||||
|
/*
|
||||||
|
* We need an extra register for this loop - save the return address and
|
||||||
|
* use the LR
|
||||||
|
*/
|
||||||
|
str lr, [sp, #-4]!
|
||||||
|
mov ip, r1
|
||||||
|
mov lr, r1
|
||||||
|
|
||||||
|
2: subs r2, r2, #32
|
||||||
|
stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time.
|
||||||
|
stmgeia r0!, {r1, r3, ip, lr}
|
||||||
|
stmgeia r0!, {r1, r3, ip, lr}
|
||||||
|
stmgeia r0!, {r1, r3, ip, lr}
|
||||||
|
bgt 2b
|
||||||
|
ldmeqfd sp!, {pc} @ Now <64 bytes to go.
|
||||||
|
/*
|
||||||
|
* No need to correct the count; we're only testing bits from now on
|
||||||
|
*/
|
||||||
|
tst r2, #16
|
||||||
|
stmneia r0!, {r1, r3, ip, lr}
|
||||||
|
stmneia r0!, {r1, r3, ip, lr}
|
||||||
|
tst r2, #8
|
||||||
|
stmneia r0!, {r1, r3, ip, lr}
|
||||||
|
ldr lr, [sp], #4
|
||||||
|
|
||||||
|
4: tst r2, #4
|
||||||
|
stmneia r0!, {r1, r3}
|
||||||
|
tst r2, #2
|
||||||
|
strne r1, [r0], #4
|
||||||
|
|
||||||
|
tst r2, #1
|
||||||
|
strneh r1, [r0], #2
|
||||||
|
bx lr
|
||||||
|
.end:
|
||||||
|
.size memset16,.end-memset16
|
||||||
Loading…
Add table
Add a link
Reference in a new issue