forked from len0rd/rockbox
Assembler optimised memset16() for ARM, by Thom Johansen. Should speed up LCD clearing and solid rectangle drawing on colour iPods somewhat.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@10900 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
825fb8a264
commit
9d2f7b5c6d
4 changed files with 83 additions and 7 deletions
|
|
@ -53,7 +53,7 @@ target/sh/memset-sh.S
|
|||
common/memcpy.c
|
||||
common/memmove.c
|
||||
target/arm/memset-arm.S
|
||||
common/memset16.c
|
||||
target/arm/memset16-arm.S
|
||||
#else
|
||||
common/memcpy.c
|
||||
common/memmove.c
|
||||
|
|
|
|||
|
|
@ -22,15 +22,13 @@
|
|||
#define UNALIGNED(X) ((long)X & (sizeof(long) - 1))
|
||||
#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE)
|
||||
|
||||
void *memset16(void *dst, int val, size_t len)
|
||||
void memset16(void *dst, int val, size_t len)
|
||||
{
|
||||
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
|
||||
unsigned short *p = (unsigned short *)dst;
|
||||
|
||||
while (len--)
|
||||
*p++ = val;
|
||||
|
||||
return dst;
|
||||
#else
|
||||
unsigned short *p = (unsigned short *)dst;
|
||||
unsigned int i;
|
||||
|
|
@ -73,7 +71,5 @@ void *memset16(void *dst, int val, size_t len)
|
|||
|
||||
while (len--)
|
||||
*p++ = val;
|
||||
|
||||
return dst;
|
||||
#endif /* not PREFER_SIZE_OVER_SPEED */
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,6 +22,6 @@
|
|||
|
||||
#include <sys/types.h>
|
||||
|
||||
void *memset16(void *dst, int val, size_t len);
|
||||
void memset16(void *dst, int val, size_t len);
|
||||
|
||||
#endif /* _MEMORY_H_ */
|
||||
|
|
|
|||
80
firmware/target/arm/memset16-arm.S
Executable file
80
firmware/target/arm/memset16-arm.S
Executable file
|
|
@ -0,0 +1,80 @@
|
|||
/***************************************************************************
|
||||
* __________ __ ___.
|
||||
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
||||
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
||||
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
||||
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
||||
* \/ \/ \/ \/ \/
|
||||
* $Id$
|
||||
*
|
||||
* Copyright (C) 2006 by Thom Johansen
|
||||
*
|
||||
* All files in this archive are subject to the GNU General Public License.
|
||||
* See the file COPYING in the source tree root for full license agreement.
|
||||
*
|
||||
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
||||
* KIND, either express or implied.
|
||||
*
|
||||
****************************************************************************/
|
||||
#include "config.h"
|
||||
|
||||
.section .icode,"ax",%progbits
|
||||
|
||||
.align 2
|
||||
|
||||
/* The following code is based on code from the Linux kernel version 2.6.15.3,
|
||||
* linux/arch/arm/lib/memset.S
|
||||
*
|
||||
* Copyright (C) 1995-2000 Russell King
|
||||
*/
|
||||
|
||||
.global memset16
|
||||
.type memset16,%function
|
||||
memset16:
|
||||
tst r0, #2 @ unaligned?
|
||||
cmpne r2, #0
|
||||
strneh r1, [r0], #2 @ store one halfword to align
|
||||
subne r2, r2, #1
|
||||
|
||||
/*
|
||||
* we know that the pointer in r0 is aligned to a word boundary.
|
||||
*/
|
||||
orr r1, r1, r1, lsl #16
|
||||
mov r3, r1
|
||||
cmp r2, #8
|
||||
blt 4f
|
||||
/*
|
||||
* We need an extra register for this loop - save the return address and
|
||||
* use the LR
|
||||
*/
|
||||
str lr, [sp, #-4]!
|
||||
mov ip, r1
|
||||
mov lr, r1
|
||||
|
||||
2: subs r2, r2, #32
|
||||
stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time.
|
||||
stmgeia r0!, {r1, r3, ip, lr}
|
||||
stmgeia r0!, {r1, r3, ip, lr}
|
||||
stmgeia r0!, {r1, r3, ip, lr}
|
||||
bgt 2b
|
||||
ldmeqfd sp!, {pc} @ Now <64 bytes to go.
|
||||
/*
|
||||
* No need to correct the count; we're only testing bits from now on
|
||||
*/
|
||||
tst r2, #16
|
||||
stmneia r0!, {r1, r3, ip, lr}
|
||||
stmneia r0!, {r1, r3, ip, lr}
|
||||
tst r2, #8
|
||||
stmneia r0!, {r1, r3, ip, lr}
|
||||
ldr lr, [sp], #4
|
||||
|
||||
4: tst r2, #4
|
||||
stmneia r0!, {r1, r3}
|
||||
tst r2, #2
|
||||
strne r1, [r0], #4
|
||||
|
||||
tst r2, #1
|
||||
strneh r1, [r0], #2
|
||||
bx lr
|
||||
.end:
|
||||
.size memset16,.end-memset16
|
||||
Loading…
Add table
Add a link
Reference in a new issue