1
0
Fork 0
forked from len0rd/rockbox

Assembler optimised memset16() for ARM, by Thom Johansen. Should speed up LCD clearing and solid rectangle drawing on colour iPods somewhat.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@10900 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Jens Arnold 2006-09-07 00:16:04 +00:00
parent 825fb8a264
commit 9d2f7b5c6d
4 changed files with 83 additions and 7 deletions

View file

@ -53,7 +53,7 @@ target/sh/memset-sh.S
common/memcpy.c
common/memmove.c
target/arm/memset-arm.S
common/memset16.c
target/arm/memset16-arm.S
#else
common/memcpy.c
common/memmove.c

View file

@ -22,15 +22,13 @@
#define UNALIGNED(X) ((long)X & (sizeof(long) - 1))
#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE)
void *memset16(void *dst, int val, size_t len)
void memset16(void *dst, int val, size_t len)
{
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
unsigned short *p = (unsigned short *)dst;
while (len--)
*p++ = val;
return dst;
#else
unsigned short *p = (unsigned short *)dst;
unsigned int i;
@ -73,7 +71,5 @@ void *memset16(void *dst, int val, size_t len)
while (len--)
*p++ = val;
return dst;
#endif /* not PREFER_SIZE_OVER_SPEED */
}

View file

@ -22,6 +22,6 @@
#include <sys/types.h>
void *memset16(void *dst, int val, size_t len);
void memset16(void *dst, int val, size_t len);
#endif /* _MEMORY_H_ */

View file

@ -0,0 +1,80 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2006 by Thom Johansen
*
* All files in this archive are subject to the GNU General Public License.
* See the file COPYING in the source tree root for full license agreement.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "config.h"
.section .icode,"ax",%progbits
.align 2
/* The following code is based on code from the Linux kernel version 2.6.15.3,
* linux/arch/arm/lib/memset.S
*
* Copyright (C) 1995-2000 Russell King
*/
.global memset16
.type memset16,%function
memset16:
tst r0, #2 @ unaligned?
cmpne r2, #0
strneh r1, [r0], #2 @ store one halfword to align
subne r2, r2, #1
/*
* we know that the pointer in r0 is aligned to a word boundary.
*/
orr r1, r1, r1, lsl #16
mov r3, r1
cmp r2, #8
blt 4f
/*
* We need an extra register for this loop - save the return address and
* use the LR
*/
str lr, [sp, #-4]!
mov ip, r1
mov lr, r1
2: subs r2, r2, #32
stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time.
stmgeia r0!, {r1, r3, ip, lr}
stmgeia r0!, {r1, r3, ip, lr}
stmgeia r0!, {r1, r3, ip, lr}
bgt 2b
ldmeqfd sp!, {pc} @ Now <64 bytes to go.
/*
* No need to correct the count; we're only testing bits from now on
*/
tst r2, #16
stmneia r0!, {r1, r3, ip, lr}
stmneia r0!, {r1, r3, ip, lr}
tst r2, #8
stmneia r0!, {r1, r3, ip, lr}
ldr lr, [sp], #4
4: tst r2, #4
stmneia r0!, {r1, r3}
tst r2, #2
strne r1, [r0], #4
tst r2, #1
strneh r1, [r0], #2
bx lr
.end:
.size memset16,.end-memset16