Move optimized memcpy and friends and strlen to firmware/asm,

using the new automatic-asm-picking infrastructure.
This commit is contained in:
Thomas Martitz 2012-01-07 19:56:09 +01:00
parent 8e8e978de6
commit a035261089
22 changed files with 7 additions and 34 deletions

View file

@ -1,117 +0,0 @@
/*
FUNCTION
<<memcpy>>---copy memory regions
ANSI_SYNOPSIS
#include <string.h>
void* memcpy(void *<[out]>, const void *<[in]>, size_t <[n]>);
TRAD_SYNOPSIS
void *memcpy(<[out]>, <[in]>, <[n]>
void *<[out]>;
void *<[in]>;
size_t <[n]>;
DESCRIPTION
This function copies <[n]> bytes from the memory region
pointed to by <[in]> to the memory region pointed to by
<[out]>.
If the regions overlap, the behavior is undefined.
RETURNS
<<memcpy>> returns a pointer to the first byte of the <[out]>
region.
PORTABILITY
<<memcpy>> is ANSI C.
<<memcpy>> requires no supporting OS subroutines.
QUICKREF
memcpy ansi pure
*/
#include "config.h"
#include "_ansi.h" /* for _DEFUN */
#include <string.h>
/* Nonzero if either X or Y is not aligned on a "long" boundary. */
#define UNALIGNED(X, Y) \
(((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
/* How many bytes are copied each iteration of the 4X unrolled loop. */
#define BIGBLOCKSIZE (sizeof (long) << 2)
/* How many bytes are copied each iteration of the word copy loop. */
#define LITTLEBLOCKSIZE (sizeof (long))
/* Threshold for punting to the byte copier. */
#define TOO_SMALL(LEN) ((LEN) < BIGBLOCKSIZE)
_PTR
_DEFUN (memcpy, (dst0, src0, len0),
_PTR dst0 _AND
_CONST _PTR src0 _AND
size_t len0) ICODE_ATTR;
_PTR
_DEFUN (memcpy, (dst0, src0, len0),
_PTR dst0 _AND
_CONST _PTR src0 _AND
size_t len0)
{
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
char *dst = (char *) dst0;
char *src = (char *) src0;
_PTR save = dst0;
while (len0--)
{
*dst++ = *src++;
}
return save;
#else
char *dst = dst0;
_CONST char *src = src0;
long *aligned_dst;
_CONST long *aligned_src;
unsigned int len = len0;
/* If the size is small, or either SRC or DST is unaligned,
then punt into the byte copy loop. This should be rare. */
if (!TOO_SMALL(len) && !UNALIGNED (src, dst))
{
aligned_dst = (long*)dst;
aligned_src = (long*)src;
/* Copy 4X long words at a time if possible. */
while (len >= BIGBLOCKSIZE)
{
*aligned_dst++ = *aligned_src++;
*aligned_dst++ = *aligned_src++;
*aligned_dst++ = *aligned_src++;
*aligned_dst++ = *aligned_src++;
len -= (unsigned int)BIGBLOCKSIZE;
}
/* Copy one long word at a time if possible. */
while (len >= LITTLEBLOCKSIZE)
{
*aligned_dst++ = *aligned_src++;
len -= LITTLEBLOCKSIZE;
}
/* Pick up any residual with a byte copier. */
dst = (char*)aligned_dst;
src = (char*)aligned_src;
}
while (len--)
*dst++ = *src++;
return dst0;
#endif /* not PREFER_SIZE_OVER_SPEED */
}

View file

@ -1,147 +0,0 @@
/*
FUNCTION
<<memmove>>---move possibly overlapping memory
INDEX
memmove
ANSI_SYNOPSIS
#include <string.h>
void *memmove(void *<[dst]>, const void *<[src]>, size_t <[length]>);
TRAD_SYNOPSIS
#include <string.h>
void *memmove(<[dst]>, <[src]>, <[length]>)
void *<[dst]>;
void *<[src]>;
size_t <[length]>;
DESCRIPTION
This function moves <[length]> characters from the block of
memory starting at <<*<[src]>>> to the memory starting at
<<*<[dst]>>>. <<memmove>> reproduces the characters correctly
at <<*<[dst]>>> even if the two areas overlap.
RETURNS
The function returns <[dst]> as passed.
PORTABILITY
<<memmove>> is ANSI C.
<<memmove>> requires no supporting OS subroutines.
QUICKREF
memmove ansi pure
*/
#include "config.h"
#include <_ansi.h>
#include <string.h>
/* Nonzero if either X or Y is not aligned on a "long" boundary. */
#define UNALIGNED(X, Y) \
(((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
/* How many bytes are copied each iteration of the 4X unrolled loop. */
#define BIGBLOCKSIZE (sizeof (long) << 2)
/* How many bytes are copied each iteration of the word copy loop. */
#define LITTLEBLOCKSIZE (sizeof (long))
/* Threshhold for punting to the byte copier. */
#define TOO_SMALL(LEN) ((LEN) < BIGBLOCKSIZE)
_PTR
_DEFUN (memmove, (dst_void, src_void, length),
_PTR dst_void _AND
_CONST _PTR src_void _AND
size_t length) ICODE_ATTR;
_PTR
_DEFUN (memmove, (dst_void, src_void, length),
_PTR dst_void _AND
_CONST _PTR src_void _AND
size_t length)
{
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
char *dst = dst_void;
_CONST char *src = src_void;
if (src < dst && dst < src + length)
{
/* Have to copy backwards */
src += length;
dst += length;
while (length--)
{
*--dst = *--src;
}
}
else
{
while (length--)
{
*dst++ = *src++;
}
}
return dst_void;
#else
char *dst = dst_void;
_CONST char *src = src_void;
long *aligned_dst;
_CONST long *aligned_src;
unsigned int len = length;
if (src < dst && dst < src + len)
{
/* Destructive overlap...have to copy backwards */
src += len;
dst += len;
while (len--)
{
*--dst = *--src;
}
}
else
{
/* Use optimizing algorithm for a non-destructive copy to closely
match memcpy. If the size is small or either SRC or DST is unaligned,
then punt into the byte copy loop. This should be rare. */
if (!TOO_SMALL(len) && !UNALIGNED (src, dst))
{
aligned_dst = (long*)dst;
aligned_src = (long*)src;
/* Copy 4X long words at a time if possible. */
while (len >= BIGBLOCKSIZE)
{
*aligned_dst++ = *aligned_src++;
*aligned_dst++ = *aligned_src++;
*aligned_dst++ = *aligned_src++;
*aligned_dst++ = *aligned_src++;
len -= BIGBLOCKSIZE;
}
/* Copy one long word at a time if possible. */
while (len >= LITTLEBLOCKSIZE)
{
*aligned_dst++ = *aligned_src++;
len -= LITTLEBLOCKSIZE;
}
/* Pick up any residual with a byte copier. */
dst = (char*)aligned_dst;
src = (char*)aligned_src;
}
while (len--)
{
*dst++ = *src++;
}
}
return dst_void;
#endif /* not PREFER_SIZE_OVER_SPEED */
}

View file

@ -1,110 +0,0 @@
/*
FUNCTION
<<memset>>---set an area of memory
INDEX
memset
ANSI_SYNOPSIS
#include <string.h>
void *memset(const void *<[dst]>, int <[c]>, size_t <[length]>);
TRAD_SYNOPSIS
#include <string.h>
void *memset(<[dst]>, <[c]>, <[length]>)
void *<[dst]>;
int <[c]>;
size_t <[length]>;
DESCRIPTION
This function converts the argument <[c]> into an unsigned
char and fills the first <[length]> characters of the array
pointed to by <[dst]> to the value.
RETURNS
<<memset>> returns the value of <[m]>.
PORTABILITY
<<memset>> is ANSI C.
<<memset>> requires no supporting OS subroutines.
QUICKREF
memset ansi pure
*/
#include <string.h>
#include "_ansi.h"
#define LBLOCKSIZE (sizeof(long))
#define UNALIGNED(X) ((long)X & (LBLOCKSIZE - 1))
#define TOO_SMALL(LEN) ((LEN) < LBLOCKSIZE)
_PTR
_DEFUN (memset, (m, c, n),
_PTR m _AND
int c _AND
size_t n)
{
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
char *s = (char *) m;
while (n-- != 0)
{
*s++ = (char) c;
}
return m;
#else
char *s = (char *) m;
unsigned int i;
unsigned long buffer;
unsigned long *aligned_addr;
if (!TOO_SMALL (n) && !UNALIGNED (m))
{
/* If we get this far, we know that n is large and m is word-aligned. */
aligned_addr = (unsigned long*)m;
/* Store C into each char sized location in BUFFER so that
we can set large blocks quickly. */
c &= 0xff;
if (LBLOCKSIZE == 4)
{
buffer = (c << 8) | c;
buffer |= (buffer << 16);
}
else
{
buffer = 0;
for (i = 0; i < LBLOCKSIZE; i++)
buffer = (buffer << 8) | c;
}
while (n >= LBLOCKSIZE*4)
{
*aligned_addr++ = buffer;
*aligned_addr++ = buffer;
*aligned_addr++ = buffer;
*aligned_addr++ = buffer;
n -= 4*LBLOCKSIZE;
}
while (n >= LBLOCKSIZE)
{
*aligned_addr++ = buffer;
n -= LBLOCKSIZE;
}
/* Pick up the remainder with a bytewise loop. */
s = (char*)aligned_addr;
}
while (n--)
{
*s++ = (char)c;
}
return m;
#endif /* not PREFER_SIZE_OVER_SPEED */
}

View file

@ -1,93 +0,0 @@
/*
FUNCTION
<<strlen>>---character string length
INDEX
strlen
ANSI_SYNOPSIS
#include <string.h>
size_t strlen(const char *<[str]>);
TRAD_SYNOPSIS
#include <string.h>
size_t strlen(<[str]>)
char *<[src]>;
DESCRIPTION
The <<strlen>> function works out the length of the string
starting at <<*<[str]>>> by counting chararacters until it
reaches a <<NULL>> character.
RETURNS
<<strlen>> returns the character count.
PORTABILITY
<<strlen>> is ANSI C.
<<strlen>> requires no supporting OS subroutines.
QUICKREF
strlen ansi pure
*/
#include "config.h"
#include "_ansi.h"
#include <string.h>
#include <limits.h>
#define LBLOCKSIZE (sizeof (long))
#define UNALIGNED(X) ((long)X & (LBLOCKSIZE - 1))
#if LONG_MAX == 2147483647L
#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080)
#else
#if LONG_MAX == 9223372036854775807L
/* Nonzero if X (a long int) contains a NULL byte. */
#define DETECTNULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080)
#else
#error long int is not a 32bit or 64bit type.
#endif
#endif
#ifndef DETECTNULL
#error long int is not a 32bit or 64bit byte
#endif
size_t
_DEFUN (strlen, (str),
_CONST char *str) ICODE_ATTR;
size_t
_DEFUN (strlen, (str),
_CONST char *str)
{
#if defined(PREFER_SIZE_OVER_SPEED) || defined(__OPTIMIZE_SIZE__)
_CONST char *start = str;
while (*str)
str++;
return str - start;
#else
_CONST char *start = str;
unsigned long *aligned_addr;
if (!UNALIGNED (str))
{
/* If the string is word-aligned, we can check for the presence of
a null in each word-sized block. */
aligned_addr = (unsigned long*)str;
while (!DETECTNULL (*aligned_addr))
aligned_addr++;
/* Once a null is detected, we check each byte in that block for a
precise position of the null. */
str = (char*)aligned_addr;
}
while (*str)
str++;
return str - start;
#endif /* not PREFER_SIZE_OVER_SPEED */
}