Change alignment macros to allow further performance optimization. Define the CACHEALIGN macros for all ARM CPUs, the used alignment size is derived from CACHEALIGN_BITS which has been defined for each supported ARM CPU with r28619. The default alignment size for ARM is set to 32 bytes as new -- not yet supported -- ARM CPUs will most probably need this alignment. To be able to differ between ARM and other CPUs a new macro called MEM_ALIGN_ATTR is introduced. This equals CACHEALIGN_ATTR for ARM, 16 byte alignment for Coldfire and is kept empty for other CPUs. MEM_ALIGN_ATTR is available system wide. From measurements it is expected that the usage of MEM_ALIGN_ATTR can give significant performance gain on ARM11 CPUs.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@28625 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Andree Buschmann 2010-11-21 12:27:01 +00:00
parent f54cbfa164
commit eef21cb18a
2 changed files with 64 additions and 49 deletions

View file

@ -309,58 +309,77 @@ static inline void cpucache_flush(void)
}
#endif
#ifndef CACHEALIGN_SIZE /* could be elsewhere for a particular reason */
#ifdef CACHEALIGN_BITS
/* 2^CACHEALIGN_BITS = the byte size */
#define CACHEALIGN_SIZE (1u << CACHEALIGN_BITS)
#else
#define CACHEALIGN_SIZE 16 /* FIXME */
/* Define this, if the CPU may take advantage of cache aligment. Is enabled
* for all ARM CPUs. */
#ifdef CPU_ARM
#define HAVE_CPU_CACHE_ALIGN
#endif
#endif /* CACHEALIGN_SIZE */
#ifdef PROC_NEEDS_CACHEALIGN
/* Cache alignment attributes and sizes are enabled */
/* Calculate CACHEALIGN_SIZE from CACHEALIGN_BITS */
#ifdef CACHEALIGN_SIZE
/* undefine, if defined. always calculate from CACHEALIGN_BITS */
#undef CACHEALIGN_SIZE
#endif
#ifdef CACHEALIGN_BITS
/* CACHEALIGN_BITS = 2 ^ CACHEALIGN_BITS */
#define CACHEALIGN_SIZE (1u << CACHEALIGN_BITS)
#else
/* FIXME: set to maximum known cache alignment of supported CPUs */
#define CACHEALIGN_BITS 5
#define CACHEALIGN_SIZE 32
#endif
#define CACHEALIGN_ATTR __attribute__((aligned(CACHEALIGN_SIZE)))
/* Aligns x up to a CACHEALIGN_SIZE boundary */
#define CACHEALIGN_UP(x) \
#ifdef HAVE_CPU_CACHE_ALIGN
/* Cache alignment attributes and sizes are enabled */
#define CACHEALIGN_ATTR __attribute__((aligned(CACHEALIGN_SIZE)))
/* Aligns x up to a CACHEALIGN_SIZE boundary */
#define CACHEALIGN_UP(x) \
((typeof (x))ALIGN_UP_P2((uintptr_t)(x), CACHEALIGN_BITS))
/* Aligns x down to a CACHEALIGN_SIZE boundary */
#define CACHEALIGN_DOWN(x) \
/* Aligns x down to a CACHEALIGN_SIZE boundary */
#define CACHEALIGN_DOWN(x) \
((typeof (x))ALIGN_DOWN_P2((uintptr_t)(x), CACHEALIGN_BITS))
/* Aligns at least to the greater of size x or CACHEALIGN_SIZE */
#define CACHEALIGN_AT_LEAST_ATTR(x) \
/* Aligns at least to the greater of size x or CACHEALIGN_SIZE */
#define CACHEALIGN_AT_LEAST_ATTR(x) \
__attribute__((aligned(CACHEALIGN_UP(x))))
/* Aligns a buffer pointer and size to proper boundaries */
#define CACHEALIGN_BUFFER(start, size) \
/* Aligns a buffer pointer and size to proper boundaries */
#define CACHEALIGN_BUFFER(start, size) \
ALIGN_BUFFER((start), (size), CACHEALIGN_SIZE)
#else
/* Cache alignment attributes and sizes are not enabled */
#define CACHEALIGN_ATTR
#define CACHEALIGN_AT_LEAST_ATTR(x) __attribute__((aligned(x)))
#define CACHEALIGN_UP(x) (x)
#define CACHEALIGN_DOWN(x) (x)
/* Make no adjustments */
#define CACHEALIGN_BUFFER(start, size)
#endif
#else /* ndef PROC_NEEDS_CACHEALIGN */
/* Cache alignment attributes and sizes are not enabled */
#define CACHEALIGN_ATTR
#define CACHEALIGN_AT_LEAST_ATTR(x) \
__attribute__((aligned(x)))
#define CACHEALIGN_UP(x) (x)
#define CACHEALIGN_DOWN(x) (x)
/* Make no adjustments */
#define CACHEALIGN_BUFFER(start, size)
#endif /* PROC_NEEDS_CACHEALIGN */
/* Define MEM_ALIGN_ATTR which may be used to align e.g. buffers for faster
* access. */
#if defined(CPU_ARM)
/* Use ARMs cache alignment. */
#define MEM_ALIGN_ATTR CACHEALIGN_ATTR
#elif defined(CPU_COLDFIRE)
/* Use fixed alignment of 16 bytes. Speed up only for 'movem' in DRAM. */
#define MEM_ALIGN_ATTR __attribute__((aligned(16)))
#else
/* Do nothing. */
#define MEM_ALIGN_ATTR
#endif
#ifdef STORAGE_WANTS_ALIGN
#define STORAGE_ALIGN_ATTR __attribute__((aligned(CACHEALIGN_SIZE)))
#define STORAGE_ALIGN_DOWN(x) \
#define STORAGE_ALIGN_ATTR __attribute__((aligned(CACHEALIGN_SIZE)))
#define STORAGE_ALIGN_DOWN(x) \
((typeof (x))ALIGN_DOWN_P2((uintptr_t)(x), CACHEALIGN_BITS))
/* Pad a size so the buffer can be aligned later */
#define STORAGE_PAD(x) ((x) + CACHEALIGN_SIZE - 1)
/* Number of bytes in the last cacheline assuming buffer of size x is aligned */
#define STORAGE_OVERLAP(x) ((x) & (CACHEALIGN_SIZE - 1))
/* Pad a size so the buffer can be aligned later */
#define STORAGE_PAD(x) ((x) + CACHEALIGN_SIZE - 1)
/* Number of bytes in the last cacheline assuming buffer of size x is aligned */
#define STORAGE_OVERLAP(x) ((x) & (CACHEALIGN_SIZE - 1))
#else
#define STORAGE_ALIGN_ATTR
#define STORAGE_ALIGN_DOWN(x) (x)
#define STORAGE_PAD(x) (x)
#define STORAGE_OVERLAP(x) 0
#define STORAGE_ALIGN_ATTR
#define STORAGE_ALIGN_DOWN(x) (x)
#define STORAGE_PAD(x) (x)
#define STORAGE_OVERLAP(x) 0
#endif
/* Double-cast to avoid 'dereferencing type-punned pointer will

View file

@ -158,10 +158,6 @@ static inline void wake_core(int core)
((typeof (a))((uintptr_t)(a) | UNCACHED_BASE_ADDR))
#endif /* BOOTLOADER */
/* Certain data needs to be out of the way of cache line interference
* such as data for COP use or for use with UNCACHED_ADDR */
#define PROC_NEEDS_CACHEALIGN
#if defined(CPU_PP502x) && defined(HAVE_ATA_DMA)
#define STORAGE_WANTS_ALIGN
#endif