1
0
Fork 0
forked from len0rd/rockbox

Thom Johansen's first EMAC optimisation for the Coldfire - about a 3%-4% speedup

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6024 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Dave Chapman 2005-02-19 22:11:29 +00:00
parent a3ed6e9c7a
commit e9edc8f82d
4 changed files with 222 additions and 0 deletions

View file

@ -10,3 +10,6 @@ md5.c
memory.c memory.c
seekable_stream_decoder.c seekable_stream_decoder.c
stream_decoder.c stream_decoder.c
#if CONFIG_CPU==MCF5249
coldfire.c
#endif

View file

@ -0,0 +1,165 @@
#ifndef SIMULATOR
#include <private/coldfire.h>
void FLAC__lpc_restore_signal_order8_mac(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[])
{
register const FLAC__int32 *qlp0 = &qlp_coeff[(order-1)];
register FLAC__int32 sum;
register const FLAC__int32 *history;
SET_MACSR(0);
history = &data[(-order)];
SET_ACC(0, acc0);
switch (order) {
case 8:
for( ; data_len != 0; --data_len) {
asm volatile(
"mov.l (%1), %%d0\n\t"
"mov.l (%2), %%d1\n\t"
"mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t"
"mov.l -4(%1), %%d0\n\t"
"mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t"
"mov.l -8(%1), %%d0\n\t"
"mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t"
"mov.l -12(%1), %%d0\n\t"
"mac.l %%d0, %%d1, 16(%2), %%d1, %%acc0\n\t"
"mov.l -16(%1), %%d0\n\t"
"mac.l %%d0, %%d1, 20(%2), %%d1, %%acc0\n\t"
"mov.l -20(%1), %%d0\n\t"
"mac.l %%d0, %%d1, 24(%2), %%d1, %%acc0\n\t"
"mov.l -24(%1), %%d0\n\t"
"mac.l %%d0, %%d1, 28(%2), %%d1, %%acc0\n\t"
"mov.l -28(%1), %%d0\n\t"
"mac.l %%d0, %%d1, %%acc0\n\t"
"movclr.l %%acc0, %0"
: "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1");
++history;
*(data++) = *(residual++) + (sum >> lp_quantization);
}
return;
case 7:
for( ; data_len != 0; --data_len) {
asm volatile(
"mov.l (%1), %%d0\n\t"
"mov.l (%2), %%d1\n\t"
"mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t"
"mov.l -4(%1), %%d0\n\t"
"mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t"
"mov.l -8(%1), %%d0\n\t"
"mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t"
"mov.l -12(%1), %%d0\n\t"
"mac.l %%d0, %%d1, 16(%2), %%d1, %%acc0\n\t"
"mov.l -16(%1), %%d0\n\t"
"mac.l %%d0, %%d1, 20(%2), %%d1, %%acc0\n\t"
"mov.l -20(%1), %%d0\n\t"
"mac.l %%d0, %%d1, 24(%2), %%d1, %%acc0\n\t"
"mov.l -24(%1), %%d0\n\t"
"mac.l %%d0, %%d1, %%acc0\n\t"
"movclr.l %%acc0, %0"
: "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1");
++history;
*(data++) = *(residual++) + (sum >> lp_quantization);
}
return;
case 6:
for( ; data_len != 0; --data_len) {
asm volatile(
"mov.l (%1), %%d0\n\t"
"mov.l (%2), %%d1\n\t"
"mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t"
"mov.l -4(%1), %%d0\n\t"
"mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t"
"mov.l -8(%1), %%d0\n\t"
"mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t"
"mov.l -12(%1), %%d0\n\t"
"mac.l %%d0, %%d1, 16(%2), %%d1, %%acc0\n\t"
"mov.l -16(%1), %%d0\n\t"
"mac.l %%d0, %%d1, 20(%2), %%d1, %%acc0\n\t"
"mov.l -20(%1), %%d0\n\t"
"mac.l %%d0, %%d1, %%acc0\n\t"
"movclr.l %%acc0, %0"
: "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1");
++history;
*(data++) = *(residual++) + (sum >> lp_quantization);
}
return;
case 5:
for( ; data_len != 0; --data_len) {
asm volatile(
"mov.l (%1), %%d0\n\t"
"mov.l (%2), %%d1\n\t"
"mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t"
"mov.l -4(%1), %%d0\n\t"
"mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t"
"mov.l -8(%1), %%d0\n\t"
"mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t"
"mov.l -12(%1), %%d0\n\t"
"mac.l %%d0, %%d1, 16(%2), %%d1, %%acc0\n\t"
"mov.l -16(%1), %%d0\n\t"
"mac.l %%d0, %%d1, %%acc0\n\t"
"movclr.l %%acc0, %0"
: "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1");
++history;
*(data++) = *(residual++) + (sum >> lp_quantization);
}
return;
case 4:
for( ; data_len != 0; --data_len) {
asm volatile(
"mov.l (%1), %%d0\n\t"
"mov.l (%2), %%d1\n\t"
"mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t"
"mov.l -4(%1), %%d0\n\t"
"mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t"
"mov.l -8(%1), %%d0\n\t"
"mac.l %%d0, %%d1, 12(%2), %%d1, %%acc0\n\t"
"mov.l -12(%1), %%d0\n\t"
"mac.l %%d0, %%d1, %%acc0\n\t"
"movclr.l %%acc0, %0"
: "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1");
++history;
*(data++) = *(residual++) + (sum >> lp_quantization);
}
return;
case 3:
for( ; data_len != 0; --data_len) {
asm volatile(
"mov.l (%1), %%d0\n\t"
"mov.l (%2), %%d1\n\t"
"mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t"
"mov.l -4(%1), %%d0\n\t"
"mac.l %%d0, %%d1, 8(%2), %%d1, %%acc0\n\t"
"mov.l -8(%1), %%d0\n\t"
"mac.l %%d0, %%d1, %%acc0\n\t"
"movclr.l %%acc0, %0"
: "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1");
++history;
*(data++) = *(residual++) + (sum >> lp_quantization);
}
return;
case 2:
for( ; data_len != 0; --data_len) {
asm volatile(
"mov.l (%1), %%d0\n\t"
"mov.l (%2), %%d1\n\t"
"mac.l %%d0, %%d1, 4(%2), %%d1, %%acc0\n\t"
"mov.l -4(%1), %%d0\n\t"
"mac.l %%d0, %%d1, %%acc0\n\t"
"movclr.l %%acc0, %0"
: "=ad" (sum) : "a" (qlp0), "a" (history) : "d0", "d1");
++history;
*(data++) = *(residual++) + (sum >> lp_quantization);
}
return;
case 1:
// won't gain anything by using mac here.
for( ; data_len != 0; --data_len) {
sum = (qlp0[0] * (*(history++)));
*(data++) = *(residual++) + (sum >> lp_quantization);
}
return;
}
}
#endif

View file

@ -0,0 +1,46 @@
#ifndef SIMULATOR
#ifndef _FLAC_COLDFIRE_H
#define _FLAC_COLDFIRE_H
#include <FLAC/ordinals.h>
#define MACL(x, y, acc) \
asm volatile ("mac.l %0, %1, %%" #acc \
: : "ad" ((x)), "ad" ((y)));
#define MACL_SHIFT(x, y, shift, acc) \
asm volatile ("mac.l %0, %1, #" #shift ", %%" #acc \
: : "ad" ((x)), "ad" ((y)));
#define MSACL(x, y, acc) \
asm volatile ("msac.l %0, %1, %%" #acc \
: : "ad" ((x)), "ad" ((y)));
#define MSACL_SHIFT(x, y, shift, acc) \
asm volatile ("msac.l %0, %1, #" #shift ", %%" #acc \
: : "ad" ((x)), "ad" ((y)));
#define SET_MACSR(x) \
asm volatile ("mov.l %0, %%macsr" : : "adi" ((x)));
#define TRANSFER_ACC(acca, accb) \
asm volatile ("mov.l %" #acca ", %" #accb);
#define SET_ACC(x, acc) \
asm volatile ("mov.l %0, %%" #acc : : "adi" ((x)));
#define GET_ACC(x, acc) \
asm volatile ("mov.l %%" #acc ", %0\n\t" : "=ad" ((x)));
#define GET_ACC_CLR(x, acc) \
asm volatile ("movclr.l %%" #acc ", %0\n\t" : "=ad" ((x)));
#define EMAC_SATURATE 0x00000080
#define EMAC_FRACTIONAL 0x00000020
#define EMAC_ROUND 0x00000010
void FLAC__lpc_restore_signal_order8_mac(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
#endif
#endif

View file

@ -43,6 +43,10 @@
#include "private/lpc.h" #include "private/lpc.h"
#include "private/memory.h" #include "private/memory.h"
#if CONFIG_CPU==MCF5249
#include <private/coldfire.h>
#endif
#ifdef HAVE_CONFIG_H #ifdef HAVE_CONFIG_H
#include <config.h> #include <config.h>
#endif #endif
@ -298,7 +302,11 @@ FLAC_API FLAC__StreamDecoderState FLAC__stream_decoder_init(FLAC__StreamDecoder
decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal; decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal;
decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide; decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide;
decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal; decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal;
#if CONFIG_CPU==MCF5249 && !SIMULATOR
decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_order8_mac;
#else
decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal; decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal;
#endif
/* now override with asm where appropriate */ /* now override with asm where appropriate */
#ifndef FLAC__NO_ASM #ifndef FLAC__NO_ASM
if(decoder->private_->cpuinfo.use_asm) { if(decoder->private_->cpuinfo.use_asm) {