Add codecs to librbcodec.

Change-Id: Id7f4717d51ed02d67cb9f9cb3c0ada4a81843f97
Reviewed-on: http://gerrit.rockbox.org/137
Reviewed-by: Nils Wallménius <nils@rockbox.org>
Tested-by: Nils Wallménius <nils@rockbox.org>
This commit is contained in:
Sean Bartell 2011-06-25 21:32:25 -04:00 committed by Nils Wallménius
parent a0009907de
commit f40bfc9267
757 changed files with 122 additions and 122 deletions

View file

@ -0,0 +1,54 @@
/* decoders */
vorbis.c
mpa.c
flac.c
wav.c
a52.c
wavpack.c
#ifndef RB_PROFILE
alac.c
#endif
cook.c
raac.c
a52_rm.c
atrac3_rm.c
atrac3_oma.c
mpc.c
wma.c
sid.c
ape.c
asap.c
aac.c
spc.c
mod.c
shorten.c
aiff.c
speex.c
adx.c
smaf.c
au.c
vox.c
wav64.c
tta.c
wmapro.c
ay.c
gbs.c
hes.c
nsf.c
sgc.c
vgm.c
#if MEMORYSIZE > 2
kss.c
#endif
#ifdef HAVE_RECORDING
/* encoders */
aiff_enc.c
mp3_enc.c
wav_enc.c
wavpack_enc.c
#endif /* HAVE_RECORDING */

192
lib/rbcodec/codecs/a52.c Normal file
View file

@ -0,0 +1,192 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2005 Dave Chapman
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "codeclib.h"
#include <inttypes.h> /* Needed by a52.h */
#include <codecs/liba52/config-a52.h>
#include <codecs/liba52/a52.h>
CODEC_HEADER
#define BUFFER_SIZE 4096
#define A52_SAMPLESPERFRAME (6*256)
static a52_state_t *state;
static unsigned long samplesdone;
static unsigned long frequency;
/* used outside liba52 */
static uint8_t buf[3840] IBSS_ATTR;
static inline void output_audio(sample_t *samples)
{
ci->yield();
ci->pcmbuf_insert(&samples[0], &samples[256], 256);
}
static void a52_decode_data(uint8_t *start, uint8_t *end)
{
static uint8_t *bufptr = buf;
static uint8_t *bufpos = buf + 7;
/*
* sample_rate and flags are static because this routine could
* exit between the a52_syncinfo() and the ao_setup(), and we want
* to have the same values when we get back !
*/
static int sample_rate;
static int flags;
int bit_rate;
int len;
while (1) {
len = end - start;
if (!len)
break;
if (len > bufpos - bufptr)
len = bufpos - bufptr;
memcpy(bufptr, start, len);
bufptr += len;
start += len;
if (bufptr == bufpos) {
if (bufpos == buf + 7) {
int length;
length = a52_syncinfo(buf, &flags, &sample_rate, &bit_rate);
if (!length) {
//DEBUGF("skip\n");
for (bufptr = buf; bufptr < buf + 6; bufptr++)
bufptr[0] = bufptr[1];
continue;
}
bufpos = buf + length;
} else {
/* Unity gain is 1 << 26, and we want to end up on 28 bits
of precision instead of the default 30.
*/
level_t level = 1 << 24;
sample_t bias = 0;
int i;
/* This is the configuration for the downmixing: */
flags = A52_STEREO | A52_ADJUST_LEVEL;
if (a52_frame(state, buf, &flags, &level, bias))
goto error;
a52_dynrng(state, NULL, NULL);
frequency = sample_rate;
/* An A52 frame consists of 6 blocks of 256 samples
So we decode and output them one block at a time */
for (i = 0; i < 6; i++) {
if (a52_block(state))
goto error;
output_audio(a52_samples(state));
samplesdone += 256;
}
ci->set_elapsed(samplesdone/(frequency/1000));
bufptr = buf;
bufpos = buf + 7;
continue;
error:
//logf("Error decoding A52 stream\n");
bufptr = buf;
bufpos = buf + 7;
}
}
}
}
/* this is the codec entry point */
enum codec_status codec_main(enum codec_entry_call_reason reason)
{
if (reason == CODEC_LOAD) {
/* Generic codec initialisation */
ci->configure(DSP_SET_STEREO_MODE, STEREO_NONINTERLEAVED);
ci->configure(DSP_SET_SAMPLE_DEPTH, 28);
}
else if (reason == CODEC_UNLOAD) {
if (state)
a52_free(state);
}
return CODEC_OK;
}
/* this is called for each file to process */
enum codec_status codec_run(void)
{
size_t n;
unsigned char *filebuf;
int sample_loc;
intptr_t param;
if (codec_init())
return CODEC_ERROR;
ci->configure(DSP_SWITCH_FREQUENCY, ci->id3->frequency);
codec_set_replaygain(ci->id3);
/* Intialise the A52 decoder and check for success */
state = a52_init(0);
samplesdone = 0;
/* The main decoding loop */
if (ci->id3->offset) {
if (ci->seek_buffer(ci->id3->offset)) {
samplesdone = (ci->id3->offset / ci->id3->bytesperframe) *
A52_SAMPLESPERFRAME;
ci->set_elapsed(samplesdone/(ci->id3->frequency / 1000));
}
}
else {
ci->seek_buffer(ci->id3->first_frame_offset);
ci->set_elapsed(0);
}
while (1) {
enum codec_command_action action = ci->get_command(&param);
if (action == CODEC_ACTION_HALT)
break;
if (action == CODEC_ACTION_SEEK_TIME) {
sample_loc = param/1000 * ci->id3->frequency;
if (ci->seek_buffer((sample_loc/A52_SAMPLESPERFRAME)*ci->id3->bytesperframe)) {
samplesdone = sample_loc;
ci->set_elapsed(samplesdone/(ci->id3->frequency/1000));
}
ci->seek_complete();
}
filebuf = ci->request_buffer(&n, BUFFER_SIZE);
if (n == 0) /* End of Stream */
break;
a52_decode_data(filebuf, filebuf + n);
ci->advance_buffer(n);
}
return CODEC_OK;
}

227
lib/rbcodec/codecs/a52_rm.c Normal file
View file

@ -0,0 +1,227 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2009 Mohamed Tarek
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "codeclib.h"
#include <codecs/librm/rm.h>
#include <inttypes.h> /* Needed by a52.h */
#include <codecs/liba52/config-a52.h>
#include <codecs/liba52/a52.h>
CODEC_HEADER
#define BUFFER_SIZE 4096
#define A52_SAMPLESPERFRAME (6*256)
static a52_state_t *state;
static unsigned long samplesdone;
static unsigned long frequency;
static RMContext rmctx;
static RMPacket pkt;
static void init_rm(RMContext *rmctx)
{
memcpy(rmctx, (void*)(( (intptr_t)ci->id3->id3v2buf + 3 ) &~ 3), sizeof(RMContext));
}
/* used outside liba52 */
static uint8_t buf[3840] IBSS_ATTR;
/* The following two functions, a52_decode_data and output_audio are taken from a52.c */
static inline void output_audio(sample_t *samples)
{
ci->yield();
ci->pcmbuf_insert(&samples[0], &samples[256], 256);
}
static void a52_decode_data(uint8_t *start, uint8_t *end)
{
static uint8_t *bufptr = buf;
static uint8_t *bufpos = buf + 7;
/*
* sample_rate and flags are static because this routine could
* exit between the a52_syncinfo() and the ao_setup(), and we want
* to have the same values when we get back !
*/
static int sample_rate;
static int flags;
int bit_rate;
int len;
while (1) {
len = end - start;
if (!len)
break;
if (len > bufpos - bufptr)
len = bufpos - bufptr;
memcpy(bufptr, start, len);
bufptr += len;
start += len;
if (bufptr == bufpos) {
if (bufpos == buf + 7) {
int length;
length = a52_syncinfo(buf, &flags, &sample_rate, &bit_rate);
if (!length) {
//DEBUGF("skip\n");
for (bufptr = buf; bufptr < buf + 6; bufptr++)
bufptr[0] = bufptr[1];
continue;
}
bufpos = buf + length;
} else {
/* Unity gain is 1 << 26, and we want to end up on 28 bits
of precision instead of the default 30.
*/
level_t level = 1 << 24;
sample_t bias = 0;
int i;
/* This is the configuration for the downmixing: */
flags = A52_STEREO | A52_ADJUST_LEVEL;
if (a52_frame(state, buf, &flags, &level, bias))
goto error;
a52_dynrng(state, NULL, NULL);
frequency = sample_rate;
/* An A52 frame consists of 6 blocks of 256 samples
So we decode and output them one block at a time */
for (i = 0; i < 6; i++) {
if (a52_block(state))
goto error;
output_audio(a52_samples(state));
samplesdone += 256;
}
ci->set_elapsed(samplesdone/(frequency/1000));
bufptr = buf;
bufpos = buf + 7;
continue;
error:
//logf("Error decoding A52 stream\n");
bufptr = buf;
bufpos = buf + 7;
}
}
}
}
/* this is the codec entry point */
enum codec_status codec_main(enum codec_entry_call_reason reason)
{
if (reason == CODEC_LOAD) {
/* Generic codec initialisation */
ci->configure(DSP_SET_STEREO_MODE, STEREO_NONINTERLEAVED);
ci->configure(DSP_SET_SAMPLE_DEPTH, 28);
}
else if (reason == CODEC_UNLOAD) {
if (state)
a52_free(state);
}
return CODEC_OK;
}
/* this is called for each file to process */
enum codec_status codec_run(void)
{
size_t n;
uint8_t *filebuf;
int consumed, packet_offset;
int playback_on = -1;
size_t resume_offset;
intptr_t param;
enum codec_command_action action = CODEC_ACTION_NULL;
if (codec_init()) {
return CODEC_ERROR;
}
resume_offset = ci->id3->offset;
ci->configure(DSP_SWITCH_FREQUENCY, ci->id3->frequency);
codec_set_replaygain(ci->id3);
ci->seek_buffer(ci->id3->first_frame_offset);
/* Intializations */
state = a52_init(0);
ci->memset(&rmctx,0,sizeof(RMContext));
ci->memset(&pkt,0,sizeof(RMPacket));
init_rm(&rmctx);
samplesdone = 0;
/* check for a mid-track resume and force a seek time accordingly */
if(resume_offset > rmctx.data_offset + DATA_HEADER_SIZE) {
resume_offset -= rmctx.data_offset + DATA_HEADER_SIZE;
/* put number of subpackets to skip in resume_offset */
resume_offset /= (rmctx.block_align + PACKET_HEADER_SIZE);
param = (int)resume_offset * ((rmctx.block_align * 8 * 1000)/rmctx.bit_rate);
action = CODEC_ACTION_SEEK_TIME;
}
else {
/* Seek to the first packet */
ci->set_elapsed(0);
ci->advance_buffer(rmctx.data_offset + DATA_HEADER_SIZE );
}
/* The main decoding loop */
while((unsigned)rmctx.audio_pkt_cnt < rmctx.nb_packets) {
if (action == CODEC_ACTION_NULL)
action = ci->get_command(&param);
if (action == CODEC_ACTION_HALT)
break;
if (action == CODEC_ACTION_SEEK_TIME) {
packet_offset = param / ((rmctx.block_align*8*1000)/rmctx.bit_rate);
ci->seek_buffer(rmctx.data_offset + DATA_HEADER_SIZE +
packet_offset*(rmctx.block_align + PACKET_HEADER_SIZE));
rmctx.audio_pkt_cnt = packet_offset;
samplesdone = (rmctx.sample_rate/1000 * param);
ci->set_elapsed(samplesdone/(frequency/1000));
ci->seek_complete();
}
action = CODEC_ACTION_NULL;
filebuf = ci->request_buffer(&n, rmctx.block_align + PACKET_HEADER_SIZE);
consumed = rm_get_packet(&filebuf, &rmctx, &pkt);
if(consumed < 0 && playback_on != 0) {
if(playback_on == -1) {
/* Error only if packet-parsing failed and playback hadn't started */
DEBUGF("rm_get_packet failed\n");
return CODEC_ERROR;
}
else {
break;
}
}
playback_on = 1;
a52_decode_data(filebuf, filebuf + rmctx.block_align);
ci->advance_buffer(pkt.length);
}
return CODEC_OK;
}

297
lib/rbcodec/codecs/aac.c Normal file
View file

@ -0,0 +1,297 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2005 Dave Chapman
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "codeclib.h"
#include "libm4a/m4a.h"
#include "libfaad/common.h"
#include "libfaad/structs.h"
#include "libfaad/decoder.h"
CODEC_HEADER
/* The maximum buffer size handled by faad. 12 bytes are required by libfaad
* as headroom (see libfaad/bits.c). FAAD_BYTE_BUFFER_SIZE bytes are buffered
* for each frame. */
#define FAAD_BYTE_BUFFER_SIZE (2048-12)
/* this is the codec entry point */
enum codec_status codec_main(enum codec_entry_call_reason reason)
{
if (reason == CODEC_LOAD) {
/* Generic codec initialisation */
ci->configure(DSP_SET_STEREO_MODE, STEREO_NONINTERLEAVED);
ci->configure(DSP_SET_SAMPLE_DEPTH, 29);
}
return CODEC_OK;
}
/* this is called for each file to process */
enum codec_status codec_run(void)
{
/* Note that when dealing with QuickTime/MPEG4 files, terminology is
* a bit confusing. Files with sound are split up in chunks, where
* each chunk contains one or more samples. Each sample in turn
* contains a number of "sound samples" (the kind you refer to with
* the sampling frequency).
*/
size_t n;
demux_res_t demux_res;
stream_t input_stream;
uint32_t sound_samples_done;
uint32_t elapsed_time;
int file_offset;
int framelength;
int lead_trim = 0;
unsigned int frame_samples;
unsigned int i;
unsigned char* buffer;
NeAACDecFrameInfo frame_info;
NeAACDecHandle decoder;
int err;
uint32_t seek_idx = 0;
uint32_t s = 0;
uint32_t sbr_fac = 1;
unsigned char c = 0;
void *ret;
intptr_t param;
bool empty_first_frame = false;
/* Clean and initialize decoder structures */
memset(&demux_res , 0, sizeof(demux_res));
if (codec_init()) {
LOGF("FAAD: Codec init error\n");
return CODEC_ERROR;
}
file_offset = ci->id3->offset;
ci->configure(DSP_SWITCH_FREQUENCY, ci->id3->frequency);
codec_set_replaygain(ci->id3);
stream_create(&input_stream,ci);
ci->seek_buffer(ci->id3->first_frame_offset);
/* if qtmovie_read returns successfully, the stream is up to
* the movie data, which can be used directly by the decoder */
if (!qtmovie_read(&input_stream, &demux_res)) {
LOGF("FAAD: File init error\n");
return CODEC_ERROR;
}
/* initialise the sound converter */
decoder = NeAACDecOpen();
if (!decoder) {
LOGF("FAAD: Decode open error\n");
return CODEC_ERROR;
}
NeAACDecConfigurationPtr conf = NeAACDecGetCurrentConfiguration(decoder);
conf->outputFormat = FAAD_FMT_24BIT; /* irrelevant, we don't convert */
NeAACDecSetConfiguration(decoder, conf);
err = NeAACDecInit2(decoder, demux_res.codecdata, demux_res.codecdata_len, &s, &c);
if (err) {
LOGF("FAAD: DecInit: %d, %d\n", err, decoder->object_type);
return CODEC_ERROR;
}
#ifdef SBR_DEC
/* Check for need of special handling for seek/resume and elapsed time. */
if (ci->id3->needs_upsampling_correction) {
sbr_fac = 2;
} else {
sbr_fac = 1;
}
#endif
i = 0;
if (file_offset > 0) {
/* Resume the desired (byte) position. Important: When resuming SBR
* upsampling files the resulting sound_samples_done must be expanded
* by a factor of 2. This is done via using sbr_fac. */
if (m4a_seek_raw(&demux_res, &input_stream, file_offset,
&sound_samples_done, (int*) &i)) {
sound_samples_done *= sbr_fac;
} else {
sound_samples_done = 0;
}
NeAACDecPostSeekReset(decoder, i);
} else {
sound_samples_done = 0;
}
elapsed_time = (sound_samples_done * 10) / (ci->id3->frequency / 100);
ci->set_elapsed(elapsed_time);
if (i == 0)
{
lead_trim = ci->id3->lead_trim;
}
/* The main decoding loop */
while (i < demux_res.num_sample_byte_sizes) {
enum codec_command_action action = ci->get_command(&param);
if (action == CODEC_ACTION_HALT)
break;
/* Deal with any pending seek requests */
if (action == CODEC_ACTION_SEEK_TIME) {
/* Seek to the desired time position. Important: When seeking in SBR
* upsampling files the seek_time must be divided by 2 when calling
* m4a_seek and the resulting sound_samples_done must be expanded
* by a factor 2. This is done via using sbr_fac. */
if (m4a_seek(&demux_res, &input_stream,
(param/10/sbr_fac)*(ci->id3->frequency/100),
&sound_samples_done, (int*) &i)) {
sound_samples_done *= sbr_fac;
elapsed_time = (sound_samples_done * 10) / (ci->id3->frequency / 100);
ci->set_elapsed(elapsed_time);
seek_idx = 0;
if (i == 0)
{
lead_trim = ci->id3->lead_trim;
}
}
NeAACDecPostSeekReset(decoder, i);
ci->seek_complete();
}
/* There can be gaps between chunks, so skip ahead if needed. It
* doesn't seem to happen much, but it probably means that a
* "proper" file can have chunks out of order. Why one would want
* that an good question (but files with gaps do exist, so who
* knows?), so we don't support that - for now, at least.
*/
file_offset = m4a_check_sample_offset(&demux_res, i, &seek_idx);
if (file_offset > ci->curpos)
{
ci->advance_buffer(file_offset - ci->curpos);
}
else if (file_offset == 0)
{
LOGF("AAC: get_sample_offset error\n");
return CODEC_ERROR;
}
/* Request the required number of bytes from the input buffer */
buffer=ci->request_buffer(&n, FAAD_BYTE_BUFFER_SIZE);
/* Decode one block - returned samples will be host-endian */
ret = NeAACDecDecode(decoder, &frame_info, buffer, n);
/* NeAACDecDecode may sometimes return NULL without setting error. */
if (ret == NULL || frame_info.error > 0) {
LOGF("FAAD: decode error '%s'\n", NeAACDecGetErrorMessage(frame_info.error));
return CODEC_ERROR;
}
/* Advance codec buffer (no need to call set_offset because of this) */
ci->advance_buffer(frame_info.bytesconsumed);
/* Output the audio */
ci->yield();
frame_samples = frame_info.samples >> 1;
if (empty_first_frame)
{
/* Remove the first frame from lead_trim, under the assumption
* that it had the same size as this frame
*/
empty_first_frame = false;
lead_trim -= frame_samples;
if (lead_trim < 0)
{
lead_trim = 0;
}
}
/* Gather number of samples for the decoded frame. */
framelength = frame_samples - lead_trim;
if (i == demux_res.num_sample_byte_sizes - 1)
{
// Size of the last frame
const uint32_t sample_duration = (demux_res.num_time_to_samples > 0) ?
demux_res.time_to_sample[demux_res.num_time_to_samples - 1].sample_duration :
frame_samples;
/* Currently limited to at most one frame of tail_trim.
* Seems to be enough.
*/
if (ci->id3->tail_trim == 0 && sample_duration < frame_samples)
{
/* Subtract lead_trim just in case we decode a file with only
* one audio frame with actual data (lead_trim is usually zero
* here).
*/
framelength = sample_duration - lead_trim;
}
else
{
framelength -= ci->id3->tail_trim;
}
}
if (framelength > 0)
{
ci->pcmbuf_insert(&decoder->time_out[0][lead_trim],
&decoder->time_out[1][lead_trim],
framelength);
sound_samples_done += framelength;
/* Update the elapsed-time indicator */
elapsed_time = ((uint64_t) sound_samples_done * 1000) /
ci->id3->frequency;
ci->set_elapsed(elapsed_time);
}
if (lead_trim > 0)
{
/* frame_info.samples can be 0 for frame 0. We still want to
* remove it from lead_trim, so do that during frame 1.
*/
if (0 == i && 0 == frame_info.samples)
{
empty_first_frame = true;
}
lead_trim -= frame_samples;
if (lead_trim < 0)
{
lead_trim = 0;
}
}
++i;
}
LOGF("AAC: Decoded %lu samples\n", (unsigned long)sound_samples_done);
return CODEC_OK;
}

404
lib/rbcodec/codecs/adx.c Normal file
View file

@ -0,0 +1,404 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
*
* Copyright (C) 2006-2008 Adam Gashlin (hcs)
* Copyright (C) 2006 Jens Arnold
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include <limits.h>
#include "codeclib.h"
#include "inttypes.h"
#include "math.h"
#include "lib/fixedpoint.h"
CODEC_HEADER
/* Maximum number of bytes to process in one iteration */
#define WAV_CHUNK_SIZE (1024*2)
/* Number of times to loop looped tracks when repeat is disabled */
#define LOOP_TIMES 2
/* Length of fade-out for looped tracks (milliseconds) */
#define FADE_LENGTH 10000L
/* Default high pass filter cutoff frequency is 500 Hz.
* Others can be set, but the default is nearly always used,
* and there is no way to determine if another was used, anyway.
*/
static const long cutoff = 500;
static int16_t samples[WAV_CHUNK_SIZE] IBSS_ATTR;
/* this is the codec entry point */
enum codec_status codec_main(enum codec_entry_call_reason reason)
{
if (reason == CODEC_LOAD) {
/* Generic codec initialisation */
/* we only render 16 bits */
ci->configure(DSP_SET_SAMPLE_DEPTH, 16);
}
return CODEC_OK;
}
/* this is called for each file to process */
enum codec_status codec_run(void)
{
int channels;
int sampleswritten, i;
uint8_t *buf;
int32_t ch1_1, ch1_2, ch2_1, ch2_2; /* ADPCM history */
size_t n;
int endofstream; /* end of stream flag */
uint32_t avgbytespersec;
int looping; /* looping flag */
int loop_count; /* number of loops done so far */
int fade_count; /* countdown for fadeout */
int fade_frames; /* length of fade in frames */
off_t start_adr, end_adr; /* loop points */
off_t chanstart, bufoff;
/*long coef1=0x7298L,coef2=-0x3350L;*/
long coef1, coef2;
intptr_t param;
DEBUGF("ADX: next_track\n");
if (codec_init()) {
return CODEC_ERROR;
}
DEBUGF("ADX: after init\n");
/* init history */
ch1_1=ch1_2=ch2_1=ch2_2=0;
codec_set_replaygain(ci->id3);
/* Get header */
DEBUGF("ADX: request initial buffer\n");
ci->seek_buffer(0);
buf = ci->request_buffer(&n, 0x38);
if (!buf || n < 0x38) {
return CODEC_ERROR;
}
bufoff = 0;
DEBUGF("ADX: read size = %lx\n",(unsigned long)n);
/* Get file header for starting offset, channel count */
chanstart = ((buf[2] << 8) | buf[3]) + 4;
channels = buf[7];
/* useful for seeking and reporting current playback position */
avgbytespersec = ci->id3->frequency * 18 * channels / 32;
DEBUGF("avgbytespersec=%ld\n",(unsigned long)avgbytespersec);
/* calculate filter coefficients */
/**
* A simple table of these coefficients would be nice, but
* some very odd frequencies are used and if I'm going to
* interpolate I might as well just go all the way and
* calclate them precisely.
* Speed is not an issue as this only needs to be done once per file.
*/
{
const int64_t big28 = 0x10000000LL;
const int64_t big32 = 0x100000000LL;
int64_t frequency = ci->id3->frequency;
int64_t phasemultiple = cutoff*big32/frequency;
long z;
int64_t a;
const int64_t b = (M_SQRT2*big28)-big28;
int64_t c;
int64_t d;
fp_sincos((unsigned long)phasemultiple,&z);
a = (M_SQRT2*big28) - (z >> 3);
/**
* In the long passed to fsqrt there are only 4 nonfractional bits,
* which is sufficient here, but this is the only reason why I don't
* use 32 fractional bits everywhere.
*/
d = fp_sqrt((a+b)*(a-b)/big28,28);
c = (a-d)*big28/b;
coef1 = (c*8192) >> 28;
coef2 = (c*c/big28*-4096) >> 28;
DEBUGF("ADX: samprate=%ld ",(long)frequency);
DEBUGF("coef1 %04x ",(unsigned int)(coef1*4));
DEBUGF("coef2 %04x\n",(unsigned int)(coef2*-4));
}
/* Get loop data */
looping = 0; start_adr = 0; end_adr = 0;
if (!memcmp(buf+0x10,"\x01\xF4\x03",3)) {
/* Soul Calibur 2 style (type 03) */
DEBUGF("ADX: type 03 found\n");
/* check if header is too small for loop data */
if (chanstart-6 < 0x2c) looping=0;
else {
looping = (buf[0x18]) ||
(buf[0x19]) ||
(buf[0x1a]) ||
(buf[0x1b]);
end_adr = (buf[0x28]<<24) |
(buf[0x29]<<16) |
(buf[0x2a]<<8) |
(buf[0x2b]);
start_adr = (
(buf[0x1c]<<24) |
(buf[0x1d]<<16) |
(buf[0x1e]<<8) |
(buf[0x1f])
)/32*channels*18+chanstart;
}
} else if (!memcmp(buf+0x10,"\x01\xF4\x04",3)) {
/* Standard (type 04) */
DEBUGF("ADX: type 04 found\n");
/* check if header is too small for loop data */
if (chanstart-6 < 0x38) looping=0;
else {
looping = (buf[0x24]) ||
(buf[0x25]) ||
(buf[0x26]) ||
(buf[0x27]);
end_adr = (buf[0x34]<<24) |
(buf[0x35]<<16) |
(buf[0x36]<<8) |
buf[0x37];
start_adr = (
(buf[0x28]<<24) |
(buf[0x29]<<16) |
(buf[0x2a]<<8) |
(buf[0x2b])
)/32*channels*18+chanstart;
}
} else {
DEBUGF("ADX: error, couldn't determine ADX type\n");
return CODEC_ERROR;
}
/* is file using encryption */
if (buf[0x13]==0x08) {
DEBUGF("ADX: error, encrypted ADX not supported\n");
return false;
}
if (looping) {
DEBUGF("ADX: looped, start: %lx end: %lx\n",start_adr,end_adr);
} else {
DEBUGF("ADX: not looped\n");
}
/* advance to first frame */
DEBUGF("ADX: first frame at %lx\n",chanstart);
bufoff = chanstart;
/* get in position */
ci->seek_buffer(bufoff);
ci->set_elapsed(0);
/* setup pcm buffer format */
ci->configure(DSP_SWITCH_FREQUENCY, ci->id3->frequency);
if (channels == 2) {
ci->configure(DSP_SET_STEREO_MODE, STEREO_INTERLEAVED);
} else if (channels == 1) {
ci->configure(DSP_SET_STEREO_MODE, STEREO_MONO);
} else {
DEBUGF("ADX CODEC_ERROR: more than 2 channels\n");
return CODEC_ERROR;
}
endofstream = 0;
loop_count = 0;
fade_count = -1; /* disable fade */
fade_frames = 1;
/* The main decoder loop */
while (!endofstream) {
enum codec_command_action action = ci->get_command(&param);
if (action == CODEC_ACTION_HALT)
break;
/* do we need to loop? */
if (bufoff > end_adr-18*channels && looping) {
DEBUGF("ADX: loop!\n");
/* check for endless looping */
if (ci->loop_track()) {
loop_count=0;
fade_count = -1; /* disable fade */
} else {
/* otherwise start fade after LOOP_TIMES loops */
loop_count++;
if (loop_count >= LOOP_TIMES && fade_count < 0) {
/* frames to fade over */
fade_frames = FADE_LENGTH*ci->id3->frequency/32/1000;
/* volume relative to fade_frames */
fade_count = fade_frames;
DEBUGF("ADX: fade_frames = %d\n",fade_frames);
}
}
bufoff = start_adr;
ci->seek_buffer(bufoff);
}
/* do we need to seek? */
if (action == CODEC_ACTION_SEEK_TIME) {
uint32_t newpos;
DEBUGF("ADX: seek to %ldms\n", (long)param);
endofstream = 0;
loop_count = 0;
fade_count = -1; /* disable fade */
fade_frames = 1;
newpos = (((uint64_t)avgbytespersec*param)
/ (1000LL*18*channels))*(18*channels);
bufoff = chanstart + newpos;
while (bufoff > end_adr-18*channels) {
bufoff-=end_adr-start_adr;
loop_count++;
}
ci->seek_buffer(bufoff);
ci->set_elapsed(
((end_adr-start_adr)*loop_count + bufoff-chanstart)*
1000LL/avgbytespersec);
ci->seek_complete();
}
if (bufoff>ci->filesize-channels*18) break; /* End of stream */
sampleswritten=0;
while (
/* Is there data left in the file? */
(bufoff <= ci->filesize-(18*channels)) &&
/* Is there space in the output buffer? */
(sampleswritten <= WAV_CHUNK_SIZE-(32*channels)) &&
/* Should we be looping? */
((!looping) || bufoff <= end_adr-18*channels))
{
/* decode first/only channel */
int32_t scale;
int32_t ch1_0, d;
/* fetch a frame */
buf = ci->request_buffer(&n, 18);
if (!buf || n!=18) {
DEBUGF("ADX: couldn't get buffer at %lx\n",
bufoff);
return CODEC_ERROR;
}
scale = ((buf[0] << 8) | (buf[1])) +1;
for (i = 2; i < 18; i++)
{
d = (buf[i] >> 4) & 15;
if (d & 8) d-= 16;
ch1_0 = d*scale + ((coef1*ch1_1 + coef2*ch1_2) >> 12);
if (ch1_0 > 32767) ch1_0 = 32767;
else if (ch1_0 < -32768) ch1_0 = -32768;
samples[sampleswritten] = ch1_0;
sampleswritten+=channels;
ch1_2 = ch1_1; ch1_1 = ch1_0;
d = buf[i] & 15;
if (d & 8) d -= 16;
ch1_0 = d*scale + ((coef1*ch1_1 + coef2*ch1_2) >> 12);
if (ch1_0 > 32767) ch1_0 = 32767;
else if (ch1_0 < -32768) ch1_0 = -32768;
samples[sampleswritten] = ch1_0;
sampleswritten+=channels;
ch1_2 = ch1_1; ch1_1 = ch1_0;
}
bufoff+=18;
ci->advance_buffer(18);
if (channels == 2) {
/* decode second channel */
int32_t scale;
int32_t ch2_0, d;
buf = ci->request_buffer(&n, 18);
if (!buf || n!=18) {
DEBUGF("ADX: couldn't get buffer at %lx\n",
bufoff);
return CODEC_ERROR;
}
scale = ((buf[0] << 8)|(buf[1]))+1;
sampleswritten-=63;
for (i = 2; i < 18; i++)
{
d = (buf[i] >> 4) & 15;
if (d & 8) d-= 16;
ch2_0 = d*scale + ((coef1*ch2_1 + coef2*ch2_2) >> 12);
if (ch2_0 > 32767) ch2_0 = 32767;
else if (ch2_0 < -32768) ch2_0 = -32768;
samples[sampleswritten] = ch2_0;
sampleswritten+=2;
ch2_2 = ch2_1; ch2_1 = ch2_0;
d = buf[i] & 15;
if (d & 8) d -= 16;
ch2_0 = d*scale + ((coef1*ch2_1 + coef2*ch2_2) >> 12);
if (ch2_0 > 32767) ch2_0 = 32767;
else if (ch2_0 < -32768) ch2_0 = -32768;
samples[sampleswritten] = ch2_0;
sampleswritten+=2;
ch2_2 = ch2_1; ch2_1 = ch2_0;
}
bufoff+=18;
ci->advance_buffer(18);
sampleswritten--; /* go back to first channel's next sample */
}
if (fade_count>0) {
fade_count--;
for (i=0;i<(channels==1?32:64);i++) samples[sampleswritten-i-1]=
((int32_t)samples[sampleswritten-i-1])*fade_count/fade_frames;
if (fade_count==0) {endofstream=1; break;}
}
}
if (channels == 2)
sampleswritten >>= 1; /* make samples/channel */
ci->pcmbuf_insert(samples, NULL, sampleswritten);
ci->set_elapsed(
((end_adr-start_adr)*loop_count + bufoff-chanstart)*
1000LL/avgbytespersec);
}
return CODEC_OK;
}

350
lib/rbcodec/codecs/aiff.c Normal file
View file

@ -0,0 +1,350 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (c) 2005 Jvo Studer
* Copyright (c) 2009 Yoshihisa Uchida
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "codeclib.h"
#include "codecs/libpcm/support_formats.h"
CODEC_HEADER
#define FOURCC(c1, c2, c3, c4) \
((((uint32_t)c1)<<24)|(((uint32_t)c2)<<16)|(((uint32_t)c3)<<8)|((uint32_t)c4))
/* This codec supports the following AIFC compressionType formats */
enum {
AIFC_FORMAT_PCM = FOURCC('N', 'O', 'N', 'E'), /* AIFC PCM Format (big endian) */
AIFC_FORMAT_ALAW = FOURCC('a', 'l', 'a', 'w'), /* AIFC ALaw compressed */
AIFC_FORMAT_MULAW = FOURCC('u', 'l', 'a', 'w'), /* AIFC uLaw compressed */
AIFC_FORMAT_IEEE_FLOAT32 = FOURCC('f', 'l', '3', '2'), /* AIFC IEEE float 32 bit */
AIFC_FORMAT_IEEE_FLOAT64 = FOURCC('f', 'l', '6', '4'), /* AIFC IEEE float 64 bit */
AIFC_FORMAT_QT_IMA_ADPCM = FOURCC('i', 'm', 'a', '4'), /* AIFC QuickTime IMA ADPCM */
};
static const struct pcm_entry pcm_codecs[] = {
{ AIFC_FORMAT_PCM, get_linear_pcm_codec },
{ AIFC_FORMAT_ALAW, get_itut_g711_alaw_codec },
{ AIFC_FORMAT_MULAW, get_itut_g711_mulaw_codec },
{ AIFC_FORMAT_IEEE_FLOAT32, get_ieee_float_codec },
{ AIFC_FORMAT_IEEE_FLOAT64, get_ieee_float_codec },
{ AIFC_FORMAT_QT_IMA_ADPCM, get_qt_ima_adpcm_codec },
};
#define PCM_SAMPLE_SIZE (1024*2)
static int32_t samples[PCM_SAMPLE_SIZE] IBSS_ATTR;
static const struct pcm_codec *get_codec(uint32_t formattag)
{
unsigned i;
for (i = 0; i < sizeof(pcm_codecs)/sizeof(pcm_codecs[0]); i++)
if (pcm_codecs[i].format_tag == formattag)
return pcm_codecs[i].get_codec();
return NULL;
}
/* this is the codec entry point */
enum codec_status codec_main(enum codec_entry_call_reason reason)
{
if (reason == CODEC_LOAD) {
/* Generic codec initialisation */
ci->configure(DSP_SET_SAMPLE_DEPTH, PCM_OUTPUT_DEPTH-1);
}
return CODEC_OK;
}
/* this is called for each file to process */
enum codec_status codec_run(void)
{
struct pcm_format format;
uint32_t bytesdone, decodedsamples;
/* rockbox: comment 'set but unused' variables
uint32_t num_sample_frames = 0;
*/
size_t n;
int bufcount;
int endofstream;
unsigned char *buf;
uint8_t *aifbuf;
uint32_t offset2snd = 0;
off_t firstblockposn; /* position of the first block in file */
bool is_aifc = false;
const struct pcm_codec *codec;
uint32_t size;
intptr_t param;
if (codec_init()) {
return CODEC_ERROR;
}
codec_set_replaygain(ci->id3);
/* Need to save offset for later use (cleared indirectly by advance_buffer) */
bytesdone = ci->id3->offset;
/* assume the AIFF header is less than 1024 bytes */
ci->seek_buffer(0);
buf = ci->request_buffer(&n, 1024);
if (n < 54) {
return CODEC_ERROR;
}
if (memcmp(buf, "FORM", 4) != 0)
{
DEBUGF("CODEC_ERROR: does not aiff format %4.4s\n", (char*)&buf[0]);
return CODEC_ERROR;
}
if (memcmp(&buf[8], "AIFF", 4) == 0)
is_aifc = false;
else if (memcmp(&buf[8], "AIFC", 4) == 0)
is_aifc = true;
else
{
DEBUGF("CODEC_ERROR: does not aiff format %4.4s\n", (char*)&buf[8]);
return CODEC_ERROR;
}
buf += 12;
n -= 12;
ci->memset(&format, 0, sizeof(struct pcm_format));
format.is_signed = true;
format.is_little_endian = false;
decodedsamples = 0;
codec = 0;
/* read until 'SSND' chunk, which typically is last */
while (format.numbytes == 0 && n >= 8)
{
/* chunkSize */
size = ((buf[4]<<24)|(buf[5]<<16)|(buf[6]<<8)|buf[7]);
if (memcmp(buf, "COMM", 4) == 0) {
if ((!is_aifc && size < 18) || (is_aifc && size < 22))
{
DEBUGF("CODEC_ERROR: 'COMM' chunk size=%lu < %d\n",
(unsigned long)size, (is_aifc)?22:18);
return CODEC_ERROR;
}
/* num_channels */
format.channels = ((buf[8]<<8)|buf[9]);
/* num_sample_frames */
/*
num_sample_frames = ((buf[10]<<24)|(buf[11]<<16)|(buf[12]<<8)
|buf[13]);
*/
/* sample_size */
format.bitspersample = ((buf[14]<<8)|buf[15]);
/* sample_rate (don't use last 4 bytes, only integer fs) */
if (buf[16] != 0x40) {
DEBUGF("CODEC_ERROR: weird sampling rate (no @)\n");
return CODEC_ERROR;
}
format.samplespersec = ((buf[18]<<24)|(buf[19]<<16)|(buf[20]<<8)|buf[21])+1;
format.samplespersec >>= (16 + 14 - buf[17]);
/* compressionType (AIFC only) */
if (is_aifc)
{
format.formattag = (buf[26]<<24)|(buf[27]<<16)|(buf[28]<<8)|buf[29];
/*
* aiff's sample_size is uncompressed sound data size.
* But format.bitspersample is compressed sound data size.
*/
if (format.formattag == AIFC_FORMAT_ALAW ||
format.formattag == AIFC_FORMAT_MULAW)
format.bitspersample = 8;
else if (format.formattag == AIFC_FORMAT_QT_IMA_ADPCM)
format.bitspersample = 4;
}
else
format.formattag = AIFC_FORMAT_PCM;
/* calc average bytes per second */
format.avgbytespersec = format.samplespersec*format.channels*format.bitspersample/8;
} else if (memcmp(buf, "SSND", 4)==0) {
if (format.bitspersample == 0) {
DEBUGF("CODEC_ERROR: unsupported chunk order\n");
return CODEC_ERROR;
}
/* offset2snd */
offset2snd = (buf[8]<<24)|(buf[9]<<16)|(buf[10]<<8)|buf[11];
/* block_size */
format.blockalign = ((buf[12]<<24)|(buf[13]<<16)|(buf[14]<<8)|buf[15]) >> 3;
if (format.blockalign == 0)
format.blockalign = format.channels * format.bitspersample >> 3;
format.numbytes = size - 8 - offset2snd;
size = 8 + offset2snd; /* advance to the beginning of data */
} else if (is_aifc && (memcmp(buf, "FVER", 4)==0)) {
/* Format Version Chunk (AIFC only chunk) */
/* skip this chunk */
} else {
DEBUGF("unsupported AIFF chunk: '%c%c%c%c', size=%lu\n",
buf[0], buf[1], buf[2], buf[3], (unsigned long)size);
}
size += 8 + (size & 0x01); /* odd chunk sizes must be padded */
buf += size;
if (n < size) {
DEBUGF("CODEC_ERROR: AIFF header size > 1024\n");
return CODEC_ERROR;
}
n -= size;
} /* while 'SSND' */
if (format.channels == 0) {
DEBUGF("CODEC_ERROR: 'COMM' chunk not found or 0-channels file\n");
return CODEC_ERROR;
}
if (format.numbytes == 0) {
DEBUGF("CODEC_ERROR: 'SSND' chunk not found or has zero length\n");
return CODEC_ERROR;
}
codec = get_codec(format.formattag);
if (codec == 0)
{
DEBUGF("CODEC_ERROR: AIFC does not support compressionType: 0x%x\n",
(unsigned int)format.formattag);
return CODEC_ERROR;
}
if (!codec->set_format(&format))
{
return CODEC_ERROR;
}
ci->configure(DSP_SWITCH_FREQUENCY, ci->id3->frequency);
if (format.channels == 2) {
ci->configure(DSP_SET_STEREO_MODE, STEREO_INTERLEAVED);
} else if (format.channels == 1) {
ci->configure(DSP_SET_STEREO_MODE, STEREO_MONO);
} else {
DEBUGF("CODEC_ERROR: more than 2 channels unsupported\n");
return CODEC_ERROR;
}
if (format.samplesperblock == 0)
{
DEBUGF("CODEC_ERROR: samplesperblock is 0\n");
return CODEC_ERROR;
}
if (format.blockalign == 0)
{
DEBUGF("CODEC_ERROR: blockalign is 0\n");
return CODEC_ERROR;
}
/* check chunksize */
if ((format.chunksize / format.blockalign) * format.samplesperblock * format.channels
> PCM_SAMPLE_SIZE)
format.chunksize = (PCM_SAMPLE_SIZE / format.blockalign) * format.blockalign;
if (format.chunksize == 0)
{
DEBUGF("CODEC_ERROR: chunksize is 0\n");
return CODEC_ERROR;
}
firstblockposn = 1024 - n;
ci->advance_buffer(firstblockposn);
/* make sure we're at the correct offset */
if (bytesdone > (uint32_t) firstblockposn) {
/* Round down to previous block */
struct pcm_pos *newpos = codec->get_seek_pos(bytesdone - firstblockposn,
PCM_SEEK_POS, NULL);
if (newpos->pos > format.numbytes)
return CODEC_OK;
if (ci->seek_buffer(firstblockposn + newpos->pos))
{
bytesdone = newpos->pos;
decodedsamples = newpos->samples;
}
} else {
/* already where we need to be */
bytesdone = 0;
}
ci->set_elapsed(decodedsamples*1000LL/ci->id3->frequency);
/* The main decoder loop */
endofstream = 0;
while (!endofstream) {
enum codec_command_action action = ci->get_command(&param);
if (action == CODEC_ACTION_HALT)
break;
if (action == CODEC_ACTION_SEEK_TIME) {
/* 3rd args(read_buffer) is unnecessary in the format which AIFF supports. */
struct pcm_pos *newpos = codec->get_seek_pos(param, PCM_SEEK_TIME, NULL);
if (newpos->pos > format.numbytes)
{
ci->set_elapsed(ci->id3->length);
ci->seek_complete();
break;
}
if (ci->seek_buffer(firstblockposn + newpos->pos))
{
bytesdone = newpos->pos;
decodedsamples = newpos->samples;
}
ci->set_elapsed(decodedsamples*1000LL/ci->id3->frequency);
ci->seek_complete();
}
aifbuf = (uint8_t *)ci->request_buffer(&n, format.chunksize);
if (n == 0)
break; /* End of stream */
if (bytesdone + n > format.numbytes) {
n = format.numbytes - bytesdone;
endofstream = 1;
}
if (codec->decode(aifbuf, n, samples, &bufcount) == CODEC_ERROR)
{
DEBUGF("codec error\n");
return CODEC_ERROR;
}
ci->pcmbuf_insert(samples, NULL, bufcount);
ci->advance_buffer(n);
bytesdone += n;
decodedsamples += bufcount;
if (bytesdone >= format.numbytes)
endofstream = 1;
ci->set_elapsed(decodedsamples*1000LL/ci->id3->frequency);
}
return CODEC_OK;
}

View file

@ -0,0 +1,400 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2006 Antonius Hellmann
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include <inttypes.h>
#include "codeclib.h"
CODEC_ENC_HEADER
struct aiff_header
{
uint8_t form_id[4]; /* 00h - 'FORM' */
uint32_t form_size; /* 04h - size of file - 8 */
uint8_t aiff_id[4]; /* 08h - 'AIFF' */
uint8_t comm_id[4]; /* 0Ch - 'COMM' */
int32_t comm_size; /* 10h - num_channels through sample_rate
(18) */
int16_t num_channels; /* 14h - 1=M, 2=S, etc. */
uint32_t num_sample_frames; /* 16h - num samples for each channel */
int16_t sample_size; /* 1ah - 1-32 bits per sample */
uint8_t sample_rate[10]; /* 1ch - IEEE 754 80-bit floating point */
uint8_t ssnd_id[4]; /* 26h - "SSND" */
int32_t ssnd_size; /* 2ah - size of chunk from offset to
end of pcm data */
uint32_t offset; /* 2eh - data offset from end of header */
uint32_t block_size; /* 32h - pcm data alignment */
/* 36h */
} __attribute__((packed));
#define PCM_DEPTH_BYTES 2
#define PCM_DEPTH_BITS 16
#define PCM_SAMP_PER_CHUNK 2048
#define PCM_CHUNK_SIZE (PCM_SAMP_PER_CHUNK*4)
/* Template headers */
struct aiff_header aiff_header =
{
{ 'F', 'O', 'R', 'M' }, /* form_id */
0, /* form_size (*) */
{ 'A', 'I', 'F', 'F' }, /* aiff_id */
{ 'C', 'O', 'M', 'M' }, /* comm_id */
htobe32(18), /* comm_size */
0, /* num_channels (*) */
0, /* num_sample_frames (*) */
htobe16(PCM_DEPTH_BITS), /* sample_size */
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* sample_rate (*) */
{ 'S', 'S', 'N', 'D' }, /* ssnd_id */
0, /* ssnd_size (*) */
htobe32(0), /* offset */
htobe32(0), /* block_size */
};
/* (*) updated when finalizing file */
static int num_channels IBSS_ATTR;
static int rec_mono_mode IBSS_ATTR;
static uint32_t sample_rate;
static uint32_t enc_size;
static int32_t err IBSS_ATTR;
/* convert unsigned 32 bit value to 80-bit floating point number */
STATICIRAM void uint32_h_to_ieee754_extended_be(uint8_t f[10], uint32_t l)
ICODE_ATTR;
STATICIRAM void uint32_h_to_ieee754_extended_be(uint8_t f[10], uint32_t l)
{
int32_t exp;
ci->memset(f, 0, 10);
if (l == 0)
return;
for (exp = 30; (l & (1ul << 31)) == 0; exp--)
l <<= 1;
/* sign always zero - bit 79 */
/* exponent is 0-31 (normalized: 31 - shift + 16383) - bits 64-78 */
f[0] = 0x40;
f[1] = (uint8_t)exp;
/* mantissa is value left justified with most significant non-zero
bit stored in bit 63 - bits 0-63 */
f[2] = (uint8_t)(l >> 24);
f[3] = (uint8_t)(l >> 16);
f[4] = (uint8_t)(l >> 8);
f[5] = (uint8_t)(l >> 0);
} /* uint32_h_to_ieee754_extended_be */
/* called version often - inline */
static inline bool is_file_data_ok(struct enc_file_event_data *data) ICODE_ATTR;
static inline bool is_file_data_ok(struct enc_file_event_data *data)
{
return data->rec_file >= 0 && (long)data->chunk->flags >= 0;
} /* is_file_data_ok */
/* called version often - inline */
static inline bool on_write_chunk(struct enc_file_event_data *data) ICODE_ATTR;
static inline bool on_write_chunk(struct enc_file_event_data *data)
{
if (!is_file_data_ok(data))
return false;
if (data->chunk->enc_data == NULL)
{
#ifdef ROCKBOX_HAS_LOGF
ci->logf("aiff enc: NULL data");
#endif
return true;
}
if (ci->write(data->rec_file, data->chunk->enc_data,
data->chunk->enc_size) != (ssize_t)data->chunk->enc_size)
return false;
data->num_pcm_samples += data->chunk->num_pcm;
return true;
} /* on_write_chunk */
static bool on_start_file(struct enc_file_event_data *data)
{
if ((data->chunk->flags & CHUNKF_ERROR) || *data->filename == '\0')
return false;
data->rec_file = ci->open(data->filename, O_RDWR|O_CREAT|O_TRUNC, 0666);
if (data->rec_file < 0)
return false;
/* reset sample count */
data->num_pcm_samples = 0;
/* write template headers */
if (ci->write(data->rec_file, &aiff_header, sizeof (aiff_header))
!= sizeof (aiff_header))
{
return false;
}
data->new_enc_size += sizeof(aiff_header);
return true;
} /* on_start_file */
static bool on_end_file(struct enc_file_event_data *data)
{
/* update template headers */
struct aiff_header hdr;
uint32_t data_size;
if (!is_file_data_ok(data))
return false;
if (ci->lseek(data->rec_file, 0, SEEK_SET) != 0 ||
ci->read(data->rec_file, &hdr, sizeof (hdr)) != sizeof (hdr))
{
return false;
}
data_size = data->num_pcm_samples*num_channels*PCM_DEPTH_BYTES;
/* 'FORM' chunk */
hdr.form_size = htobe32(data_size + sizeof (hdr) - 8);
/* 'COMM' chunk */
hdr.num_channels = htobe16(num_channels);
hdr.num_sample_frames = htobe32(data->num_pcm_samples);
uint32_h_to_ieee754_extended_be(hdr.sample_rate, sample_rate);
/* 'SSND' chunk */
hdr.ssnd_size = htobe32(data_size + 8);
if (ci->lseek(data->rec_file, 0, SEEK_SET) != 0 ||
ci->write(data->rec_file, &hdr, sizeof (hdr)) != sizeof (hdr) ||
ci->close(data->rec_file) != 0)
{
return false;
}
data->rec_file = -1;
return true;
} /* on_end_file */
STATICIRAM void enc_events_callback(enum enc_events event, void *data)
ICODE_ATTR;
STATICIRAM void enc_events_callback(enum enc_events event, void *data)
{
switch (event)
{
case ENC_WRITE_CHUNK:
if (on_write_chunk((struct enc_file_event_data *)data))
return;
break;
case ENC_START_FILE:
if (on_start_file((struct enc_file_event_data *)data))
return;
break;
case ENC_END_FILE:
if (on_end_file((struct enc_file_event_data *)data))
return;
break;
default:
return;
}
/* Something failed above. Signal error back to core. */
((struct enc_file_event_data *)data)->chunk->flags |= CHUNKF_ERROR;
} /* enc_events_callback */
/* convert native pcm samples to aiff format samples */
static inline void sample_to_mono(uint32_t **src, uint32_t **dst)
{
int32_t lr1, lr2;
switch(rec_mono_mode)
{
case 1:
/* mono = L */
lr1 = *(*src)++;
lr1 = lr1 >> 16;
lr2 = *(*src)++;
lr2 = lr2 >> 16;
break;
case 2:
/* mono = R */
lr1 = *(*src)++;
lr1 = (int16_t)lr1;
lr2 = *(*src)++;
lr2 = (int16_t)lr2;
break;
case 0:
default:
/* mono = (L+R)/2 */
lr1 = *(*src)++;
lr1 = (int16_t)lr1 + (lr1 >> 16) + err;
err = lr1 & 1;
lr1 >>= 1;
lr2 = *(*src)++;
lr2 = (int16_t)lr2 + (lr2 >> 16) + err;
err = lr2 & 1;
lr2 >>= 1;
break;
}
*(*dst)++ = htobe32((lr1 << 16) | (uint16_t)lr2);
} /* sample_to_mono */
STATICIRAM void chunk_to_aiff_format(uint32_t *src, uint32_t *dst) ICODE_ATTR;
STATICIRAM void chunk_to_aiff_format(uint32_t *src, uint32_t *dst)
{
if (num_channels == 1)
{
/* On big endian:
* |LLLLLLLLllllllll|RRRRRRRRrrrrrrrr|
* |LLLLLLLLllllllll|RRRRRRRRrrrrrrrr| =>
* |MMMMMMMMmmmmmmmm|MMMMMMMMmmmmmmmm|
*
* On little endian:
* |llllllllLLLLLLLL|rrrrrrrrRRRRRRRR|
* |llllllllLLLLLLLL|rrrrrrrrRRRRRRRR| =>
* |MMMMMMMMmmmmmmmm|MMMMMMMMmmmmmmmm|
*/
uint32_t *src_end = src + PCM_SAMP_PER_CHUNK;
do
{
sample_to_mono(&src, &dst);
sample_to_mono(&src, &dst);
sample_to_mono(&src, &dst);
sample_to_mono(&src, &dst);
sample_to_mono(&src, &dst);
sample_to_mono(&src, &dst);
sample_to_mono(&src, &dst);
sample_to_mono(&src, &dst);
}
while (src < src_end);
}
else
{
#ifdef ROCKBOX_BIG_ENDIAN
/* |LLLLLLLLllllllll|RRRRRRRRrrrrrrrr| =>
* |LLLLLLLLllllllll|RRRRRRRRrrrrrrrr|
*/
ci->memcpy(dst, src, PCM_CHUNK_SIZE);
#else
/* |llllllllLLLLLLLL|rrrrrrrrRRRRRRRR| =>
* |LLLLLLLLllllllll|RRRRRRRRrrrrrrrr|
*/
uint32_t *src_end = src + PCM_SAMP_PER_CHUNK;
do
{
*dst++ = swap_odd_even32(*src++);
*dst++ = swap_odd_even32(*src++);
*dst++ = swap_odd_even32(*src++);
*dst++ = swap_odd_even32(*src++);
*dst++ = swap_odd_even32(*src++);
*dst++ = swap_odd_even32(*src++);
*dst++ = swap_odd_even32(*src++);
*dst++ = swap_odd_even32(*src++);
}
while (src < src_end);
#endif
}
} /* chunk_to_aiff_format */
static bool init_encoder(void)
{
struct enc_inputs inputs;
struct enc_parameters params;
if (ci->enc_get_inputs == NULL ||
ci->enc_set_parameters == NULL ||
ci->enc_get_chunk == NULL ||
ci->enc_finish_chunk == NULL ||
ci->enc_get_pcm_data == NULL )
return false;
ci->enc_get_inputs(&inputs);
if (inputs.config->afmt != AFMT_AIFF)
return false;
sample_rate = inputs.sample_rate;
num_channels = inputs.num_channels;
rec_mono_mode = inputs.rec_mono_mode;
err = 0;
/* configure the buffer system */
params.afmt = AFMT_AIFF;
enc_size = PCM_CHUNK_SIZE*inputs.num_channels / 2;
params.chunk_size = enc_size;
params.enc_sample_rate = sample_rate;
params.reserve_bytes = 0;
params.events_callback = enc_events_callback;
ci->enc_set_parameters(&params);
return true;
} /* init_encoder */
/* this is the codec entry point */
enum codec_status codec_main(enum codec_entry_call_reason reason)
{
if (reason == CODEC_LOAD) {
if (!init_encoder())
return CODEC_ERROR;
}
else if (reason == CODEC_UNLOAD) {
/* reset parameters to initial state */
ci->enc_set_parameters(NULL);
}
return CODEC_OK;
}
/* this is called for each file to process */
enum codec_status codec_run(void)
{
/* main encoding loop */
while (ci->get_command(NULL) != CODEC_ACTION_HALT)
{
uint32_t *src = (uint32_t *)ci->enc_get_pcm_data(PCM_CHUNK_SIZE);
struct enc_chunk_hdr *chunk;
if (src == NULL)
continue;
chunk = ci->enc_get_chunk();
chunk->enc_size = enc_size;
chunk->num_pcm = PCM_SAMP_PER_CHUNK;
chunk->enc_data = ENC_CHUNK_SKIP_HDR(chunk->enc_data, chunk);
chunk_to_aiff_format(src, (uint32_t *)chunk->enc_data);
ci->enc_finish_chunk();
}
return CODEC_OK;
}

146
lib/rbcodec/codecs/alac.c Normal file
View file

@ -0,0 +1,146 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2005 Dave Chapman
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "codeclib.h"
#include "libm4a/m4a.h"
#include "libalac/decomp.h"
CODEC_HEADER
/* The maximum buffer size handled. This amount of bytes is buffered for each
* frame. */
#define ALAC_BYTE_BUFFER_SIZE 32768
static int32_t outputbuffer[ALAC_MAX_CHANNELS][ALAC_BLOCKSIZE] IBSS_ATTR;
/* this is the codec entry point */
enum codec_status codec_main(enum codec_entry_call_reason reason)
{
if (reason == CODEC_LOAD) {
/* Generic codec initialisation */
ci->configure(DSP_SET_STEREO_MODE, STEREO_NONINTERLEAVED);
ci->configure(DSP_SET_SAMPLE_DEPTH, ALAC_OUTPUT_DEPTH-1);
}
return CODEC_OK;
}
/* this is called for each file to process */
enum codec_status codec_run(void)
{
size_t n;
demux_res_t demux_res;
stream_t input_stream;
uint32_t samplesdone;
uint32_t elapsedtime = 0;
int samplesdecoded;
unsigned int i;
unsigned char* buffer;
alac_file alac;
intptr_t param;
/* Clean and initialize decoder structures */
memset(&demux_res , 0, sizeof(demux_res));
if (codec_init()) {
LOGF("ALAC: Error initialising codec\n");
return CODEC_ERROR;
}
ci->configure(DSP_SWITCH_FREQUENCY, ci->id3->frequency);
codec_set_replaygain(ci->id3);
ci->seek_buffer(0);
stream_create(&input_stream,ci);
/* Read from ci->id3->offset before calling qtmovie_read. */
samplesdone = (uint32_t)(((uint64_t)(ci->id3->offset) * ci->id3->frequency) /
(ci->id3->bitrate*128));
/* if qtmovie_read returns successfully, the stream is up to
* the movie data, which can be used directly by the decoder */
if (!qtmovie_read(&input_stream, &demux_res)) {
LOGF("ALAC: Error initialising file\n");
return CODEC_ERROR;
}
/* initialise the sound converter */
alac_set_info(&alac, demux_res.codecdata);
/* Set i for first frame, seek to desired sample position for resuming. */
i=0;
if (samplesdone > 0) {
if (m4a_seek(&demux_res, &input_stream, samplesdone,
&samplesdone, (int*) &i)) {
elapsedtime = (samplesdone * 10) / (ci->id3->frequency / 100);
ci->set_elapsed(elapsedtime);
} else {
samplesdone = 0;
}
}
ci->set_elapsed(elapsedtime);
/* The main decoding loop */
while (i < demux_res.num_sample_byte_sizes) {
enum codec_command_action action = ci->get_command(&param);
if (action == CODEC_ACTION_HALT)
break;
/* Request the required number of bytes from the input buffer */
buffer=ci->request_buffer(&n, ALAC_BYTE_BUFFER_SIZE);
/* Deal with any pending seek requests */
if (action == CODEC_ACTION_SEEK_TIME) {
if (m4a_seek(&demux_res, &input_stream,
(param/10) * (ci->id3->frequency/100),
&samplesdone, (int *)&i)) {
elapsedtime=(samplesdone*10)/(ci->id3->frequency/100);
}
ci->set_elapsed(elapsedtime);
ci->seek_complete();
}
/* Request the required number of bytes from the input buffer */
buffer=ci->request_buffer(&n, ALAC_BYTE_BUFFER_SIZE);
/* Decode one block - returned samples will be host-endian */
samplesdecoded=alac_decode_frame(&alac, buffer, outputbuffer, ci->yield);
ci->yield();
/* Advance codec buffer by amount of consumed bytes */
ci->advance_buffer(alac.bytes_consumed);
/* Output the audio */
ci->pcmbuf_insert(outputbuffer[0], outputbuffer[1], samplesdecoded);
/* Update the elapsed-time indicator */
samplesdone+=samplesdecoded;
elapsedtime=(samplesdone*10)/(ci->id3->frequency/100);
ci->set_elapsed(elapsedtime);
i++;
}
LOGF("ALAC: Decoded %lu samples\n",(unsigned long)samplesdone);
return CODEC_OK;
}

330
lib/rbcodec/codecs/ape.c Normal file
View file

@ -0,0 +1,330 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2007 Dave Chapman
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "codeclib.h"
#include <codecs/demac/libdemac/demac.h>
CODEC_HEADER
#define BLOCKS_PER_LOOP 1024
#define MAX_CHANNELS 2
#define MAX_BYTESPERSAMPLE 3
/* Monkey's Audio files have one seekpoint per frame. The framesize
varies between 73728 and 1179648 samples.
At the smallest framesize, 30000 frames would be 50155 seconds of
audio - almost 14 hours. This should be enough for any file a user
would want to play in Rockbox, given the 2GB FAT filesize (and 4GB
seektable entry size) limit.
This means the seektable is 120000 bytes, but we have a lot of
spare room in the codec buffer - the APE codec itself is small.
*/
#define MAX_SEEKPOINTS 30000
static uint32_t seektablebuf[MAX_SEEKPOINTS];
#define INPUT_CHUNKSIZE (32*1024)
/* 1024*4 = 4096 bytes per channel */
static int32_t decoded0[BLOCKS_PER_LOOP] IBSS_ATTR;
static int32_t decoded1[BLOCKS_PER_LOOP] IBSS_ATTR;
#define MAX_SUPPORTED_SEEKTABLE_SIZE 5000
/* Given an ape_ctx and a sample to seek to, return the file position
to the frame containing that sample, and the number of samples to
skip in that frame.
*/
static bool ape_calc_seekpos(struct ape_ctx_t* ape_ctx,
uint32_t new_sample,
uint32_t* newframe,
uint32_t* filepos,
uint32_t* samplestoskip)
{
uint32_t n;
n = new_sample / ape_ctx->blocksperframe;
if (n >= ape_ctx->numseekpoints)
{
/* We don't have a seekpoint for that frame */
return false;
}
*newframe = n;
*filepos = ape_ctx->seektable[n];
*samplestoskip = new_sample - (n * ape_ctx->blocksperframe);
return true;
}
/* The resume offset is a value in bytes - we need to
turn it into a frame number and samplestoskip value */
static void ape_resume(struct ape_ctx_t* ape_ctx, size_t resume_offset,
uint32_t* currentframe, uint32_t* samplesdone,
uint32_t* samplestoskip, int* firstbyte)
{
off_t newfilepos;
int64_t framesize;
int64_t offset;
*currentframe = 0;
*samplesdone = 0;
*samplestoskip = 0;
while ((*currentframe < ape_ctx->totalframes) &&
(*currentframe < ape_ctx->numseekpoints) &&
(resume_offset > ape_ctx->seektable[*currentframe]))
{
++*currentframe;
*samplesdone += ape_ctx->blocksperframe;
}
if ((*currentframe > 0) &&
(ape_ctx->seektable[*currentframe] > resume_offset)) {
--*currentframe;
*samplesdone -= ape_ctx->blocksperframe;
}
newfilepos = ape_ctx->seektable[*currentframe];
/* APE's bytestream is weird... */
*firstbyte = 3 - (newfilepos & 3);
newfilepos &= ~3;
ci->seek_buffer(newfilepos);
/* We estimate where we were in the current frame, based on the
byte offset */
if (*currentframe < (ape_ctx->totalframes - 1)) {
framesize = ape_ctx->seektable[*currentframe+1] - ape_ctx->seektable[*currentframe];
offset = resume_offset - ape_ctx->seektable[*currentframe];
*samplestoskip = (offset * ape_ctx->blocksperframe) / framesize;
}
}
/* this is the codec entry point */
enum codec_status codec_main(enum codec_entry_call_reason reason)
{
if (reason == CODEC_LOAD) {
/* Generic codec initialisation */
ci->configure(DSP_SET_SAMPLE_DEPTH, APE_OUTPUT_DEPTH-1);
}
return CODEC_OK;
}
/* this is called for each file to process */
enum codec_status codec_run(void)
{
struct ape_ctx_t ape_ctx;
uint32_t samplesdone;
uint32_t elapsedtime;
size_t bytesleft;
uint32_t currentframe;
uint32_t newfilepos;
uint32_t samplestoskip;
int nblocks;
int bytesconsumed;
unsigned char* inbuffer;
uint32_t blockstodecode;
int res;
int firstbyte;
size_t resume_offset;
intptr_t param;
if (codec_init()) {
LOGF("APE: Error initialising codec\n");
return CODEC_ERROR;
}
/* Remember the resume position - when the codec is opened, the
playback engine will reset it. */
resume_offset = ci->id3->offset;
ci->seek_buffer(0);
inbuffer = ci->request_buffer(&bytesleft, INPUT_CHUNKSIZE);
/* Read the file headers to populate the ape_ctx struct */
if (ape_parseheaderbuf(inbuffer,&ape_ctx) < 0) {
LOGF("APE: Error reading header\n");
return CODEC_ERROR;
}
/* Initialise the seektable for this file */
ape_ctx.seektable = seektablebuf;
ape_ctx.numseekpoints = MIN(MAX_SEEKPOINTS,ape_ctx.numseekpoints);
ci->advance_buffer(ape_ctx.seektablefilepos);
/* The seektable may be bigger than the guard buffer (32KB), so we
do a read() */
ci->read_filebuf(ape_ctx.seektable, ape_ctx.numseekpoints * sizeof(uint32_t));
#ifdef ROCKBOX_BIG_ENDIAN
/* Byte-swap the little-endian seekpoints */
{
uint32_t i;
for(i = 0; i < ape_ctx.numseekpoints; i++)
ape_ctx.seektable[i] = swap32(ape_ctx.seektable[i]);
}
#endif
/* Now advance the file position to the first frame */
ci->advance_buffer(ape_ctx.firstframe -
(ape_ctx.seektablefilepos +
ape_ctx.numseekpoints * sizeof(uint32_t)));
ci->configure(DSP_SWITCH_FREQUENCY, ape_ctx.samplerate);
ci->configure(DSP_SET_STEREO_MODE, ape_ctx.channels == 1 ?
STEREO_MONO : STEREO_NONINTERLEAVED);
codec_set_replaygain(ci->id3);
/* The main decoding loop */
if (resume_offset) {
/* The resume offset is a value in bytes - we need to
turn it into a frame number and samplestoskip value */
ape_resume(&ape_ctx, resume_offset,
&currentframe, &samplesdone, &samplestoskip, &firstbyte);
} else {
currentframe = 0;
samplesdone = 0;
samplestoskip = 0;
firstbyte = 3; /* Take account of the little-endian 32-bit byte ordering */
}
elapsedtime = (samplesdone*10)/(ape_ctx.samplerate/100);
ci->set_elapsed(elapsedtime);
/* Initialise the buffer */
inbuffer = ci->request_buffer(&bytesleft, INPUT_CHUNKSIZE);
/* The main decoding loop - we decode the frames a small chunk at a time */
while (currentframe < ape_ctx.totalframes)
{
frame_start:
/* Calculate how many blocks there are in this frame */
if (currentframe == (ape_ctx.totalframes - 1))
nblocks = ape_ctx.finalframeblocks;
else
nblocks = ape_ctx.blocksperframe;
ape_ctx.currentframeblocks = nblocks;
/* Initialise the frame decoder */
init_frame_decoder(&ape_ctx, inbuffer, &firstbyte, &bytesconsumed);
ci->advance_buffer(bytesconsumed);
inbuffer = ci->request_buffer(&bytesleft, INPUT_CHUNKSIZE);
/* Decode the frame a chunk at a time */
while (nblocks > 0)
{
enum codec_command_action action = ci->get_command(&param);
if (action == CODEC_ACTION_HALT)
goto done;
/* Deal with any pending seek requests */
if (action == CODEC_ACTION_SEEK_TIME)
{
if (ape_calc_seekpos(&ape_ctx,
(param/10) * (ci->id3->frequency/100),
&currentframe,
&newfilepos,
&samplestoskip))
{
samplesdone = currentframe * ape_ctx.blocksperframe;
/* APE's bytestream is weird... */
firstbyte = 3 - (newfilepos & 3);
newfilepos &= ~3;
ci->seek_buffer(newfilepos);
inbuffer = ci->request_buffer(&bytesleft, INPUT_CHUNKSIZE);
elapsedtime = (samplesdone*10)/(ape_ctx.samplerate/100);
ci->set_elapsed(elapsedtime);
ci->seek_complete();
goto frame_start; /* Sorry... */
}
ci->seek_complete();
}
blockstodecode = MIN(BLOCKS_PER_LOOP, nblocks);
if ((res = decode_chunk(&ape_ctx, inbuffer, &firstbyte,
&bytesconsumed,
decoded0, decoded1,
blockstodecode)) < 0)
{
/* Frame decoding error, abort */
LOGF("APE: Frame %lu, error %d\n",(unsigned long)currentframe,res);
return CODEC_ERROR;
}
ci->yield();
if (samplestoskip > 0) {
if (samplestoskip < blockstodecode) {
ci->pcmbuf_insert(decoded0 + samplestoskip,
decoded1 + samplestoskip,
blockstodecode - samplestoskip);
samplestoskip = 0;
} else {
samplestoskip -= blockstodecode;
}
} else {
ci->pcmbuf_insert(decoded0, decoded1, blockstodecode);
}
samplesdone += blockstodecode;
if (!samplestoskip) {
/* Update the elapsed-time indicator */
elapsedtime = (samplesdone*10)/(ape_ctx.samplerate/100);
ci->set_elapsed(elapsedtime);
}
ci->advance_buffer(bytesconsumed);
inbuffer = ci->request_buffer(&bytesleft, INPUT_CHUNKSIZE);
/* Decrement the block count */
nblocks -= blockstodecode;
}
currentframe++;
}
done:
LOGF("APE: Decoded %lu samples\n",(unsigned long)samplesdone);
return CODEC_OK;
}

140
lib/rbcodec/codecs/asap.c Normal file
View file

@ -0,0 +1,140 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2008 Dominik Wenger
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "codeclib.h"
#include "libasap/asap.h"
CODEC_HEADER
#define CHUNK_SIZE (1024*2)
static byte samples[CHUNK_SIZE] IBSS_ATTR; /* The sample buffer */
static ASAP_State asap IBSS_ATTR; /* asap codec state */
/* this is the codec entry point */
enum codec_status codec_main(enum codec_entry_call_reason reason)
{
/* Nothing to do */
return CODEC_OK;
(void)reason;
}
/* this is called for each file to process */
enum codec_status codec_run(void)
{
int n_bytes;
int song;
int duration;
char* module;
int bytesPerSample =2;
intptr_t param;
if (codec_init()) {
DEBUGF("codec init failed\n");
return CODEC_ERROR;
}
codec_set_replaygain(ci->id3);
int bytes_done =0;
size_t filesize;
ci->seek_buffer(0);
module = ci->request_buffer(&filesize, ci->filesize);
if (!module || (size_t)filesize < (size_t)ci->filesize)
{
DEBUGF("loading error\n");
return CODEC_ERROR;
}
/*Init ASAP */
if (!ASAP_Load(&asap, ci->id3->path, module, filesize))
{
DEBUGF("%s: format not supported",ci->id3->path);
return CODEC_ERROR;
}
/* Make use of 44.1khz */
ci->configure(DSP_SET_FREQUENCY, 44100);
/* Sample depth is 16 bit little endian */
ci->configure(DSP_SET_SAMPLE_DEPTH, 16);
/* Stereo or Mono output ? */
if(asap.module_info->channels ==1)
{
ci->configure(DSP_SET_STEREO_MODE, STEREO_MONO);
bytesPerSample = 2;
}
else
{
ci->configure(DSP_SET_STEREO_MODE, STEREO_INTERLEAVED);
bytesPerSample = 4;
}
/* reset eleapsed */
ci->set_elapsed(0);
song = asap.module_info->default_song;
duration = asap.module_info->durations[song];
if (duration < 0)
duration = 180 * 1000;
/* set id3 length, because metadata parse might not have done it */
ci->id3->length = duration;
ASAP_PlaySong(&asap, song, duration);
ASAP_MutePokeyChannels(&asap, 0);
/* The main decoder loop */
while (1) {
enum codec_command_action action = ci->get_command(&param);
if (action == CODEC_ACTION_HALT)
break;
if (action == CODEC_ACTION_SEEK_TIME) {
/* New time is ready in param */
/* seek to pos */
ASAP_Seek(&asap,param);
/* update bytes_done */
bytes_done = param*44.1*2;
/* update elapsed */
ci->set_elapsed((bytes_done / 2) / 44.1);
/* seek ready */
ci->seek_complete();
}
/* Generate a buffer full of Audio */
#ifdef ROCKBOX_LITTLE_ENDIAN
n_bytes = ASAP_Generate(&asap, samples, sizeof(samples), ASAP_FORMAT_S16_LE);
#else
n_bytes = ASAP_Generate(&asap, samples, sizeof(samples), ASAP_FORMAT_S16_BE);
#endif
ci->pcmbuf_insert(samples, NULL, n_bytes /bytesPerSample);
bytes_done += n_bytes;
ci->set_elapsed((bytes_done / 2) / 44.1);
if(n_bytes != sizeof(samples))
break;
}
return CODEC_OK;
}

View file

@ -0,0 +1,153 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2009 Mohamed Tarek
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include <string.h>
#include "logf.h"
#include "codeclib.h"
#include "inttypes.h"
#include "libatrac/atrac3.h"
CODEC_HEADER
#define FRAMESIZE ci->id3->bytesperframe
#define BITRATE ci->id3->bitrate
static ATRAC3Context q IBSS_ATTR;
/* this is the codec entry point */
enum codec_status codec_main(enum codec_entry_call_reason reason)
{
/* Nothing to do */
return CODEC_OK;
(void)reason;
}
/* this is called for each file to process */
enum codec_status codec_run(void)
{
static size_t buff_size;
int datasize, res, frame_counter, total_frames, seek_frame_offset;
uint8_t *bit_buffer;
int elapsed = 0;
size_t resume_offset;
intptr_t param;
enum codec_command_action action = CODEC_ACTION_NULL;
if (codec_init()) {
DEBUGF("codec init failed\n");
return CODEC_ERROR;
}
resume_offset = ci->id3->offset;
codec_set_replaygain(ci->id3);
ci->memset(&q,0,sizeof(ATRAC3Context));
ci->configure(DSP_SET_FREQUENCY, ci->id3->frequency);
ci->configure(DSP_SET_SAMPLE_DEPTH, 17); /* Remark: atrac3 uses s15.0 by default, s15.2 was hacked. */
ci->configure(DSP_SET_STEREO_MODE, ci->id3->channels == 1 ?
STEREO_MONO : STEREO_NONINTERLEAVED);
ci->seek_buffer(0);
res = atrac3_decode_init(&q, ci->id3);
if(res < 0) {
DEBUGF("failed to initialize OMA atrac decoder\n");
return CODEC_ERROR;
}
total_frames = (ci->id3->filesize - ci->id3->first_frame_offset) / FRAMESIZE;
frame_counter = 0;
/* check for a mid-track resume and force a seek time accordingly */
if(resume_offset > ci->id3->first_frame_offset) {
resume_offset -= ci->id3->first_frame_offset;
/* calculate resume_offset in frames */
resume_offset = (int)resume_offset / FRAMESIZE;
param = (int)resume_offset * ((FRAMESIZE * 8)/BITRATE);
action = CODEC_ACTION_SEEK_TIME;
}
else {
ci->set_elapsed(0);
ci->seek_buffer(ci->id3->first_frame_offset);
}
/* The main decoder loop */
while(frame_counter < total_frames)
{
if (action == CODEC_ACTION_NULL)
action = ci->get_command(&param);
if (action == CODEC_ACTION_HALT)
break;
bit_buffer = (uint8_t *) ci->request_buffer(&buff_size, FRAMESIZE);
if (action == CODEC_ACTION_SEEK_TIME) {
/* Do not allow seeking beyond the file's length */
if ((unsigned) param > ci->id3->length) {
ci->set_elapsed(ci->id3->length);
ci->seek_complete();
break;
}
/* Seek to the start of the track */
if (param == 0) {
elapsed = 0;
ci->set_elapsed(0);
ci->seek_buffer(ci->id3->first_frame_offset);
ci->seek_complete();
action = CODEC_ACTION_NULL;
continue;
}
seek_frame_offset = (param * BITRATE) / (8 * FRAMESIZE);
frame_counter = seek_frame_offset;
ci->seek_buffer(ci->id3->first_frame_offset + seek_frame_offset* FRAMESIZE);
bit_buffer = (uint8_t *) ci->request_buffer(&buff_size, FRAMESIZE);
elapsed = param;
ci->set_elapsed(elapsed);
ci->seek_complete();
}
action = CODEC_ACTION_NULL;
res = atrac3_decode_frame(FRAMESIZE, &q, &datasize, bit_buffer, FRAMESIZE);
if(res != (int)FRAMESIZE) {
DEBUGF("codec error\n");
return CODEC_ERROR;
}
if(datasize)
ci->pcmbuf_insert(q.outSamples, q.outSamples + 1024,
q.samples_per_frame / ci->id3->channels);
elapsed += (FRAMESIZE * 8) / BITRATE;
ci->set_elapsed(elapsed);
ci->advance_buffer(FRAMESIZE);
frame_counter++;
}
return CODEC_OK;
}

View file

@ -0,0 +1,215 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2009 Mohamed Tarek
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include <string.h>
#include "logf.h"
#include "codeclib.h"
#include "inttypes.h"
#include "libatrac/atrac3.h"
CODEC_HEADER
static RMContext rmctx IBSS_ATTR_LARGE_IRAM;
static RMPacket pkt IBSS_ATTR_LARGE_IRAM;
static ATRAC3Context q IBSS_ATTR;
static void init_rm(RMContext *rmctx)
{
/* initialize the RMContext */
memcpy(rmctx, (void*)(( (intptr_t)ci->id3->id3v2buf + 3 ) &~ 3), sizeof(RMContext));
/* and atrac3 expects extadata in id3v2buf, so we shall give it that */
memcpy(ci->id3->id3v2buf, (char*)rmctx->codec_extradata, rmctx->extradata_size*sizeof(char));
}
/* this is the codec entry point */
enum codec_status codec_main(enum codec_entry_call_reason reason)
{
/* Nothing to do */
return CODEC_OK;
(void)reason;
}
/* this is called for each file to process */
enum codec_status codec_run(void)
{
static size_t buff_size;
int datasize, res, consumed, i, time_offset;
uint8_t *bit_buffer;
uint16_t fs,sps,h;
uint32_t packet_count;
int scrambling_unit_size, num_units, elapsed = 0;
int playback_on = -1;
size_t resume_offset;
intptr_t param;
enum codec_command_action action = CODEC_ACTION_NULL;
if (codec_init()) {
DEBUGF("codec init failed\n");
return CODEC_ERROR;
}
resume_offset = ci->id3->offset;
codec_set_replaygain(ci->id3);
ci->memset(&rmctx,0,sizeof(RMContext));
ci->memset(&pkt,0,sizeof(RMPacket));
ci->memset(&q,0,sizeof(ATRAC3Context));
ci->seek_buffer(0);
init_rm(&rmctx);
ci->configure(DSP_SET_FREQUENCY, ci->id3->frequency);
ci->configure(DSP_SET_SAMPLE_DEPTH, 17); /* Remark: atrac3 uses s15.0 by default, s15.2 was hacked. */
ci->configure(DSP_SET_STEREO_MODE, rmctx.nb_channels == 1 ?
STEREO_MONO : STEREO_NONINTERLEAVED);
packet_count = rmctx.nb_packets;
rmctx.audio_framesize = rmctx.block_align;
rmctx.block_align = rmctx.sub_packet_size;
fs = rmctx.audio_framesize;
sps= rmctx.block_align;
h = rmctx.sub_packet_h;
scrambling_unit_size = h * (fs + PACKET_HEADER_SIZE);
res = atrac3_decode_init(&q, ci->id3);
if(res < 0) {
DEBUGF("failed to initialize RM atrac decoder\n");
return CODEC_ERROR;
}
/* check for a mid-track resume and force a seek time accordingly */
if(resume_offset > rmctx.data_offset + DATA_HEADER_SIZE) {
resume_offset -= rmctx.data_offset + DATA_HEADER_SIZE;
num_units = (int)resume_offset / scrambling_unit_size;
/* put number of subpackets to skip in resume_offset */
resume_offset /= (sps + PACKET_HEADER_SIZE);
param = (int)resume_offset * ((sps * 8 * 1000)/rmctx.bit_rate);
action = CODEC_ACTION_SEEK_TIME;
}
else {
ci->set_elapsed(0);
}
ci->advance_buffer(rmctx.data_offset + DATA_HEADER_SIZE);
/* The main decoder loop */
seek_start :
while((unsigned)elapsed < rmctx.duration)
{
bit_buffer = (uint8_t *) ci->request_buffer(&buff_size, scrambling_unit_size);
consumed = rm_get_packet(&bit_buffer, &rmctx, &pkt);
if(consumed < 0 && playback_on != 0) {
if(playback_on == -1) {
/* Error only if packet-parsing failed and playback hadn't started */
DEBUGF("rm_get_packet failed\n");
return CODEC_ERROR;
}
else
return CODEC_OK;
}
for(i = 0; i < rmctx.audio_pkt_cnt*(fs/sps) ; i++)
{
if (action == CODEC_ACTION_NULL)
action = ci->get_command(&param);
if (action == CODEC_ACTION_HALT)
return CODEC_OK;
if (action == CODEC_ACTION_SEEK_TIME) {
/* Do not allow seeking beyond the file's length */
if ((unsigned) param > ci->id3->length) {
ci->set_elapsed(ci->id3->length);
ci->seek_complete();
return CODEC_OK;
}
ci->seek_buffer(rmctx.data_offset + DATA_HEADER_SIZE);
packet_count = rmctx.nb_packets;
rmctx.audio_pkt_cnt = 0;
rmctx.frame_number = 0;
/* Seek to the start of the track */
if (param == 0) {
ci->set_elapsed(0);
ci->seek_complete();
action = CODEC_ACTION_NULL;
goto seek_start;
}
num_units = (param/(sps*1000*8/rmctx.bit_rate))/(h*(fs/sps));
ci->seek_buffer(rmctx.data_offset + DATA_HEADER_SIZE + consumed * num_units);
bit_buffer = (uint8_t *) ci->request_buffer(&buff_size, scrambling_unit_size);
consumed = rm_get_packet(&bit_buffer, &rmctx, &pkt);
if(consumed < 0 && playback_on != 0) {
if(playback_on == -1) {
/* Error only if packet-parsing failed and playback hadn't started */
DEBUGF("rm_get_packet failed\n");
return CODEC_ERROR;
}
else
return CODEC_OK;
}
packet_count = rmctx.nb_packets - rmctx.audio_pkt_cnt * num_units;
rmctx.frame_number = (param/(sps*1000*8/rmctx.bit_rate));
while(rmctx.audiotimestamp > (unsigned) param) {
rmctx.audio_pkt_cnt = 0;
ci->seek_buffer(rmctx.data_offset + DATA_HEADER_SIZE + consumed * (num_units-1));
bit_buffer = (uint8_t *) ci->request_buffer(&buff_size, scrambling_unit_size);
consumed = rm_get_packet(&bit_buffer, &rmctx, &pkt);
packet_count += rmctx.audio_pkt_cnt;
num_units--;
}
time_offset = param - rmctx.audiotimestamp;
i = (time_offset/((sps * 8 * 1000)/rmctx.bit_rate));
elapsed = rmctx.audiotimestamp+(1000*8*sps/rmctx.bit_rate)*i;
ci->set_elapsed(elapsed);
ci->seek_complete();
}
action = CODEC_ACTION_NULL;
if(pkt.length)
res = atrac3_decode_frame(rmctx.block_align, &q, &datasize, pkt.frames[i], rmctx.block_align);
else /* indicates that there are no remaining frames */
return CODEC_OK;
if(res != rmctx.block_align) {
DEBUGF("codec error\n");
return CODEC_ERROR;
}
if(datasize)
ci->pcmbuf_insert(q.outSamples, q.outSamples + 1024, q.samples_per_frame / rmctx.nb_channels);
playback_on = 1;
elapsed = rmctx.audiotimestamp+(1000*8*sps/rmctx.bit_rate)*i;
ci->set_elapsed(elapsed);
rmctx.frame_number++;
}
packet_count -= rmctx.audio_pkt_cnt;
rmctx.audio_pkt_cnt = 0;
ci->advance_buffer(consumed);
}
return CODEC_OK;
}

314
lib/rbcodec/codecs/au.c Normal file
View file

@ -0,0 +1,314 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2010 Yoshihisa Uchida
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "codeclib.h"
#include "codecs/libpcm/support_formats.h"
CODEC_HEADER
/* Sun Audio file (Au file format) codec
*
* References
* [1] Sun Microsystems, Inc., Header file for Audio, .au, 1992
* URL http://www.opengroup.org/public/pubs/external/auformat.html
* [2] Wikipedia, Au file format, URL: http://en.wikipedia.org/wiki/Sun_Audio
*/
#define PCM_SAMPLE_SIZE (1024*2)
static int32_t samples[PCM_SAMPLE_SIZE] IBSS_ATTR;
enum
{
AU_FORMAT_UNSUPPORT = 0, /* unsupported format */
AU_FORMAT_MULAW, /* G.711 MULAW */
AU_FORMAT_PCM, /* Linear PCM */
AU_FORMAT_IEEE_FLOAT, /* IEEE float */
AU_FORMAT_ALAW, /* G.711 ALAW */
};
static const char support_formats[9][2] = {
{ AU_FORMAT_UNSUPPORT, 0 }, /* encoding */
{ AU_FORMAT_MULAW, 8 }, /* 1: G.711 MULAW */
{ AU_FORMAT_PCM, 8 }, /* 2: Linear PCM 8bit (signed) */
{ AU_FORMAT_PCM, 16 }, /* 3: Linear PCM 16bit (signed, big endian) */
{ AU_FORMAT_PCM, 24 }, /* 4: Linear PCM 24bit (signed, big endian) */
{ AU_FORMAT_PCM, 32 }, /* 5: Linear PCM 32bit (signed, big endian) */
{ AU_FORMAT_IEEE_FLOAT, 32 }, /* 6: Linear PCM float 32bit (signed, big endian) */
{ AU_FORMAT_IEEE_FLOAT, 64 }, /* 7: Linear PCM float 64bit (signed, big endian) */
/* encoding 8 - 26 unsupported. */
{ AU_FORMAT_ALAW, 8 }, /* 27: G.711 ALAW */
};
static const struct pcm_entry au_codecs[] = {
{ AU_FORMAT_MULAW, get_itut_g711_mulaw_codec },
{ AU_FORMAT_PCM, get_linear_pcm_codec },
{ AU_FORMAT_IEEE_FLOAT, get_ieee_float_codec },
{ AU_FORMAT_ALAW, get_itut_g711_alaw_codec },
};
#define NUM_FORMATS 4
static const struct pcm_codec *get_au_codec(uint32_t formattag)
{
int i;
for (i = 0; i < NUM_FORMATS; i++)
{
if (au_codecs[i].format_tag == formattag)
{
if (au_codecs[i].get_codec)
return au_codecs[i].get_codec();
return 0;
}
}
return 0;
}
static unsigned int get_be32(uint8_t *buf)
{
return (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
}
static int convert_au_format(unsigned int encoding, struct pcm_format *fmt)
{
fmt->formattag = AU_FORMAT_UNSUPPORT;
if (encoding < 8)
{
fmt->formattag = support_formats[encoding][0];
fmt->bitspersample = support_formats[encoding][1];
}
else if (encoding == 27)
{
fmt->formattag = support_formats[8][0];
fmt->bitspersample = support_formats[8][1];
}
return fmt->formattag;
}
/* this is the codec entry point */
enum codec_status codec_main(enum codec_entry_call_reason reason)
{
if (reason == CODEC_LOAD) {
/* Generic codec initialisation */
ci->configure(DSP_SET_SAMPLE_DEPTH, PCM_OUTPUT_DEPTH-1);
}
return CODEC_OK;
}
/* this is called for each file to process */
enum codec_status codec_run(void)
{
struct pcm_format format;
uint32_t bytesdone, decodedsamples;
size_t n;
int bufcount;
int endofstream;
unsigned char *buf;
uint8_t *aubuf;
off_t firstblockposn; /* position of the first block in file */
const struct pcm_codec *codec;
int offset = 0;
intptr_t param;
if (codec_init()) {
DEBUGF("codec_init() error\n");
return CODEC_ERROR;
}
codec_set_replaygain(ci->id3);
/* Need to save offset for later use (cleared indirectly by advance_buffer) */
bytesdone = ci->id3->offset;
ci->memset(&format, 0, sizeof(struct pcm_format));
format.is_signed = true;
format.is_little_endian = false;
/* set format */
ci->seek_buffer(0);
buf = ci->request_buffer(&n, 24);
if (n < 24 || (memcmp(buf, ".snd", 4) != 0))
{
/*
* headerless sun audio file
* It is decoded under conditions.
* format: G.711 mu-law
* channel: mono
* frequency: 8000 kHz
*/
offset = 0;
format.formattag = AU_FORMAT_MULAW;
format.channels = 1;
format.bitspersample = 8;
format.numbytes = ci->id3->filesize;
}
else
{
/* parse header */
/* data offset */
offset = get_be32(buf + 4);
if (offset < 24)
{
DEBUGF("CODEC_ERROR: sun audio offset size is small: %d\n", offset);
return CODEC_ERROR;
}
/* data size */
format.numbytes = get_be32(buf + 8);
if (format.numbytes == (uint32_t)0xffffffff)
format.numbytes = ci->id3->filesize - offset;
/* encoding */
format.formattag = convert_au_format(get_be32(buf + 12), &format);
if (format.formattag == AU_FORMAT_UNSUPPORT)
{
DEBUGF("CODEC_ERROR: sun audio unsupport format: %d\n", get_be32(buf + 12));
return CODEC_ERROR;
}
/* skip sample rate */
format.channels = get_be32(buf + 20);
}
/* advance to first WAVE chunk */
ci->advance_buffer(offset);
firstblockposn = offset;
decodedsamples = 0;
codec = 0;
/* get codec */
codec = get_au_codec(format.formattag);
if (!codec)
{
DEBUGF("CODEC_ERROR: unsupport sun audio format: %x\n", (int)format.formattag);
return CODEC_ERROR;
}
if (!codec->set_format(&format))
{
return CODEC_ERROR;
}
if (format.numbytes == 0) {
DEBUGF("CODEC_ERROR: data size is 0\n");
return CODEC_ERROR;
}
/* check chunksize */
if ((format.chunksize / format.blockalign) * format.samplesperblock * format.channels
> PCM_SAMPLE_SIZE)
format.chunksize = (PCM_SAMPLE_SIZE / format.blockalign) * format.blockalign;
if (format.chunksize == 0)
{
DEBUGF("CODEC_ERROR: chunksize is 0\n");
return CODEC_ERROR;
}
ci->configure(DSP_SWITCH_FREQUENCY, ci->id3->frequency);
if (format.channels == 2) {
ci->configure(DSP_SET_STEREO_MODE, STEREO_INTERLEAVED);
} else if (format.channels == 1) {
ci->configure(DSP_SET_STEREO_MODE, STEREO_MONO);
} else {
DEBUGF("CODEC_ERROR: more than 2 channels\n");
return CODEC_ERROR;
}
/* make sure we're at the correct offset */
if (bytesdone > (uint32_t) firstblockposn) {
/* Round down to previous block */
struct pcm_pos *newpos = codec->get_seek_pos(bytesdone - firstblockposn,
PCM_SEEK_POS, NULL);
if (newpos->pos > format.numbytes)
goto done;
if (ci->seek_buffer(firstblockposn + newpos->pos))
{
bytesdone = newpos->pos;
decodedsamples = newpos->samples;
}
} else {
/* already where we need to be */
bytesdone = 0;
}
ci->set_elapsed(decodedsamples*1000LL/ci->id3->frequency);
/* The main decoder loop */
endofstream = 0;
while (!endofstream) {
enum codec_command_action action = ci->get_command(&param);
if (action == CODEC_ACTION_HALT)
break;
if (action == CODEC_ACTION_SEEK_TIME) {
/* 3rd args(read_buffer) is unnecessary in the format which Sun Audio supports. */
struct pcm_pos *newpos = codec->get_seek_pos(param, PCM_SEEK_TIME, NULL);
if (newpos->pos > format.numbytes)
{
ci->set_elapsed(ci->id3->length);
ci->seek_complete();
break;
}
if (ci->seek_buffer(firstblockposn + newpos->pos))
{
bytesdone = newpos->pos;
decodedsamples = newpos->samples;
}
ci->set_elapsed(decodedsamples*1000LL/ci->id3->frequency);
ci->seek_complete();
}
aubuf = (uint8_t *)ci->request_buffer(&n, format.chunksize);
if (n == 0)
break; /* End of stream */
if (bytesdone + n > format.numbytes) {
n = format.numbytes - bytesdone;
endofstream = 1;
}
if (codec->decode(aubuf, n, samples, &bufcount) == CODEC_ERROR)
{
DEBUGF("codec error\n");
return CODEC_ERROR;
}
ci->pcmbuf_insert(samples, NULL, bufcount);
ci->advance_buffer(n);
bytesdone += n;
decodedsamples += bufcount;
if (bytesdone >= format.numbytes)
endofstream = 1;
ci->set_elapsed(decodedsamples*1000LL/ci->id3->frequency);
}
done:
return CODEC_OK;
}

137
lib/rbcodec/codecs/ay.c Normal file
View file

@ -0,0 +1,137 @@
/* Ripped off from Game_Music_Emu 0.5.2. http://www.slack.net/~ant/ */
#include <codecs/lib/codeclib.h>
#include "libgme/ay_emu.h"
CODEC_HEADER
/* Maximum number of bytes to process in one iteration */
#define CHUNK_SIZE (1024*2)
static int16_t samples[CHUNK_SIZE] IBSS_ATTR;
static struct Ay_Emu ay_emu;
/****************** rockbox interface ******************/
static void set_codec_track(int t, int multitrack) {
Ay_start_track(&ay_emu, t);
/* for loop mode we disable track limits */
if (!ci->loop_track()) {
Track_set_fade(&ay_emu, Track_get_length( &ay_emu, t ) - 4000, 4000);
}
if (multitrack) ci->set_elapsed(t*1000); /* t is track no to display */
else ci->set_elapsed(0);
}
/* this is the codec entry point */
enum codec_status codec_main(enum codec_entry_call_reason reason)
{
if (reason == CODEC_LOAD) {
/* we only render 16 bits */
ci->configure(DSP_SET_SAMPLE_DEPTH, 16);
/* 44 Khz, Interleaved stereo */
ci->configure(DSP_SET_FREQUENCY, 44100);
ci->configure(DSP_SET_STEREO_MODE, STEREO_INTERLEAVED);
Ay_init(&ay_emu);
Ay_set_sample_rate(&ay_emu, 44100);
}
return CODEC_OK;
}
/* this is called for each file to process */
enum codec_status codec_run(void)
{
blargg_err_t err;
uint8_t *buf;
size_t n;
int track, is_multitrack;
intptr_t param;
uint32_t elapsed_time;
/* reset values */
track = is_multitrack = 0;
elapsed_time = 0;
DEBUGF("AY: next_track\n");
if (codec_init()) {
return CODEC_ERROR;
}
codec_set_replaygain(ci->id3);
/* Read the entire file */
DEBUGF("AY: request file\n");
ci->seek_buffer(0);
buf = ci->request_buffer(&n, ci->filesize);
if (!buf || n < (size_t)ci->filesize) {
DEBUGF("AY: file load failed\n");
return CODEC_ERROR;
}
if ((err = Ay_load_mem(&ay_emu, buf, ci->filesize))) {
DEBUGF("AY: Ay_load_mem failed (%s)\n", err);
return CODEC_ERROR;
}
/* Update internal track count */
if (ay_emu.m3u.size > 0)
ay_emu.track_count = ay_emu.m3u.size;
/* Check if file has multiple tracks */
if (ay_emu.track_count > 1) {
is_multitrack = 1;
}
next_track:
set_codec_track(track, is_multitrack);
/* The main decoder loop */
while (1) {
enum codec_command_action action = ci->get_command(&param);
if (action == CODEC_ACTION_HALT)
break;
if (action == CODEC_ACTION_SEEK_TIME) {
if (is_multitrack) {
track = param/1000;
ci->seek_complete();
if (track >= ay_emu.track_count) break;
goto next_track;
}
ci->set_elapsed(param);
elapsed_time = param;
Track_seek(&ay_emu, param);
ci->seek_complete();
/* Set fade again */
if (!ci->loop_track()) {
Track_set_fade(&ay_emu, Track_get_length( &ay_emu, track ) - 4000, 4000);
}
}
/* Generate audio buffer */
err = Ay_play(&ay_emu, CHUNK_SIZE, samples);
if (err || Track_ended(&ay_emu)) {
track++;
if (track >= ay_emu.track_count) break;
goto next_track;
}
ci->pcmbuf_insert(samples, NULL, CHUNK_SIZE >> 1);
/* Set elapsed time for one track files */
if (!is_multitrack) {
elapsed_time += (CHUNK_SIZE / 2) * 10 / 441;
ci->set_elapsed(elapsed_time);
}
}
return CODEC_OK;
}

View file

@ -0,0 +1,74 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2006 Tomasz Malesinski
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "config.h"
#include "codecs.h"
struct codec_api *ci DATA_ATTR;
extern unsigned char plugin_bss_start[];
extern unsigned char plugin_end_addr[];
/* stub, the entry point is called via its reference in __header to
* avoid warning with certain compilers */
int _start(void) {return 0;}
enum codec_status codec_start(enum codec_entry_call_reason reason)
{
#if (CONFIG_PLATFORM & PLATFORM_NATIVE)
if (reason == CODEC_LOAD)
{
#ifdef USE_IRAM
extern char iramcopy[], iramstart[], iramend[], iedata[], iend[];
size_t iram_size = iramend - iramstart;
size_t ibss_size = iend - iedata;
if (iram_size > 0 || ibss_size > 0)
{
ci->memcpy(iramstart, iramcopy, iram_size);
ci->memset(iedata, 0, ibss_size);
/* make the icache (if it exists) up to date with the new code */
ci->commit_discard_idcache();
/* barrier to prevent reordering iram copy and BSS clearing,
* because the BSS segment alias the IRAM copy.
*/
asm volatile ("" ::: "memory");
}
#endif /* PLUGIN_USE_IRAM */
ci->memset(plugin_bss_start, 0, plugin_end_addr - plugin_bss_start);
/* Some parts of bss may be used via a no-cache alias (at least
* portalplayer has this). If we don't clear the cache, those aliases
* may read garbage */
ci->commit_dcache();
}
#endif /* CONFIG_PLATFORM */
/* Note: If for any reason codec_main would not be called with CODEC_LOAD
* because the above code failed then it must not be ever be called with
* any other value and some strategy to avoid doing so must be conceived */
return codec_main(reason);
}
#if defined(CPU_ARM) && (CONFIG_PLATFORM & PLATFORM_NATIVE)
void __attribute__((naked)) __div0(void)
{
asm volatile("bx %0" : : "r"(ci->__div0));
}
#endif

291
lib/rbcodec/codecs/codecs.h Normal file
View file

@ -0,0 +1,291 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2002 Björn Stenberg
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#ifndef _CODECS_H_
#define _CODECS_H_
/* instruct simulator code to not redefine any symbols when compiling codecs.
(the CODEC macro is defined in codecs.make) */
#ifdef CODEC
#define NO_REDEFINES_PLEASE
#endif
#include <stdbool.h>
#include <stdlib.h>
#include "strlcpy.h"
#include "config.h"
#include "system.h"
#include "metadata.h"
#include "audio.h"
#ifdef RB_PROFILE
#include "profile.h"
#include "thread.h"
#endif
#if (CONFIG_CODEC == SWCODEC)
#ifdef HAVE_RECORDING
#include "pcm_record.h"
#endif
#include "dsp.h"
#include "dsp-util.h"
#endif
#include "gcc_extensions.h"
#include "load_code.h"
#ifdef CODEC
#if defined(DEBUG) || defined(SIMULATOR)
#undef DEBUGF
#define DEBUGF ci->debugf
#undef LDEBUGF
#define LDEBUGF ci->debugf
#else
#define DEBUGF(...)
#define LDEBUGF(...)
#endif
#ifdef ROCKBOX_HAS_LOGF
#undef LOGF
#define LOGF ci->logf
#else
#define LOGF(...)
#endif
#endif
/* magic for normal codecs */
#define CODEC_MAGIC 0x52434F44 /* RCOD */
/* magic for encoder codecs */
#define CODEC_ENC_MAGIC 0x52454E43 /* RENC */
/* increase this every time the api struct changes */
#define CODEC_API_VERSION 44
/* update this to latest version if a change to the api struct breaks
backwards compatibility (and please take the opportunity to sort in any
new function which are "waiting" at the end of the function table) */
#define CODEC_MIN_API_VERSION 43
/* reasons for calling codec main entrypoint */
enum codec_entry_call_reason {
CODEC_LOAD = 0,
CODEC_UNLOAD
};
/* codec return codes */
enum codec_status {
CODEC_OK = 0,
CODEC_ERROR = -1,
};
/* codec command action codes */
enum codec_command_action {
CODEC_ACTION_HALT = -1,
CODEC_ACTION_NULL = 0,
CODEC_ACTION_SEEK_TIME = 1,
};
/* NOTE: To support backwards compatibility, only add new functions at
the end of the structure. Every time you add a new function,
remember to increase CODEC_API_VERSION. If you make changes to the
existing APIs then also update CODEC_MIN_API_VERSION to current
version
*/
struct codec_api {
off_t filesize; /* Total file length */
off_t curpos; /* Current buffer position */
struct mp3entry *id3; /* TAG metadata pointer */
int audio_hid; /* Current audio handle */
/* The dsp instance to be used for audio output */
struct dsp_config *dsp;
/* Returns buffer to malloc array. Only codeclib should need this. */
void* (*codec_get_buffer)(size_t *size);
/* Insert PCM data into audio buffer for playback. Playback will start
automatically. */
void (*pcmbuf_insert)(const void *ch1, const void *ch2, int count);
/* Set song position in WPS (value in ms). */
void (*set_elapsed)(unsigned long value);
/* Read next <size> amount bytes from file buffer to <ptr>.
Will return number of bytes read or 0 if end of file. */
size_t (*read_filebuf)(void *ptr, size_t size);
/* Request pointer to file buffer which can be used to read
<realsize> amount of data. <reqsize> tells the buffer system
how much data it should try to allocate. If <realsize> is 0,
end of file is reached. */
void* (*request_buffer)(size_t *realsize, size_t reqsize);
/* Advance file buffer position by <amount> amount of bytes. */
void (*advance_buffer)(size_t amount);
/* Seek file buffer to position <newpos> beginning of file. */
bool (*seek_buffer)(size_t newpos);
/* Codec should call this function when it has done the seeking. */
void (*seek_complete)(void);
/* Update the current position */
void (*set_offset)(size_t value);
/* Configure different codec buffer parameters. */
void (*configure)(int setting, intptr_t value);
/* Obtain command action on what to do next */
enum codec_command_action (*get_command)(intptr_t *param);
/* Determine whether the track should be looped, if applicable. */
bool (*loop_track)(void);
/* kernel/ system */
#if defined(CPU_ARM) && CONFIG_PLATFORM & PLATFORM_NATIVE
void (*__div0)(void);
#endif
unsigned (*sleep)(unsigned ticks);
void (*yield)(void);
#if NUM_CORES > 1
unsigned int
(*create_thread)(void (*function)(void), void* stack,
size_t stack_size, unsigned flags, const char *name
IF_PRIO(, int priority)
IF_COP(, unsigned int core));
void (*thread_thaw)(unsigned int thread_id);
void (*thread_wait)(unsigned int thread_id);
void (*semaphore_init)(struct semaphore *s, int max, int start);
int (*semaphore_wait)(struct semaphore *s, int timeout);
void (*semaphore_release)(struct semaphore *s);
#endif /* NUM_CORES */
void (*commit_dcache)(void);
void (*commit_discard_dcache)(void);
/* strings and memory */
char* (*strcpy)(char *dst, const char *src);
size_t (*strlen)(const char *str);
int (*strcmp)(const char *, const char *);
char *(*strcat)(char *s1, const char *s2);
void* (*memset)(void *dst, int c, size_t length);
void* (*memcpy)(void *out, const void *in, size_t n);
void* (*memmove)(void *out, const void *in, size_t n);
int (*memcmp)(const void *s1, const void *s2, size_t n);
void *(*memchr)(const void *s1, int c, size_t n);
#if defined(DEBUG) || defined(SIMULATOR)
void (*debugf)(const char *fmt, ...) ATTRIBUTE_PRINTF(1, 2);
#endif
#ifdef ROCKBOX_HAS_LOGF
void (*logf)(const char *fmt, ...) ATTRIBUTE_PRINTF(1, 2);
#endif
/* Tremor requires qsort */
void (*qsort)(void *base, size_t nmemb, size_t size,
int(*compar)(const void *, const void *));
#ifdef RB_PROFILE
void (*profile_thread)(void);
void (*profstop)(void);
void (*profile_func_enter)(void *this_fn, void *call_site);
void (*profile_func_exit)(void *this_fn, void *call_site);
#endif
#ifdef HAVE_RECORDING
void (*enc_get_inputs)(struct enc_inputs *inputs);
void (*enc_set_parameters)(struct enc_parameters *params);
struct enc_chunk_hdr * (*enc_get_chunk)(void);
void (*enc_finish_chunk)(void);
unsigned char * (*enc_get_pcm_data)(size_t size);
size_t (*enc_unget_pcm_data)(size_t size);
/* file */
int (*open)(const char* pathname, int flags, ...);
int (*close)(int fd);
ssize_t (*read)(int fd, void* buf, size_t count);
off_t (*lseek)(int fd, off_t offset, int whence);
ssize_t (*write)(int fd, const void* buf, size_t count);
int (*round_value_to_list32)(unsigned long value,
const unsigned long list[],
int count,
bool signd);
#endif
/* new stuff at the end, sort into place next time
the API gets incompatible */
void (*commit_discard_idcache)(void);
};
/* codec header */
struct codec_header {
struct lc_header lc_hdr; /* must be first */
enum codec_status(*entry_point)(enum codec_entry_call_reason reason);
enum codec_status(*run_proc)(void);
struct codec_api **api;
};
#ifdef CODEC
#if (CONFIG_PLATFORM & PLATFORM_NATIVE)
/* plugin_* is correct, codecs use the plugin linker script */
extern unsigned char plugin_start_addr[];
extern unsigned char plugin_end_addr[];
/* decoders */
#define CODEC_HEADER \
const struct codec_header __header \
__attribute__ ((section (".header")))= { \
{ CODEC_MAGIC, TARGET_ID, CODEC_API_VERSION, \
plugin_start_addr, plugin_end_addr }, codec_start, \
codec_run, &ci };
/* encoders */
#define CODEC_ENC_HEADER \
const struct codec_header __header \
__attribute__ ((section (".header")))= { \
{ CODEC_ENC_MAGIC, TARGET_ID, CODEC_API_VERSION, \
plugin_start_addr, plugin_end_addr }, codec_start, \
codec_run, &ci };
#else /* def SIMULATOR */
/* decoders */
#define CODEC_HEADER \
const struct codec_header __header \
__attribute__((visibility("default"))) = { \
{ CODEC_MAGIC, TARGET_ID, CODEC_API_VERSION, NULL, NULL }, \
codec_start, codec_run, &ci };
/* encoders */
#define CODEC_ENC_HEADER \
const struct codec_header __header = { \
{ CODEC_ENC_MAGIC, TARGET_ID, CODEC_API_VERSION, NULL, NULL }, \
codec_start, codec_run, &ci };
#endif /* SIMULATOR */
#endif /* CODEC */
/* create full codec path from root filenames in audio_formats[]
assumes buffer size is MAX_PATH */
void codec_get_full_path(char *path, const char *codec_root_fn);
/* Returns pointer to and size of free codec RAM */
void *codec_get_buffer_callback(size_t *size);
/* defined by the codec loader (codec.c) */
int codec_load_buf(int hid, struct codec_api *api);
int codec_load_file(const char* codec, struct codec_api *api);
int codec_run_proc(void);
int codec_halt(void);
int codec_close(void);
/* defined by the codec */
enum codec_status codec_start(enum codec_entry_call_reason reason);
enum codec_status codec_main(enum codec_entry_call_reason reason);
enum codec_status codec_run(void);
#endif /* _CODECS_H_ */

View file

@ -0,0 +1,206 @@
# __________ __ ___.
# Open \______ \ ____ ____ | | _\_ |__ _______ ___
# Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
# Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
# Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
# \/ \/ \/ \/ \/
# $Id$
#
CODECDIR = $(RBCODEC_BLD)/codecs
CODECS_SRC := $(call preprocess, $(RBCODECLIB_DIR)/codecs/SOURCES)
OTHER_SRC += $(CODECS_SRC)
CODECS := $(CODECS_SRC:.c=.codec)
CODECS := $(subst $(RBCODECLIB_DIR),$(RBCODEC_BLD),$(CODECS))
# the codec helper library
include $(RBCODECLIB_DIR)/codecs/lib/libcodec.make
OTHER_INC += -I$(RBCODECLIB_DIR)/codecs/lib
# extra libraries
CODEC_LIBS := $(EXTRA_LIBS) $(CODECLIB)
# the codec libraries
include $(RBCODECLIB_DIR)/codecs/demac/libdemac.make
include $(RBCODECLIB_DIR)/codecs/liba52/liba52.make
include $(RBCODECLIB_DIR)/codecs/libalac/libalac.make
include $(RBCODECLIB_DIR)/codecs/libasap/libasap.make
include $(RBCODECLIB_DIR)/codecs/libasf/libasf.make
include $(RBCODECLIB_DIR)/codecs/libfaad/libfaad.make
include $(RBCODECLIB_DIR)/codecs/libffmpegFLAC/libffmpegFLAC.make
include $(RBCODECLIB_DIR)/codecs/libm4a/libm4a.make
include $(RBCODECLIB_DIR)/codecs/libmad/libmad.make
include $(RBCODECLIB_DIR)/codecs/libmusepack/libmusepack.make
include $(RBCODECLIB_DIR)/codecs/libspc/libspc.make
include $(RBCODECLIB_DIR)/codecs/libspeex/libspeex.make
include $(RBCODECLIB_DIR)/codecs/libtremor/libtremor.make
include $(RBCODECLIB_DIR)/codecs/libwavpack/libwavpack.make
include $(RBCODECLIB_DIR)/codecs/libwma/libwma.make
include $(RBCODECLIB_DIR)/codecs/libwmapro/libwmapro.make
include $(RBCODECLIB_DIR)/codecs/libcook/libcook.make
include $(RBCODECLIB_DIR)/codecs/librm/librm.make
include $(RBCODECLIB_DIR)/codecs/libatrac/libatrac.make
include $(RBCODECLIB_DIR)/codecs/libpcm/libpcm.make
include $(RBCODECLIB_DIR)/codecs/libtta/libtta.make
include $(RBCODECLIB_DIR)/codecs/libgme/libay.make
include $(RBCODECLIB_DIR)/codecs/libgme/libgbs.make
include $(RBCODECLIB_DIR)/codecs/libgme/libhes.make
include $(RBCODECLIB_DIR)/codecs/libgme/libnsf.make
include $(RBCODECLIB_DIR)/codecs/libgme/libsgc.make
include $(RBCODECLIB_DIR)/codecs/libgme/libvgm.make
include $(RBCODECLIB_DIR)/codecs/libgme/libkss.make
include $(RBCODECLIB_DIR)/codecs/libgme/libemu2413.make
# compile flags for codecs
CODECFLAGS = $(CFLAGS) $(RBCODEC_CFLAGS) -fstrict-aliasing \
-I$(RBCODECLIB_DIR)/codecs -I$(RBCODECLIB_DIR)/codecs/lib -DCODEC
# set CODECFLAGS per codec lib, since gcc takes the last -Ox and the last
# in a -ffoo -fno-foo pair, there is no need to filter them out
$(A52LIB) : CODECFLAGS += -O1
$(ALACLIB) : CODECFLAGS += -O1
$(ASAPLIB) : CODECFLAGS += -O1
$(ASFLIB) : CODECFLAGS += -O2
$(ATRACLIB) : CODECFLAGS += -O1
$(AYLIB) : CODECFLAGS += -O2
$(COOKLIB): CODECFLAGS += -O1
$(DEMACLIB) : CODECFLAGS += -O3
$(FAADLIB) : CODECFLAGS += -O2
$(FFMPEGFLACLIB) : CODECFLAGS += -O2
$(GBSLIB) : CODECFLAGS += -O2
$(HESLIB) : CODECFLAGS += -O2
$(KSSLIB) : CODECFLAGS += -O2
$(M4ALIB) : CODECFLAGS += -O3
$(MUSEPACKLIB) : CODECFLAGS += -O1
$(NSFLIB) : CODECFLAGS += -O2
$(PCMSLIB) : CODECFLAGS += -O1
$(RMLIB) : CODECFLAGS += -O3
$(SGCLIB) : CODECFLAGS += -O2
$(SPCLIB) : CODECFLAGS += -O1
$(TREMORLIB) : CODECFLAGS += -O2
$(TTALIB) : CODECFLAGS += -O2
$(VGMLIB) : CODECFLAGS += -O2
$(EMU2413LIB) : CODECFLAGS += -O3
$(WAVPACKLIB) : CODECFLAGS += -O1
$(WMALIB) : CODECFLAGS += -O2
$(WMAPROLIB) : CODECFLAGS += -O1
$(WMAVOICELIB) : CODECFLAGS += -O1
# fine-tuning of CODECFLAGS per cpu arch
ifeq ($(ARCH),arch_arm)
# redo per arm generation
$(ALACLIB) : CODECFLAGS += -O2
$(AYLIB) : CODECFLAGS += -O1
$(GBSLIB) : CODECFLAGS += -O1
$(HESLIB) : CODECFLAGS += -O1
$(KSSLIB) : CODECFLAGS += -O1
$(NSFLIB) : CODECFLAGS += -O1
$(SGCLIB) : CODECFLAGS += -O1
$(VGMLIB) : CODECFLAGS += -O1
$(EMU2413LIB) : CODECFLAGS += -O3
$(WAVPACKLIB) : CODECFLAGS += -O3
else ifeq ($(ARCH),arch_m68k)
$(A52LIB) : CODECFLAGS += -O2
$(ASFLIB) : CODECFLAGS += -O3
$(ATRACLIB) : CODECFLAGS += -O2
$(COOKLIB): CODECFLAGS += -O2
$(DEMACLIB) : CODECFLAGS += -O2
$(SPCLIB) : CODECFLAGS += -O3
$(WMAPROLIB) : CODECFLAGS += -O3
$(WMAVOICELIB) : CODECFLAGS += -O2
endif
ifeq ($(MEMORYSIZE),2)
$(ASFLIB) : CODECFLAGS += -Os
$(WMALIB) : CODECFLAGS += -Os
endif
ifndef APP_TYPE
CONFIGFILE := $(FIRMDIR)/export/config/$(MODELNAME).h
CODEC_LDS := $(APPSDIR)/plugins/plugin.lds # codecs and plugins use same file
CODECLINK_LDS := $(CODECDIR)/codec.link
endif
CODEC_CRT0 := $(CODECDIR)/codec_crt0.o
$(CODECS): $(CODEC_CRT0) $(CODECLINK_LDS)
$(CODECLINK_LDS): $(CODEC_LDS) $(CONFIGFILE)
$(call PRINTS,PP $(@F))
$(shell mkdir -p $(dir $@))
$(call preprocess2file, $<, $@, -DCODEC)
# codec/library dependencies
$(CODECDIR)/spc.codec : $(CODECDIR)/libspc.a
$(CODECDIR)/mpa.codec : $(CODECDIR)/libmad.a
$(CODECDIR)/a52.codec : $(CODECDIR)/liba52.a
$(CODECDIR)/flac.codec : $(CODECDIR)/libffmpegFLAC.a
$(CODECDIR)/vorbis.codec : $(CODECDIR)/libtremor.a
$(CODECDIR)/speex.codec : $(CODECDIR)/libspeex.a
$(CODECDIR)/mpc.codec : $(CODECDIR)/libmusepack.a
$(CODECDIR)/wavpack.codec : $(CODECDIR)/libwavpack.a
$(CODECDIR)/alac.codec : $(CODECDIR)/libalac.a $(CODECDIR)/libm4a.a
$(CODECDIR)/aac.codec : $(CODECDIR)/libfaad.a $(CODECDIR)/libm4a.a
$(CODECDIR)/shorten.codec : $(CODECDIR)/libffmpegFLAC.a
$(CODECDIR)/ape-pre.map : $(CODECDIR)/libdemac-pre.a
$(CODECDIR)/ape.codec : $(CODECDIR)/libdemac.a
$(CODECDIR)/wma.codec : $(CODECDIR)/libwma.a $(CODECDIR)/libasf.a
$(CODECDIR)/wmapro.codec : $(CODECDIR)/libwmapro.a $(CODECDIR)/libasf.a
$(CODECDIR)/wavpack_enc.codec: $(CODECDIR)/libwavpack.a
$(CODECDIR)/asap.codec : $(CODECDIR)/libasap.a
$(CODECDIR)/cook.codec : $(CODECDIR)/libcook.a $(CODECDIR)/librm.a
$(CODECDIR)/raac.codec : $(CODECDIR)/libfaad.a $(CODECDIR)/librm.a
$(CODECDIR)/a52_rm.codec : $(CODECDIR)/liba52.a $(CODECDIR)/librm.a
$(CODECDIR)/atrac3_rm.codec : $(CODECDIR)/libatrac.a $(CODECDIR)/librm.a
$(CODECDIR)/atrac3_oma.codec : $(CODECDIR)/libatrac.a
$(CODECDIR)/aiff.codec : $(CODECDIR)/libpcm.a
$(CODECDIR)/wav.codec : $(CODECDIR)/libpcm.a
$(CODECDIR)/smaf.codec : $(CODECDIR)/libpcm.a
$(CODECDIR)/au.codec : $(CODECDIR)/libpcm.a
$(CODECDIR)/vox.codec : $(CODECDIR)/libpcm.a
$(CODECDIR)/wav64.codec : $(CODECDIR)/libpcm.a
$(CODECDIR)/tta.codec : $(CODECDIR)/libtta.a
$(CODECDIR)/ay.codec : $(CODECDIR)/libay.a
$(CODECDIR)/gbs.codec : $(CODECDIR)/libgbs.a
$(CODECDIR)/hes.codec : $(CODECDIR)/libhes.a
$(CODECDIR)/nsf.codec : $(CODECDIR)/libnsf.a $(CODECDIR)/libemu2413.a
$(CODECDIR)/sgc.codec : $(CODECDIR)/libsgc.a $(CODECDIR)/libemu2413.a
$(CODECDIR)/vgm.codec : $(CODECDIR)/libvgm.a $(CODECDIR)/libemu2413.a
$(CODECDIR)/kss.codec : $(CODECDIR)/libkss.a $(CODECDIR)/libemu2413.a
$(CODECS): $(CODEC_LIBS) # this must be last in codec dependency list
# pattern rule for compiling codecs
$(CODECDIR)/%.o: $(RBCODECLIB_DIR)/codecs/%.c
$(SILENT)mkdir -p $(dir $@)
$(call PRINTS,CC $(subst $(ROOTDIR)/,,$<))$(CC) \
-I$(dir $<) $(CODECFLAGS) -c $< -o $@
# pattern rule for compiling codecs
$(CODECDIR)/%.o: $(RBCODECLIB_DIR)/codecs/%.S
$(SILENT)mkdir -p $(dir $@)
$(call PRINTS,CC $(subst $(ROOTDIR)/,,$<))$(CC) \
-I$(dir $<) $(CODECFLAGS) $(ASMFLAGS) -c $< -o $@
ifdef APP_TYPE
CODECLDFLAGS = $(SHARED_LDFLAG) -Wl,--gc-sections -Wl,-Map,$(CODECDIR)/$*.map
CODECFLAGS += $(SHARED_CFLAGS) # <-- from Makefile
else
CODECLDFLAGS = -T$(CODECLINK_LDS) -Wl,--gc-sections -Wl,-Map,$(CODECDIR)/$*.map
CODECFLAGS += -UDEBUG -DNDEBUG
endif
CODECLDFLAGS += $(GLOBAL_LDOPTS)
$(CODECDIR)/%-pre.map: $(CODEC_CRT0) $(CODECLINK_LDS) $(CODECDIR)/%.o $(CODECS_LIBS)
$(call PRINTS,LD $(@F))$(CC) $(CODECFLAGS) -o $(CODECDIR)/$*-pre.elf \
$(filter %.o, $^) \
$(filter-out $(CODECLIB),$(filter %.a, $+)) $(CODECLIB) \
-lgcc $(subst .map,-pre.map,$(CODECLDFLAGS))
$(CODECDIR)/%.codec: $(CODECDIR)/%.o
$(call PRINTS,LD $(@F))$(CC) $(CODECFLAGS) -o $(CODECDIR)/$*.elf \
$(filter %.o, $^) \
$(filter %.a, $+) \
-lgcc $(CODECLDFLAGS)
$(SILENT)$(call objcopy,$(CODECDIR)/$*.elf,$@)

202
lib/rbcodec/codecs/cook.c Normal file
View file

@ -0,0 +1,202 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2009 Mohamed Tarek
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include <string.h>
#include "logf.h"
#include "codeclib.h"
#include "inttypes.h"
#include "libcook/cook.h"
CODEC_HEADER
static RMContext rmctx IBSS_ATTR_COOK_LARGE_IRAM;
static RMPacket pkt IBSS_ATTR_COOK_LARGE_IRAM;
static COOKContext q IBSS_ATTR;
static int32_t rm_outbuf[2048] IBSS_ATTR_COOK_LARGE_IRAM MEM_ALIGN_ATTR;
static void init_rm(RMContext *rmctx)
{
memcpy(rmctx, (void*)(( (intptr_t)ci->id3->id3v2buf + 3 ) &~ 3), sizeof(RMContext));
}
/* this is the codec entry point */
enum codec_status codec_main(enum codec_entry_call_reason reason)
{
/* Nothing to do */
return CODEC_OK;
(void)reason;
}
/* this is called for each file to process */
enum codec_status codec_run(void)
{
static size_t buff_size;
int datasize, res, consumed, i, time_offset;
uint8_t *bit_buffer;
uint16_t fs,sps,h;
uint32_t packet_count;
int scrambling_unit_size, num_units;
size_t resume_offset;
intptr_t param = 0;
enum codec_command_action action = CODEC_ACTION_NULL;
if (codec_init()) {
DEBUGF("codec init failed\n");
return CODEC_ERROR;
}
resume_offset = ci->id3->offset;
codec_set_replaygain(ci->id3);
ci->memset(&rmctx,0,sizeof(RMContext));
ci->memset(&pkt,0,sizeof(RMPacket));
ci->memset(&q,0,sizeof(COOKContext));
ci->seek_buffer(0);
init_rm(&rmctx);
ci->configure(DSP_SET_FREQUENCY, ci->id3->frequency);
/* cook's sample representation is 21.11
* DSP_SET_SAMPLE_DEPTH = 11 (FRACT) + 16 (NATIVE) - 1 (SIGN) = 26 */
ci->configure(DSP_SET_SAMPLE_DEPTH, 26);
ci->configure(DSP_SET_STEREO_MODE, rmctx.nb_channels == 1 ?
STEREO_MONO : STEREO_NONINTERLEAVED);
packet_count = rmctx.nb_packets;
rmctx.audio_framesize = rmctx.block_align;
rmctx.block_align = rmctx.sub_packet_size;
fs = rmctx.audio_framesize;
sps= rmctx.block_align;
h = rmctx.sub_packet_h;
scrambling_unit_size = h * (fs + PACKET_HEADER_SIZE);
res =cook_decode_init(&rmctx, &q);
if(res < 0) {
DEBUGF("failed to initialize cook decoder\n");
return CODEC_ERROR;
}
/* check for a mid-track resume and force a seek time accordingly */
if(resume_offset > rmctx.data_offset + DATA_HEADER_SIZE) {
resume_offset -= rmctx.data_offset + DATA_HEADER_SIZE;
num_units = (int)resume_offset / scrambling_unit_size;
/* put number of subpackets to skip in resume_offset */
resume_offset /= (sps + PACKET_HEADER_SIZE);
param = (int)resume_offset * ((sps * 8 * 1000)/rmctx.bit_rate);
action = CODEC_ACTION_SEEK_TIME;
}
else {
ci->set_elapsed(0);
}
ci->advance_buffer(rmctx.data_offset + DATA_HEADER_SIZE);
/* The main decoder loop */
seek_start :
while(packet_count)
{
bit_buffer = (uint8_t *) ci->request_buffer(&buff_size, scrambling_unit_size);
consumed = rm_get_packet(&bit_buffer, &rmctx, &pkt);
if(consumed < 0) {
DEBUGF("rm_get_packet failed\n");
return CODEC_ERROR;
}
for(i = 0; i < rmctx.audio_pkt_cnt*(fs/sps) ; i++)
{
if (action == CODEC_ACTION_NULL)
action = ci->get_command(&param);
if (action == CODEC_ACTION_HALT)
return CODEC_OK;
if (action == CODEC_ACTION_SEEK_TIME) {
/* Do not allow seeking beyond the file's length */
if ((unsigned) param > ci->id3->length) {
ci->set_elapsed(ci->id3->length);
ci->seek_complete();
return CODEC_OK;
}
ci->seek_buffer(rmctx.data_offset + DATA_HEADER_SIZE);
packet_count = rmctx.nb_packets;
rmctx.audio_pkt_cnt = 0;
rmctx.frame_number = 0;
/* Seek to the start of the track */
if (param == 0) {
ci->set_elapsed(0);
ci->seek_complete();
action = CODEC_ACTION_NULL;
goto seek_start;
}
num_units = (param/(sps*1000*8/rmctx.bit_rate))/(h*(fs/sps));
ci->seek_buffer(rmctx.data_offset + DATA_HEADER_SIZE + consumed * num_units);
bit_buffer = (uint8_t *) ci->request_buffer(&buff_size, scrambling_unit_size);
consumed = rm_get_packet(&bit_buffer, &rmctx, &pkt);
if(consumed < 0) {
DEBUGF("rm_get_packet failed\n");
ci->seek_complete();
return CODEC_ERROR;
}
packet_count = rmctx.nb_packets - rmctx.audio_pkt_cnt * num_units;
rmctx.frame_number = (param/(sps*1000*8/rmctx.bit_rate));
while(rmctx.audiotimestamp > (unsigned) param) {
rmctx.audio_pkt_cnt = 0;
ci->seek_buffer(rmctx.data_offset + DATA_HEADER_SIZE + consumed * (num_units-1));
bit_buffer = (uint8_t *) ci->request_buffer(&buff_size, scrambling_unit_size);
consumed = rm_get_packet(&bit_buffer, &rmctx, &pkt);
packet_count += rmctx.audio_pkt_cnt;
num_units--;
}
time_offset = param - rmctx.audiotimestamp;
i = (time_offset/((sps * 8 * 1000)/rmctx.bit_rate));
ci->set_elapsed(rmctx.audiotimestamp+(1000*8*sps/rmctx.bit_rate)*i);
ci->seek_complete();
}
action = CODEC_ACTION_NULL;
res = cook_decode_frame(&rmctx,&q, rm_outbuf, &datasize, pkt.frames[i], rmctx.block_align);
rmctx.frame_number++;
/* skip the first two frames; no valid audio */
if(rmctx.frame_number < 3) continue;
if(res != rmctx.block_align) {
DEBUGF("codec error\n");
return CODEC_ERROR;
}
ci->pcmbuf_insert(rm_outbuf,
rm_outbuf+q.samples_per_channel,
q.samples_per_channel);
ci->set_elapsed(rmctx.audiotimestamp+(1000*8*sps/rmctx.bit_rate)*i);
}
packet_count -= rmctx.audio_pkt_cnt;
rmctx.audio_pkt_cnt = 0;
ci->advance_buffer(consumed);
}
return CODEC_OK;
}

View file

@ -0,0 +1,339 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License.

View file

@ -0,0 +1,42 @@
# $Id$
FILTERS = libdemac/filter_16_11.o libdemac/filter_64_11.o libdemac/filter_256_13.o libdemac/filter_32_10.o libdemac/filter_1280_15.o
LIBOBJS = libdemac/parser.o libdemac/decoder.o libdemac/entropy.o libdemac/predictor.o libdemac/crc.o $(FILTERS)
OBJS = demac.o wavwrite.o $(LIBOBJS)
CFLAGS = -Wall -g -O3 -Ilibdemac
ifeq ($(findstring CYGWIN,$(shell uname)),CYGWIN)
EXT = .exe
CROSS =
CFLAGS += -mno-cygwin
else
ifdef WIN
EXT = .exe
CROSS = i586-mingw32msvc-
else
EXT =
CROSS =
endif
endif
CC = $(CROSS)gcc
STRIP = $(CROSS)strip
OUTPUT = demac$(EXT)
all: $(OUTPUT)
$(OUTPUT): $(OBJS)
$(CC) $(CFLAGS) -o $(OUTPUT) $(OBJS)
.c.o :
$(CC) $(CFLAGS) $(INC) -c -o $@ $<
libdemac/filter_16_11.o: libdemac/filter.c
libdemac/filter_64_11.o: libdemac/filter.c
libdemac/filter_256_13.o: libdemac/filter.c
libdemac/filter_1280_15.o: libdemac/filter.c
libdemac/filter_32_10.o: libdemac/filter.c
clean:
rm -f $(OUTPUT) $(OBJS) *~ */*~

View file

@ -0,0 +1,69 @@
demac - a decoder for Monkey's Audio files.
Introduction
demac is an implementation in portable ANSI C of a decoder for the
Monkey's Audio lossless compression format. It has the following
features:
* Open source (GNU GPL)
* Written in portable ANSI C
* Designed for use on low memory and embedded devices. All internal
buffers are statically declared - the core library doesn't require
malloc/free. This has the disadvantage that the library isn't
re-entrant.
Compatibility
libdemac is still in the early stages of development but has been
relatively well tested with v3.99 files at all compression levels.
v3.97 files have received less testing - 16-bit files seem to work,
but 24-bit files are causing problems in the range decoder.
Files earlier than v3.97 are not supported by libdemac, but support
might be added in the future.
Source Code
The source code in this directory is structured as follows:
demac/Makefile - Makefile for the standalone demac decoder
demac/demac.c - Simple standalone test program to decoder an APE file to WAV
demac/wavwrite.[ch] - Helper functions for demac.c
demac/libdemac/Makefile - A Makefile for use in Rockbox
demac/libdemac/*.[ch] - The main libdemac code
Latest Version
The latest version of demac and libdemac can always be found in the
"lib/rbcodec/codecs/demac/" directory in the Rockbox source. You can check
this out from svn with the command:
svn co svn://svn.rockbox.org/rockbox/trunk/lib/rbcodec/codecs/demac demac
Or browse the source code online here:
http://svn.rockbox.org/viewvc.cgi/trunk/lib/rbcodec/codecs/demac
Acknowledgements
Thanks to Matt. T. Ashland for writing Monkey's Audio. His website
can be found here: http://www.monkeysaudio.com
Copyright and license
libdemac is (C) 2007 Dave Chapman and is licensed under the GNU
GPL. See the COPYING file in this directory.
The exception is the majority of rangecoding.h, which is (C) 1997,
1998, 1999, 2000 Michael Schindler and is also licensed under the GPL.
See that source file for full details.

View file

@ -0,0 +1,281 @@
/*
demac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
/*
This example is intended to demonstrate how the decoder can be used in
embedded devices - there is no usage of dynamic memory (i.e. no
malloc/free) and small buffer sizes are chosen to minimise both the
memory usage and decoding latency.
This implementation requires the following memory and supports decoding of all APE files up to 24-bit Stereo.
32768 - data from the input stream to be presented to the decoder in one contiguous chunk.
18432 - decoding buffer (left channel)
18432 - decoding buffer (right channel)
17408+5120+2240 - buffers used for filter histories (compression levels 2000-5000)
In addition, this example uses a static 27648 byte buffer as temporary
storage for outputting the data to a WAV file but that could be
avoided by writing the decoded data one sample at a time.
*/
#include <stdio.h>
#include <inttypes.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include "demac.h"
#include "wavwrite.h"
#ifndef __WIN32__
#define O_BINARY 0
#endif
#define CALC_CRC 1
#define BLOCKS_PER_LOOP 4608
#define MAX_CHANNELS 2
#define MAX_BYTESPERSAMPLE 3
#define INPUT_CHUNKSIZE (32*1024)
#ifndef MIN
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#endif
/* 4608*2*3 = 27648 bytes */
static unsigned char wavbuffer[BLOCKS_PER_LOOP*MAX_CHANNELS*MAX_BYTESPERSAMPLE];
/* 4608*4 = 18432 bytes per channel */
static int32_t decoded0[BLOCKS_PER_LOOP];
static int32_t decoded1[BLOCKS_PER_LOOP];
/* We assume that 32KB of compressed data is enough to extract up to
27648 bytes of decompressed data. */
static unsigned char inbuffer[INPUT_CHUNKSIZE];
int ape_decode(char* infile, char* outfile)
{
int fd;
int fdwav;
int currentframe;
int nblocks;
int bytesconsumed;
struct ape_ctx_t ape_ctx;
int i, n;
unsigned char* p;
int bytesinbuffer;
int blockstodecode;
int res;
int firstbyte;
int16_t sample16;
int32_t sample32;
uint32_t frame_crc;
int crc_errors = 0;
fd = open(infile,O_RDONLY|O_BINARY);
if (fd < 0) return -1;
/* Read the file headers to populate the ape_ctx struct */
if (ape_parseheader(fd,&ape_ctx) < 0) {
printf("Cannot read header\n");
close(fd);
return -1;
}
if ((ape_ctx.fileversion < APE_MIN_VERSION) || (ape_ctx.fileversion > APE_MAX_VERSION)) {
printf("Unsupported file version - %.2f\n", ape_ctx.fileversion/1000.0);
close(fd);
return -2;
}
//ape_dumpinfo(&ape_ctx);
printf("Decoding file - v%.2f, compression level %d\n",ape_ctx.fileversion/1000.0,ape_ctx.compressiontype);
/* Open the WAV file and write a canonical 44-byte WAV header
based on the audio format information in the ape_ctx struct.
NOTE: This example doesn't write the original WAV header and
tail data which are (optionally) stored in the APE file.
*/
fdwav = open_wav(&ape_ctx,outfile);
currentframe = 0;
/* Initialise the buffer */
lseek(fd, ape_ctx.firstframe, SEEK_SET);
bytesinbuffer = read(fd, inbuffer, INPUT_CHUNKSIZE);
firstbyte = 3; /* Take account of the little-endian 32-bit byte ordering */
/* The main decoding loop - we decode the frames a small chunk at a time */
while (currentframe < ape_ctx.totalframes)
{
/* Calculate how many blocks there are in this frame */
if (currentframe == (ape_ctx.totalframes - 1))
nblocks = ape_ctx.finalframeblocks;
else
nblocks = ape_ctx.blocksperframe;
ape_ctx.currentframeblocks = nblocks;
/* Initialise the frame decoder */
init_frame_decoder(&ape_ctx, inbuffer, &firstbyte, &bytesconsumed);
/* Update buffer */
memmove(inbuffer,inbuffer + bytesconsumed, bytesinbuffer - bytesconsumed);
bytesinbuffer -= bytesconsumed;
n = read(fd, inbuffer + bytesinbuffer, INPUT_CHUNKSIZE - bytesinbuffer);
bytesinbuffer += n;
#if CALC_CRC
frame_crc = ape_initcrc();
#endif
/* Decode the frame a chunk at a time */
while (nblocks > 0)
{
blockstodecode = MIN(BLOCKS_PER_LOOP, nblocks);
if ((res = decode_chunk(&ape_ctx, inbuffer, &firstbyte,
&bytesconsumed,
decoded0, decoded1,
blockstodecode)) < 0)
{
/* Frame decoding error, abort */
close(fd);
return res;
}
/* Convert the output samples to WAV format and write to output file */
p = wavbuffer;
if (ape_ctx.bps == 8) {
for (i = 0 ; i < blockstodecode ; i++)
{
/* 8 bit WAV uses unsigned samples */
*(p++) = (decoded0[i] + 0x80) & 0xff;
if (ape_ctx.channels == 2) {
*(p++) = (decoded1[i] + 0x80) & 0xff;
}
}
} else if (ape_ctx.bps == 16) {
for (i = 0 ; i < blockstodecode ; i++)
{
sample16 = decoded0[i];
*(p++) = sample16 & 0xff;
*(p++) = (sample16 >> 8) & 0xff;
if (ape_ctx.channels == 2) {
sample16 = decoded1[i];
*(p++) = sample16 & 0xff;
*(p++) = (sample16 >> 8) & 0xff;
}
}
} else if (ape_ctx.bps == 24) {
for (i = 0 ; i < blockstodecode ; i++)
{
sample32 = decoded0[i];
*(p++) = sample32 & 0xff;
*(p++) = (sample32 >> 8) & 0xff;
*(p++) = (sample32 >> 16) & 0xff;
if (ape_ctx.channels == 2) {
sample32 = decoded1[i];
*(p++) = sample32 & 0xff;
*(p++) = (sample32 >> 8) & 0xff;
*(p++) = (sample32 >> 16) & 0xff;
}
}
}
#if CALC_CRC
frame_crc = ape_updatecrc(wavbuffer, p - wavbuffer, frame_crc);
#endif
write(fdwav,wavbuffer,p - wavbuffer);
/* Update the buffer */
memmove(inbuffer,inbuffer + bytesconsumed, bytesinbuffer - bytesconsumed);
bytesinbuffer -= bytesconsumed;
n = read(fd, inbuffer + bytesinbuffer, INPUT_CHUNKSIZE - bytesinbuffer);
bytesinbuffer += n;
/* Decrement the block count */
nblocks -= blockstodecode;
}
#if CALC_CRC
frame_crc = ape_finishcrc(frame_crc);
if (ape_ctx.CRC != frame_crc)
{
fprintf(stderr,"CRC error in frame %d\n",currentframe);
crc_errors++;
}
#endif
currentframe++;
}
close(fd);
close(fdwav);
if (crc_errors > 0)
return -1;
else
return 0;
}
int main(int argc, char* argv[])
{
int res;
if (argc != 3) {
fprintf(stderr,"Usage: demac infile.ape outfile.wav\n");
return 0;
}
res = ape_decode(argv[1], argv[2]);
if (res < 0)
{
fprintf(stderr,"DECODING ERROR %d, ABORTING\n", res);
}
else
{
fprintf(stderr,"DECODED OK - NO CRC ERRORS.\n");
}
return 0;
}

View file

@ -0,0 +1,35 @@
# __________ __ ___.
# Open \______ \ ____ ____ | | _\_ |__ _______ ___
# Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
# Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
# Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
# \/ \/ \/ \/ \/
# $Id$
#
# libdemac
DEMACLIB := $(CODECDIR)/libdemac.a
DEMACLIB_SRC := $(call preprocess, $(RBCODECLIB_DIR)/codecs/demac/libdemac/SOURCES)
DEMACLIB_OBJ := $(call c2obj, $(DEMACLIB_SRC))
OTHER_SRC += $(DEMACLIB_SRC)
ifeq ($(ARCH),arch_arm)
OTHER_SRC += $(RBCODECLIB_DIR)/codecs/demac/libdemac/udiv32_arm-pre.S
endif
DEMACLIB_PRE := $(subst .a,-pre.a,$(DEMACLIB))
DEMACLIB_OBJ_PRE := $(subst udiv32_arm.o,udiv32_arm-pre.o,$(DEMACLIB_OBJ))
$(DEMACLIB_PRE): $(DEMACLIB_OBJ_PRE)
$(SILENT)$(shell rm -f $@)
$(call PRINTS,AR $(@F))$(AR) rcs $@ $^ >/dev/null
$(DEMACLIB): $(DEMACLIB_OBJ)
$(SILENT)$(shell rm -f $@)
$(call PRINTS,AR $(@F))$(AR) rcs $@ $^ >/dev/null
$(CODECDIR)/ape_free_iram.h: $(CODECDIR)/ape-pre.map
$(call PRINTS,GEN $(@F))perl -an \
-e 'if(/^PLUGIN_IRAM/){$$istart=hex($$F[1]);$$ilen=hex($$F[2])}' \
-e 'if(/iend = /){$$iend=hex($$F[0]);}' \
-e '}{if($$ilen){print"#define FREE_IRAM ".($$ilen+$$istart-$$iend)."\n";}' \
$(CODECDIR)/ape-pre.map \
> $@

View file

@ -0,0 +1,15 @@
predictor.c
#ifdef CPU_ARM
predictor-arm.S
udiv32_arm.S
#elif defined CPU_COLDFIRE
predictor-cf.S
#endif
entropy.c
decoder.c
parser.c
filter_1280_15.c
filter_16_11.c
filter_256_13.c
filter_32_10.c
filter_64_11.c

View file

@ -0,0 +1,120 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#include <inttypes.h>
#include "demac.h"
static const uint32_t crctab32[] =
{
0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA,
0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3,
0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988,
0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91,
0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE,
0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC,
0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5,
0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172,
0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B,
0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940,
0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116,
0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F,
0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924,
0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D,
0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A,
0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818,
0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01,
0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E,
0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457,
0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C,
0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2,
0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB,
0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0,
0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9,
0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086,
0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4,
0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD,
0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A,
0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683,
0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8,
0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE,
0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7,
0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC,
0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5,
0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252,
0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60,
0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79,
0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236,
0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F,
0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04,
0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A,
0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713,
0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38,
0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21,
0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E,
0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C,
0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45,
0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2,
0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB,
0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0,
0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6,
0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF,
0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,
0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
};
uint32_t ape_initcrc(void)
{
return 0xffffffff;
}
/* Update the CRC from a block of WAV-format audio data */
uint32_t ape_updatecrc(unsigned char *block, int count, uint32_t crc)
{
while (count--)
crc = (crc >> 8) ^ crctab32[(crc & 0xff) ^ *block++];
return crc;
}
uint32_t ape_finishcrc(uint32_t crc)
{
crc ^= 0xffffffff;
crc >>= 1;
return crc;
}

View file

@ -0,0 +1,216 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#include <inttypes.h>
#include <string.h>
#include "demac.h"
#include "predictor.h"
#include "entropy.h"
#include "filter.h"
#include "demac_config.h"
/* Statically allocate the filter buffers */
#ifdef FILTER256_IRAM
static filter_int filterbuf32[(32*3 + FILTER_HISTORY_SIZE) * 2]
IBSS_ATTR_DEMAC MEM_ALIGN_ATTR;
/* 2432 or 4864 bytes */
static filter_int filterbuf256[(256*3 + FILTER_HISTORY_SIZE) * 2]
IBSS_ATTR_DEMAC MEM_ALIGN_ATTR;
/* 5120 or 10240 bytes */
#define FILTERBUF64 filterbuf256
#define FILTERBUF32 filterbuf32
#define FILTERBUF16 filterbuf32
#else
static filter_int filterbuf64[(64*3 + FILTER_HISTORY_SIZE) * 2]
IBSS_ATTR_DEMAC MEM_ALIGN_ATTR;
/* 2432 or 4864 bytes */
static filter_int filterbuf256[(256*3 + FILTER_HISTORY_SIZE) * 2]
MEM_ALIGN_ATTR; /* 5120 or 10240 bytes */
#define FILTERBUF64 filterbuf64
#define FILTERBUF32 filterbuf64
#define FILTERBUF16 filterbuf64
#endif
/* This is only needed for "insane" files, and no current Rockbox targets
can hope to decode them in realtime, except the Gigabeat S (at 528MHz). */
static filter_int filterbuf1280[(1280*3 + FILTER_HISTORY_SIZE) * 2]
IBSS_ATTR_DEMAC_INSANEBUF MEM_ALIGN_ATTR;
/* 17408 or 34816 bytes */
void init_frame_decoder(struct ape_ctx_t* ape_ctx,
unsigned char* inbuffer, int* firstbyte,
int* bytesconsumed)
{
init_entropy_decoder(ape_ctx, inbuffer, firstbyte, bytesconsumed);
//printf("CRC=0x%08x\n",ape_ctx->CRC);
//printf("Flags=0x%08x\n",ape_ctx->frameflags);
init_predictor_decoder(&ape_ctx->predictor);
switch (ape_ctx->compressiontype)
{
case 2000:
init_filter_16_11(FILTERBUF16);
break;
case 3000:
init_filter_64_11(FILTERBUF64);
break;
case 4000:
init_filter_256_13(filterbuf256);
init_filter_32_10(FILTERBUF32);
break;
case 5000:
init_filter_1280_15(filterbuf1280);
init_filter_256_13(filterbuf256);
init_filter_16_11(FILTERBUF32);
}
}
int ICODE_ATTR_DEMAC decode_chunk(struct ape_ctx_t* ape_ctx,
unsigned char* inbuffer, int* firstbyte,
int* bytesconsumed,
int32_t* decoded0, int32_t* decoded1,
int count)
{
int32_t left, right;
#ifdef ROCKBOX
int scale = (APE_OUTPUT_DEPTH - ape_ctx->bps);
#define SCALE(x) ((x) << scale)
#else
#define SCALE(x) (x)
#endif
if ((ape_ctx->channels==1) || ((ape_ctx->frameflags
& (APE_FRAMECODE_PSEUDO_STEREO|APE_FRAMECODE_STEREO_SILENCE))
== APE_FRAMECODE_PSEUDO_STEREO)) {
entropy_decode(ape_ctx, inbuffer, firstbyte, bytesconsumed,
decoded0, NULL, count);
if (ape_ctx->frameflags & APE_FRAMECODE_MONO_SILENCE) {
/* We are pure silence, so we're done. */
return 0;
}
switch (ape_ctx->compressiontype)
{
case 2000:
apply_filter_16_11(ape_ctx->fileversion,0,decoded0,count);
break;
case 3000:
apply_filter_64_11(ape_ctx->fileversion,0,decoded0,count);
break;
case 4000:
apply_filter_32_10(ape_ctx->fileversion,0,decoded0,count);
apply_filter_256_13(ape_ctx->fileversion,0,decoded0,count);
break;
case 5000:
apply_filter_16_11(ape_ctx->fileversion,0,decoded0,count);
apply_filter_256_13(ape_ctx->fileversion,0,decoded0,count);
apply_filter_1280_15(ape_ctx->fileversion,0,decoded0,count);
}
/* Now apply the predictor decoding */
predictor_decode_mono(&ape_ctx->predictor,decoded0,count);
if (ape_ctx->channels==2) {
/* Pseudo-stereo - copy left channel to right channel */
while (count--)
{
left = *decoded0;
*(decoded1++) = *(decoded0++) = SCALE(left);
}
}
#ifdef ROCKBOX
else {
/* Scale to output depth */
while (count--)
{
left = *decoded0;
*(decoded0++) = SCALE(left);
}
}
#endif
} else { /* Stereo */
entropy_decode(ape_ctx, inbuffer, firstbyte, bytesconsumed,
decoded0, decoded1, count);
if ((ape_ctx->frameflags & APE_FRAMECODE_STEREO_SILENCE)
== APE_FRAMECODE_STEREO_SILENCE) {
/* We are pure silence, so we're done. */
return 0;
}
/* Apply filters - compression type 1000 doesn't have any */
switch (ape_ctx->compressiontype)
{
case 2000:
apply_filter_16_11(ape_ctx->fileversion,0,decoded0,count);
apply_filter_16_11(ape_ctx->fileversion,1,decoded1,count);
break;
case 3000:
apply_filter_64_11(ape_ctx->fileversion,0,decoded0,count);
apply_filter_64_11(ape_ctx->fileversion,1,decoded1,count);
break;
case 4000:
apply_filter_32_10(ape_ctx->fileversion,0,decoded0,count);
apply_filter_32_10(ape_ctx->fileversion,1,decoded1,count);
apply_filter_256_13(ape_ctx->fileversion,0,decoded0,count);
apply_filter_256_13(ape_ctx->fileversion,1,decoded1,count);
break;
case 5000:
apply_filter_16_11(ape_ctx->fileversion,0,decoded0,count);
apply_filter_16_11(ape_ctx->fileversion,1,decoded1,count);
apply_filter_256_13(ape_ctx->fileversion,0,decoded0,count);
apply_filter_256_13(ape_ctx->fileversion,1,decoded1,count);
apply_filter_1280_15(ape_ctx->fileversion,0,decoded0,count);
apply_filter_1280_15(ape_ctx->fileversion,1,decoded1,count);
}
/* Now apply the predictor decoding */
predictor_decode_stereo(&ape_ctx->predictor,decoded0,decoded1,count);
/* Decorrelate and scale to output depth */
while (count--)
{
left = *decoded1 - (*decoded0 / 2);
right = left + *decoded0;
*(decoded0++) = SCALE(left);
*(decoded1++) = SCALE(right);
}
}
return 0;
}

View file

@ -0,0 +1,40 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#ifndef _APE_DECODER_H
#define _APE_DECODER_H
#include <inttypes.h>
#include "parser.h"
void init_frame_decoder(struct ape_ctx_t* ape_ctx,
unsigned char* inbuffer, int* firstbyte,
int* bytesconsumed);
int decode_chunk(struct ape_ctx_t* ape_ctx,
unsigned char* inbuffer, int* firstbyte,
int* bytesconsumed,
int32_t* decoded0, int32_t* decoded1,
int count);
#endif

View file

@ -0,0 +1,45 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#ifndef _APE_DECODER_H
#define _APE_DECODER_H
#include <inttypes.h>
#include "parser.h"
void init_frame_decoder(struct ape_ctx_t* ape_ctx,
unsigned char* inbuffer, int* firstbyte,
int* bytesconsumed);
int decode_chunk(struct ape_ctx_t* ape_ctx,
unsigned char* inbuffer, int* firstbyte,
int* bytesconsumed,
int32_t* decoded0, int32_t* decoded1,
int count);
uint32_t ape_initcrc(void);
uint32_t ape_updatecrc(unsigned char *block, int count, uint32_t crc);
uint32_t ape_finishcrc(uint32_t crc);
#endif

View file

@ -0,0 +1,145 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#ifndef _DEMAC_CONFIG_H
#define _DEMAC_CONFIG_H
/* Build-time choices for libdemac.
* Note that this file is included by both .c and .S files. */
#ifdef ROCKBOX
#include "config.h"
#ifndef __ASSEMBLER__
#include "codeclib.h"
#include <codecs.h>
#endif
#define APE_OUTPUT_DEPTH 29
/* On ARMv4, using 32 bit ints for the filters is faster. */
#if defined(CPU_ARM) && (ARM_ARCH == 4)
#define FILTER_BITS 32
#endif
#if !defined(CPU_PP) && !defined(CPU_S5L870X)
#define FILTER256_IRAM
#endif
#if CONFIG_CPU == PP5002 || defined(CPU_S5L870X)
/* Code and data IRAM for speed (PP5002 has a broken cache), not enough IRAM
* for the insane filter buffer. Reciprocal table for division in IRAM. */
#define ICODE_SECTION_DEMAC_ARM .icode
#define ICODE_ATTR_DEMAC ICODE_ATTR
#define ICONST_ATTR_DEMAC ICONST_ATTR
#define IBSS_ATTR_DEMAC IBSS_ATTR
#define IBSS_ATTR_DEMAC_INSANEBUF
#elif CONFIG_CPU == PP5020
/* Code and small data in DRAM for speed (PP5020 IRAM isn't completely single
* cycle). Insane filter buffer not in IRAM in favour of reciprocal table for
* divison. Decoded data buffers should be in IRAM (defined by the caller). */
#define ICODE_SECTION_DEMAC_ARM .text
#define ICODE_ATTR_DEMAC
#define ICONST_ATTR_DEMAC
#define IBSS_ATTR_DEMAC
#define IBSS_ATTR_DEMAC_INSANEBUF
#elif CONFIG_CPU == PP5022
/* Code in DRAM, data in IRAM. Insane filter buffer not in IRAM in favour of
* reciprocal table for divison */
#define ICODE_SECTION_DEMAC_ARM .text
#define ICODE_ATTR_DEMAC
#define ICONST_ATTR_DEMAC ICONST_ATTR
#define IBSS_ATTR_DEMAC IBSS_ATTR
#define IBSS_ATTR_DEMAC_INSANEBUF
#else
/* Code in DRAM, data in IRAM, including insane filter buffer. */
#define ICODE_SECTION_DEMAC_ARM .text
#define ICODE_ATTR_DEMAC
#define ICONST_ATTR_DEMAC ICONST_ATTR
#define IBSS_ATTR_DEMAC IBSS_ATTR
#define IBSS_ATTR_DEMAC_INSANEBUF IBSS_ATTR
#endif
#else /* !ROCKBOX */
#define APE_OUTPUT_DEPTH (ape_ctx->bps)
#define MEM_ALIGN_ATTR __attribute__((aligned(16)))
/* adjust to target architecture for best performance */
#define ICODE_ATTR_DEMAC
#define ICONST_ATTR_DEMAC
#define IBSS_ATTR_DEMAC
#define IBSS_ATTR_DEMAC_INSANEBUF
/* Use to give gcc hints on which branch is most likely taken */
#if defined(__GNUC__) && __GNUC__ >= 3
#define LIKELY(x) __builtin_expect(!!(x), 1)
#define UNLIKELY(x) __builtin_expect(!!(x), 0)
#else
#define LIKELY(x) (x)
#define UNLIKELY(x) (x)
#endif
#endif /* !ROCKBOX */
/* Defaults */
#ifndef FILTER_HISTORY_SIZE
#define FILTER_HISTORY_SIZE 512
#endif
#ifndef PREDICTOR_HISTORY_SIZE
#define PREDICTOR_HISTORY_SIZE 512
#endif
#ifndef FILTER_BITS
#define FILTER_BITS 16
#endif
#ifndef __ASSEMBLER__
#if defined(CPU_ARM) && (ARM_ARCH < 5 || defined(USE_IRAM))
/* optimised unsigned integer division for ARMv4, in IRAM */
unsigned udiv32_arm(unsigned a, unsigned b);
#define UDIV32(a, b) udiv32_arm(a, b)
#else
/* default */
#define UDIV32(a, b) (a / b)
#endif
#include <inttypes.h>
#if FILTER_BITS == 32
typedef int32_t filter_int;
#elif FILTER_BITS == 16
typedef int16_t filter_int;
#endif
#endif
#endif /* _DEMAC_CONFIG_H */

View file

@ -0,0 +1,464 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#include <inttypes.h>
#include <string.h>
#include "parser.h"
#include "entropy.h"
#include "demac_config.h"
#define MODEL_ELEMENTS 64
/*
The following counts arrays for use with the range decoder are
hard-coded in the Monkey's Audio decoder.
*/
static const int counts_3970[65] ICONST_ATTR_DEMAC =
{
0,14824,28224,39348,47855,53994,58171,60926,
62682,63786,64463,64878,65126,65276,65365,65419,
65450,65469,65480,65487,65491,65493,65494,65495,
65496,65497,65498,65499,65500,65501,65502,65503,
65504,65505,65506,65507,65508,65509,65510,65511,
65512,65513,65514,65515,65516,65517,65518,65519,
65520,65521,65522,65523,65524,65525,65526,65527,
65528,65529,65530,65531,65532,65533,65534,65535,
65536
};
/* counts_diff_3970[i] = counts_3970[i+1] - counts_3970[i] */
static const int counts_diff_3970[64] ICONST_ATTR_DEMAC =
{
14824,13400,11124,8507,6139,4177,2755,1756,
1104,677,415,248,150,89,54,31,
19,11,7,4,2,1,1,1,
1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1
};
static const int counts_3980[65] ICONST_ATTR_DEMAC =
{
0,19578,36160,48417,56323,60899,63265,64435,
64971,65232,65351,65416,65447,65466,65476,65482,
65485,65488,65490,65491,65492,65493,65494,65495,
65496,65497,65498,65499,65500,65501,65502,65503,
65504,65505,65506,65507,65508,65509,65510,65511,
65512,65513,65514,65515,65516,65517,65518,65519,
65520,65521,65522,65523,65524,65525,65526,65527,
65528,65529,65530,65531,65532,65533,65534,65535,
65536
};
/* counts_diff_3980[i] = counts_3980[i+1] - counts_3980[i] */
static const int counts_diff_3980[64] ICONST_ATTR_DEMAC =
{
19578,16582,12257,7906,4576,2366,1170,536,
261,119,65,31,19,10,6,3,
3,2,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1
};
/*
Range decoder adapted from rangecod.c included in:
http://www.compressconsult.com/rangecoder/rngcod13.zip
rangecod.c range encoding
(c) Michael Schindler
1997, 1998, 1999, 2000
http://www.compressconsult.com/
michael@compressconsult.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
The encoding functions were removed, and functions turned into "static
inline" functions. Some minor cosmetic changes were made (e.g. turning
pre-processor symbols into upper-case, removing the rc parameter from
each function (and the RNGC macro)).
*/
/* BITSTREAM READING FUNCTIONS */
/* We deal with the input data one byte at a time - to ensure
functionality on CPUs of any endianness regardless of any requirements
for aligned reads.
*/
static unsigned char* bytebuffer IBSS_ATTR_DEMAC;
static int bytebufferoffset IBSS_ATTR_DEMAC;
static inline void skip_byte(void)
{
bytebufferoffset--;
bytebuffer += bytebufferoffset & 4;
bytebufferoffset &= 3;
}
static inline int read_byte(void)
{
int ch = bytebuffer[bytebufferoffset];
skip_byte();
return ch;
}
/* RANGE DECODING FUNCTIONS */
/* SIZE OF RANGE ENCODING CODE VALUES. */
#define CODE_BITS 32
#define TOP_VALUE ((unsigned int)1 << (CODE_BITS-1))
#define SHIFT_BITS (CODE_BITS - 9)
#define EXTRA_BITS ((CODE_BITS-2) % 8 + 1)
#define BOTTOM_VALUE (TOP_VALUE >> 8)
struct rangecoder_t
{
uint32_t low; /* low end of interval */
uint32_t range; /* length of interval */
uint32_t help; /* bytes_to_follow resp. intermediate value */
unsigned int buffer; /* buffer for input/output */
};
static struct rangecoder_t rc IBSS_ATTR_DEMAC;
/* Start the decoder */
static inline void range_start_decoding(void)
{
rc.buffer = read_byte();
rc.low = rc.buffer >> (8 - EXTRA_BITS);
rc.range = (uint32_t) 1 << EXTRA_BITS;
}
static inline void range_dec_normalize(void)
{
while (rc.range <= BOTTOM_VALUE)
{
rc.buffer = (rc.buffer << 8) | read_byte();
rc.low = (rc.low << 8) | ((rc.buffer >> 1) & 0xff);
rc.range <<= 8;
}
}
/* Calculate culmulative frequency for next symbol. Does NO update!*/
/* tot_f is the total frequency */
/* or: totf is (code_value)1<<shift */
/* returns the culmulative frequency */
static inline int range_decode_culfreq(int tot_f)
{
range_dec_normalize();
rc.help = UDIV32(rc.range, tot_f);
return UDIV32(rc.low, rc.help);
}
static inline int range_decode_culshift(int shift)
{
range_dec_normalize();
rc.help = rc.range >> shift;
return UDIV32(rc.low, rc.help);
}
/* Update decoding state */
/* sy_f is the interval length (frequency of the symbol) */
/* lt_f is the lower end (frequency sum of < symbols) */
static inline void range_decode_update(int sy_f, int lt_f)
{
rc.low -= rc.help * lt_f;
rc.range = rc.help * sy_f;
}
/* Decode a byte/short without modelling */
static inline unsigned char decode_byte(void)
{ int tmp = range_decode_culshift(8);
range_decode_update( 1,tmp);
return tmp;
}
static inline unsigned short range_decode_short(void)
{ int tmp = range_decode_culshift(16);
range_decode_update( 1,tmp);
return tmp;
}
/* Decode n bits (n <= 16) without modelling - based on range_decode_short */
static inline int range_decode_bits(int n)
{ int tmp = range_decode_culshift(n);
range_decode_update( 1,tmp);
return tmp;
}
/* Finish decoding */
static inline void range_done_decoding(void)
{ range_dec_normalize(); /* normalize to use up all bytes */
}
/*
range_get_symbol_* functions based on main decoding loop in simple_d.c from
http://www.compressconsult.com/rangecoder/rngcod13.zip
(c) Michael Schindler
*/
static inline int range_get_symbol_3980(void)
{
int symbol, cf;
cf = range_decode_culshift(16);
/* figure out the symbol inefficiently; a binary search would be much better */
for (symbol = 0; counts_3980[symbol+1] <= cf; symbol++);
range_decode_update(counts_diff_3980[symbol],counts_3980[symbol]);
return symbol;
}
static inline int range_get_symbol_3970(void)
{
int symbol, cf;
cf = range_decode_culshift(16);
/* figure out the symbol inefficiently; a binary search would be much better */
for (symbol = 0; counts_3970[symbol+1] <= cf; symbol++);
range_decode_update(counts_diff_3970[symbol],counts_3970[symbol]);
return symbol;
}
/* MAIN DECODING FUNCTIONS */
struct rice_t
{
uint32_t k;
uint32_t ksum;
};
static struct rice_t riceX IBSS_ATTR_DEMAC;
static struct rice_t riceY IBSS_ATTR_DEMAC;
static inline void update_rice(struct rice_t* rice, int x)
{
rice->ksum += ((x + 1) / 2) - ((rice->ksum + 16) >> 5);
if (UNLIKELY(rice->k == 0)) {
rice->k = 1;
} else {
uint32_t lim = 1 << (rice->k + 4);
if (UNLIKELY(rice->ksum < lim)) {
rice->k--;
} else if (UNLIKELY(rice->ksum >= 2 * lim)) {
rice->k++;
}
}
}
static inline int entropy_decode3980(struct rice_t* rice)
{
int base, x, pivot, overflow;
pivot = rice->ksum >> 5;
if (UNLIKELY(pivot == 0))
pivot=1;
overflow = range_get_symbol_3980();
if (UNLIKELY(overflow == (MODEL_ELEMENTS-1))) {
overflow = range_decode_short() << 16;
overflow |= range_decode_short();
}
if (pivot >= 0x10000) {
/* Codepath for 24-bit streams */
int nbits, lo_bits, base_hi, base_lo;
/* Count the number of bits in pivot */
nbits = 17; /* We know there must be at least 17 bits */
while ((pivot >> nbits) > 0) { nbits++; }
/* base_lo is the low (nbits-16) bits of base
base_hi is the high 16 bits of base
*/
lo_bits = (nbits - 16);
base_hi = range_decode_culfreq((pivot >> lo_bits) + 1);
range_decode_update(1, base_hi);
base_lo = range_decode_culshift(lo_bits);
range_decode_update(1, base_lo);
base = (base_hi << lo_bits) + base_lo;
} else {
/* Codepath for 16-bit streams */
base = range_decode_culfreq(pivot);
range_decode_update(1, base);
}
x = base + (overflow * pivot);
update_rice(rice, x);
/* Convert to signed */
if (x & 1)
return (x >> 1) + 1;
else
return -(x >> 1);
}
static inline int entropy_decode3970(struct rice_t* rice)
{
int x, tmpk;
int overflow = range_get_symbol_3970();
if (UNLIKELY(overflow == (MODEL_ELEMENTS - 1))) {
tmpk = range_decode_bits(5);
overflow = 0;
} else {
tmpk = (rice->k < 1) ? 0 : rice->k - 1;
}
if (tmpk <= 16) {
x = range_decode_bits(tmpk);
} else {
x = range_decode_short();
x |= (range_decode_bits(tmpk - 16) << 16);
}
x += (overflow << tmpk);
update_rice(rice, x);
/* Convert to signed */
if (x & 1)
return (x >> 1) + 1;
else
return -(x >> 1);
}
void init_entropy_decoder(struct ape_ctx_t* ape_ctx,
unsigned char* inbuffer, int* firstbyte,
int* bytesconsumed)
{
bytebuffer = inbuffer;
bytebufferoffset = *firstbyte;
/* Read the CRC */
ape_ctx->CRC = read_byte();
ape_ctx->CRC = (ape_ctx->CRC << 8) | read_byte();
ape_ctx->CRC = (ape_ctx->CRC << 8) | read_byte();
ape_ctx->CRC = (ape_ctx->CRC << 8) | read_byte();
/* Read the frame flags if they exist */
ape_ctx->frameflags = 0;
if ((ape_ctx->fileversion > 3820) && (ape_ctx->CRC & 0x80000000)) {
ape_ctx->CRC &= ~0x80000000;
ape_ctx->frameflags = read_byte();
ape_ctx->frameflags = (ape_ctx->frameflags << 8) | read_byte();
ape_ctx->frameflags = (ape_ctx->frameflags << 8) | read_byte();
ape_ctx->frameflags = (ape_ctx->frameflags << 8) | read_byte();
}
/* Keep a count of the blocks decoded in this frame */
ape_ctx->blocksdecoded = 0;
/* Initialise the rice structs */
riceX.k = 10;
riceX.ksum = (1 << riceX.k) * 16;
riceY.k = 10;
riceY.ksum = (1 << riceY.k) * 16;
/* The first 8 bits of input are ignored. */
skip_byte();
range_start_decoding();
/* Return the new state of the buffer */
*bytesconsumed = (intptr_t)bytebuffer - (intptr_t)inbuffer;
*firstbyte = bytebufferoffset;
}
void ICODE_ATTR_DEMAC entropy_decode(struct ape_ctx_t* ape_ctx,
unsigned char* inbuffer, int* firstbyte,
int* bytesconsumed,
int32_t* decoded0, int32_t* decoded1,
int blockstodecode)
{
bytebuffer = inbuffer;
bytebufferoffset = *firstbyte;
ape_ctx->blocksdecoded += blockstodecode;
if ((ape_ctx->frameflags & APE_FRAMECODE_LEFT_SILENCE)
&& ((ape_ctx->frameflags & APE_FRAMECODE_RIGHT_SILENCE)
|| (decoded1 == NULL))) {
/* We are pure silence, just memset the output buffer. */
memset(decoded0, 0, blockstodecode * sizeof(int32_t));
if (decoded1 != NULL)
memset(decoded1, 0, blockstodecode * sizeof(int32_t));
} else {
if (ape_ctx->fileversion > 3970) {
while (LIKELY(blockstodecode--)) {
*(decoded0++) = entropy_decode3980(&riceY);
if (decoded1 != NULL)
*(decoded1++) = entropy_decode3980(&riceX);
}
} else {
while (LIKELY(blockstodecode--)) {
*(decoded0++) = entropy_decode3970(&riceY);
if (decoded1 != NULL)
*(decoded1++) = entropy_decode3970(&riceX);
}
}
}
if (ape_ctx->blocksdecoded == ape_ctx->currentframeblocks)
{
range_done_decoding();
}
/* Return the new state of the buffer */
*bytesconsumed = bytebuffer - inbuffer;
*firstbyte = bytebufferoffset;
}

View file

@ -0,0 +1,40 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#ifndef _APE_ENTROPY_H
#define _APE_ENTROPY_H
#include <inttypes.h>
void init_entropy_decoder(struct ape_ctx_t* ape_ctx,
unsigned char* inbuffer, int* firstbyte,
int* bytesconsumed);
void entropy_decode(struct ape_ctx_t* ape_ctx,
unsigned char* inbuffer, int* firstbyte,
int* bytesconsumed,
int32_t* decoded0, int32_t* decoded1,
int blockstodecode);
#endif

View file

@ -0,0 +1,296 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#include <string.h>
#include <inttypes.h>
#include "demac.h"
#include "filter.h"
#include "demac_config.h"
#if FILTER_BITS == 32
#if defined(CPU_ARM) && (ARM_ARCH == 4)
#include "vector_math32_armv4.h"
#else
#include "vector_math_generic.h"
#endif
#else /* FILTER_BITS == 16 */
#ifdef CPU_COLDFIRE
#include "vector_math16_cf.h"
#elif defined(CPU_ARM) && (ARM_ARCH >= 7)
#include "vector_math16_armv7.h"
#elif defined(CPU_ARM) && (ARM_ARCH >= 6)
#include "vector_math16_armv6.h"
#elif defined(CPU_ARM) && (ARM_ARCH >= 5)
/* Assume all our ARMv5 targets are ARMv5te(j) */
#include "vector_math16_armv5te.h"
#elif (defined(__i386__) || defined(__i486__)) && defined(__MMX__) \
|| defined(__x86_64__)
#include "vector_math16_mmx.h"
#else
#include "vector_math_generic.h"
#endif
#endif /* FILTER_BITS */
struct filter_t {
filter_int* coeffs; /* ORDER entries */
/* We store all the filter delays in a single buffer */
filter_int* history_end;
filter_int* delay;
filter_int* adaptcoeffs;
int avg;
};
/* We name the functions according to the ORDER and FRACBITS
pre-processor symbols and build multiple .o files from this .c file
- this increases code-size but gives the compiler more scope for
optimising the individual functions, as well as replacing a lot of
variables with constants.
*/
#if FRACBITS == 11
#if ORDER == 16
#define INIT_FILTER init_filter_16_11
#define APPLY_FILTER apply_filter_16_11
#elif ORDER == 64
#define INIT_FILTER init_filter_64_11
#define APPLY_FILTER apply_filter_64_11
#endif
#elif FRACBITS == 13
#define INIT_FILTER init_filter_256_13
#define APPLY_FILTER apply_filter_256_13
#elif FRACBITS == 10
#define INIT_FILTER init_filter_32_10
#define APPLY_FILTER apply_filter_32_10
#elif FRACBITS == 15
#define INIT_FILTER init_filter_1280_15
#define APPLY_FILTER apply_filter_1280_15
#endif
/* Some macros to handle the fixed-point stuff */
/* Convert from (32-FRACBITS).FRACBITS fixed-point format to an
integer (rounding to nearest). */
#define FP_HALF (1 << (FRACBITS - 1)) /* 0.5 in fixed-point format. */
#define FP_TO_INT(x) ((x + FP_HALF) >> FRACBITS) /* round(x) */
#ifdef CPU_ARM
#if ARM_ARCH >= 6
#define SATURATE(x) ({int __res; asm("ssat %0, #16, %1" : "=r"(__res) : "r"(x)); __res; })
#else /* ARM_ARCH < 6 */
/* Keeping the asr #31 outside of the asm allows loads to be scheduled between
it and the rest of the block on ARM9E, with the load's result latency filled
by the other calculations. */
#define SATURATE(x) ({ \
int __res = (x) >> 31; \
asm volatile ( \
"teq %0, %1, asr #15\n\t" \
"moveq %0, %1\n\t" \
"eorne %0, %0, #0xff\n\t" \
"eorne %0, %0, #0x7f00" \
: "+r" (__res) : "r" (x) : "cc" \
); \
__res; \
})
#endif /* ARM_ARCH */
#else /* CPU_ARM */
#define SATURATE(x) (LIKELY((x) == (int16_t)(x)) ? (x) : ((x) >> 31) ^ 0x7FFF)
#endif
/* Apply the filter with state f to count entries in data[] */
static void ICODE_ATTR_DEMAC do_apply_filter_3980(struct filter_t* f,
int32_t* data, int count)
{
int res;
int absres;
#ifdef PREPARE_SCALARPRODUCT
PREPARE_SCALARPRODUCT
#endif
while(LIKELY(count--))
{
#ifdef FUSED_VECTOR_MATH
if (LIKELY(*data != 0)) {
if (*data < 0)
res = vector_sp_add(f->coeffs, f->delay - ORDER,
f->adaptcoeffs - ORDER);
else
res = vector_sp_sub(f->coeffs, f->delay - ORDER,
f->adaptcoeffs - ORDER);
} else {
res = scalarproduct(f->coeffs, f->delay - ORDER);
}
res = FP_TO_INT(res);
#else
res = FP_TO_INT(scalarproduct(f->coeffs, f->delay - ORDER));
if (LIKELY(*data != 0)) {
if (*data < 0)
vector_add(f->coeffs, f->adaptcoeffs - ORDER);
else
vector_sub(f->coeffs, f->adaptcoeffs - ORDER);
}
#endif
res += *data;
*data++ = res;
/* Update the output history */
*f->delay++ = SATURATE(res);
/* Version 3.98 and later files */
/* Update the adaption coefficients */
absres = (res < 0 ? -res : res);
if (UNLIKELY(absres > 3 * f->avg))
*f->adaptcoeffs = ((res >> 25) & 64) - 32;
else if (3 * absres > 4 * f->avg)
*f->adaptcoeffs = ((res >> 26) & 32) - 16;
else if (LIKELY(absres > 0))
*f->adaptcoeffs = ((res >> 27) & 16) - 8;
else
*f->adaptcoeffs = 0;
f->avg += (absres - f->avg) / 16;
f->adaptcoeffs[-1] >>= 1;
f->adaptcoeffs[-2] >>= 1;
f->adaptcoeffs[-8] >>= 1;
f->adaptcoeffs++;
/* Have we filled the history buffer? */
if (UNLIKELY(f->delay == f->history_end)) {
memmove(f->coeffs + ORDER, f->delay - (ORDER*2),
(ORDER*2) * sizeof(filter_int));
f->adaptcoeffs = f->coeffs + ORDER*2;
f->delay = f->coeffs + ORDER*3;
}
}
}
static void ICODE_ATTR_DEMAC do_apply_filter_3970(struct filter_t* f,
int32_t* data, int count)
{
int res;
#ifdef PREPARE_SCALARPRODUCT
PREPARE_SCALARPRODUCT
#endif
while(LIKELY(count--))
{
#ifdef FUSED_VECTOR_MATH
if (LIKELY(*data != 0)) {
if (*data < 0)
res = vector_sp_add(f->coeffs, f->delay - ORDER,
f->adaptcoeffs - ORDER);
else
res = vector_sp_sub(f->coeffs, f->delay - ORDER,
f->adaptcoeffs - ORDER);
} else {
res = scalarproduct(f->coeffs, f->delay - ORDER);
}
res = FP_TO_INT(res);
#else
res = FP_TO_INT(scalarproduct(f->coeffs, f->delay - ORDER));
if (LIKELY(*data != 0)) {
if (*data < 0)
vector_add(f->coeffs, f->adaptcoeffs - ORDER);
else
vector_sub(f->coeffs, f->adaptcoeffs - ORDER);
}
#endif
/* Convert res from (32-FRACBITS).FRACBITS fixed-point format to an
integer (rounding to nearest) and add the input value to
it */
res += *data;
*data++ = res;
/* Update the output history */
*f->delay++ = SATURATE(res);
/* Version ??? to < 3.98 files (untested) */
f->adaptcoeffs[0] = (res == 0) ? 0 : ((res >> 28) & 8) - 4;
f->adaptcoeffs[-4] >>= 1;
f->adaptcoeffs[-8] >>= 1;
f->adaptcoeffs++;
/* Have we filled the history buffer? */
if (UNLIKELY(f->delay == f->history_end)) {
memmove(f->coeffs + ORDER, f->delay - (ORDER*2),
(ORDER*2) * sizeof(filter_int));
f->adaptcoeffs = f->coeffs + ORDER*2;
f->delay = f->coeffs + ORDER*3;
}
}
}
static struct filter_t filter[2] IBSS_ATTR_DEMAC;
static void do_init_filter(struct filter_t* f, filter_int* buf)
{
f->coeffs = buf;
f->history_end = buf + ORDER*3 + FILTER_HISTORY_SIZE;
/* Init pointers */
f->adaptcoeffs = f->coeffs + ORDER*2;
f->delay = f->coeffs + ORDER*3;
/* Zero coefficients and history buffer */
memset(f->coeffs, 0, ORDER*3 * sizeof(filter_int));
/* Zero the running average */
f->avg = 0;
}
void INIT_FILTER(filter_int* buf)
{
do_init_filter(&filter[0], buf);
do_init_filter(&filter[1], buf + ORDER*3 + FILTER_HISTORY_SIZE);
}
void ICODE_ATTR_DEMAC APPLY_FILTER(int fileversion, int channel,
int32_t* data, int count)
{
if (fileversion >= 3980)
do_apply_filter_3980(&filter[channel], data, count);
else
do_apply_filter_3970(&filter[channel], data, count);
}

View file

@ -0,0 +1,50 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#ifndef _APE_FILTER_H
#define _APE_FILTER_H
#include "demac_config.h"
void init_filter_16_11(filter_int* buf);
void apply_filter_16_11(int fileversion, int channel,
int32_t* decoded, int count);
void init_filter_64_11(filter_int* buf);
void apply_filter_64_11(int fileversion, int channel,
int32_t* decoded, int count);
void init_filter_32_10(filter_int* buf);
void apply_filter_32_10(int fileversion, int channel,
int32_t* decoded, int count);
void init_filter_256_13(filter_int* buf);
void apply_filter_256_13(int fileversion, int channel,
int32_t* decoded, int count);
void init_filter_1280_15(filter_int* buf);
void apply_filter_1280_15(int fileversion, int channel,
int32_t* decoded, int count);
#endif

View file

@ -0,0 +1,32 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#include "demac_config.h"
#ifndef FILTER256_IRAM
#undef ICODE_ATTR_DEMAC
#define ICODE_ATTR_DEMAC
#endif
#define ORDER 1280
#define FRACBITS 15
#include "filter.c"

View file

@ -0,0 +1,27 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#define ORDER 16
#define FRACBITS 11
#include "filter.c"

View file

@ -0,0 +1,32 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#include "demac_config.h"
#ifndef FILTER256_IRAM
#undef ICODE_ATTR_DEMAC
#define ICODE_ATTR_DEMAC
#endif
#define ORDER 256
#define FRACBITS 13
#include "filter.c"

View file

@ -0,0 +1,27 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#define ORDER 32
#define FRACBITS 10
#include "filter.c"

View file

@ -0,0 +1,27 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#define ORDER 64
#define FRACBITS 11
#include "filter.c"

View file

@ -0,0 +1,402 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#include <inttypes.h>
#include <string.h>
#ifndef ROCKBOX
#include <stdio.h>
#include <stdlib.h>
#include "inttypes.h"
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#endif
#include "parser.h"
#ifdef APE_MAX
#undef APE_MAX
#endif
#define APE_MAX(a,b) ((a)>(b)?(a):(b))
static inline int16_t get_int16(unsigned char* buf)
{
return(buf[0] | (buf[1] << 8));
}
static inline uint16_t get_uint16(unsigned char* buf)
{
return(buf[0] | (buf[1] << 8));
}
static inline uint32_t get_uint32(unsigned char* buf)
{
return(buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24));
}
int ape_parseheaderbuf(unsigned char* buf, struct ape_ctx_t* ape_ctx)
{
unsigned char* header;
memset(ape_ctx,0,sizeof(struct ape_ctx_t));
/* TODO: Skip any leading junk such as id3v2 tags */
ape_ctx->junklength = 0;
memcpy(ape_ctx->magic, buf, 4);
if (memcmp(ape_ctx->magic,"MAC ",4)!=0)
{
return -1;
}
ape_ctx->fileversion = get_int16(buf + 4);
if (ape_ctx->fileversion >= 3980)
{
ape_ctx->padding1 = get_int16(buf + 6);
ape_ctx->descriptorlength = get_uint32(buf + 8);
ape_ctx->headerlength = get_uint32(buf + 12);
ape_ctx->seektablelength = get_uint32(buf + 16);
ape_ctx->wavheaderlength = get_uint32(buf + 20);
ape_ctx->audiodatalength = get_uint32(buf + 24);
ape_ctx->audiodatalength_high = get_uint32(buf + 28);
ape_ctx->wavtaillength = get_uint32(buf + 32);
memcpy(ape_ctx->md5, buf + 36, 16);
header = buf + ape_ctx->descriptorlength;
/* Read header data */
ape_ctx->compressiontype = get_uint16(header + 0);
ape_ctx->formatflags = get_uint16(header + 2);
ape_ctx->blocksperframe = get_uint32(header + 4);
ape_ctx->finalframeblocks = get_uint32(header + 8);
ape_ctx->totalframes = get_uint32(header + 12);
ape_ctx->bps = get_uint16(header + 16);
ape_ctx->channels = get_uint16(header + 18);
ape_ctx->samplerate = get_uint32(header + 20);
ape_ctx->seektablefilepos = ape_ctx->junklength +
ape_ctx->descriptorlength +
ape_ctx->headerlength;
ape_ctx->firstframe = ape_ctx->junklength + ape_ctx->descriptorlength +
ape_ctx->headerlength + ape_ctx->seektablelength +
ape_ctx->wavheaderlength;
} else {
ape_ctx->headerlength = 32;
ape_ctx->compressiontype = get_uint16(buf + 6);
ape_ctx->formatflags = get_uint16(buf + 8);
ape_ctx->channels = get_uint16(buf + 10);
ape_ctx->samplerate = get_uint32(buf + 12);
ape_ctx->wavheaderlength = get_uint32(buf + 16);
ape_ctx->totalframes = get_uint32(buf + 24);
ape_ctx->finalframeblocks = get_uint32(buf + 28);
if (ape_ctx->formatflags & MAC_FORMAT_FLAG_HAS_PEAK_LEVEL)
{
ape_ctx->headerlength += 4;
}
if (ape_ctx->formatflags & MAC_FORMAT_FLAG_HAS_SEEK_ELEMENTS)
{
ape_ctx->seektablelength = get_uint32(buf + ape_ctx->headerlength);
ape_ctx->seektablelength *= sizeof(int32_t);
ape_ctx->headerlength += 4;
} else {
ape_ctx->seektablelength = ape_ctx->totalframes * sizeof(int32_t);
}
if (ape_ctx->formatflags & MAC_FORMAT_FLAG_8_BIT)
ape_ctx->bps = 8;
else if (ape_ctx->formatflags & MAC_FORMAT_FLAG_24_BIT)
ape_ctx->bps = 24;
else
ape_ctx->bps = 16;
if (ape_ctx->fileversion >= 3950)
ape_ctx->blocksperframe = 73728 * 4;
else if ((ape_ctx->fileversion >= 3900) || (ape_ctx->fileversion >= 3800 && ape_ctx->compressiontype >= 4000))
ape_ctx->blocksperframe = 73728;
else
ape_ctx->blocksperframe = 9216;
ape_ctx->seektablefilepos = ape_ctx->junklength + ape_ctx->headerlength +
ape_ctx->wavheaderlength;
ape_ctx->firstframe = ape_ctx->junklength + ape_ctx->headerlength +
ape_ctx->wavheaderlength + ape_ctx->seektablelength;
}
ape_ctx->totalsamples = ape_ctx->finalframeblocks;
if (ape_ctx->totalframes > 1)
ape_ctx->totalsamples += ape_ctx->blocksperframe * (ape_ctx->totalframes-1);
ape_ctx->numseekpoints = APE_MAX(ape_ctx->maxseekpoints,
ape_ctx->seektablelength / sizeof(int32_t));
return 0;
}
#ifndef ROCKBOX
/* Helper functions */
static int read_uint16(int fd, uint16_t* x)
{
unsigned char tmp[2];
int n;
n = read(fd,tmp,2);
if (n != 2)
return -1;
*x = tmp[0] | (tmp[1] << 8);
return 0;
}
static int read_int16(int fd, int16_t* x)
{
return read_uint16(fd, (uint16_t*)x);
}
static int read_uint32(int fd, uint32_t* x)
{
unsigned char tmp[4];
int n;
n = read(fd,tmp,4);
if (n != 4)
return -1;
*x = tmp[0] | (tmp[1] << 8) | (tmp[2] << 16) | (tmp[3] << 24);
return 0;
}
int ape_parseheader(int fd, struct ape_ctx_t* ape_ctx)
{
int i,n;
/* TODO: Skip any leading junk such as id3v2 tags */
ape_ctx->junklength = 0;
lseek(fd,ape_ctx->junklength,SEEK_SET);
n = read(fd,&ape_ctx->magic,4);
if (n != 4) return -1;
if (memcmp(ape_ctx->magic,"MAC ",4)!=0)
{
return -1;
}
if (read_int16(fd,&ape_ctx->fileversion) < 0)
return -1;
if (ape_ctx->fileversion >= 3980)
{
if (read_int16(fd,&ape_ctx->padding1) < 0)
return -1;
if (read_uint32(fd,&ape_ctx->descriptorlength) < 0)
return -1;
if (read_uint32(fd,&ape_ctx->headerlength) < 0)
return -1;
if (read_uint32(fd,&ape_ctx->seektablelength) < 0)
return -1;
if (read_uint32(fd,&ape_ctx->wavheaderlength) < 0)
return -1;
if (read_uint32(fd,&ape_ctx->audiodatalength) < 0)
return -1;
if (read_uint32(fd,&ape_ctx->audiodatalength_high) < 0)
return -1;
if (read_uint32(fd,&ape_ctx->wavtaillength) < 0)
return -1;
if (read(fd,&ape_ctx->md5,16) != 16)
return -1;
/* Skip any unknown bytes at the end of the descriptor. This is for future
compatibility */
if (ape_ctx->descriptorlength > 52)
lseek(fd,ape_ctx->descriptorlength - 52, SEEK_CUR);
/* Read header data */
if (read_uint16(fd,&ape_ctx->compressiontype) < 0)
return -1;
if (read_uint16(fd,&ape_ctx->formatflags) < 0)
return -1;
if (read_uint32(fd,&ape_ctx->blocksperframe) < 0)
return -1;
if (read_uint32(fd,&ape_ctx->finalframeblocks) < 0)
return -1;
if (read_uint32(fd,&ape_ctx->totalframes) < 0)
return -1;
if (read_uint16(fd,&ape_ctx->bps) < 0)
return -1;
if (read_uint16(fd,&ape_ctx->channels) < 0)
return -1;
if (read_uint32(fd,&ape_ctx->samplerate) < 0)
return -1;
} else {
ape_ctx->descriptorlength = 0;
ape_ctx->headerlength = 32;
if (read_uint16(fd,&ape_ctx->compressiontype) < 0)
return -1;
if (read_uint16(fd,&ape_ctx->formatflags) < 0)
return -1;
if (read_uint16(fd,&ape_ctx->channels) < 0)
return -1;
if (read_uint32(fd,&ape_ctx->samplerate) < 0)
return -1;
if (read_uint32(fd,&ape_ctx->wavheaderlength) < 0)
return -1;
if (read_uint32(fd,&ape_ctx->wavtaillength) < 0)
return -1;
if (read_uint32(fd,&ape_ctx->totalframes) < 0)
return -1;
if (read_uint32(fd,&ape_ctx->finalframeblocks) < 0)
return -1;
if (ape_ctx->formatflags & MAC_FORMAT_FLAG_HAS_PEAK_LEVEL)
{
lseek(fd, 4, SEEK_CUR); /* Skip the peak level */
ape_ctx->headerlength += 4;
}
if (ape_ctx->formatflags & MAC_FORMAT_FLAG_HAS_SEEK_ELEMENTS)
{
if (read_uint32(fd,&ape_ctx->seektablelength) < 0)
return -1;
ape_ctx->headerlength += 4;
ape_ctx->seektablelength *= sizeof(int32_t);
} else {
ape_ctx->seektablelength = ape_ctx->totalframes * sizeof(int32_t);
}
if (ape_ctx->formatflags & MAC_FORMAT_FLAG_8_BIT)
ape_ctx->bps = 8;
else if (ape_ctx->formatflags & MAC_FORMAT_FLAG_24_BIT)
ape_ctx->bps = 24;
else
ape_ctx->bps = 16;
if (ape_ctx->fileversion >= 3950)
ape_ctx->blocksperframe = 73728 * 4;
else if ((ape_ctx->fileversion >= 3900) || (ape_ctx->fileversion >= 3800 && ape_ctx->compressiontype >= 4000))
ape_ctx->blocksperframe = 73728;
else
ape_ctx->blocksperframe = 9216;
/* Skip any stored wav header */
if (!(ape_ctx->formatflags & MAC_FORMAT_FLAG_CREATE_WAV_HEADER))
{
lseek(fd, ape_ctx->wavheaderlength, SEEK_CUR);
}
}
ape_ctx->totalsamples = ape_ctx->finalframeblocks;
if (ape_ctx->totalframes > 1)
ape_ctx->totalsamples += ape_ctx->blocksperframe * (ape_ctx->totalframes-1);
if (ape_ctx->seektablelength > 0)
{
ape_ctx->seektable = malloc(ape_ctx->seektablelength);
if (ape_ctx->seektable == NULL)
return -1;
for (i=0; i < ape_ctx->seektablelength / sizeof(uint32_t); i++)
{
if (read_uint32(fd,&ape_ctx->seektable[i]) < 0)
{
free(ape_ctx->seektable);
return -1;
}
}
}
ape_ctx->firstframe = ape_ctx->junklength + ape_ctx->descriptorlength +
ape_ctx->headerlength + ape_ctx->seektablelength +
ape_ctx->wavheaderlength;
return 0;
}
void ape_dumpinfo(struct ape_ctx_t* ape_ctx)
{
int i;
printf("Descriptor Block:\n\n");
printf("magic = \"%c%c%c%c\"\n",
ape_ctx->magic[0],ape_ctx->magic[1],
ape_ctx->magic[2],ape_ctx->magic[3]);
printf("fileversion = %d\n",ape_ctx->fileversion);
printf("descriptorlength = %d\n",ape_ctx->descriptorlength);
printf("headerlength = %d\n",ape_ctx->headerlength);
printf("seektablelength = %d\n",ape_ctx->seektablelength);
printf("wavheaderlength = %d\n",ape_ctx->wavheaderlength);
printf("audiodatalength = %d\n",ape_ctx->audiodatalength);
printf("audiodatalength_high = %d\n",ape_ctx->audiodatalength_high);
printf("wavtaillength = %d\n",ape_ctx->wavtaillength);
printf("md5 = ");
for (i = 0; i < 16; i++)
printf("%02x",ape_ctx->md5[i]);
printf("\n");
printf("\nHeader Block:\n\n");
printf("compressiontype = %d\n",ape_ctx->compressiontype);
printf("formatflags = %d\n",ape_ctx->formatflags);
printf("blocksperframe = %d\n",ape_ctx->blocksperframe);
printf("finalframeblocks = %d\n",ape_ctx->finalframeblocks);
printf("totalframes = %d\n",ape_ctx->totalframes);
printf("bps = %d\n",ape_ctx->bps);
printf("channels = %d\n",ape_ctx->channels);
printf("samplerate = %d\n",ape_ctx->samplerate);
printf("\nSeektable\n\n");
if ((ape_ctx->seektablelength / sizeof(uint32_t)) != ape_ctx->totalframes)
{
printf("No seektable\n");
}
else
{
for ( i = 0; i < ape_ctx->seektablelength / sizeof(uint32_t) ; i++)
{
if (i < ape_ctx->totalframes-1) {
printf("%8d %d (%d bytes)\n",i,ape_ctx->seektable[i],ape_ctx->seektable[i+1]-ape_ctx->seektable[i]);
} else {
printf("%8d %d\n",i,ape_ctx->seektable[i]);
}
}
}
printf("\nCalculated information:\n\n");
printf("junklength = %d\n",ape_ctx->junklength);
printf("firstframe = %d\n",ape_ctx->firstframe);
printf("totalsamples = %d\n",ape_ctx->totalsamples);
}
#endif /* !ROCKBOX */

View file

@ -0,0 +1,137 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#ifndef _APE_PARSER_H
#define _APE_PARSER_H
#include <inttypes.h>
#include "demac_config.h"
/* The earliest and latest file formats supported by this library */
#define APE_MIN_VERSION 3970
#define APE_MAX_VERSION 3990
#define MAC_FORMAT_FLAG_8_BIT 1 // is 8-bit [OBSOLETE]
#define MAC_FORMAT_FLAG_CRC 2 // uses the new CRC32 error detection [OBSOLETE]
#define MAC_FORMAT_FLAG_HAS_PEAK_LEVEL 4 // uint32 nPeakLevel after the header [OBSOLETE]
#define MAC_FORMAT_FLAG_24_BIT 8 // is 24-bit [OBSOLETE]
#define MAC_FORMAT_FLAG_HAS_SEEK_ELEMENTS 16 // has the number of seek elements after the peak level
#define MAC_FORMAT_FLAG_CREATE_WAV_HEADER 32 // create the wave header on decompression (not stored)
/* Special frame codes:
MONO_SILENCE - All PCM samples in frame are zero (mono streams only)
LEFT_SILENCE - All PCM samples for left channel in frame are zero (stereo streams)
RIGHT_SILENCE - All PCM samples for left channel in frame are zero (stereo streams)
PSEUDO_STEREO - Left and Right channels are identical
*/
#define APE_FRAMECODE_MONO_SILENCE 1
#define APE_FRAMECODE_LEFT_SILENCE 1 /* same as mono */
#define APE_FRAMECODE_RIGHT_SILENCE 2
#define APE_FRAMECODE_STEREO_SILENCE 3 /* combined */
#define APE_FRAMECODE_PSEUDO_STEREO 4
#define PREDICTOR_ORDER 8
/* Total size of all predictor histories - 50 * sizeof(int32_t) */
#define PREDICTOR_SIZE 50
/* NOTE: This struct is used in predictor-arm.S - any updates need to
be reflected there. */
struct predictor_t
{
/* Filter histories */
int32_t* buf;
int32_t YlastA;
int32_t XlastA;
/* NOTE: The order of the next four fields is important for
predictor-arm.S */
int32_t YfilterB;
int32_t XfilterA;
int32_t XfilterB;
int32_t YfilterA;
/* Adaption co-efficients */
int32_t YcoeffsA[4];
int32_t XcoeffsA[4];
int32_t YcoeffsB[5];
int32_t XcoeffsB[5];
int32_t historybuffer[PREDICTOR_HISTORY_SIZE + PREDICTOR_SIZE];
};
struct ape_ctx_t
{
/* Derived fields */
uint32_t junklength;
uint32_t firstframe;
uint32_t totalsamples;
/* Info from Descriptor Block */
char magic[4];
int16_t fileversion;
int16_t padding1;
uint32_t descriptorlength;
uint32_t headerlength;
uint32_t seektablelength;
uint32_t wavheaderlength;
uint32_t audiodatalength;
uint32_t audiodatalength_high;
uint32_t wavtaillength;
uint8_t md5[16];
/* Info from Header Block */
uint16_t compressiontype;
uint16_t formatflags;
uint32_t blocksperframe;
uint32_t finalframeblocks;
uint32_t totalframes;
uint16_t bps;
uint16_t channels;
uint32_t samplerate;
/* Seektable */
uint32_t* seektable; /* Seektable buffer */
uint32_t maxseekpoints; /* Max seekpoints we can store (size of seektable buffer) */
uint32_t numseekpoints; /* Number of seekpoints */
int seektablefilepos; /* Location in .ape file of seektable */
/* Decoder state */
uint32_t CRC;
int frameflags;
int currentframeblocks;
int blocksdecoded;
struct predictor_t predictor;
};
int ape_parseheader(int fd, struct ape_ctx_t* ape_ctx);
int ape_parseheaderbuf(unsigned char* buf, struct ape_ctx_t* ape_ctx);
void ape_dumpinfo(struct ape_ctx_t* ape_ctx);
#endif

View file

@ -0,0 +1,702 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#include "demac_config.h"
.section ICODE_SECTION_DEMAC_ARM,"ax",%progbits
.align 2
/* NOTE: The following need to be kept in sync with parser.h */
#define YDELAYA 200
#define YDELAYB 168
#define XDELAYA 136
#define XDELAYB 104
#define YADAPTCOEFFSA 72
#define XADAPTCOEFFSA 56
#define YADAPTCOEFFSB 40
#define XADAPTCOEFFSB 20
/* struct predictor_t members: */
#define buf 0 /* int32_t* buf */
#define YlastA 4 /* int32_t YlastA; */
#define XlastA 8 /* int32_t XlastA; */
#define YfilterB 12 /* int32_t YfilterB; */
#define XfilterA 16 /* int32_t XfilterA; */
#define XfilterB 20 /* int32_t XfilterB; */
#define YfilterA 24 /* int32_t YfilterA; */
#define YcoeffsA 28 /* int32_t YcoeffsA[4]; */
#define XcoeffsA 44 /* int32_t XcoeffsA[4]; */
#define YcoeffsB 60 /* int32_t YcoeffsB[5]; */
#define XcoeffsB 80 /* int32_t XcoeffsB[5]; */
#define historybuffer 100 /* int32_t historybuffer[] */
@ Macro for loading 2 registers, for various ARM versions.
@ Registers must start with an even register, and must be consecutive.
.macro LDR2OFS reg1, reg2, base, offset
#if ARM_ARCH >= 6
ldrd \reg1, [\base, \offset]
#else /* ARM_ARCH < 6 */
#ifdef CPU_ARM7TDMI
add \reg1, \base, \offset
ldmia \reg1, {\reg1, \reg2}
#else /* ARM9 (v4 and v5) is faster this way */
ldr \reg1, [\base, \offset]
ldr \reg2, [\base, \offset+4]
#endif
#endif /* ARM_ARCH */
.endm
@ Macro for storing 2 registers, for various ARM versions.
@ Registers must start with an even register, and must be consecutive.
.macro STR2OFS reg1, reg2, base, offset
#if ARM_ARCH >= 6
strd \reg1, [\base, \offset]
#else
str \reg1, [\base, \offset]
str \reg2, [\base, \offset+4]
#endif
.endm
.global predictor_decode_stereo
.type predictor_decode_stereo,%function
@ Register usage:
@
@ r0-r11 - scratch
@ r12 - struct predictor_t* p
@ r14 - int32_t* p->buf
@ void predictor_decode_stereo(struct predictor_t* p,
@ int32_t* decoded0,
@ int32_t* decoded1,
@ int count)
predictor_decode_stereo:
stmdb sp!, {r1-r11, lr}
@ r1 (decoded0) is [sp]
@ r2 (decoded1) is [sp, #4]
@ r3 (count) is [sp, #8]
mov r12, r0 @ r12 := p
ldr r14, [r0] @ r14 := p->buf
loop:
@@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR Y
@ Predictor Y, Filter A
ldr r11, [r12, #YlastA] @ r11 := p->YlastA
add r2, r14, #YDELAYA-12 @ r2 := &p->buf[YDELAYA-3]
ldmia r2, {r2, r3, r10} @ r2 := p->buf[YDELAYA-3]
@ r3 := p->buf[YDELAYA-2]
@ r10 := p->buf[YDELAYA-1]
add r6, r12, #YcoeffsA
ldmia r6, {r6 - r9} @ r6 := p->YcoeffsA[0]
@ r7 := p->YcoeffsA[1]
@ r8 := p->YcoeffsA[2]
@ r9 := p->YcoeffsA[3]
subs r10, r11, r10 @ r10 := r11 - r10
STR2OFS r10, r11, r14, #YDELAYA-4
@ p->buf[YDELAYA-1] = r10
@ p->buf[YDELAYA] = r11
mul r0, r11, r6 @ r0 := p->buf[YDELAYA] * p->YcoeffsA[0]
mla r0, r10, r7, r0 @ r0 += p->buf[YDELAYA-1] * p->YcoeffsA[1]
mla r0, r3, r8, r0 @ r0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
mla r0, r2, r9, r0 @ r0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
@ flags were set above, in the subs instruction
mvngt r10, #0
movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
cmp r11, #0
mvngt r11, #0
movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro)
STR2OFS r10, r11, r14, #YADAPTCOEFFSA-4
@ p->buf[YADAPTCOEFFSA-1] := r10
@ p->buf[YADAPTCOEFFSA] := r11
@ NOTE: r0 now contains predictionA - don't overwrite.
@ Predictor Y, Filter B
LDR2OFS r6, r7, r12, #YfilterB @ r6 := p->YfilterB
@ r7 := p->XfilterA
add r2, r14, #YDELAYB-16 @ r2 := &p->buf[YDELAYB-4]
ldmia r2, {r2 - r4, r10} @ r2 := p->buf[YDELAYB-4]
@ r3 := p->buf[YDELAYB-3]
@ r4 := p->buf[YDELAYB-2]
@ r10 := p->buf[YDELAYB-1]
rsb r6, r6, r6, lsl #5 @ r6 := r6 * 32 - r6 ( == r6*31)
sub r11, r7, r6, asr #5 @ r11 (p->buf[YDELAYB]) := r7 - (r6 >> 5)
str r7, [r12, #YfilterB] @ p->YfilterB := r7 (p->XfilterA)
add r5, r12, #YcoeffsB
ldmia r5, {r5 - r9} @ r5 := p->YcoeffsB[0]
@ r6 := p->YcoeffsB[1]
@ r7 := p->YcoeffsB[2]
@ r8 := p->YcoeffsB[3]
@ r9 := p->YcoeffsB[4]
subs r10, r11, r10 @ r10 := r11 - r10
STR2OFS r10, r11, r14, #YDELAYB-4
@ p->buf[YDELAYB-1] = r10
@ p->buf[YDELAYB] = r11
mul r1, r11, r5 @ r1 := p->buf[YDELAYB] * p->YcoeffsB[0]
mla r1, r10, r6, r1 @ r1 += p->buf[YDELAYB-1] * p->YcoeffsB[1]
mla r1, r4, r7, r1 @ r1 += p->buf[YDELAYB-2] * p->YcoeffsB[2]
mla r1, r3, r8, r1 @ r1 += p->buf[YDELAYB-3] * p->YcoeffsB[3]
mla r1, r2, r9, r1 @ r1 += p->buf[YDELAYB-4] * p->YcoeffsB[4]
@ flags were set above, in the subs instruction
mvngt r10, #0
movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
cmp r11, #0
mvngt r11, #0
movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro)
STR2OFS r10, r11, r14, #YADAPTCOEFFSB-4
@ p->buf[YADAPTCOEFFSB-1] := r10
@ p->buf[YADAPTCOEFFSB] := r11
@ r0 still contains predictionA
@ r1 contains predictionB
@ Finish Predictor Y
ldr r2, [sp] @ r2 := decoded0
add r0, r0, r1, asr #1 @ r0 := r0 + (r1 >> 1)
ldr r4, [r12, #YfilterA] @ r4 := p->YfilterA
ldr r3, [r2] @ r3 := *decoded0
rsb r4, r4, r4, lsl #5 @ r4 := r4 * 32 - r4 ( == r4*31)
add r1, r3, r0, asr #10 @ r1 := r3 + (r0 >> 10)
str r1, [r12, #YlastA] @ p->YlastA := r1
add r1, r1, r4, asr #5 @ r1 := r1 + (r4 >> 5)
str r1, [r12, #YfilterA] @ p->YfilterA := r1
@ r1 contains p->YfilterA
@ r2 contains decoded0
@ r3 contains *decoded0
@ r5, r6, r7, r8, r9 contain p->YcoeffsB[0..4]
@ r10, r11 contain p->buf[YADAPTCOEFFSB-1] and p->buf[YADAPTCOEFFSB]
str r1, [r2], #4 @ *(decoded0++) := r1 (p->YfilterA)
str r2, [sp] @ save decoded0
cmp r3, #0
beq 3f
add r2, r14, #YADAPTCOEFFSB-16
ldmia r2, {r2 - r4} @ r2 := p->buf[YADAPTCOEFFSB-4]
@ r3 := p->buf[YADAPTCOEFFSB-3]
@ r4 := p->buf[YADAPTCOEFFSB-2]
blt 1f
@ *decoded0 > 0
sub r5, r5, r11 @ r5 := p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB]
sub r6, r6, r10 @ r6 := p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1]
sub r9, r9, r2 @ r9 := p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4]
sub r8, r8, r3 @ r8 := p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3]
sub r7, r7, r4 @ r7 := p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2]
add r0, r12, #YcoeffsB
stmia r0, {r5 - r9} @ Save p->YcoeffsB[]
add r1, r12, #YcoeffsA
ldmia r1, {r2 - r5} @ r2 := p->YcoeffsA[0]
@ r3 := p->YcoeffsA[1]
@ r4 := p->YcoeffsA[2]
@ r5 := p->YcoeffsA[3]
add r6, r14, #YADAPTCOEFFSA-12
ldmia r6, {r6 - r9} @ r6 := p->buf[YADAPTCOEFFSA-3]
@ r7 := p->buf[YADAPTCOEFFSA-2]
@ r8 := p->buf[YADAPTCOEFFSA-1]
@ r9 := p->buf[YADAPTCOEFFSA]
sub r5, r5, r6 @ r5 := p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
sub r4, r4, r7 @ r4 := p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
sub r3, r3, r8 @ r3 := p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
sub r2, r2, r9 @ r2 := p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
b 2f
1: @ *decoded0 < 0
add r5, r5, r11 @ r5 := p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB]
add r6, r6, r10 @ r6 := p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1]
add r9, r9, r2 @ r9 := p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4]
add r8, r8, r3 @ r9 := p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3]
add r7, r7, r4 @ r8 := p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2]
add r0, r12, #YcoeffsB
stmia r0, {r5 - r9} @ Save p->YcoeffsB[]
add r1, r12, #YcoeffsA
ldmia r1, {r2 - r5} @ r2 := p->YcoeffsA[0]
@ r3 := p->YcoeffsA[1]
@ r4 := p->YcoeffsA[2]
@ r5 := p->YcoeffsA[3]
add r6, r14, #YADAPTCOEFFSA-12
ldmia r6, {r6 - r9} @ r6 := p->buf[YADAPTCOEFFSA-3]
@ r7 := p->buf[YADAPTCOEFFSA-2]
@ r8 := p->buf[YADAPTCOEFFSA-1]
@ r9 := p->buf[YADAPTCOEFFSA]
add r5, r5, r6 @ r5 := p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3]
add r4, r4, r7 @ r4 := p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2]
add r3, r3, r8 @ r3 := p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1]
add r2, r2, r9 @ r2 := p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA]
2:
stmia r1, {r2 - r5} @ Save p->YcoeffsA
3:
@@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR X
@ Predictor X, Filter A
ldr r11, [r12, #XlastA] @ r11 := p->XlastA
add r2, r14, #XDELAYA-12 @ r2 := &p->buf[XDELAYA-3]
ldmia r2, {r2, r3, r10} @ r2 := p->buf[XDELAYA-3]
@ r3 := p->buf[XDELAYA-2]
@ r10 := p->buf[XDELAYA-1]
add r6, r12, #XcoeffsA
ldmia r6, {r6 - r9} @ r6 := p->XcoeffsA[0]
@ r7 := p->XcoeffsA[1]
@ r8 := p->XcoeffsA[2]
@ r9 := p->XcoeffsA[3]
subs r10, r11, r10 @ r10 := r11 - r10
STR2OFS r10, r11, r14, #XDELAYA-4
@ p->buf[XDELAYA-1] = r10
@ p->buf[XDELAYA] = r11
mul r0, r11, r6 @ r0 := p->buf[XDELAYA] * p->XcoeffsA[0]
mla r0, r10, r7, r0 @ r0 += p->buf[XDELAYA-1] * p->XcoeffsA[1]
mla r0, r3, r8, r0 @ r0 += p->buf[XDELAYA-2] * p->XcoeffsA[2]
mla r0, r2, r9, r0 @ r0 += p->buf[XDELAYA-3] * p->XcoeffsA[3]
@ flags were set above, in the subs instruction
mvngt r10, #0
movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
cmp r11, #0
mvngt r11, #0
movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro)
STR2OFS r10, r11, r14, #XADAPTCOEFFSA-4
@ p->buf[XADAPTCOEFFSA-1] := r10
@ p->buf[XADAPTCOEFFSA] := r11
@ NOTE: r0 now contains predictionA - don't overwrite.
@ Predictor X, Filter B
LDR2OFS r6, r7, r12, #XfilterB @ r6 := p->XfilterB
@ r7 := p->YfilterA
add r2, r14, #XDELAYB-16 @ r2 := &p->buf[XDELAYB-4]
ldmia r2, {r2 - r4, r10} @ r2 := p->buf[XDELAYB-4]
@ r3 := p->buf[XDELAYB-3]
@ r4 := p->buf[XDELAYB-2]
@ r10 := p->buf[XDELAYB-1]
rsb r6, r6, r6, lsl #5 @ r6 := r2 * 32 - r6 ( == r6*31)
sub r11, r7, r6, asr #5 @ r11 (p->buf[XDELAYB]) := r7 - (r6 >> 5)
str r7, [r12, #XfilterB] @ p->XfilterB := r7 (p->YfilterA)
add r5, r12, #XcoeffsB
ldmia r5, {r5 - r9} @ r5 := p->XcoeffsB[0]
@ r6 := p->XcoeffsB[1]
@ r7 := p->XcoeffsB[2]
@ r8 := p->XcoeffsB[3]
@ r9 := p->XcoeffsB[4]
subs r10, r11, r10 @ r10 := r11 - r10
STR2OFS r10, r11, r14, #XDELAYB-4
@ p->buf[XDELAYB-1] = r10
@ p->buf[XDELAYB] = r11
mul r1, r11, r5 @ r1 := p->buf[XDELAYB] * p->XcoeffsB[0]
mla r1, r10, r6, r1 @ r1 += p->buf[XDELAYB-1] * p->XcoeffsB[1]
mla r1, r4, r7, r1 @ r1 += p->buf[XDELAYB-2] * p->XcoeffsB[2]
mla r1, r3, r8, r1 @ r1 += p->buf[XDELAYB-3] * p->XcoeffsB[3]
mla r1, r2, r9, r1 @ r1 += p->buf[XDELAYB-4] * p->XcoeffsB[4]
@ flags were set above, in the subs instruction
mvngt r10, #0
movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
cmp r11, #0
mvngt r11, #0
movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro)
STR2OFS r10, r11, r14, #XADAPTCOEFFSB-4
@ p->buf[XADAPTCOEFFSB-1] := r10
@ p->buf[XADAPTCOEFFSB] := r11
@ r0 still contains predictionA
@ r1 contains predictionB
@ Finish Predictor X
ldr r2, [sp, #4] @ r2 := decoded1
add r0, r0, r1, asr #1 @ r0 := r0 + (r1 >> 1)
ldr r4, [r12, #XfilterA] @ r4 := p->XfilterA
ldr r3, [r2] @ r3 := *decoded1
rsb r4, r4, r4, lsl #5 @ r4 := r4 * 32 - r4 ( == r4*31)
add r1, r3, r0, asr #10 @ r1 := r3 + (r0 >> 10)
str r1, [r12, #XlastA] @ p->XlastA := r1
add r1, r1, r4, asr #5 @ r1 := r1 + (r4 >> 5)
str r1, [r12, #XfilterA] @ p->XfilterA := r1
@ r1 contains p->XfilterA
@ r2 contains decoded1
@ r3 contains *decoded1
@ r5, r6, r7, r8, r9 contain p->XcoeffsB[0..4]
@ r10, r11 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB]
str r1, [r2], #4 @ *(decoded1++) := r1 (p->XfilterA)
str r2, [sp, #4] @ save decoded1
cmp r3, #0
beq 3f
add r2, r14, #XADAPTCOEFFSB-16
ldmia r2, {r2 - r4} @ r2 := p->buf[XADAPTCOEFFSB-4]
@ r3 := p->buf[XADAPTCOEFFSB-3]
@ r4 := p->buf[XADAPTCOEFFSB-2]
blt 1f
@ *decoded1 > 0
sub r5, r5, r11 @ r5 := p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB]
sub r6, r6, r10 @ r6 := p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1]
sub r9, r9, r2 @ r9 := p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4]
sub r8, r8, r3 @ r8 := p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3]
sub r7, r7, r4 @ r7 := p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2]
add r0, r12, #XcoeffsB
stmia r0, {r5 - r9} @ Save p->XcoeffsB[]
add r1, r12, #XcoeffsA
ldmia r1, {r2 - r5} @ r2 := p->XcoeffsA[0]
@ r3 := p->XcoeffsA[1]
@ r4 := p->XcoeffsA[2]
@ r5 := p->XcoeffsA[3]
add r6, r14, #XADAPTCOEFFSA-12
ldmia r6, {r6 - r9} @ r6 := p->buf[XADAPTCOEFFSA-3]
@ r7 := p->buf[XADAPTCOEFFSA-2]
@ r8 := p->buf[XADAPTCOEFFSA-1]
@ r9 := p->buf[XADAPTCOEFFSA]
sub r5, r5, r6 @ r5 := p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3]
sub r4, r4, r7 @ r4 := p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2]
sub r3, r3, r8 @ r3 := p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1]
sub r2, r2, r9 @ r2 := p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA]
b 2f
1: @ *decoded1 < 0
add r5, r5, r11 @ r5 := p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB]
add r6, r6, r10 @ r6 := p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1]
add r9, r9, r2 @ r9 := p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4]
add r8, r8, r3 @ r8 := p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3]
add r7, r7, r4 @ r7 := p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2]
add r0, r12, #XcoeffsB
stmia r0, {r5 - r9} @ Save p->XcoeffsB[]
add r1, r12, #XcoeffsA
ldmia r1, {r2 - r5} @ r2 := p->XcoeffsA[0]
@ r3 := p->XcoeffsA[1]
@ r4 := p->XcoeffsA[2]
@ r5 := p->XcoeffsA[3]
add r6, r14, #XADAPTCOEFFSA-12
ldmia r6, {r6 - r9} @ r6 := p->buf[XADAPTCOEFFSA-3]
@ r7 := p->buf[XADAPTCOEFFSA-2]
@ r8 := p->buf[XADAPTCOEFFSA-1]
@ r9 := p->buf[XADAPTCOEFFSA]
add r5, r5, r6 @ r5 := p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3]
add r4, r4, r7 @ r4 := p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2]
add r3, r3, r8 @ r3 := p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1]
add r2, r2, r9 @ r2 := p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA]
2:
stmia r1, {r2 - r5} @ Save p->XcoeffsA
3:
@@@@@@@@@@@@@@@@@@@@@@@@@@@ COMMON
add r14, r14, #4 @ p->buf++
add r11, r12, #historybuffer @ r11 := &p->historybuffer[0]
sub r10, r14, #PREDICTOR_HISTORY_SIZE*4
@ r10 := p->buf - PREDICTOR_HISTORY_SIZE
ldr r0, [sp, #8]
cmp r10, r11
beq move_hist @ The history buffer is full, we need to do a memmove
@ Check loop count
subs r0, r0, #1
strne r0, [sp, #8]
bne loop
done:
str r14, [r12] @ Save value of p->buf
add sp, sp, #12 @ Don't bother restoring r1-r3
#ifdef ROCKBOX
ldmpc regs=r4-r11
#else
ldmia sp!, {r4 - r11, pc}
#endif
move_hist:
@ dest = r11 (p->historybuffer)
@ src = r14 (p->buf)
@ n = 200
ldmia r14!, {r0-r9} @ 40 bytes
stmia r11!, {r0-r9}
ldmia r14!, {r0-r9} @ 40 bytes
stmia r11!, {r0-r9}
ldmia r14!, {r0-r9} @ 40 bytes
stmia r11!, {r0-r9}
ldmia r14!, {r0-r9} @ 40 bytes
stmia r11!, {r0-r9}
ldmia r14!, {r0-r9} @ 40 bytes
stmia r11!, {r0-r9}
ldr r0, [sp, #8]
add r14, r12, #historybuffer @ p->buf = &p->historybuffer[0]
@ Check loop count
subs r0, r0, #1
strne r0, [sp, #8]
bne loop
b done
.size predictor_decode_stereo, .-predictor_decode_stereo
.global predictor_decode_mono
.type predictor_decode_mono,%function
@ Register usage:
@
@ r0-r11 - scratch
@ r12 - struct predictor_t* p
@ r14 - int32_t* p->buf
@ void predictor_decode_mono(struct predictor_t* p,
@ int32_t* decoded0,
@ int count)
predictor_decode_mono:
stmdb sp!, {r1, r2, r4-r11, lr}
@ r1 (decoded0) is [sp]
@ r2 (count) is [sp, #4]
mov r12, r0 @ r12 := p
ldr r14, [r0] @ r14 := p->buf
loopm:
@@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR
ldr r11, [r12, #YlastA] @ r11 := p->YlastA
add r2, r14, #YDELAYA-12 @ r2 := &p->buf[YDELAYA-3]
ldmia r2, {r2, r3, r10} @ r2 := p->buf[YDELAYA-3]
@ r3 := p->buf[YDELAYA-2]
@ r10 := p->buf[YDELAYA-1]
add r5, r12, #YcoeffsA @ r5 := &p->YcoeffsA[0]
ldmia r5, {r6 - r9} @ r6 := p->YcoeffsA[0]
@ r7 := p->YcoeffsA[1]
@ r8 := p->YcoeffsA[2]
@ r9 := p->YcoeffsA[3]
subs r10, r11, r10 @ r10 := r11 - r10
STR2OFS r10, r11, r14, #YDELAYA-4
@ p->buf[YDELAYA-1] = r10
@ p->buf[YDELAYA] = r11
mul r0, r11, r6 @ r0 := p->buf[YDELAYA] * p->YcoeffsA[0]
mla r0, r10, r7, r0 @ r0 += p->buf[YDELAYA-1] * p->YcoeffsA[1]
mla r0, r3, r8, r0 @ r0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
mla r0, r2, r9, r0 @ r0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
@ flags were set above, in the subs instruction
mvngt r10, #0
movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
cmp r11, #0
mvngt r11, #0
movlt r11, #1 @ r11 := SIGN(r11) (see .c for SIGN macro)
STR2OFS r10, r11, r14, #YADAPTCOEFFSA-4
@ p->buf[YADAPTCOEFFSA-1] := r10
@ p->buf[YADAPTCOEFFSA] := r11
ldr r2, [sp] @ r2 := decoded0
ldr r4, [r12, #YfilterA] @ r4 := p->YfilterA
ldr r3, [r2] @ r3 := *decoded0
rsb r4, r4, r4, lsl #5 @ r4 := r4 * 32 - r4 ( == r4*31)
add r1, r3, r0, asr #10 @ r1 := r3 + (r0 >> 10)
str r1, [r12, #YlastA] @ p->YlastA := r1
add r1, r1, r4, asr #5 @ r1 := r1 + (r4 >> 5)
str r1, [r12, #YfilterA] @ p->YfilterA := r1
@ r1 contains p->YfilterA
@ r2 contains decoded0
@ r3 contains *decoded0
@ r6, r7, r8, r9 contain p->YcoeffsA[0..3]
@ r10, r11 contain p->buf[YADAPTCOEFFSA-1] and p->buf[YADAPTCOEFFSA]
str r1, [r2], #4 @ *(decoded0++) := r1 (p->YfilterA)
str r2, [sp] @ save decoded0
cmp r3, #0
beq 3f
LDR2OFS r2, r3, r14, #YADAPTCOEFFSA-12
@ r2 := p->buf[YADAPTCOEFFSA-3]
@ r3 := p->buf[YADAPTCOEFFSA-2]
blt 1f
@ *decoded0 > 0
sub r6, r6, r11 @ r6 := p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
sub r7, r7, r10 @ r7 := p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
sub r9, r9, r2 @ r9 := p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
sub r8, r8, r3 @ r8 := p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
b 2f
1: @ *decoded0 < 0
add r6, r6, r11 @ r6 := p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA]
add r7, r7, r10 @ r7 := p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1]
add r9, r9, r2 @ r9 := p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3]
add r8, r8, r3 @ r8 := p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2]
2:
stmia r5, {r6 - r9} @ Save p->YcoeffsA
3:
@@@@@@@@@@@@@@@@@@@@@@@@@@@ COMMON
add r14, r14, #4 @ p->buf++
add r11, r12, #historybuffer @ r11 := &p->historybuffer[0]
sub r10, r14, #PREDICTOR_HISTORY_SIZE*4
@ r10 := p->buf - PREDICTOR_HISTORY_SIZE
ldr r0, [sp, #4]
cmp r10, r11
beq move_histm @ The history buffer is full, we need to do a memmove
@ Check loop count
subs r0, r0, #1
strne r0, [sp, #4]
bne loopm
donem:
str r14, [r12] @ Save value of p->buf
add sp, sp, #8 @ Don't bother restoring r1, r2
#ifdef ROCKBOX
ldmpc regs=r4-r11
#else
ldmia sp!, {r4 - r11, pc}
#endif
move_histm:
@ dest = r11 (p->historybuffer)
@ src = r14 (p->buf)
@ n = 200
ldmia r14!, {r0-r9} @ 40 bytes
stmia r11!, {r0-r9}
ldmia r14!, {r0-r9} @ 40 bytes
stmia r11!, {r0-r9}
ldmia r14!, {r0-r9} @ 40 bytes
stmia r11!, {r0-r9}
ldmia r14!, {r0-r9} @ 40 bytes
stmia r11!, {r0-r9}
ldmia r14!, {r0-r9} @ 40 bytes
stmia r11!, {r0-r9}
ldr r0, [sp, #4]
add r14, r12, #historybuffer @ p->buf = &p->historybuffer[0]
@ Check loop count
subs r0, r0, #1
strne r0, [sp, #4]
bne loopm
b donem
.size predictor_decode_mono, .-predictor_decode_mono

View file

@ -0,0 +1,660 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
Coldfire predictor copyright (C) 2007 Jens Arnold
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#include "demac_config.h"
/* NOTE: The following need to be kept in sync with parser.h */
#define YDELAYA 200
#define YDELAYB 168
#define XDELAYA 136
#define XDELAYB 104
#define YADAPTCOEFFSA 72
#define XADAPTCOEFFSA 56
#define YADAPTCOEFFSB 40
#define XADAPTCOEFFSB 20
/* struct predictor_t members: */
#define buf 0 /* int32_t* buf */
#define YlastA 4 /* int32_t YlastA; */
#define XlastA 8 /* int32_t XlastA; */
#define YfilterB 12 /* int32_t YfilterB; */
#define XfilterA 16 /* int32_t XfilterA; */
#define XfilterB 20 /* int32_t XfilterB; */
#define YfilterA 24 /* int32_t YfilterA; */
#define YcoeffsA 28 /* int32_t YcoeffsA[4]; */
#define XcoeffsA 44 /* int32_t XcoeffsA[4]; */
#define YcoeffsB 60 /* int32_t YcoeffsB[5]; */
#define XcoeffsB 80 /* int32_t XcoeffsB[5]; */
#define historybuffer 100 /* int32_t historybuffer[] */
.text
.align 2
.global predictor_decode_stereo
.type predictor_decode_stereo,@function
| void predictor_decode_stereo(struct predictor_t* p,
| int32_t* decoded0,
| int32_t* decoded1,
| int count)
predictor_decode_stereo:
lea.l (-12*4,%sp), %sp
movem.l %d2-%d7/%a2-%a6, (4,%sp)
movem.l (12*4+8,%sp), %a3-%a5 | %a3 = decoded0
| %a4 = decoded1
move.l %a5, (%sp) | (%sp) = count
move.l #0, %macsr | signed integer mode
move.l (12*4+4,%sp), %a6 | %a6 = p
move.l (%a6), %a5 | %a5 = p->buf
.loop:
| ***** PREDICTOR Y *****
| Predictor Y, Filter A
move.l (YlastA,%a6), %d3 | %d3 = p->YlastA
movem.l (YDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[YDELAYA-3]
| %d1 = p->buf[YDELAYA-2]
| %d2 = p->buf[YDELAYA-1]
move.l %d3, (YDELAYA,%a5) | p->buf[YDELAYA] = %d3
sub.l %d3, %d2
neg.l %d2 | %d2 = %d3 - %d2
move.l %d2, (YDELAYA-4,%a5) | p->buf[YDELAYA-1] = %d2
movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0]
| %d5 = p->YcoeffsA[1]
| %d6 = p->YcoeffsA[2]
| %d7 = p->YcoeffsA[3]
mac.l %d3, %d4, %acc0 | %acc0 = p->buf[YDELAYA] * p->YcoeffsA[0]
mac.l %d2, %d5, %acc0 | %acc0 += p->buf[YDELAYA-1] * p->YcoeffsA[1]
mac.l %d1, %d6, %acc0 | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
mac.l %d0, %d7, %acc0 | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
tst.l %d2
beq.s 1f
spl.b %d2 | pos: 0x??????ff, neg: 0x??????00
extb.l %d2 | pos: 0xffffffff, neg: 0x00000000
or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001
1: | %d2 = SIGN(%d2)
move.l %d2, (YADAPTCOEFFSA-4,%a5) | p->buf[YADAPTCOEFFSA-1] = %d2
tst.l %d3
beq.s 1f
spl.b %d3
extb.l %d3
or.l #1, %d3
1: | %d3 = SIGN(%d3)
move.l %d3, (YADAPTCOEFFSA,%a5) | p->buf[YADAPTCOEFFSA] = %d3
| Predictor Y, Filter B
movem.l (YfilterB,%a6), %d2-%d3 | %d2 = p->YfilterB
| %d3 = p->XfilterA
move.l %d3, (YfilterB,%a6) | p->YfilterB = %d3
move.l %d2, %d1 | %d1 = %d2
lsl.l #5, %d2 | %d2 = %d2 * 32
sub.l %d1, %d2 | %d2 -= %d1 (== 31 * old_d2)
asr.l #5, %d2 | %d2 >>= 5
sub.l %d2, %d3 | %d3 -= %d2
movem.l (YDELAYB-16,%a5), %d4-%d7 | %d4 = p->buf[YDELAYB-4]
| %d5 = p->buf[YDELAYB-3]
| %d6 = p->buf[YDELAYB-2]
| %d7 = p->buf[YDELAYB-1]
sub.l %d3, %d7
neg.l %d7 | %d7 = %d3 - %d7
move.l %d7, (YDELAYB-4,%a5) | p->buf[YDELAYB-1] = %d7
movem.l (YcoeffsB,%a6), %d1-%d2/%a0-%a2 | %d1 = p->YcoeffsB[0]
| %d2 = p->YcoeffsB[1]
| %a0 = p->YcoeffsB[2]
| %a1 = p->YcoeffsB[3]
| %a2 = p->YcoeffsB[4]
mac.l %d3, %d1, %acc1 | %acc1 = p->buf[YDELAYB] * p->YcoeffsB[0]
mac.l %d7, %d2, %acc1 | %acc1 += p->buf[YDELAYB-1] * p->YcoeffsB[1]
mac.l %d6, %a0, %acc1 | %acc1 += p->buf[YDELAYB-2] * p->YcoeffsB[2]
mac.l %d5, %a1, %acc1 | %acc1 += p->buf[YDELAYB-3] * p->YcoeffsB[3]
mac.l %d4, %a2, %acc1 | %acc1 += p->buf[YDELAYB-4] * p->YcoeffsB[4]
move.l %d3, (YDELAYB, %a5) | p->buf[YDELAYB] = %d3
tst.l %d7
beq.s 1f
spl.b %d7
extb.l %d7
or.l #1, %d7
1: | %d7 = SIGN(%d7)
move.l %d7, (YADAPTCOEFFSB-4,%a5) | p->buf[YADAPTCOEFFSB-1] = %d7
tst.l %d3
beq.s 1f
spl.b %d3
extb.l %d3
or.l #1, %d3
1: | %d3 = SIGN(%d3)
move.l %d3, (YADAPTCOEFFSB, %a5) | p->buf[YADAPTCOEFFSB] = %d3
| %d1, %d2, %a0, %a1, %a2 contain p->YcoeffsB[0..4]
| %d7, %d3 contain p->buf[YADAPTCOEFFSB-1] and p->buf[YADAPTCOEFFSB]
move.l (%a3), %d0 | %d0 = *decoded0
beq.s 3f
movem.l (YADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4 = p->buf[YADAPTCOEFFSB-4]
| %d5 = p->buf[YADAPTCOEFFSB-3]
| %d6 = p->buf[YADAPTCOEFFSB-2]
bmi.s 1f | flags still valid here
| *decoded0 > 0
sub.l %d3, %d1 | %d1 = p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB]
sub.l %d7, %d2 | %d2 = p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1]
sub.l %d6, %a0 | %a0 = p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2]
sub.l %d5, %a1 | %a1 = p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3]
sub.l %d4, %a2 | %a2 = p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4]
movem.l %d1-%d2/%a0-%a2, (YcoeffsB,%a6) | Save p->YcoeffsB[]
movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0]
| %d5 = p->YcoeffsA[1]
| %d6 = p->YcoeffsA[2]
| %d7 = p->YcoeffsA[3]
movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2
| %d2 = p->buf[YADAPTCOEFFSA-3]
| %a0 = p->buf[YADAPTCOEFFSA-2]
| %a1 = p->buf[YADAPTCOEFFSA-1]
| %a2 = p->buf[YADAPTCOEFFSA]
sub.l %a2, %d4 | %d4 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
sub.l %a1, %d5 | %d5 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
sub.l %a0, %d6 | %d6 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
sub.l %d2, %d7 | %d7 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
bra.s 2f
1: | *decoded0 < 0
add.l %d3, %d1 | %d1 = p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB]
add.l %d7, %d2 | %d2 = p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1]
add.l %d6, %a0 | %a0 = p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2]
add.l %d5, %a1 | %a1 = p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3]
add.l %d4, %a2 | %a2 = p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4]
movem.l %d1-%d2/%a0-%a2, (YcoeffsB,%a6) | Save p->YcoeffsB[]
movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0]
| %d5 = p->YcoeffsA[1]
| %d6 = p->YcoeffsA[2]
| %d7 = p->YcoeffsA[3]
movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2
| %d2 = p->buf[YADAPTCOEFFSA-3]
| %a0 = p->buf[YADAPTCOEFFSA-2]
| %a1 = p->buf[YADAPTCOEFFSA-1]
| %a2 = p->buf[YADAPTCOEFFSA]
add.l %a2, %d4 | %d4 = p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA]
add.l %a1, %d5 | %d5 = p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1]
add.l %a0, %d6 | %d6 = p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2]
add.l %d2, %d7 | %d7 = p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3]
2:
movem.l %d4-%d7, (YcoeffsA,%a6) | Save p->YcoeffsA[]
3:
| Finish Predictor Y
movclr.l %acc0, %d1 | %d1 = predictionA
movclr.l %acc1, %d2 | %d2 = predictionB
asr.l #1, %d2
add.l %d2, %d1 | %d1 += (%d2 >> 1)
asr.l #8, %d1
asr.l #2, %d1 | %d1 >>= 10
add.l %d0, %d1 | %d1 += %d0
move.l %d1, (YlastA,%a6) | p->YlastA = %d1
move.l (YfilterA,%a6), %d2 | %d2 = p->YfilterA
move.l %d2, %d0
lsl.l #5, %d2
sub.l %d0, %d2 | %d2 = 31 * %d2
asr.l #5, %d2 | %d2 >>= 5
add.l %d1, %d2
move.l %d2, (YfilterA,%a6) | p->YfilterA = %d2
| *decoded0 stored 2 instructions down, avoiding pipeline stall
| ***** PREDICTOR X *****
| Predictor X, Filter A
move.l (XlastA,%a6), %d3 | %d3 = p->XlastA
move.l %d2, (%a3)+ | *(decoded0++) = %d2 (p->YfilterA)
movem.l (XDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[XDELAYA-3]
| %d1 = p->buf[XDELAYA-2]
| %d2 = p->buf[XDELAYA-1]
move.l %d3, (XDELAYA,%a5) | p->buf[XDELAYA] = %d3
sub.l %d3, %d2
neg.l %d2 | %d2 = %d3 -%d2
move.l %d2, (XDELAYA-4,%a5) | p->buf[XDELAYA-1] = %d2
movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0]
| %d5 = p->XcoeffsA[1]
| %d6 = p->XcoeffsA[2]
| %d7 = p->XcoeffsA[3]
mac.l %d3, %d4, %acc0 | %acc0 = p->buf[XDELAYA] * p->XcoeffsA[0]
mac.l %d2, %d5, %acc0 | %acc0 += p->buf[XDELAYA-1] * p->XcoeffsA[1]
mac.l %d1, %d6, %acc0 | %acc0 += p->buf[XDELAYA-2] * p->XcoeffsA[2]
mac.l %d0, %d7, %acc0 | %acc0 += p->buf[XDELAYA-3] * p->XcoeffsA[3]
tst.l %d2
beq.s 1f
spl.b %d2 | pos: 0x??????ff, neg: 0x??????00
extb.l %d2 | pos: 0xffffffff, neg: 0x00000000
or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001
1: | %d2 = SIGN(%d2)
move.l %d2, (XADAPTCOEFFSA-4,%a5) | p->buf[XADAPTCOEFFSA-1] = %d2
tst.l %d3
beq.s 1f
spl.b %d3
extb.l %d3
or.l #1, %d3
1: | %d3 = SIGN(%d3)
move.l %d3, (XADAPTCOEFFSA,%a5) | p->buf[XADAPTCOEFFSA] = %d3
| Predictor X, Filter B
movem.l (XfilterB,%a6), %d2-%d3 | %d2 = p->XfilterB
| %d3 = p->YfilterA
move.l %d3, (XfilterB,%a6) | p->XfilterB = %d3
move.l %d2, %d1 | %d1 = %d2
lsl.l #5, %d2 | %d2 = %d2 * 32
sub.l %d1, %d2 | %d2 -= %d1 (== 31 * old_d2)
asr.l #5, %d2 | %d2 >>= 5
sub.l %d2, %d3 | %d3 -= %d2
movem.l (XDELAYB-16,%a5), %d4-%d7 | %d4 = p->buf[XDELAYB-4]
| %d5 = p->buf[XDELAYB-3]
| %d6 = p->buf[XDELAYB-2]
| %d7 = p->buf[XDELAYB-1]
sub.l %d3, %d7
neg.l %d7 | %d7 = %d3 - %d7
move.l %d7, (XDELAYB-4,%a5) | p->buf[XDELAYB-1] = %d7
movem.l (XcoeffsB,%a6), %d1-%d2/%a0-%a2 | %d1 = p->XcoeffsB[0]
| %d2 = p->XcoeffsB[1]
| %a0 = p->XcoeffsB[2]
| %a1 = p->XcoeffsB[3]
| %a2 = p->XcoeffsB[4]
mac.l %d3, %d1, %acc1 | %acc1 = p->buf[XDELAYB] * p->XcoeffsB[0]
mac.l %d7, %d2, %acc1 | %acc1 += p->buf[XDELAYB-1] * p->XcoeffsB[1]
mac.l %d6, %a0, %acc1 | %acc1 += p->buf[XDELAYB-2] * p->XcoeffsB[2]
mac.l %d5, %a1, %acc1 | %acc1 += p->buf[XDELAYB-3] * p->XcoeffsB[3]
mac.l %d4, %a2, %acc1 | %acc1 += p->buf[XDELAYB-4] * p->XcoeffsB[4]
move.l %d3, (XDELAYB, %a5) | p->buf[XDELAYB] = %d3
tst.l %d7
beq.s 1f
spl.b %d7
extb.l %d7
or.l #1, %d7
1: | %d7 = SIGN(%d7)
move.l %d7, (XADAPTCOEFFSB-4,%a5) | p->buf[XADAPTCOEFFSB-1] = %d7
tst.l %d3
beq.s 1f
spl.b %d3
extb.l %d3
or.l #1, %d3
1: | %d3 = SIGN(%d3)
move.l %d3, (XADAPTCOEFFSB, %a5) | p->buf[XADAPTCOEFFSB] = %d3
| %d1, %d2, %a0, %a1, %a2 contain p->XcoeffsB[0..4]
| %d7, %d3 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB]
move.l (%a4), %d0 | %d0 = *decoded1
beq.s 3f
movem.l (XADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4 = p->buf[XADAPTCOEFFSB-4]
| %d5 = p->buf[XADAPTCOEFFSB-3]
| %d6 = p->buf[XADAPTCOEFFSB-2]
bmi.s 1f | flags still valid here
| *decoded1 > 0
sub.l %d3, %d1 | %d1 = p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB]
sub.l %d7, %d2 | %d2 = p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1]
sub.l %d6, %a0 | %a0 = p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2]
sub.l %d5, %a1 | %a1 = p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3]
sub.l %d4, %a2 | %a2 = p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4]
movem.l %d1-%d2/%a0-%a2, (XcoeffsB,%a6) | Save p->XcoeffsB[]
movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0]
| %d5 = p->XcoeffsA[1]
| %d6 = p->XcoeffsA[2]
| %d7 = p->XcoeffsA[3]
movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2
| %d2 = p->buf[XADAPTCOEFFSA-3]
| %a0 = p->buf[XADAPTCOEFFSA-2]
| %a1 = p->buf[XADAPTCOEFFSA-1]
| %a2 = p->buf[XADAPTCOEFFSA]
sub.l %a2, %d4 | %d4 = p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA]
sub.l %a1, %d5 | %d5 = p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1]
sub.l %a0, %d6 | %d6 = p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2]
sub.l %d2, %d7 | %d7 = p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3]
bra.s 2f
1: | *decoded1 < 0
add.l %d3, %d1 | %d1 = p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB]
add.l %d7, %d2 | %d2 = p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1]
add.l %d6, %a0 | %a0 = p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2]
add.l %d5, %a1 | %a1 = p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3]
add.l %d4, %a2 | %a2 = p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4]
movem.l %d1-%d2/%a0-%a2, (XcoeffsB,%a6) | Save p->XcoeffsB[]
movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0]
| %d5 = p->XcoeffsA[1]
| %d6 = p->XcoeffsA[2]
| %d7 = p->XcoeffsA[3]
movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2
| %d2 = p->buf[XADAPTCOEFFSA-3]
| %a0 = p->buf[XADAPTCOEFFSA-2]
| %a1 = p->buf[XADAPTCOEFFSA-1]
| %a2 = p->buf[XADAPTCOEFFSA]
add.l %a2, %d4 | %d4 = p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA]
add.l %a1, %d5 | %d5 = p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1]
add.l %a0, %d6 | %d6 = p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2]
add.l %d2, %d7 | %d7 = p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3]
2:
movem.l %d4-%d7, (XcoeffsA,%a6) | Save p->XcoeffsA[]
3:
| Finish Predictor X
movclr.l %acc0, %d1 | %d1 = predictionA
movclr.l %acc1, %d2 | %d2 = predictionB
asr.l #1, %d2
add.l %d2, %d1 | %d1 += (%d2 >> 1)
asr.l #8, %d1
asr.l #2, %d1 | %d1 >>= 10
add.l %d0, %d1 | %d1 += %d0
move.l %d1, (XlastA,%a6) | p->XlastA = %d1
move.l (XfilterA,%a6), %d2 | %d2 = p->XfilterA
move.l %d2, %d0
lsl.l #5, %d2
sub.l %d0, %d2 | %d2 = 31 * %d2
asr.l #5, %d2 | %d6 >>= 2
add.l %d1, %d2
move.l %d2, (XfilterA,%a6) | p->XfilterA = %d2
| *decoded1 stored 3 instructions down, avoiding pipeline stall
| ***** COMMON *****
addq.l #4, %a5 | p->buf++
lea.l (historybuffer+PREDICTOR_HISTORY_SIZE*4,%a6), %a2
| %a2 = &p->historybuffer[PREDICTOR_HISTORY_SIZE]
move.l %d2, (%a4)+ | *(decoded1++) = %d2 (p->XfilterA)
cmp.l %a2, %a5
beq.s .move_hist | History buffer is full, we need to do a memmove
subq.l #1, (%sp) | decrease loop count
bne.w .loop
.done:
move.l %a5, (%a6) | Save value of p->buf
movem.l (4,%sp), %d2-%d7/%a2-%a6
lea.l (12*4,%sp), %sp
rts
.move_hist:
lea.l (historybuffer,%a6), %a2
| dest = %a2 (p->historybuffer)
| src = %a5 (p->buf)
| n = 200
movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes
movem.l %d0-%d7/%a0-%a1, (%a2)
movem.l (40,%a5), %d0-%d7/%a0-%a1 | 40 bytes
movem.l %d0-%d7/%a0-%a1, (40,%a2)
movem.l (80,%a5), %d0-%d7/%a0-%a1 | 40 bytes
movem.l %d0-%d7/%a0-%a1, (80,%a2)
movem.l (120,%a5), %d0-%d7/%a0-%a1 | 40 bytes
movem.l %d0-%d7/%a0-%a1, (120,%a2)
movem.l (160,%a5), %d0-%d7/%a0-%a1 | 40 bytes
movem.l %d0-%d7/%a0-%a1, (160,%a2)
move.l %a2, %a5 | p->buf = &p->historybuffer[0]
subq.l #1, (%sp) | decrease loop count
bne.w .loop
bra.s .done
.size predictor_decode_stereo, .-predictor_decode_stereo
.global predictor_decode_mono
.type predictor_decode_mono,@function
| void predictor_decode_mono(struct predictor_t* p,
| int32_t* decoded0,
| int count)
predictor_decode_mono:
lea.l (-11*4,%sp), %sp
movem.l %d2-%d7/%a2-%a6, (%sp)
move.l #0, %macsr | signed integer mode
move.l (11*4+4,%sp), %a6 | %a6 = p
move.l (11*4+8,%sp), %a4 | %a4 = decoded0
move.l (11*4+12,%sp), %d7 | %d7 = count
move.l (%a6), %a5 | %a5 = p->buf
move.l (YlastA,%a6), %d3 | %d3 = p->YlastA
.loopm:
| ***** PREDICTOR *****
movem.l (YDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[YDELAYA-3]
| %d1 = p->buf[YDELAYA-2]
| %d2 = p->buf[YDELAYA-1]
move.l %d3, (YDELAYA,%a5) | p->buf[YDELAYA] = %d3
sub.l %d3, %d2
neg.l %d2 | %d2 = %d3 - %d2
move.l %d2, (YDELAYA-4,%a5) | p->buf[YDELAYA-1] = %d2
movem.l (YcoeffsA,%a6), %a0-%a3 | %a0 = p->YcoeffsA[0]
| %a1 = p->YcoeffsA[1]
| %a2 = p->YcoeffsA[2]
| %a3 = p->YcoeffsA[3]
mac.l %d3, %a0, %acc0 | %acc0 = p->buf[YDELAYA] * p->YcoeffsA[0]
mac.l %d2, %a1, %acc0 | %acc0 += p->buf[YDELAYA-1] * p->YcoeffsA[1]
mac.l %d1, %a2, %acc0 | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
mac.l %d0, %a3, %acc0 | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
tst.l %d2
beq.s 1f
spl.b %d2 | pos: 0x??????ff, neg: 0x??????00
extb.l %d2 | pos: 0xffffffff, neg: 0x00000000
or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001
1: | %d2 = SIGN(%d2)
move.l %d2, (YADAPTCOEFFSA-4,%a5) | p->buf[YADAPTCOEFFSA-1] = %d2
tst.l %d3
beq.s 1f
spl.b %d3
extb.l %d3
or.l #1, %d3
1: | %d3 = SIGN(%d3)
move.l %d3, (YADAPTCOEFFSA,%a5) | p->buf[YADAPTCOEFFSA] = %d3
move.l (%a4), %d0 | %d0 = *decoded0
beq.s 3f
movem.l (YADAPTCOEFFSA-12,%a5),%d4-%d5 | %d4 = p->buf[YADAPTCOEFFSA-3]
| %d5 = p->buf[YADAPTCOEFFSA-2]
bmi.s 1f | flags still valid here
| *decoded0 > 0
sub.l %d3, %a0 | %a0 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
sub.l %d2, %a1 | %a1 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
sub.l %d5, %a2 | %a2 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
sub.l %d4, %a3 | %a3 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
bra.s 2f
1: | *decoded0 < 0
add.l %d3, %a0 | %a0 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
add.l %d2, %a1 | %a1 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
add.l %d5, %a2 | %a2 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
add.l %d4, %a3 | %a3 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
2:
movem.l %a0-%a3, (YcoeffsA,%a6) | save p->YcoeffsA[]
3:
| Finish Predictor
movclr.l %acc0, %d3 | %d3 = predictionA
asr.l #8, %d3
asr.l #2, %d3 | %d3 >>= 10
add.l %d0, %d3 | %d3 += %d0
move.l (YfilterA,%a6), %d2 | %d2 = p->YfilterA
move.l %d2, %d0
lsl.l #5, %d2
sub.l %d0, %d2 | %d2 = 31 * %d2
asr.l #5, %d2 | %d2 >>= 5
add.l %d3, %d2
move.l %d2, (YfilterA,%a6) | p->YfilterA = %d2
| *decoded0 stored 3 instructions down, avoiding pipeline stall
| ***** COMMON *****
addq.l #4, %a5 | p->buf++
lea.l (historybuffer+PREDICTOR_HISTORY_SIZE*4,%a6), %a3
| %a3 = &p->historybuffer[PREDICTOR_HISTORY_SIZE]
move.l %d2, (%a4)+ | *(decoded0++) = %d2 (p->YfilterA)
cmp.l %a3, %a5
beq.s .move_histm | History buffer is full, we need to do a memmove
subq.l #1, %d7 | decrease loop count
bne.w .loopm
move.l %d3, (YlastA,%a6) | %d3 = p->YlastA
.donem:
move.l %a5, (%a6) | Save value of p->buf
movem.l (%sp), %d2-%d7/%a2-%a6
lea.l (11*4,%sp), %sp
rts
.move_histm:
move.l %d3, (YlastA,%a6) | %d3 = p->YlastA
lea.l (historybuffer,%a6), %a3
| dest = %a3 (p->historybuffer)
| src = %a5 (p->buf)
| n = 200
movem.l (%a5), %d0-%d6/%a0-%a2 | 40 bytes
movem.l %d0-%d6/%a0-%a2, (%a3)
movem.l (40,%a5), %d0-%d6/%a0-%a2 | 40 bytes
movem.l %d0-%d6/%a0-%a2, (40,%a3)
movem.l (80,%a5), %d0-%d6/%a0-%a2 | 40 bytes
movem.l %d0-%d6/%a0-%a2, (80,%a3)
movem.l (120,%a5), %d0-%d6/%a0-%a2 | 40 bytes
movem.l %d0-%d6/%a0-%a2, (120,%a3)
movem.l (160,%a5), %d0-%d6/%a0-%a2 | 40 bytes
movem.l %d0-%d6/%a0-%a2, (160,%a3)
move.l %a3, %a5 | p->buf = &p->historybuffer[0]
move.l (YlastA,%a6), %d3 | %d3 = p->YlastA
subq.l #1, %d7 | decrease loop count
bne.w .loopm
bra.s .donem
.size predictor_decode_mono, .-predictor_decode_mono

View file

@ -0,0 +1,271 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#include <inttypes.h>
#include <string.h>
#include "parser.h"
#include "predictor.h"
#include "demac_config.h"
/* Return 0 if x is zero, -1 if x is positive, 1 if x is negative */
#define SIGN(x) (x) ? (((x) > 0) ? -1 : 1) : 0
static const int32_t initial_coeffs[4] = {
360, 317, -109, 98
};
#define YDELAYA (18 + PREDICTOR_ORDER*4)
#define YDELAYB (18 + PREDICTOR_ORDER*3)
#define XDELAYA (18 + PREDICTOR_ORDER*2)
#define XDELAYB (18 + PREDICTOR_ORDER)
#define YADAPTCOEFFSA (18)
#define XADAPTCOEFFSA (14)
#define YADAPTCOEFFSB (10)
#define XADAPTCOEFFSB (5)
void init_predictor_decoder(struct predictor_t* p)
{
/* Zero the history buffers */
memset(p->historybuffer, 0, PREDICTOR_SIZE * sizeof(int32_t));
p->buf = p->historybuffer;
/* Initialise and zero the co-efficients */
memcpy(p->YcoeffsA, initial_coeffs, sizeof(initial_coeffs));
memcpy(p->XcoeffsA, initial_coeffs, sizeof(initial_coeffs));
memset(p->YcoeffsB, 0, sizeof(p->YcoeffsB));
memset(p->XcoeffsB, 0, sizeof(p->XcoeffsB));
p->YfilterA = 0;
p->YfilterB = 0;
p->YlastA = 0;
p->XfilterA = 0;
p->XfilterB = 0;
p->XlastA = 0;
}
#if !defined(CPU_ARM) && !defined(CPU_COLDFIRE)
void ICODE_ATTR_DEMAC predictor_decode_stereo(struct predictor_t* p,
int32_t* decoded0,
int32_t* decoded1,
int count)
{
int32_t predictionA, predictionB;
while (LIKELY(count--))
{
/* Predictor Y */
p->buf[YDELAYA] = p->YlastA;
p->buf[YADAPTCOEFFSA] = SIGN(p->buf[YDELAYA]);
p->buf[YDELAYA-1] = p->buf[YDELAYA] - p->buf[YDELAYA-1];
p->buf[YADAPTCOEFFSA-1] = SIGN(p->buf[YDELAYA-1]);
predictionA = (p->buf[YDELAYA] * p->YcoeffsA[0]) +
(p->buf[YDELAYA-1] * p->YcoeffsA[1]) +
(p->buf[YDELAYA-2] * p->YcoeffsA[2]) +
(p->buf[YDELAYA-3] * p->YcoeffsA[3]);
/* Apply a scaled first-order filter compression */
p->buf[YDELAYB] = p->XfilterA - ((p->YfilterB * 31) >> 5);
p->buf[YADAPTCOEFFSB] = SIGN(p->buf[YDELAYB]);
p->YfilterB = p->XfilterA;
p->buf[YDELAYB-1] = p->buf[YDELAYB] - p->buf[YDELAYB-1];
p->buf[YADAPTCOEFFSB-1] = SIGN(p->buf[YDELAYB-1]);
predictionB = (p->buf[YDELAYB] * p->YcoeffsB[0]) +
(p->buf[YDELAYB-1] * p->YcoeffsB[1]) +
(p->buf[YDELAYB-2] * p->YcoeffsB[2]) +
(p->buf[YDELAYB-3] * p->YcoeffsB[3]) +
(p->buf[YDELAYB-4] * p->YcoeffsB[4]);
p->YlastA = *decoded0 + ((predictionA + (predictionB >> 1)) >> 10);
p->YfilterA = p->YlastA + ((p->YfilterA * 31) >> 5);
/* Predictor X */
p->buf[XDELAYA] = p->XlastA;
p->buf[XADAPTCOEFFSA] = SIGN(p->buf[XDELAYA]);
p->buf[XDELAYA-1] = p->buf[XDELAYA] - p->buf[XDELAYA-1];
p->buf[XADAPTCOEFFSA-1] = SIGN(p->buf[XDELAYA-1]);
predictionA = (p->buf[XDELAYA] * p->XcoeffsA[0]) +
(p->buf[XDELAYA-1] * p->XcoeffsA[1]) +
(p->buf[XDELAYA-2] * p->XcoeffsA[2]) +
(p->buf[XDELAYA-3] * p->XcoeffsA[3]);
/* Apply a scaled first-order filter compression */
p->buf[XDELAYB] = p->YfilterA - ((p->XfilterB * 31) >> 5);
p->buf[XADAPTCOEFFSB] = SIGN(p->buf[XDELAYB]);
p->XfilterB = p->YfilterA;
p->buf[XDELAYB-1] = p->buf[XDELAYB] - p->buf[XDELAYB-1];
p->buf[XADAPTCOEFFSB-1] = SIGN(p->buf[XDELAYB-1]);
predictionB = (p->buf[XDELAYB] * p->XcoeffsB[0]) +
(p->buf[XDELAYB-1] * p->XcoeffsB[1]) +
(p->buf[XDELAYB-2] * p->XcoeffsB[2]) +
(p->buf[XDELAYB-3] * p->XcoeffsB[3]) +
(p->buf[XDELAYB-4] * p->XcoeffsB[4]);
p->XlastA = *decoded1 + ((predictionA + (predictionB >> 1)) >> 10);
p->XfilterA = p->XlastA + ((p->XfilterA * 31) >> 5);
if (LIKELY(*decoded0 != 0))
{
if (*decoded0 > 0)
{
p->YcoeffsA[0] -= p->buf[YADAPTCOEFFSA];
p->YcoeffsA[1] -= p->buf[YADAPTCOEFFSA-1];
p->YcoeffsA[2] -= p->buf[YADAPTCOEFFSA-2];
p->YcoeffsA[3] -= p->buf[YADAPTCOEFFSA-3];
p->YcoeffsB[0] -= p->buf[YADAPTCOEFFSB];
p->YcoeffsB[1] -= p->buf[YADAPTCOEFFSB-1];
p->YcoeffsB[2] -= p->buf[YADAPTCOEFFSB-2];
p->YcoeffsB[3] -= p->buf[YADAPTCOEFFSB-3];
p->YcoeffsB[4] -= p->buf[YADAPTCOEFFSB-4];
}
else
{
p->YcoeffsA[0] += p->buf[YADAPTCOEFFSA];
p->YcoeffsA[1] += p->buf[YADAPTCOEFFSA-1];
p->YcoeffsA[2] += p->buf[YADAPTCOEFFSA-2];
p->YcoeffsA[3] += p->buf[YADAPTCOEFFSA-3];
p->YcoeffsB[0] += p->buf[YADAPTCOEFFSB];
p->YcoeffsB[1] += p->buf[YADAPTCOEFFSB-1];
p->YcoeffsB[2] += p->buf[YADAPTCOEFFSB-2];
p->YcoeffsB[3] += p->buf[YADAPTCOEFFSB-3];
p->YcoeffsB[4] += p->buf[YADAPTCOEFFSB-4];
}
}
*(decoded0++) = p->YfilterA;
if (LIKELY(*decoded1 != 0))
{
if (*decoded1 > 0)
{
p->XcoeffsA[0] -= p->buf[XADAPTCOEFFSA];
p->XcoeffsA[1] -= p->buf[XADAPTCOEFFSA-1];
p->XcoeffsA[2] -= p->buf[XADAPTCOEFFSA-2];
p->XcoeffsA[3] -= p->buf[XADAPTCOEFFSA-3];
p->XcoeffsB[0] -= p->buf[XADAPTCOEFFSB];
p->XcoeffsB[1] -= p->buf[XADAPTCOEFFSB-1];
p->XcoeffsB[2] -= p->buf[XADAPTCOEFFSB-2];
p->XcoeffsB[3] -= p->buf[XADAPTCOEFFSB-3];
p->XcoeffsB[4] -= p->buf[XADAPTCOEFFSB-4];
}
else
{
p->XcoeffsA[0] += p->buf[XADAPTCOEFFSA];
p->XcoeffsA[1] += p->buf[XADAPTCOEFFSA-1];
p->XcoeffsA[2] += p->buf[XADAPTCOEFFSA-2];
p->XcoeffsA[3] += p->buf[XADAPTCOEFFSA-3];
p->XcoeffsB[0] += p->buf[XADAPTCOEFFSB];
p->XcoeffsB[1] += p->buf[XADAPTCOEFFSB-1];
p->XcoeffsB[2] += p->buf[XADAPTCOEFFSB-2];
p->XcoeffsB[3] += p->buf[XADAPTCOEFFSB-3];
p->XcoeffsB[4] += p->buf[XADAPTCOEFFSB-4];
}
}
*(decoded1++) = p->XfilterA;
/* Combined */
p->buf++;
/* Have we filled the history buffer? */
if (UNLIKELY(p->buf == p->historybuffer + PREDICTOR_HISTORY_SIZE)) {
memmove(p->historybuffer, p->buf,
PREDICTOR_SIZE * sizeof(int32_t));
p->buf = p->historybuffer;
}
}
}
void ICODE_ATTR_DEMAC predictor_decode_mono(struct predictor_t* p,
int32_t* decoded0,
int count)
{
int32_t predictionA, currentA, A;
currentA = p->YlastA;
while (LIKELY(count--))
{
A = *decoded0;
p->buf[YDELAYA] = currentA;
p->buf[YDELAYA-1] = p->buf[YDELAYA] - p->buf[YDELAYA-1];
predictionA = (p->buf[YDELAYA] * p->YcoeffsA[0]) +
(p->buf[YDELAYA-1] * p->YcoeffsA[1]) +
(p->buf[YDELAYA-2] * p->YcoeffsA[2]) +
(p->buf[YDELAYA-3] * p->YcoeffsA[3]);
currentA = A + (predictionA >> 10);
p->buf[YADAPTCOEFFSA] = SIGN(p->buf[YDELAYA]);
p->buf[YADAPTCOEFFSA-1] = SIGN(p->buf[YDELAYA-1]);
if (LIKELY(A != 0))
{
if (A > 0)
{
p->YcoeffsA[0] -= p->buf[YADAPTCOEFFSA];
p->YcoeffsA[1] -= p->buf[YADAPTCOEFFSA-1];
p->YcoeffsA[2] -= p->buf[YADAPTCOEFFSA-2];
p->YcoeffsA[3] -= p->buf[YADAPTCOEFFSA-3];
}
else
{
p->YcoeffsA[0] += p->buf[YADAPTCOEFFSA];
p->YcoeffsA[1] += p->buf[YADAPTCOEFFSA-1];
p->YcoeffsA[2] += p->buf[YADAPTCOEFFSA-2];
p->YcoeffsA[3] += p->buf[YADAPTCOEFFSA-3];
}
}
p->buf++;
/* Have we filled the history buffer? */
if (UNLIKELY(p->buf == p->historybuffer + PREDICTOR_HISTORY_SIZE)) {
memmove(p->historybuffer, p->buf,
PREDICTOR_SIZE * sizeof(int32_t));
p->buf = p->historybuffer;
}
p->YfilterA = currentA + ((p->YfilterA * 31) >> 5);
*(decoded0++) = p->YfilterA;
}
p->YlastA = currentA;
}
#endif

View file

@ -0,0 +1,38 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#ifndef _APE_PREDICTOR_H
#define _APE_PREDICTOR_H
#include <inttypes.h>
#include "parser.h"
#include "filter.h"
void init_predictor_decoder(struct predictor_t* p);
void predictor_decode_stereo(struct predictor_t* p, int32_t* decoded0,
int32_t* decoded1, int count);
void predictor_decode_mono(struct predictor_t* p, int32_t* decoded0,
int count);
#endif

View file

@ -0,0 +1,25 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2010 by Andrew Mahone
*
* Wrapper for udiv32_arm.S to test available IRAM by pre-linking the codec.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#define APE_PRE
#include "udiv32_arm.S"

View file

@ -0,0 +1,318 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2008 by Jens Arnold
* Copyright (C) 2009 by Andrew Mahone
*
* Optimised unsigned integer division for ARMv4
*
* Based on: libgcc routines for ARM cpu, additional algorithms from ARM System
* Developer's Guide
* Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
* Free Software Foundation, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "config.h"
/* On targets with codec iram, a header file will be generated after an initial
link of the APE codec, stating the amount of IRAM remaining for use by the
reciprocal lookup table. */
#if !defined(APE_PRE) && defined(USE_IRAM) && ARM_ARCH < 5
#include "lib/rbcodec/codecs/ape_free_iram.h"
#endif
/* Codecs should not normally do this, but we need to check a macro, and
* codecs.h would confuse the assembler. */
#ifdef USE_IRAM
#define DIV_RECIP
.section .icode,"ax",%progbits
#else
.text
#endif
.align
.global udiv32_arm
.type udiv32_arm,%function
#if ARM_ARCH < 5
/* Adapted from an algorithm given in ARM System Developer's Guide (7.3.1.2)
for dividing a 30-bit value by a 15-bit value, with two operations per
iteration by storing quotient and remainder together and adding the previous
quotient bit during trial subtraction. Modified to work with any dividend
and divisor both less than 1 << 30, and skipping trials by calculating bits
in output. */
.macro ARM_DIV_31_BODY dividend, divisor, result, bits, curbit, quotient, remainder
mov \bits, #1
/* Shift the divisor left until it aligns with the numerator. If it already
has the high bit set, this is fine, everything inside .rept will be
skipped, and the add before and adcs after will set the one-bit result
to zero. */
cmn \divisor, \dividend, lsr #16
movcs \divisor, \divisor, lsl #16
addcs \bits, \bits, #16
cmn \divisor, \dividend, lsr #8
movcs \divisor, \divisor, lsl #8
addcs \bits, \bits, #8
cmn \divisor, \dividend, lsr #4
movcs \divisor, \divisor, lsl #4
addcs \bits, \bits, #4
cmn \divisor, \dividend, lsr #2
movcs \divisor, \divisor, lsl #2
addcs \bits, \bits, #2
cmn \divisor, \dividend, lsr #1
movcs \divisor, \divisor, lsl #1
addcs \bits, \bits, #1
adds \result, \dividend, \divisor
subcc \result, \result, \divisor
rsb \curbit, \bits, #31
add pc, pc, \curbit, lsl #3
nop
.rept 30
adcs \result, \divisor, \result, lsl #1
/* Fix the remainder portion of the result. This must be done because the
handler for 32-bit numerators needs the remainder. */
subcc \result, \result, \divisor
.endr
/* Shift remainder/quotient left one, add final quotient bit */
adc \result, \result, \result
mov \remainder, \result, lsr \bits
eor \quotient, \result, \remainder, lsl \bits
.endm
#ifndef FREE_IRAM
.set recip_max, 2
#else
/* Each table entry is one word. Since a compare is done against the maximum
entry as an immediate, the maximum entry must be a valid ARM immediate,
which means a byte shifted by an even number of places. */
.set recip_max, 2 + FREE_IRAM / 4
.set recip_max_tmp, recip_max >> 8
.set recip_mask_shift, 0
.set tmp_shift, 16
.rept 5
.if recip_max_tmp >> tmp_shift
.set recip_max_tmp, recip_max_tmp >> tmp_shift
.set recip_mask_shift, recip_mask_shift + tmp_shift
.endif
.set tmp_shift, tmp_shift >> 1
.endr
.if recip_max_tmp
.set recip_mask_shift, recip_mask_shift + 1
.endif
.set recip_mask_shift, (recip_mask_shift + 1) & 62
.set recip_max, recip_max & (255 << recip_mask_shift)
//.set recip_max, 2
#endif
udiv32_arm:
#ifdef DIV_RECIP
cmp r1, #3
bcc .L_udiv_tiny
cmp r1, #recip_max
bhi .L_udiv
adr r3, .L_udiv_recip_table-12
ldr r2, [r3, r1, lsl #2]
mov r3, r0
umull ip, r0, r2, r0
mul r2, r0, r1
cmp r3, r2
bxcs lr
sub r0, r0, #1
bx lr
.L_udiv_tiny:
cmp r1, #1
movhi r0, r0, lsr #1
bxcs lr
b .L_div0
#endif
.L_udiv:
/* Invert divisor. ARM_DIV_31_BODY uses adc to both subtract the divisor
and add the next bit of the result. The correction code at .L_udiv32
does not need the divisor inverted, but can be modified to work with it,
and this allows the zero divisor test to be done early and without an
explicit comparison. */
rsbs r1, r1, #0
#ifndef DIV_RECIP
beq .L_div0
#endif
tst r0, r0
/* High bit must be unset, otherwise shift numerator right, calculate,
and correct results. As this case is very uncommon we want to avoid
any other delays on the main path in handling it, so the long divide
calls the short divide as a function. */
bmi .L_udiv32
.L_udiv31:
ARM_DIV_31_BODY r0, r1, r2, r3, ip, r0, r1
bx lr
.L_udiv32:
/* store original numerator and divisor, we'll need them to correct the
result, */
stmdb sp, { r0, r1, lr }
/* Call __div0 here if divisor is zero, otherwise it would report the wrong
address. */
mov r0, r0, lsr #1
bl .L_udiv31
ldmdb sp, { r2, r3, lr }
/* Move the low bit of the original numerator to the carry bit */
movs r2, r2, lsr #1
/* Shift the remainder left one and add in the carry bit */
adc r1, r1, r1
/* Subtract the original divisor from the remainder, setting carry if the
result is non-negative */
adds r1, r1, r3
/* Shift quotient left one and add carry bit */
adc r0, r0, r0
bx lr
.L_div0:
/* __div0 expects the calling address on the top of the stack */
stmdb sp!, { lr }
mov r0, #0
#if defined(__ARM_EABI__) || !defined(USE_IRAM)
bl __div0
#else
ldr pc, [pc, #-4]
.word __div0
#endif
#ifdef DIV_RECIP
.L_udiv_recip_table:
.set div, 3
.rept recip_max - 2
.if (div - 1) & div
.set q, 0x40000000 / div
.set r, (0x40000000 - (q * div))<<1
.set q, q << 1
.if r >= div
.set q, q + 1
.set r, r - div
.endif
.set r, r << 1
.set q, q << 1
.if r >= div
.set q, q + 1
.set r, r - div
.endif
.set q, q + 1
.else
.set q, 0x40000000 / div * 4
.endif
.word q
.set div, div+1
.endr
#endif
.size udiv32_arm, . - udiv32_arm
#else
.macro ARMV5_UDIV32_BODY numerator, divisor, quotient, bits, inv, neg, div0label
cmp \numerator, \divisor
clz \bits, \divisor
bcc 30f
mov \inv, \divisor, lsl \bits
add \neg, pc, \inv, lsr #25
cmp \inv, #1<<31
ldrhib \inv, [\neg, #.L_udiv_est_table-.-64]
bls 20f
subs \bits, \bits, #7
rsb \neg, \divisor, #0
movpl \divisor, \inv, lsl \bits
bmi 10f
mul \inv, \divisor, \neg
smlawt \divisor, \divisor, \inv, \divisor
mul \inv, \divisor, \neg
/* This will save a cycle on ARMv6, but requires that the numerator sign
bit is not set (that of inv is guaranteed unset). The branch should
predict very well, making it typically 1 cycle, and thus both the branch
and test fill delay cycles for the multiplies. Based on logging of
numerator sizes in the APE codec, the branch is taken about 1/10^7 of
the time. */
#if ARM_ARCH >= 6
tst \numerator, \numerator
smmla \divisor, \divisor, \inv, \divisor
bmi 40f
smmul \inv, \numerator, \divisor
#else
mov \bits, #0
smlal \bits, \divisor, \inv, \divisor
umull \bits, \inv, \numerator, \divisor
#endif
add \numerator, \numerator, \neg
mla \divisor, \inv, \neg, \numerator
mov \quotient, \inv
cmn \divisor, \neg
addcc \quotient, \quotient, #1
addpl \quotient, \quotient, #2
bx lr
10:
rsb \bits, \bits, #0
sub \inv, \inv, #4
mov \divisor, \inv, lsr \bits
umull \bits, \inv, \numerator, \divisor
mla \divisor, \inv, \neg, \numerator
mov \quotient, \inv
cmn \neg, \divisor, lsr #1
addcs \divisor, \divisor, \neg, lsl #1
addcs \quotient, \quotient, #2
cmn \neg, \divisor
addcs \quotient, \quotient, #1
bx lr
20:
.ifnc "", "\div0label"
rsb \bits, \bits, #31
bne \div0label
.endif
mov \quotient, \numerator, lsr \bits
bx lr
30:
mov \quotient, #0
bx lr
#if ARM_ARCH >= 6
40:
umull \bits, \inv, \numerator, \divisor
add \numerator, \numerator, \neg
mla \divisor, \inv, \neg, \numerator
mov \quotient, \inv
cmn \divisor, \neg
addcc \quotient, \quotient, #1
addpl \quotient, \quotient, #2
bx lr
#endif
.endm
udiv32_arm:
ARMV5_UDIV32_BODY r0, r1, r0, r2, r3, ip, .L_div0
.L_div0:
/* __div0 expects the calling address on the top of the stack */
stmdb sp!, { lr }
mov r0, #0
#if defined(__ARM_EABI__) || !defined(USE_IRAM)
bl __div0
#else
ldr pc, [pc, #-4]
.word __div0
#endif
.L_udiv_est_table:
.byte 0xff, 0xfc, 0xf8, 0xf4, 0xf0, 0xed, 0xea, 0xe6
.byte 0xe3, 0xe0, 0xdd, 0xda, 0xd7, 0xd4, 0xd2, 0xcf
.byte 0xcc, 0xca, 0xc7, 0xc5, 0xc3, 0xc0, 0xbe, 0xbc
.byte 0xba, 0xb8, 0xb6, 0xb4, 0xb2, 0xb0, 0xae, 0xac
.byte 0xaa, 0xa8, 0xa7, 0xa5, 0xa3, 0xa2, 0xa0, 0x9f
.byte 0x9d, 0x9c, 0x9a, 0x99, 0x97, 0x96, 0x94, 0x93
.byte 0x92, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8a, 0x89
.byte 0x88, 0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81
#endif
.size udiv32_arm, . - udiv32_arm

View file

@ -0,0 +1,404 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
ARMv5te vector math copyright (C) 2008 Jens Arnold
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#define FUSED_VECTOR_MATH
#define REPEAT_3(x) x x x
#if ORDER > 16
#define REPEAT_MLA(x) x x x x x x x
#else
#define REPEAT_MLA(x) x x x
#endif
/* Calculate scalarproduct, then add a 2nd vector (fused for performance)
* This version fetches data as 32 bit words, and *requires* v1 to be
* 32 bit aligned. It also requires that f2 and s2 are either both 32 bit
* aligned or both unaligned. If either condition isn't met, it will either
* result in a data abort or incorrect results. */
static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
{
int res;
#if ORDER > 16
int cnt = ORDER>>4;
#endif
#define ADDHALFREGS(sum, s1, s2) /* Adds register */ \
"mov " #s1 ", " #s1 ", ror #16 \n" /* halves straight */ \
"add " #sum ", " #s1 ", " #s2 ", lsl #16 \n" /* Clobbers 's1' */ \
"add " #s1 ", " #s1 ", " #s2 ", lsr #16 \n" \
"mov " #s1 ", " #s1 ", lsl #16 \n" \
"orr " #sum ", " #s1 ", " #sum ", lsr #16 \n"
#define ADDHALFXREGS(sum, s1, s2) /* Adds register */ \
"add " #s1 ", " #s1 ", " #sum ", lsl #16 \n" /* halves across. */ \
"add " #sum ", " #s2 ", " #sum ", lsr #16 \n" /* Clobbers 's1'. */ \
"mov " #sum ", " #sum ", lsl #16 \n" \
"orr " #sum ", " #sum ", " #s1 ", lsr #16 \n"
asm volatile (
#if ORDER > 16
"mov %[res], #0 \n"
#endif
"tst %[f2], #2 \n"
"beq 20f \n"
"10: \n"
"ldrh r4, [%[s2]], #2 \n"
"mov r4, r4, lsl #16 \n"
"ldrh r3, [%[f2]], #2 \n"
#if ORDER > 16
"mov r3, r3, lsl #16 \n"
"1: \n"
"ldmia %[v1], {r0,r1} \n"
"smlabt %[res], r0, r3, %[res] \n"
#else
"ldmia %[v1], {r0,r1} \n"
"smulbb %[res], r0, r3 \n"
#endif
"ldmia %[f2]!, {r2,r3} \n"
"smlatb %[res], r0, r2, %[res] \n"
"smlabt %[res], r1, r2, %[res] \n"
"smlatb %[res], r1, r3, %[res] \n"
"ldmia %[s2]!, {r2,r5} \n"
ADDHALFXREGS(r0, r4, r2)
ADDHALFXREGS(r1, r2, r5)
"stmia %[v1]!, {r0,r1} \n"
"ldmia %[v1], {r0,r1} \n"
"smlabt %[res], r0, r3, %[res] \n"
"ldmia %[f2]!, {r2,r3} \n"
"smlatb %[res], r0, r2, %[res] \n"
"smlabt %[res], r1, r2, %[res] \n"
"smlatb %[res], r1, r3, %[res] \n"
"ldmia %[s2]!, {r2,r4} \n"
ADDHALFXREGS(r0, r5, r2)
ADDHALFXREGS(r1, r2, r4)
"stmia %[v1]!, {r0,r1} \n"
"ldmia %[v1], {r0,r1} \n"
"smlabt %[res], r0, r3, %[res] \n"
"ldmia %[f2]!, {r2,r3} \n"
"smlatb %[res], r0, r2, %[res] \n"
"smlabt %[res], r1, r2, %[res] \n"
"smlatb %[res], r1, r3, %[res] \n"
"ldmia %[s2]!, {r2,r5} \n"
ADDHALFXREGS(r0, r4, r2)
ADDHALFXREGS(r1, r2, r5)
"stmia %[v1]!, {r0,r1} \n"
"ldmia %[v1], {r0,r1} \n"
"smlabt %[res], r0, r3, %[res] \n"
"ldmia %[f2]!, {r2,r3} \n"
"smlatb %[res], r0, r2, %[res] \n"
"smlabt %[res], r1, r2, %[res] \n"
"smlatb %[res], r1, r3, %[res] \n"
"ldmia %[s2]!, {r2,r4} \n"
ADDHALFXREGS(r0, r5, r2)
ADDHALFXREGS(r1, r2, r4)
"stmia %[v1]!, {r0,r1} \n"
#if ORDER > 16
"subs %[cnt], %[cnt], #1 \n"
"bne 1b \n"
#endif
"b 99f \n"
"20: \n"
"1: \n"
"ldmia %[v1], {r1,r2} \n"
"ldmia %[f2]!, {r3,r4} \n"
#if ORDER > 16
"smlabb %[res], r1, r3, %[res] \n"
#else
"smulbb %[res], r1, r3 \n"
#endif
"smlatt %[res], r1, r3, %[res] \n"
"smlabb %[res], r2, r4, %[res] \n"
"smlatt %[res], r2, r4, %[res] \n"
"ldmia %[s2]!, {r3,r4} \n"
ADDHALFREGS(r0, r1, r3)
ADDHALFREGS(r1, r2, r4)
"stmia %[v1]!, {r0,r1} \n"
REPEAT_3(
"ldmia %[v1], {r1,r2} \n"
"ldmia %[f2]!, {r3,r4} \n"
"smlabb %[res], r1, r3, %[res] \n"
"smlatt %[res], r1, r3, %[res] \n"
"smlabb %[res], r2, r4, %[res] \n"
"smlatt %[res], r2, r4, %[res] \n"
"ldmia %[s2]!, {r3,r4} \n"
ADDHALFREGS(r0, r1, r3)
ADDHALFREGS(r1, r2, r4)
"stmia %[v1]!, {r0,r1} \n"
)
#if ORDER > 16
"subs %[cnt], %[cnt], #1 \n"
"bne 1b \n"
#endif
"99: \n"
: /* outputs */
#if ORDER > 16
[cnt]"+r"(cnt),
#endif
[v1] "+r"(v1),
[f2] "+r"(f2),
[s2] "+r"(s2),
[res]"=r"(res)
: /* inputs */
: /* clobbers */
"r0", "r1", "r2", "r3", "r4", "r5", "cc", "memory"
);
return res;
}
/* Calculate scalarproduct, then subtract a 2nd vector (fused for performance)
* This version fetches data as 32 bit words, and *requires* v1 to be
* 32 bit aligned. It also requires that f2 and s2 are either both 32 bit
* aligned or both unaligned. If either condition isn't met, it will either
* result in a data abort or incorrect results. */
static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
{
int res;
#if ORDER > 16
int cnt = ORDER>>4;
#endif
#define SUBHALFREGS(dif, s1, s2) /* Subtracts reg. */ \
"mov " #s1 ", " #s1 ", ror #16 \n" /* halves straight */ \
"sub " #dif ", " #s1 ", " #s2 ", lsl #16 \n" /* Clobbers 's1' */ \
"sub " #s1 ", " #s1 ", " #s2 ", lsr #16 \n" \
"mov " #s1 ", " #s1 ", lsl #16 \n" \
"orr " #dif ", " #s1 ", " #dif ", lsr #16 \n"
#define SUBHALFXREGS(dif, s1, s2, msk) /* Subtracts reg. */ \
"sub " #s1 ", " #dif ", " #s1 ", lsr #16 \n" /* halves across. */ \
"and " #s1 ", " #s1 ", " #msk " \n" /* Needs msk = */ \
"rsb " #dif ", " #s2 ", " #dif ", lsr #16 \n" /* 0x0000ffff, */ \
"orr " #dif ", " #s1 ", " #dif ", lsl #16 \n" /* clobbers 's1'. */
asm volatile (
#if ORDER > 16
"mov %[res], #0 \n"
#endif
"tst %[f2], #2 \n"
"beq 20f \n"
"10: \n"
"mov r6, #0xff \n"
"orr r6, r6, #0xff00 \n"
"ldrh r4, [%[s2]], #2 \n"
"mov r4, r4, lsl #16 \n"
"ldrh r3, [%[f2]], #2 \n"
#if ORDER > 16
"mov r3, r3, lsl #16 \n"
"1: \n"
"ldmia %[v1], {r0,r1} \n"
"smlabt %[res], r0, r3, %[res] \n"
#else
"ldmia %[v1], {r0,r1} \n"
"smulbb %[res], r0, r3 \n"
#endif
"ldmia %[f2]!, {r2,r3} \n"
"smlatb %[res], r0, r2, %[res] \n"
"smlabt %[res], r1, r2, %[res] \n"
"smlatb %[res], r1, r3, %[res] \n"
"ldmia %[s2]!, {r2,r5} \n"
SUBHALFXREGS(r0, r4, r2, r6)
SUBHALFXREGS(r1, r2, r5, r6)
"stmia %[v1]!, {r0,r1} \n"
"ldmia %[v1], {r0,r1} \n"
"smlabt %[res], r0, r3, %[res] \n"
"ldmia %[f2]!, {r2,r3} \n"
"smlatb %[res], r0, r2, %[res] \n"
"smlabt %[res], r1, r2, %[res] \n"
"smlatb %[res], r1, r3, %[res] \n"
"ldmia %[s2]!, {r2,r4} \n"
SUBHALFXREGS(r0, r5, r2, r6)
SUBHALFXREGS(r1, r2, r4, r6)
"stmia %[v1]!, {r0,r1} \n"
"ldmia %[v1], {r0,r1} \n"
"smlabt %[res], r0, r3, %[res] \n"
"ldmia %[f2]!, {r2,r3} \n"
"smlatb %[res], r0, r2, %[res] \n"
"smlabt %[res], r1, r2, %[res] \n"
"smlatb %[res], r1, r3, %[res] \n"
"ldmia %[s2]!, {r2,r5} \n"
SUBHALFXREGS(r0, r4, r2, r6)
SUBHALFXREGS(r1, r2, r5, r6)
"stmia %[v1]!, {r0,r1} \n"
"ldmia %[v1], {r0,r1} \n"
"smlabt %[res], r0, r3, %[res] \n"
"ldmia %[f2]!, {r2,r3} \n"
"smlatb %[res], r0, r2, %[res] \n"
"smlabt %[res], r1, r2, %[res] \n"
"smlatb %[res], r1, r3, %[res] \n"
"ldmia %[s2]!, {r2,r4} \n"
SUBHALFXREGS(r0, r5, r2, r6)
SUBHALFXREGS(r1, r2, r4, r6)
"stmia %[v1]!, {r0,r1} \n"
#if ORDER > 16
"subs %[cnt], %[cnt], #1 \n"
"bne 1b \n"
#endif
"b 99f \n"
"20: \n"
"1: \n"
"ldmia %[v1], {r1,r2} \n"
"ldmia %[f2]!, {r3,r4} \n"
#if ORDER > 16
"smlabb %[res], r1, r3, %[res] \n"
#else
"smulbb %[res], r1, r3 \n"
#endif
"smlatt %[res], r1, r3, %[res] \n"
"smlabb %[res], r2, r4, %[res] \n"
"smlatt %[res], r2, r4, %[res] \n"
"ldmia %[s2]!, {r3,r4} \n"
SUBHALFREGS(r0, r1, r3)
SUBHALFREGS(r1, r2, r4)
"stmia %[v1]!, {r0,r1} \n"
REPEAT_3(
"ldmia %[v1], {r1,r2} \n"
"ldmia %[f2]!, {r3,r4} \n"
"smlabb %[res], r1, r3, %[res] \n"
"smlatt %[res], r1, r3, %[res] \n"
"smlabb %[res], r2, r4, %[res] \n"
"smlatt %[res], r2, r4, %[res] \n"
"ldmia %[s2]!, {r3,r4} \n"
SUBHALFREGS(r0, r1, r3)
SUBHALFREGS(r1, r2, r4)
"stmia %[v1]!, {r0,r1} \n"
)
#if ORDER > 16
"subs %[cnt], %[cnt], #1 \n"
"bne 1b \n"
#endif
"99: \n"
: /* outputs */
#if ORDER > 16
[cnt]"+r"(cnt),
#endif
[v1] "+r"(v1),
[f2] "+r"(f2),
[s2] "+r"(s2),
[res]"=r"(res)
: /* inputs */
: /* clobbers */
"r0", "r1", "r2", "r3", "r4", "r5", "r6", "cc", "memory"
);
return res;
}
/* This version fetches data as 32 bit words, and *requires* v1 to be
* 32 bit aligned, otherwise it will result either in a data abort, or
* incorrect results (if ARM aligncheck is disabled). */
static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
{
int res;
#if ORDER > 32
int cnt = ORDER>>5;
#endif
asm volatile (
#if ORDER > 32
"mov %[res], #0 \n"
#endif
"tst %[v2], #2 \n"
"beq 20f \n"
"10: \n"
"ldrh r3, [%[v2]], #2 \n"
#if ORDER > 32
"mov r3, r3, lsl #16 \n"
"1: \n"
"ldmia %[v1]!, {r0,r1} \n"
"smlabt %[res], r0, r3, %[res] \n"
#else
"ldmia %[v1]!, {r0,r1} \n"
"smulbb %[res], r0, r3 \n"
#endif
"ldmia %[v2]!, {r2,r3} \n"
"smlatb %[res], r0, r2, %[res] \n"
"smlabt %[res], r1, r2, %[res] \n"
"smlatb %[res], r1, r3, %[res] \n"
REPEAT_MLA(
"ldmia %[v1]!, {r0,r1} \n"
"smlabt %[res], r0, r3, %[res] \n"
"ldmia %[v2]!, {r2,r3} \n"
"smlatb %[res], r0, r2, %[res] \n"
"smlabt %[res], r1, r2, %[res] \n"
"smlatb %[res], r1, r3, %[res] \n"
)
#if ORDER > 32
"subs %[cnt], %[cnt], #1 \n"
"bne 1b \n"
#endif
"b 99f \n"
"20: \n"
"1: \n"
"ldmia %[v1]!, {r0,r1} \n"
"ldmia %[v2]!, {r2,r3} \n"
#if ORDER > 32
"smlabb %[res], r0, r2, %[res] \n"
#else
"smulbb %[res], r0, r2 \n"
#endif
"smlatt %[res], r0, r2, %[res] \n"
"smlabb %[res], r1, r3, %[res] \n"
"smlatt %[res], r1, r3, %[res] \n"
REPEAT_MLA(
"ldmia %[v1]!, {r0,r1} \n"
"ldmia %[v2]!, {r2,r3} \n"
"smlabb %[res], r0, r2, %[res] \n"
"smlatt %[res], r0, r2, %[res] \n"
"smlabb %[res], r1, r3, %[res] \n"
"smlatt %[res], r1, r3, %[res] \n"
)
#if ORDER > 32
"subs %[cnt], %[cnt], #1 \n"
"bne 1b \n"
#endif
"99: \n"
: /* outputs */
#if ORDER > 32
[cnt]"+r"(cnt),
#endif
[v1] "+r"(v1),
[v2] "+r"(v2),
[res]"=r"(res)
: /* inputs */
: /* clobbers */
"r0", "r1", "r2", "r3", "cc", "memory"
);
return res;
}

View file

@ -0,0 +1,490 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
ARMv6 vector math copyright (C) 2008 Jens Arnold
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#define FUSED_VECTOR_MATH
#if ORDER > 16
#define REPEAT_BLOCK(x) x x x
#else
#define REPEAT_BLOCK(x) x
#endif
/* Calculate scalarproduct, then add a 2nd vector (fused for performance)
* This version fetches data as 32 bit words, and *requires* v1 to be
* 32 bit aligned. It also requires that f2 and s2 are either both 32 bit
* aligned or both unaligned. If either condition isn't met, it will either
* result in a data abort or incorrect results. */
static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
{
int res;
#if ORDER > 32
int cnt = ORDER>>5;
#endif
asm volatile (
#if ORDER > 32
"mov %[res], #0 \n"
#endif
"tst %[f2], #2 \n"
"beq 20f \n"
"10: \n"
"ldrh r3, [%[f2]], #2 \n"
"ldrh r6, [%[s2]], #2 \n"
"ldmia %[f2]!, {r2,r4} \n"
"mov r3, r3, lsl #16 \n"
"mov r6, r6, lsl #16 \n"
"1: \n"
"ldmia %[s2]!, {r5,r7} \n"
"pkhtb r3, r3, r2 \n"
"pkhtb r2, r2, r4 \n"
"ldrd r0, [%[v1]] \n"
"mov r5, r5, ror #16 \n"
"pkhtb r6, r5, r6, asr #16 \n"
"pkhbt r5, r5, r7, lsl #16 \n"
#if ORDER > 32
"smladx %[res], r0, r3, %[res] \n"
#else
"smuadx %[res], r0, r3 \n"
#endif
"smladx %[res], r1, r2, %[res] \n"
"ldmia %[f2]!, {r2,r3} \n"
"sadd16 r0, r0, r6 \n"
"sadd16 r1, r1, r5 \n"
"strd r0, [%[v1]], #8 \n"
REPEAT_BLOCK(
"ldmia %[s2]!, {r5,r6} \n"
"pkhtb r4, r4, r2 \n"
"pkhtb r2, r2, r3 \n"
"ldrd r0, [%[v1]] \n"
"mov r5, r5, ror #16 \n"
"pkhtb r7, r5, r7, asr #16 \n"
"pkhbt r5, r5, r6, lsl #16 \n"
"smladx %[res], r0, r4, %[res] \n"
"smladx %[res], r1, r2, %[res] \n"
"ldmia %[f2]!, {r2,r4} \n"
"sadd16 r0, r0, r7 \n"
"sadd16 r1, r1, r5 \n"
"strd r0, [%[v1]], #8 \n"
"ldmia %[s2]!, {r5,r7} \n"
"pkhtb r3, r3, r2 \n"
"pkhtb r2, r2, r4 \n"
"ldrd r0, [%[v1]] \n"
"mov r5, r5, ror #16 \n"
"pkhtb r6, r5, r6, asr #16 \n"
"pkhbt r5, r5, r7, lsl #16 \n"
"smladx %[res], r0, r3, %[res] \n"
"smladx %[res], r1, r2, %[res] \n"
"ldmia %[f2]!, {r2,r3} \n"
"sadd16 r0, r0, r6 \n"
"sadd16 r1, r1, r5 \n"
"strd r0, [%[v1]], #8 \n"
)
"ldmia %[s2]!, {r5,r6} \n"
"pkhtb r4, r4, r2 \n"
"pkhtb r2, r2, r3 \n"
"ldrd r0, [%[v1]] \n"
"mov r5, r5, ror #16 \n"
"pkhtb r7, r5, r7, asr #16 \n"
"pkhbt r5, r5, r6, lsl #16 \n"
"smladx %[res], r0, r4, %[res] \n"
"smladx %[res], r1, r2, %[res] \n"
#if ORDER > 32
"subs %[cnt], %[cnt], #1 \n"
"ldmneia %[f2]!, {r2,r4} \n"
"sadd16 r0, r0, r7 \n"
"sadd16 r1, r1, r5 \n"
"strd r0, [%[v1]], #8 \n"
"bne 1b \n"
#else
"sadd16 r0, r0, r7 \n"
"sadd16 r1, r1, r5 \n"
"strd r0, [%[v1]], #8 \n"
#endif
"b 99f \n"
"20: \n"
"ldrd r4, [%[f2]], #8 \n"
"ldrd r0, [%[v1]] \n"
#if ORDER > 32
"1: \n"
"smlad %[res], r0, r4, %[res] \n"
#else
"smuad %[res], r0, r4 \n"
#endif
"ldrd r6, [%[s2]], #8 \n"
"smlad %[res], r1, r5, %[res] \n"
"ldrd r4, [%[f2]], #8 \n"
"ldrd r2, [%[v1], #8] \n"
"sadd16 r0, r0, r6 \n"
"sadd16 r1, r1, r7 \n"
"strd r0, [%[v1]], #8 \n"
REPEAT_BLOCK(
"smlad %[res], r2, r4, %[res] \n"
"ldrd r6, [%[s2]], #8 \n"
"smlad %[res], r3, r5, %[res] \n"
"ldrd r4, [%[f2]], #8 \n"
"ldrd r0, [%[v1], #8] \n"
"sadd16 r2, r2, r6 \n"
"sadd16 r3, r3, r7 \n"
"strd r2, [%[v1]], #8 \n"
"smlad %[res], r0, r4, %[res] \n"
"ldrd r6, [%[s2]], #8 \n"
"smlad %[res], r1, r5, %[res] \n"
"ldrd r4, [%[f2]], #8 \n"
"ldrd r2, [%[v1], #8] \n"
"sadd16 r0, r0, r6 \n"
"sadd16 r1, r1, r7 \n"
"strd r0, [%[v1]], #8 \n"
)
"smlad %[res], r2, r4, %[res] \n"
"ldrd r6, [%[s2]], #8 \n"
"smlad %[res], r3, r5, %[res] \n"
#if ORDER > 32
"subs %[cnt], %[cnt], #1 \n"
"ldrned r4, [%[f2]], #8 \n"
"ldrned r0, [%[v1], #8] \n"
"sadd16 r2, r2, r6 \n"
"sadd16 r3, r3, r7 \n"
"strd r2, [%[v1]], #8 \n"
"bne 1b \n"
#else
"sadd16 r2, r2, r6 \n"
"sadd16 r3, r3, r7 \n"
"strd r2, [%[v1]], #8 \n"
#endif
"99: \n"
: /* outputs */
#if ORDER > 32
[cnt]"+r"(cnt),
#endif
[v1] "+r"(v1),
[f2] "+r"(f2),
[s2] "+r"(s2),
[res]"=r"(res)
: /* inputs */
: /* clobbers */
"r0", "r1", "r2", "r3", "r4",
"r5", "r6", "r7", "cc", "memory"
);
return res;
}
/* Calculate scalarproduct, then subtract a 2nd vector (fused for performance)
* This version fetches data as 32 bit words, and *requires* v1 to be
* 32 bit aligned. It also requires that f2 and s2 are either both 32 bit
* aligned or both unaligned. If either condition isn't met, it will either
* result in a data abort or incorrect results. */
static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
{
int res;
#if ORDER > 32
int cnt = ORDER>>5;
#endif
asm volatile (
#if ORDER > 32
"mov %[res], #0 \n"
#endif
"tst %[f2], #2 \n"
"beq 20f \n"
"10: \n"
"ldrh r3, [%[f2]], #2 \n"
"ldrh r6, [%[s2]], #2 \n"
"ldmia %[f2]!, {r2,r4} \n"
"mov r3, r3, lsl #16 \n"
"mov r6, r6, lsl #16 \n"
"1: \n"
"ldmia %[s2]!, {r5,r7} \n"
"pkhtb r3, r3, r2 \n"
"pkhtb r2, r2, r4 \n"
"ldrd r0, [%[v1]] \n"
"mov r5, r5, ror #16 \n"
"pkhtb r6, r5, r6, asr #16 \n"
"pkhbt r5, r5, r7, lsl #16 \n"
#if ORDER > 32
"smladx %[res], r0, r3, %[res] \n"
#else
"smuadx %[res], r0, r3 \n"
#endif
"smladx %[res], r1, r2, %[res] \n"
"ldmia %[f2]!, {r2,r3} \n"
"ssub16 r0, r0, r6 \n"
"ssub16 r1, r1, r5 \n"
"strd r0, [%[v1]], #8 \n"
REPEAT_BLOCK(
"ldmia %[s2]!, {r5,r6} \n"
"pkhtb r4, r4, r2 \n"
"pkhtb r2, r2, r3 \n"
"ldrd r0, [%[v1]] \n"
"mov r5, r5, ror #16 \n"
"pkhtb r7, r5, r7, asr #16 \n"
"pkhbt r5, r5, r6, lsl #16 \n"
"smladx %[res], r0, r4, %[res] \n"
"smladx %[res], r1, r2, %[res] \n"
"ldmia %[f2]!, {r2,r4} \n"
"ssub16 r0, r0, r7 \n"
"ssub16 r1, r1, r5 \n"
"strd r0, [%[v1]], #8 \n"
"ldmia %[s2]!, {r5,r7} \n"
"pkhtb r3, r3, r2 \n"
"pkhtb r2, r2, r4 \n"
"ldrd r0, [%[v1]] \n"
"mov r5, r5, ror #16 \n"
"pkhtb r6, r5, r6, asr #16 \n"
"pkhbt r5, r5, r7, lsl #16 \n"
"smladx %[res], r0, r3, %[res] \n"
"smladx %[res], r1, r2, %[res] \n"
"ldmia %[f2]!, {r2,r3} \n"
"ssub16 r0, r0, r6 \n"
"ssub16 r1, r1, r5 \n"
"strd r0, [%[v1]], #8 \n"
)
"ldmia %[s2]!, {r5,r6} \n"
"pkhtb r4, r4, r2 \n"
"pkhtb r2, r2, r3 \n"
"ldrd r0, [%[v1]] \n"
"mov r5, r5, ror #16 \n"
"pkhtb r7, r5, r7, asr #16 \n"
"pkhbt r5, r5, r6, lsl #16 \n"
"smladx %[res], r0, r4, %[res] \n"
"smladx %[res], r1, r2, %[res] \n"
#if ORDER > 32
"subs %[cnt], %[cnt], #1 \n"
"ldmneia %[f2]!, {r2,r4} \n"
"ssub16 r0, r0, r7 \n"
"ssub16 r1, r1, r5 \n"
"strd r0, [%[v1]], #8 \n"
"bne 1b \n"
#else
"ssub16 r0, r0, r7 \n"
"ssub16 r1, r1, r5 \n"
"strd r0, [%[v1]], #8 \n"
#endif
"b 99f \n"
"20: \n"
"ldrd r4, [%[f2]], #8 \n"
"ldrd r0, [%[v1]] \n"
#if ORDER > 32
"1: \n"
"smlad %[res], r0, r4, %[res] \n"
#else
"smuad %[res], r0, r4 \n"
#endif
"ldrd r6, [%[s2]], #8 \n"
"smlad %[res], r1, r5, %[res] \n"
"ldrd r4, [%[f2]], #8 \n"
"ldrd r2, [%[v1], #8] \n"
"ssub16 r0, r0, r6 \n"
"ssub16 r1, r1, r7 \n"
"strd r0, [%[v1]], #8 \n"
REPEAT_BLOCK(
"smlad %[res], r2, r4, %[res] \n"
"ldrd r6, [%[s2]], #8 \n"
"smlad %[res], r3, r5, %[res] \n"
"ldrd r4, [%[f2]], #8 \n"
"ldrd r0, [%[v1], #8] \n"
"ssub16 r2, r2, r6 \n"
"ssub16 r3, r3, r7 \n"
"strd r2, [%[v1]], #8 \n"
"smlad %[res], r0, r4, %[res] \n"
"ldrd r6, [%[s2]], #8 \n"
"smlad %[res], r1, r5, %[res] \n"
"ldrd r4, [%[f2]], #8 \n"
"ldrd r2, [%[v1], #8] \n"
"ssub16 r0, r0, r6 \n"
"ssub16 r1, r1, r7 \n"
"strd r0, [%[v1]], #8 \n"
)
"smlad %[res], r2, r4, %[res] \n"
"ldrd r6, [%[s2]], #8 \n"
"smlad %[res], r3, r5, %[res] \n"
#if ORDER > 32
"subs %[cnt], %[cnt], #1 \n"
"ldrned r4, [%[f2]], #8 \n"
"ldrned r0, [%[v1], #8] \n"
"ssub16 r2, r2, r6 \n"
"ssub16 r3, r3, r7 \n"
"strd r2, [%[v1]], #8 \n"
"bne 1b \n"
#else
"ssub16 r2, r2, r6 \n"
"ssub16 r3, r3, r7 \n"
"strd r2, [%[v1]], #8 \n"
#endif
"99: \n"
: /* outputs */
#if ORDER > 32
[cnt]"+r"(cnt),
#endif
[v1] "+r"(v1),
[f2] "+r"(f2),
[s2] "+r"(s2),
[res]"=r"(res)
: /* inputs */
: /* clobbers */
"r0", "r1", "r2", "r3", "r4",
"r5", "r6", "r7", "cc", "memory"
);
return res;
}
/* This version fetches data as 32 bit words, and *requires* v1 to be
* 32 bit aligned, otherwise it will result either in a data abort, or
* incorrect results (if ARM aligncheck is disabled). */
static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
{
int res;
#if ORDER > 32
int cnt = ORDER>>5;
#endif
asm volatile (
#if ORDER > 32
"mov %[res], #0 \n"
#endif
"tst %[v2], #2 \n"
"beq 20f \n"
"10: \n"
"bic %[v2], %[v2], #2 \n"
"ldmia %[v2]!, {r5-r7} \n"
"ldrd r0, [%[v1]], #8 \n"
"1: \n"
"pkhtb r3, r5, r6 \n"
"ldrd r4, [%[v2]], #8 \n"
#if ORDER > 32
"smladx %[res], r0, r3, %[res] \n"
#else
"smuadx %[res], r0, r3 \n"
#endif
REPEAT_BLOCK(
"pkhtb r0, r6, r7 \n"
"ldrd r2, [%[v1]], #8 \n"
"smladx %[res], r1, r0, %[res] \n"
"pkhtb r1, r7, r4 \n"
"ldrd r6, [%[v2]], #8 \n"
"smladx %[res], r2, r1, %[res] \n"
"pkhtb r2, r4, r5 \n"
"ldrd r0, [%[v1]], #8 \n"
"smladx %[res], r3, r2, %[res] \n"
"pkhtb r3, r5, r6 \n"
"ldrd r4, [%[v2]], #8 \n"
"smladx %[res], r0, r3, %[res] \n"
)
"pkhtb r0, r6, r7 \n"
"ldrd r2, [%[v1]], #8 \n"
"smladx %[res], r1, r0, %[res] \n"
"pkhtb r1, r7, r4 \n"
#if ORDER > 32
"subs %[cnt], %[cnt], #1 \n"
"ldrned r6, [%[v2]], #8 \n"
"smladx %[res], r2, r1, %[res] \n"
"pkhtb r2, r4, r5 \n"
"ldrned r0, [%[v1]], #8 \n"
"smladx %[res], r3, r2, %[res] \n"
"bne 1b \n"
#else
"pkhtb r4, r4, r5 \n"
"smladx %[res], r2, r1, %[res] \n"
"smladx %[res], r3, r4, %[res] \n"
#endif
"b 99f \n"
"20: \n"
"ldrd r0, [%[v1]], #8 \n"
"ldmia %[v2]!, {r5-r7} \n"
"1: \n"
"ldrd r2, [%[v1]], #8 \n"
#if ORDER > 32
"smlad %[res], r0, r5, %[res] \n"
#else
"smuad %[res], r0, r5 \n"
#endif
REPEAT_BLOCK(
"ldrd r4, [%[v2]], #8 \n"
"smlad %[res], r1, r6, %[res] \n"
"ldrd r0, [%[v1]], #8 \n"
"smlad %[res], r2, r7, %[res] \n"
"ldrd r6, [%[v2]], #8 \n"
"smlad %[res], r3, r4, %[res] \n"
"ldrd r2, [%[v1]], #8 \n"
"smlad %[res], r0, r5, %[res] \n"
)
#if ORDER > 32
"ldrd r4, [%[v2]], #8 \n"
"smlad %[res], r1, r6, %[res] \n"
"subs %[cnt], %[cnt], #1 \n"
"ldrned r0, [%[v1]], #8 \n"
"smlad %[res], r2, r7, %[res] \n"
"ldrned r6, [%[v2]], #8 \n"
"smlad %[res], r3, r4, %[res] \n"
"bne 1b \n"
#else
"ldr r4, [%[v2]], #4 \n"
"smlad %[res], r1, r6, %[res] \n"
"smlad %[res], r2, r7, %[res] \n"
"smlad %[res], r3, r4, %[res] \n"
#endif
"99: \n"
: /* outputs */
#if ORDER > 32
[cnt]"+r"(cnt),
#endif
[v1] "+r"(v1),
[v2] "+r"(v2),
[res]"=r"(res)
: /* inputs */
: /* clobbers */
"r0", "r1", "r2", "r3",
"r4", "r5", "r6", "r7", "cc", "memory"
);
return res;
}

View file

@ -0,0 +1,214 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
ARMv7 neon vector math copyright (C) 2010 Jens Arnold
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#define FUSED_VECTOR_MATH
#if ORDER > 32
#define REPEAT_BLOCK(x) x x x
#elif ORDER > 16
#define REPEAT_BLOCK(x) x
#else
#define REPEAT_BLOCK(x)
#endif
/* Calculate scalarproduct, then add a 2nd vector (fused for performance) */
static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
{
int res;
#if ORDER > 64
int cnt = ORDER>>6;
#endif
asm volatile (
#if ORDER > 64
"vmov.i16 q0, #0 \n"
"1: \n"
"subs %[cnt], %[cnt], #1 \n"
#endif
"vld1.16 {d6-d9}, [%[f2]]! \n"
"vld1.16 {d2-d5}, [%[v1]] \n"
"vld1.16 {d10-d13}, [%[s2]]! \n"
#if ORDER > 64
"vmlal.s16 q0, d2, d6 \n"
#else
"vmull.s16 q0, d2, d6 \n"
#endif
"vmlal.s16 q0, d3, d7 \n"
"vmlal.s16 q0, d4, d8 \n"
"vmlal.s16 q0, d5, d9 \n"
"vadd.i16 q1, q1, q5 \n"
"vadd.i16 q2, q2, q6 \n"
"vst1.16 {d2-d5}, [%[v1]]! \n"
REPEAT_BLOCK(
"vld1.16 {d6-d9}, [%[f2]]! \n"
"vld1.16 {d2-d5}, [%[v1]] \n"
"vld1.16 {d10-d13}, [%[s2]]! \n"
"vmlal.s16 q0, d2, d6 \n"
"vmlal.s16 q0, d3, d7 \n"
"vmlal.s16 q0, d4, d8 \n"
"vmlal.s16 q0, d5, d9 \n"
"vadd.i16 q1, q1, q5 \n"
"vadd.i16 q2, q2, q6 \n"
"vst1.16 {d2-d5}, [%[v1]]! \n"
)
#if ORDER > 64
"bne 1b \n"
#endif
"vpadd.i32 d0, d0, d1 \n"
"vpaddl.s32 d0, d0 \n"
"vmov.32 %[res], d0[0] \n"
: /* outputs */
#if ORDER > 64
[cnt]"+r"(cnt),
#endif
[v1] "+r"(v1),
[f2] "+r"(f2),
[s2] "+r"(s2),
[res]"=r"(res)
: /* inputs */
: /* clobbers */
"d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
"d8", "d9", "d10", "d11", "d12", "d13", "memory"
);
return res;
}
/* Calculate scalarproduct, then subtract a 2nd vector (fused for performance) */
static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
{
int res;
#if ORDER > 64
int cnt = ORDER>>6;
#endif
asm volatile (
#if ORDER > 64
"vmov.i16 q0, #0 \n"
"1: \n"
"subs %[cnt], %[cnt], #1 \n"
#endif
"vld1.16 {d6-d9}, [%[f2]]! \n"
"vld1.16 {d2-d5}, [%[v1]] \n"
"vld1.16 {d10-d13}, [%[s2]]! \n"
#if ORDER > 64
"vmlal.s16 q0, d2, d6 \n"
#else
"vmull.s16 q0, d2, d6 \n"
#endif
"vmlal.s16 q0, d3, d7 \n"
"vmlal.s16 q0, d4, d8 \n"
"vmlal.s16 q0, d5, d9 \n"
"vsub.i16 q1, q1, q5 \n"
"vsub.i16 q2, q2, q6 \n"
"vst1.16 {d2-d5}, [%[v1]]! \n"
REPEAT_BLOCK(
"vld1.16 {d6-d9}, [%[f2]]! \n"
"vld1.16 {d2-d5}, [%[v1]] \n"
"vld1.16 {d10-d13}, [%[s2]]! \n"
"vmlal.s16 q0, d2, d6 \n"
"vmlal.s16 q0, d3, d7 \n"
"vmlal.s16 q0, d4, d8 \n"
"vmlal.s16 q0, d5, d9 \n"
"vsub.i16 q1, q1, q5 \n"
"vsub.i16 q2, q2, q6 \n"
"vst1.16 {d2-d5}, [%[v1]]! \n"
)
#if ORDER > 64
"bne 1b \n"
#endif
"vpadd.i32 d0, d0, d1 \n"
"vpaddl.s32 d0, d0 \n"
"vmov.32 %[res], d0[0] \n"
: /* outputs */
#if ORDER > 64
[cnt]"+r"(cnt),
#endif
[v1] "+r"(v1),
[f2] "+r"(f2),
[s2] "+r"(s2),
[res]"=r"(res)
: /* inputs */
: /* clobbers */
"d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
"d8", "d9", "d10", "d11", "d12", "d13", "memory"
);
return res;
}
static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
{
int res;
#if ORDER > 64
int cnt = ORDER>>6;
#endif
asm volatile (
#if ORDER > 64
"vmov.i16 q0, #0 \n"
"1: \n"
"subs %[cnt], %[cnt], #1 \n"
#endif
"vld1.16 {d2-d5}, [%[v1]]! \n"
"vld1.16 {d6-d9}, [%[v2]]! \n"
#if ORDER > 64
"vmlal.s16 q0, d2, d6 \n"
#else
"vmull.s16 q0, d2, d6 \n"
#endif
"vmlal.s16 q0, d3, d7 \n"
"vmlal.s16 q0, d4, d8 \n"
"vmlal.s16 q0, d5, d9 \n"
REPEAT_BLOCK(
"vld1.16 {d2-d5}, [%[v1]]! \n"
"vld1.16 {d6-d9}, [%[v2]]! \n"
"vmlal.s16 q0, d2, d6 \n"
"vmlal.s16 q0, d3, d7 \n"
"vmlal.s16 q0, d4, d8 \n"
"vmlal.s16 q0, d5, d9 \n"
)
#if ORDER > 64
"bne 1b \n"
#endif
"vpadd.i32 d0, d0, d1 \n"
"vpaddl.s32 d0, d0 \n"
"vmov.32 %[res], d0[0] \n"
: /* outputs */
#if ORDER > 64
[cnt]"+r"(cnt),
#endif
[v1] "+r"(v1),
[v2] "+r"(v2),
[res]"=r"(res)
: /* inputs */
: /* clobbers */
"d0", "d1", "d2", "d3", "d4",
"d5", "d6", "d7", "d8", "d9"
);
return res;
}

View file

@ -0,0 +1,364 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
Coldfire vector math copyright (C) 2007 Jens Arnold
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#define FUSED_VECTOR_MATH
#define PREPARE_SCALARPRODUCT coldfire_set_macsr(0); /* signed integer mode */
#define REPEAT_2(x) x x
#define REPEAT_3(x) x x x
#define REPEAT_7(x) x x x x x x x
/* Calculate scalarproduct, then add a 2nd vector (fused for performance)
* This version fetches data as 32 bit words, and *recommends* v1 to be
* 32 bit aligned. It also assumes that f2 and s2 are either both 32 bit
* aligned or both unaligned. Performance will suffer if either condition
* isn't met. It also needs EMAC in signed integer mode. */
static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
{
int res;
#if ORDER > 16
int cnt = ORDER>>4;
#endif
#define ADDHALFREGS(s1, s2, sum) /* Add register halves straight. */ \
"move.l " #s1 ", " #sum "\n" /* 's1' and 's2' can be A or D */ \
"add.l " #s2 ", " #s1 "\n" /* regs, 'sum' must be a D reg. */ \
"clr.w " #sum " \n" /* 's1' is clobbered! */ \
"add.l " #s2 ", " #sum "\n" \
"move.w " #s1 ", " #sum "\n"
#define ADDHALFXREGS(s1, s2, sum) /* Add register halves across. */ \
"clr.w " #sum " \n" /* Needs 'sum' pre-swapped, swaps */ \
"add.l " #s1 ", " #sum "\n" /* 's2', and clobbers 's1'. */ \
"swap " #s2 " \n" /* 's1' can be an A or D reg. */ \
"add.l " #s2 ", " #s1 "\n" /* 'sum' and 's2' must be D regs. */ \
"move.w " #s1 ", " #sum "\n"
asm volatile (
"move.l %[f2], %%d0 \n"
"and.l #2, %%d0 \n"
"jeq 20f \n"
"10: \n"
"move.w (%[f2])+, %%d0 \n"
"move.w (%[s2])+, %%d1 \n"
"swap %%d1 \n"
"1: \n"
REPEAT_2(
"movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n"
"mac.w %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n"
"mac.w %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n"
ADDHALFXREGS(%%d6, %%d2, %%d1)
"mac.w %%d0l, %%d7u, (%[f2])+, %%d0, %%acc0\n"
"mac.w %%d0u, %%d7l, (%[s2])+, %%d6, %%acc0\n"
"move.l %%d1, (%[v1])+ \n"
ADDHALFXREGS(%%d7, %%d6, %%d2)
"mac.w %%d0l, %%a0u, (%[f2])+, %%d0, %%acc0\n"
"mac.w %%d0u, %%a0l, (%[s2])+, %%d7, %%acc0\n"
"move.l %%d2, (%[v1])+ \n"
ADDHALFXREGS(%%a0, %%d7, %%d6)
"mac.w %%d0l, %%a1u, (%[f2])+, %%d0, %%acc0\n"
"mac.w %%d0u, %%a1l, (%[s2])+, %%d1, %%acc0\n"
"move.l %%d6, (%[v1])+ \n"
ADDHALFXREGS(%%a1, %%d1, %%d7)
"move.l %%d7, (%[v1])+ \n"
)
#if ORDER > 16
"subq.l #1, %[res] \n"
"bne.w 1b \n"
#endif
"jra 99f \n"
"20: \n"
"move.l (%[f2])+, %%d0 \n"
"1: \n"
"movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n"
"mac.w %%d0u, %%d6u, (%[s2])+, %%d1, %%acc0\n"
"mac.w %%d0l, %%d6l, (%[f2])+, %%d0, %%acc0\n"
ADDHALFREGS(%%d6, %%d1, %%d2)
"mac.w %%d0u, %%d7u, (%[s2])+, %%d1, %%acc0\n"
"mac.w %%d0l, %%d7l, (%[f2])+, %%d0, %%acc0\n"
"move.l %%d2, (%[v1])+ \n"
ADDHALFREGS(%%d7, %%d1, %%d2)
"mac.w %%d0u, %%a0u, (%[s2])+, %%d1, %%acc0\n"
"mac.w %%d0l, %%a0l, (%[f2])+, %%d0, %%acc0\n"
"move.l %%d2, (%[v1])+ \n"
ADDHALFREGS(%%a0, %%d1, %%d2)
"mac.w %%d0u, %%a1u, (%[s2])+, %%d1, %%acc0\n"
"mac.w %%d0l, %%a1l, (%[f2])+, %%d0, %%acc0\n"
"move.l %%d2, (%[v1])+ \n"
ADDHALFREGS(%%a1, %%d1, %%d2)
"move.l %%d2, (%[v1])+ \n"
"movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n"
"mac.w %%d0u, %%d6u, (%[s2])+, %%d1, %%acc0\n"
"mac.w %%d0l, %%d6l, (%[f2])+, %%d0, %%acc0\n"
ADDHALFREGS(%%d6, %%d1, %%d2)
"mac.w %%d0u, %%d7u, (%[s2])+, %%d1, %%acc0\n"
"mac.w %%d0l, %%d7l, (%[f2])+, %%d0, %%acc0\n"
"move.l %%d2, (%[v1])+ \n"
ADDHALFREGS(%%d7, %%d1, %%d2)
"mac.w %%d0u, %%a0u, (%[s2])+, %%d1, %%acc0\n"
"mac.w %%d0l, %%a0l, (%[f2])+, %%d0, %%acc0\n"
"move.l %%d2, (%[v1])+ \n"
ADDHALFREGS(%%a0, %%d1, %%d2)
"mac.w %%d0u, %%a1u, (%[s2])+, %%d1, %%acc0\n"
#if ORDER > 16
"mac.w %%d0l, %%a1l, (%[f2])+, %%d0, %%acc0\n"
#else
"mac.w %%d0l, %%a1l, %%acc0 \n"
#endif
"move.l %%d2, (%[v1])+ \n"
ADDHALFREGS(%%a1, %%d1, %%d2)
"move.l %%d2, (%[v1])+ \n"
#if ORDER > 16
"subq.l #1, %[res] \n"
"bne.w 1b \n"
#endif
"99: \n"
"movclr.l %%acc0, %[res] \n"
: /* outputs */
[v1]"+a"(v1),
[f2]"+a"(f2),
[s2]"+a"(s2),
[res]"=d"(res)
: /* inputs */
#if ORDER > 16
[cnt]"[res]"(cnt)
#endif
: /* clobbers */
"d0", "d1", "d2", "d6", "d7",
"a0", "a1", "memory"
);
return res;
}
/* Calculate scalarproduct, then subtract a 2nd vector (fused for performance)
* This version fetches data as 32 bit words, and *recommends* v1 to be
* 32 bit aligned. It also assumes that f2 and s2 are either both 32 bit
* aligned or both unaligned. Performance will suffer if either condition
* isn't met. It also needs EMAC in signed integer mode. */
static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
{
int res;
#if ORDER > 16
int cnt = ORDER>>4;
#endif
#define SUBHALFREGS(min, sub, dif) /* Subtract register halves straight. */ \
"move.l " #min ", " #dif "\n" /* 'min' can be an A or D reg */ \
"sub.l " #sub ", " #min "\n" /* 'sub' and 'dif' must be D regs */ \
"clr.w " #sub "\n" /* 'min' and 'sub' are clobbered! */ \
"sub.l " #sub ", " #dif "\n" \
"move.w " #min ", " #dif "\n"
#define SUBHALFXREGS(min, s2, s1d) /* Subtract register halves across. */ \
"clr.w " #s1d "\n" /* Needs 's1d' pre-swapped, swaps */ \
"sub.l " #s1d ", " #min "\n" /* 's2' and clobbers 'min'. */ \
"move.l " #min ", " #s1d "\n" /* 'min' can be an A or D reg, */ \
"swap " #s2 "\n" /* 's2' and 's1d' must be D regs. */ \
"sub.l " #s2 ", " #min "\n" \
"move.w " #min ", " #s1d "\n"
asm volatile (
"move.l %[f2], %%d0 \n"
"and.l #2, %%d0 \n"
"jeq 20f \n"
"10: \n"
"move.w (%[f2])+, %%d0 \n"
"move.w (%[s2])+, %%d1 \n"
"swap %%d1 \n"
"1: \n"
REPEAT_2(
"movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n"
"mac.w %%d0l, %%d6u, (%[f2])+, %%d0, %%acc0\n"
"mac.w %%d0u, %%d6l, (%[s2])+, %%d2, %%acc0\n"
SUBHALFXREGS(%%d6, %%d2, %%d1)
"mac.w %%d0l, %%d7u, (%[f2])+, %%d0, %%acc0\n"
"mac.w %%d0u, %%d7l, (%[s2])+, %%d6, %%acc0\n"
"move.l %%d1, (%[v1])+ \n"
SUBHALFXREGS(%%d7, %%d6, %%d2)
"mac.w %%d0l, %%a0u, (%[f2])+, %%d0, %%acc0\n"
"mac.w %%d0u, %%a0l, (%[s2])+, %%d7, %%acc0\n"
"move.l %%d2, (%[v1])+ \n"
SUBHALFXREGS(%%a0, %%d7, %%d6)
"mac.w %%d0l, %%a1u, (%[f2])+, %%d0, %%acc0\n"
"mac.w %%d0u, %%a1l, (%[s2])+, %%d1, %%acc0\n"
"move.l %%d6, (%[v1])+ \n"
SUBHALFXREGS(%%a1, %%d1, %%d7)
"move.l %%d7, (%[v1])+ \n"
)
#if ORDER > 16
"subq.l #1, %[res] \n"
"bne.w 1b \n"
#endif
"jra 99f \n"
"20: \n"
"move.l (%[f2])+, %%d0 \n"
"1: \n"
"movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n"
"mac.w %%d0u, %%d6u, (%[s2])+, %%d1, %%acc0\n"
"mac.w %%d0l, %%d6l, (%[f2])+, %%d0, %%acc0\n"
SUBHALFREGS(%%d6, %%d1, %%d2)
"mac.w %%d0u, %%d7u, (%[s2])+, %%d1, %%acc0\n"
"mac.w %%d0l, %%d7l, (%[f2])+, %%d0, %%acc0\n"
"move.l %%d2, (%[v1])+ \n"
SUBHALFREGS(%%d7, %%d1, %%d2)
"mac.w %%d0u, %%a0u, (%[s2])+, %%d1, %%acc0\n"
"mac.w %%d0l, %%a0l, (%[f2])+, %%d0, %%acc0\n"
"move.l %%d2, (%[v1])+ \n"
SUBHALFREGS(%%a0, %%d1, %%d2)
"mac.w %%d0u, %%a1u, (%[s2])+, %%d1, %%acc0\n"
"mac.w %%d0l, %%a1l, (%[f2])+, %%d0, %%acc0\n"
"move.l %%d2, (%[v1])+ \n"
SUBHALFREGS(%%a1, %%d1, %%d2)
"move.l %%d2, (%[v1])+ \n"
"movem.l (%[v1]), %%d6-%%d7/%%a0-%%a1 \n"
"mac.w %%d0u, %%d6u, (%[s2])+, %%d1, %%acc0\n"
"mac.w %%d0l, %%d6l, (%[f2])+, %%d0, %%acc0\n"
SUBHALFREGS(%%d6, %%d1, %%d2)
"mac.w %%d0u, %%d7u, (%[s2])+, %%d1, %%acc0\n"
"mac.w %%d0l, %%d7l, (%[f2])+, %%d0, %%acc0\n"
"move.l %%d2, (%[v1])+ \n"
SUBHALFREGS(%%d7, %%d1, %%d2)
"mac.w %%d0u, %%a0u, (%[s2])+, %%d1, %%acc0\n"
"mac.w %%d0l, %%a0l, (%[f2])+, %%d0, %%acc0\n"
"move.l %%d2, (%[v1])+ \n"
SUBHALFREGS(%%a0, %%d1, %%d2)
"mac.w %%d0u, %%a1u, (%[s2])+, %%d1, %%acc0\n"
#if ORDER > 16
"mac.w %%d0l, %%a1l, (%[f2])+, %%d0, %%acc0\n"
#else
"mac.w %%d0l, %%a1l, %%acc0 \n"
#endif
"move.l %%d2, (%[v1])+ \n"
SUBHALFREGS(%%a1, %%d1, %%d2)
"move.l %%d2, (%[v1])+ \n"
#if ORDER > 16
"subq.l #1, %[res] \n"
"bne.w 1b \n"
#endif
"99: \n"
"movclr.l %%acc0, %[res] \n"
: /* outputs */
[v1]"+a"(v1),
[f2]"+a"(f2),
[s2]"+a"(s2),
[res]"=d"(res)
: /* inputs */
#if ORDER > 16
[cnt]"[res]"(cnt)
#endif
: /* clobbers */
"d0", "d1", "d2", "d6", "d7",
"a0", "a1", "memory"
);
return res;
}
/* This version fetches data as 32 bit words, and *recommends* v1 to be
* 32 bit aligned, otherwise performance will suffer. It also needs EMAC
* in signed integer mode. */
static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
{
int res;
#if ORDER > 16
int cnt = ORDER>>4;
#endif
asm volatile (
"move.l %[v2], %%d0 \n"
"and.l #2, %%d0 \n"
"jeq 20f \n"
"10: \n"
"move.l (%[v1])+, %%d0 \n"
"move.w (%[v2])+, %%d1 \n"
"1: \n"
REPEAT_7(
"mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
"mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
)
"mac.w %%d0u, %%d1l, (%[v2])+, %%d1, %%acc0\n"
#if ORDER > 16
"mac.w %%d0l, %%d1u, (%[v1])+, %%d0, %%acc0\n"
"subq.l #1, %[res] \n"
"bne.b 1b \n"
#else
"mac.w %%d0l, %%d1u, %%acc0 \n"
#endif
"jra 99f \n"
"20: \n"
"move.l (%[v1])+, %%d0 \n"
"move.l (%[v2])+, %%d1 \n"
"1: \n"
REPEAT_3(
"mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n"
"mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
"mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n"
"mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
)
"mac.w %%d0u, %%d1u, (%[v1])+, %%d2, %%acc0\n"
"mac.w %%d0l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
#if ORDER > 16
"mac.w %%d2u, %%d1u, (%[v1])+, %%d0, %%acc0\n"
"mac.w %%d2l, %%d1l, (%[v2])+, %%d1, %%acc0\n"
"subq.l #1, %[res] \n"
"bne.b 1b \n"
#else
"mac.w %%d2u, %%d1u, %%acc0 \n"
"mac.w %%d2l, %%d1l, %%acc0 \n"
#endif
"99: \n"
"movclr.l %%acc0, %[res] \n"
: /* outputs */
[v1]"+a"(v1),
[v2]"+a"(v2),
[res]"=d"(res)
: /* inputs */
#if ORDER > 16
[cnt]"[res]"(cnt)
#endif
: /* clobbers */
"d0", "d1", "d2"
);
return res;
}

View file

@ -0,0 +1,234 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
MMX vector math copyright (C) 2010 Jens Arnold
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#define FUSED_VECTOR_MATH
#define REPEAT_MB3(x, n) x(n) x(n+8) x(n+16)
#define REPEAT_MB7(x, n) x(n) x(n+8) x(n+16) x(n+24) x(n+32) x(n+40) x(n+48)
#define REPEAT_MB8(x, n) REPEAT_MB7(x, n) x(n+56)
#if ORDER == 16 /* 3 times */
#define REPEAT_MB(x) REPEAT_MB3(x, 8)
#elif ORDER == 32 /* 7 times */
#define REPEAT_MB(x) REPEAT_MB7(x, 8)
#elif ORDER == 64 /* 5*3 == 15 times */
#define REPEAT_MB(x) REPEAT_MB3(x, 8) REPEAT_MB3(x, 32) REPEAT_MB3(x, 56) \
REPEAT_MB3(x, 80) REPEAT_MB3(x, 104)
#elif ORDER == 256 /* 9*7 == 63 times */
#define REPEAT_MB(x) REPEAT_MB7(x, 8) REPEAT_MB7(x, 64) REPEAT_MB7(x, 120) \
REPEAT_MB7(x, 176) REPEAT_MB7(x, 232) REPEAT_MB7(x, 288) \
REPEAT_MB7(x, 344) REPEAT_MB7(x, 400) REPEAT_MB7(x, 456)
#elif ORDER == 1280 /* 8*8 == 64 times */
#define REPEAT_MB(x) REPEAT_MB8(x, 0) REPEAT_MB8(x, 64) REPEAT_MB8(x, 128) \
REPEAT_MB8(x, 192) REPEAT_MB8(x, 256) REPEAT_MB8(x, 320) \
REPEAT_MB8(x, 384) REPEAT_MB8(x, 448)
#else
#error unsupported order
#endif
static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t *s2)
{
int res, t;
#if ORDER > 256
int cnt = ORDER>>8;
#endif
asm volatile (
#if ORDER > 256
"pxor %%mm2, %%mm2 \n"
"1: \n"
#else
"movq (%[v1]), %%mm2 \n"
"movq %%mm2, %%mm0 \n"
"pmaddwd (%[f2]), %%mm2 \n"
"paddw (%[s2]), %%mm0 \n"
"movq %%mm0, (%[v1]) \n"
#endif
#define SP_ADD_BLOCK(n) \
"movq " #n "(%[v1]), %%mm1 \n" \
"movq %%mm1, %%mm0 \n" \
"pmaddwd " #n "(%[f2]), %%mm1 \n" \
"paddw " #n "(%[s2]), %%mm0 \n" \
"movq %%mm0, " #n "(%[v1]) \n" \
"paddd %%mm1, %%mm2 \n"
REPEAT_MB(SP_ADD_BLOCK)
#if ORDER > 256
"add $512, %[v1] \n"
"add $512, %[s2] \n"
"add $512, %[f2] \n"
"dec %[cnt] \n"
"jne 1b \n"
#endif
"movd %%mm2, %[t] \n"
"psrlq $32, %%mm2 \n"
"movd %%mm2, %[res] \n"
"add %[t], %[res] \n"
: /* outputs */
#if ORDER > 256
[cnt]"+r"(cnt),
[s2] "+r"(s2),
[res]"=r"(res),
[t] "=r"(t)
: /* inputs */
[v1]"2"(v1),
[f2]"3"(f2)
#else
[res]"=r"(res),
[t] "=r"(t)
: /* inputs */
[v1]"r"(v1),
[f2]"r"(f2),
[s2]"r"(s2)
#endif
: /* clobbers */
"mm0", "mm1", "mm2"
);
return res;
}
static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t *s2)
{
int res, t;
#if ORDER > 256
int cnt = ORDER>>8;
#endif
asm volatile (
#if ORDER > 256
"pxor %%mm2, %%mm2 \n"
"1: \n"
#else
"movq (%[v1]), %%mm2 \n"
"movq %%mm2, %%mm0 \n"
"pmaddwd (%[f2]), %%mm2 \n"
"psubw (%[s2]), %%mm0 \n"
"movq %%mm0, (%[v1]) \n"
#endif
#define SP_SUB_BLOCK(n) \
"movq " #n "(%[v1]), %%mm1 \n" \
"movq %%mm1, %%mm0 \n" \
"pmaddwd " #n "(%[f2]), %%mm1 \n" \
"psubw " #n "(%[s2]), %%mm0 \n" \
"movq %%mm0, " #n "(%[v1]) \n" \
"paddd %%mm1, %%mm2 \n"
REPEAT_MB(SP_SUB_BLOCK)
#if ORDER > 256
"add $512, %[v1] \n"
"add $512, %[s2] \n"
"add $512, %[f2] \n"
"dec %[cnt] \n"
"jne 1b \n"
#endif
"movd %%mm2, %[t] \n"
"psrlq $32, %%mm2 \n"
"movd %%mm2, %[res] \n"
"add %[t], %[res] \n"
: /* outputs */
#if ORDER > 256
[cnt]"+r"(cnt),
[s2] "+r"(s2),
[res]"=r"(res),
[t] "=r"(t)
: /* inputs */
[v1]"2"(v1),
[f2]"3"(f2)
#else
[res]"=r"(res),
[t] "=r"(t)
: /* inputs */
[v1]"r"(v1),
[f2]"r"(f2),
[s2]"r"(s2)
#endif
: /* clobbers */
"mm0", "mm1", "mm2"
);
return res;
}
static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
{
int res, t;
#if ORDER > 256
int cnt = ORDER>>8;
#endif
asm volatile (
#if ORDER > 256
"pxor %%mm1, %%mm1 \n"
"1: \n"
#else
"movq (%[v1]), %%mm1 \n"
"pmaddwd (%[v2]), %%mm1 \n"
#endif
#define SP_BLOCK(n) \
"movq " #n "(%[v1]), %%mm0 \n" \
"pmaddwd " #n "(%[v2]), %%mm0 \n" \
"paddd %%mm0, %%mm1 \n"
REPEAT_MB(SP_BLOCK)
#if ORDER > 256
"add $512, %[v1] \n"
"add $512, %[v2] \n"
"dec %[cnt] \n"
"jne 1b \n"
#endif
"movd %%mm1, %[t] \n"
"psrlq $32, %%mm1 \n"
"movd %%mm1, %[res] \n"
"add %[t], %[res] \n"
: /* outputs */
#if ORDER > 256
[cnt]"+r"(cnt),
[res]"=r"(res),
[t] "=r"(t)
: /* inputs */
[v1]"1"(v1),
[v2]"2"(v2)
#else
[res]"=r"(res),
[t] "=r"(t)
: /* inputs */
[v1]"r"(v1),
[v2]"r"(v2)
#endif
: /* clobbers */
"mm0", "mm1"
);
return res;
}

View file

@ -0,0 +1,201 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
ARMv4 vector math copyright (C) 2008 Jens Arnold
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#define FUSED_VECTOR_MATH
#if ORDER > 32
#define REPEAT_BLOCK(x) x x x x x x x x
#elif ORDER > 16
#define REPEAT_BLOCK(x) x x x x x x x
#else
#define REPEAT_BLOCK(x) x x x
#endif
/* Calculate scalarproduct, then add a 2nd vector (fused for performance) */
static inline int32_t vector_sp_add(int32_t* v1, int32_t* f2, int32_t* s2)
{
int res;
#if ORDER > 32
int cnt = ORDER>>5;
#endif
asm volatile (
#if ORDER > 32
"mov %[res], #0 \n"
"1: \n"
#else
"ldmia %[v1], {r0-r3} \n"
"ldmia %[f2]!, {r4-r7} \n"
"mul %[res], r4, r0 \n"
"mla %[res], r5, r1, %[res] \n"
"mla %[res], r6, r2, %[res] \n"
"mla %[res], r7, r3, %[res] \n"
"ldmia %[s2]!, {r4-r7} \n"
"add r0, r0, r4 \n"
"add r1, r1, r5 \n"
"add r2, r2, r6 \n"
"add r3, r3, r7 \n"
"stmia %[v1]!, {r0-r3} \n"
#endif
REPEAT_BLOCK(
"ldmia %[v1], {r0-r3} \n"
"ldmia %[f2]!, {r4-r7} \n"
"mla %[res], r4, r0, %[res] \n"
"mla %[res], r5, r1, %[res] \n"
"mla %[res], r6, r2, %[res] \n"
"mla %[res], r7, r3, %[res] \n"
"ldmia %[s2]!, {r4-r7} \n"
"add r0, r0, r4 \n"
"add r1, r1, r5 \n"
"add r2, r2, r6 \n"
"add r3, r3, r7 \n"
"stmia %[v1]!, {r0-r3} \n"
)
#if ORDER > 32
"subs %[cnt], %[cnt], #1 \n"
"bne 1b \n"
#endif
: /* outputs */
#if ORDER > 32
[cnt]"+r"(cnt),
#endif
[v1] "+r"(v1),
[f2] "+r"(f2),
[s2] "+r"(s2),
[res]"=r"(res)
: /* inputs */
: /* clobbers */
"r0", "r1", "r2", "r3", "r4",
"r5", "r6", "r7", "cc", "memory"
);
return res;
}
/* Calculate scalarproduct, then subtract a 2nd vector (fused for performance) */
static inline int32_t vector_sp_sub(int32_t* v1, int32_t* f2, int32_t* s2)
{
int res;
#if ORDER > 32
int cnt = ORDER>>5;
#endif
asm volatile (
#if ORDER > 32
"mov %[res], #0 \n"
"1: \n"
#else
"ldmia %[v1], {r0-r3} \n"
"ldmia %[f2]!, {r4-r7} \n"
"mul %[res], r4, r0 \n"
"mla %[res], r5, r1, %[res] \n"
"mla %[res], r6, r2, %[res] \n"
"mla %[res], r7, r3, %[res] \n"
"ldmia %[s2]!, {r4-r7} \n"
"sub r0, r0, r4 \n"
"sub r1, r1, r5 \n"
"sub r2, r2, r6 \n"
"sub r3, r3, r7 \n"
"stmia %[v1]!, {r0-r3} \n"
#endif
REPEAT_BLOCK(
"ldmia %[v1], {r0-r3} \n"
"ldmia %[f2]!, {r4-r7} \n"
"mla %[res], r4, r0, %[res] \n"
"mla %[res], r5, r1, %[res] \n"
"mla %[res], r6, r2, %[res] \n"
"mla %[res], r7, r3, %[res] \n"
"ldmia %[s2]!, {r4-r7} \n"
"sub r0, r0, r4 \n"
"sub r1, r1, r5 \n"
"sub r2, r2, r6 \n"
"sub r3, r3, r7 \n"
"stmia %[v1]!, {r0-r3} \n"
)
#if ORDER > 32
"subs %[cnt], %[cnt], #1 \n"
"bne 1b \n"
#endif
: /* outputs */
#if ORDER > 32
[cnt]"+r"(cnt),
#endif
[v1] "+r"(v1),
[f2] "+r"(f2),
[s2] "+r"(s2),
[res]"=r"(res)
: /* inputs */
: /* clobbers */
"r0", "r1", "r2", "r3", "r4",
"r5", "r6", "r7", "cc", "memory"
);
return res;
}
static inline int32_t scalarproduct(int32_t* v1, int32_t* v2)
{
int res;
#if ORDER > 32
int cnt = ORDER>>5;
#endif
asm volatile (
#if ORDER > 32
"mov %[res], #0 \n"
"1: \n"
#else
"ldmia %[v1]!, {r0-r3} \n"
"ldmia %[v2]!, {r4-r7} \n"
"mul %[res], r4, r0 \n"
"mla %[res], r5, r1, %[res] \n"
"mla %[res], r6, r2, %[res] \n"
"mla %[res], r7, r3, %[res] \n"
#endif
REPEAT_BLOCK(
"ldmia %[v1]!, {r0-r3} \n"
"ldmia %[v2]!, {r4-r7} \n"
"mla %[res], r4, r0, %[res] \n"
"mla %[res], r5, r1, %[res] \n"
"mla %[res], r6, r2, %[res] \n"
"mla %[res], r7, r3, %[res] \n"
)
#if ORDER > 32
"subs %[cnt], %[cnt], #1 \n"
"bne 1b \n"
#endif
: /* outputs */
#if ORDER > 32
[cnt]"+r"(cnt),
#endif
[v1] "+r"(v1),
[v2] "+r"(v2),
[res]"=r"(res)
: /* inputs */
: /* clobbers */
"r0", "r1", "r2", "r3",
"r4", "r5", "r6", "r7", "cc", "memory"
);
return res;
}

View file

@ -0,0 +1,160 @@
/*
libdemac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#include "demac_config.h"
static inline void vector_add(filter_int* v1, filter_int* v2)
{
#if ORDER > 32
int order = (ORDER >> 5);
while (order--)
#endif
{
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
#if ORDER > 16
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
*v1++ += *v2++;
#endif
}
}
static inline void vector_sub(filter_int* v1, filter_int* v2)
{
#if ORDER > 32
int order = (ORDER >> 5);
while (order--)
#endif
{
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
#if ORDER > 16
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
*v1++ -= *v2++;
#endif
}
}
static inline int32_t scalarproduct(filter_int* v1, filter_int* v2)
{
int res = 0;
#if ORDER > 32
int order = (ORDER >> 5);
while (order--)
#endif
{
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
#if ORDER > 16
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
res += *v1++ * *v2++;
#endif
}
return res;
}

View file

@ -0,0 +1,110 @@
/*
demac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#include <stdio.h>
#include <inttypes.h>
#include <stdlib.h>
#include "inttypes.h"
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include "parser.h"
#ifndef __WIN32__
#define O_BINARY 0
#endif
static unsigned char wav_header[44]={
'R','I','F','F',// 0 - ChunkID
0,0,0,0, // 4 - ChunkSize (filesize-8)
'W','A','V','E',// 8 - Format
'f','m','t',' ',// 12 - SubChunkID
16,0,0,0, // 16 - SubChunk1ID // 16 for PCM
1,0, // 20 - AudioFormat (1=Uncompressed)
2,0, // 22 - NumChannels
0,0,0,0, // 24 - SampleRate in Hz
0,0,0,0, // 28 - Byte Rate (SampleRate*NumChannels*(BitsPerSample/8)
4,0, // 32 - BlockAlign (== NumChannels * BitsPerSample/8)
16,0, // 34 - BitsPerSample
'd','a','t','a',// 36 - Subchunk2ID
0,0,0,0 // 40 - Subchunk2Size
};
int open_wav(struct ape_ctx_t* ape_ctx, char* filename)
{
int fd;
int x;
int filesize;
int bytespersample;
fd=open(filename, O_CREAT|O_WRONLY|O_TRUNC|O_BINARY, 0644);
if (fd < 0)
return fd;
bytespersample=ape_ctx->bps/8;
filesize=ape_ctx->totalsamples*bytespersample*ape_ctx->channels+44;
// ChunkSize
x=filesize-8;
wav_header[4]=(x&0xff);
wav_header[5]=(x&0xff00)>>8;
wav_header[6]=(x&0xff0000)>>16;
wav_header[7]=(x&0xff000000)>>24;
// Number of channels
wav_header[22]=ape_ctx->channels;
// Samplerate
wav_header[24]=ape_ctx->samplerate&0xff;
wav_header[25]=(ape_ctx->samplerate&0xff00)>>8;
wav_header[26]=(ape_ctx->samplerate&0xff0000)>>16;
wav_header[27]=(ape_ctx->samplerate&0xff000000)>>24;
// ByteRate
x=ape_ctx->samplerate*(ape_ctx->bps/8)*ape_ctx->channels;
wav_header[28]=(x&0xff);
wav_header[29]=(x&0xff00)>>8;
wav_header[30]=(x&0xff0000)>>16;
wav_header[31]=(x&0xff000000)>>24;
// BlockAlign
wav_header[32]=(ape_ctx->bps/8)*ape_ctx->channels;
// Bits per sample
wav_header[34]=ape_ctx->bps;
// Subchunk2Size
x=filesize-44;
wav_header[40]=(x&0xff);
wav_header[41]=(x&0xff00)>>8;
wav_header[42]=(x&0xff0000)>>16;
wav_header[43]=(x&0xff000000)>>24;
write(fd,wav_header,sizeof(wav_header));
return fd;
}

View file

@ -0,0 +1,32 @@
/*
demac - A Monkey's Audio decoder
$Id$
Copyright (C) Dave Chapman 2007
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
*/
#ifndef _APE_WAVWRITE_H
#define _APE_WAVWRITE_H
#include "parser.h"
int open_wav(struct ape_ctx_t* ape_ctx, char* filename);
#endif

536
lib/rbcodec/codecs/flac.c Normal file
View file

@ -0,0 +1,536 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2005 Dave Chapman
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "codeclib.h"
#include <codecs/libffmpegFLAC/decoder.h>
CODEC_HEADER
static FLACContext fc IBSS_ATTR_FLAC;
/* The output buffers containing the decoded samples (channels 0 and 1) */
static int32_t decoded0[MAX_BLOCKSIZE] IBSS_ATTR_FLAC;
static int32_t decoded1[MAX_BLOCKSIZE] IBSS_ATTR_FLAC;
static int32_t decoded2[MAX_BLOCKSIZE] IBSS_ATTR_FLAC_LARGE_IRAM;
static int32_t decoded3[MAX_BLOCKSIZE] IBSS_ATTR_FLAC_LARGE_IRAM;
static int32_t decoded4[MAX_BLOCKSIZE] IBSS_ATTR_FLAC_XLARGE_IRAM;
static int32_t decoded5[MAX_BLOCKSIZE] IBSS_ATTR_FLAC_XLARGE_IRAM;
#define MAX_SUPPORTED_SEEKTABLE_SIZE 5000
/* Notes about seeking:
The full seek table consists of:
uint64_t sample (only 36 bits are used)
uint64_t offset
uint32_t blocksize
We also limit the sample and offset values to 32-bits - Rockbox doesn't
support files bigger than 2GB on FAT32 filesystems.
The reference FLAC encoder produces a seek table with points every
10 seconds, but this can be overridden by the user when encoding a file.
With the default settings, a typical 4 minute track will contain
24 seek points.
Taking the extreme case of a Rockbox supported file to be a 2GB (compressed)
16-bit/44.1KHz mono stream with a likely uncompressed size of 4GB:
Total duration is: 48694 seconds (about 810 minutes - 13.5 hours)
Total number of seek points: 4869
Therefore we limit the number of seek points to 5000. This is a
very extreme case, and requires 5000*8=40000 bytes of storage.
If we come across a FLAC file with more than this number of seekpoints, we
just use the first 5000.
*/
struct FLACseekpoints {
uint32_t sample;
uint32_t offset;
uint16_t blocksize;
};
static struct FLACseekpoints seekpoints[MAX_SUPPORTED_SEEKTABLE_SIZE];
static int nseekpoints;
static int8_t *bit_buffer;
static size_t buff_size;
static bool flac_init(FLACContext* fc, int first_frame_offset)
{
unsigned char buf[255];
bool found_streaminfo=false;
uint32_t seekpoint_hi,seekpoint_lo;
uint32_t offset_hi,offset_lo;
uint16_t blocksize;
int endofmetadata=0;
uint32_t blocklength;
ci->memset(fc,0,sizeof(FLACContext));
nseekpoints=0;
fc->sample_skip = 0;
/* Reset sample buffers */
memset(decoded0, 0, sizeof(decoded0));
memset(decoded1, 0, sizeof(decoded1));
memset(decoded2, 0, sizeof(decoded2));
memset(decoded3, 0, sizeof(decoded3));
memset(decoded4, 0, sizeof(decoded4));
memset(decoded5, 0, sizeof(decoded5));
/* Set sample buffers in decoder structure */
fc->decoded[0] = decoded0;
fc->decoded[1] = decoded1;
fc->decoded[2] = decoded2;
fc->decoded[3] = decoded3;
fc->decoded[4] = decoded4;
fc->decoded[5] = decoded5;
/* Skip any foreign tags at start of file */
ci->seek_buffer(first_frame_offset);
fc->metadatalength = first_frame_offset;
if (ci->read_filebuf(buf, 4) < 4)
{
return false;
}
if (ci->memcmp(buf,"fLaC",4) != 0)
{
return false;
}
fc->metadatalength += 4;
while (!endofmetadata) {
if (ci->read_filebuf(buf, 4) < 4)
{
return false;
}
endofmetadata=(buf[0]&0x80);
blocklength = (buf[1] << 16) | (buf[2] << 8) | buf[3];
fc->metadatalength+=blocklength+4;
if ((buf[0] & 0x7f) == 0) /* 0 is the STREAMINFO block */
{
if (ci->read_filebuf(buf, blocklength) < blocklength) return false;
fc->filesize = ci->filesize;
fc->min_blocksize = (buf[0] << 8) | buf[1];
int max_blocksize = (buf[2] << 8) | buf[3];
if (max_blocksize > MAX_BLOCKSIZE)
{
LOGF("FLAC: Maximum blocksize is too large (%d > %d)\n",
max_blocksize, MAX_BLOCKSIZE);
return false;
}
fc->max_blocksize = max_blocksize;
fc->min_framesize = (buf[4] << 16) | (buf[5] << 8) | buf[6];
fc->max_framesize = (buf[7] << 16) | (buf[8] << 8) | buf[9];
fc->samplerate = (buf[10] << 12) | (buf[11] << 4)
| ((buf[12] & 0xf0) >> 4);
fc->channels = ((buf[12]&0x0e)>>1) + 1;
fc->bps = (((buf[12]&0x01) << 4) | ((buf[13]&0xf0)>>4) ) + 1;
/* totalsamples is a 36-bit field, but we assume <= 32 bits are
used */
fc->totalsamples = (buf[14] << 24) | (buf[15] << 16)
| (buf[16] << 8) | buf[17];
/* Calculate track length (in ms) and estimate the bitrate
(in kbit/s) */
fc->length = ((int64_t) fc->totalsamples * 1000) / fc->samplerate;
found_streaminfo=true;
} else if ((buf[0] & 0x7f) == 3) { /* 3 is the SEEKTABLE block */
while ((nseekpoints < MAX_SUPPORTED_SEEKTABLE_SIZE) &&
(blocklength >= 18)) {
if (ci->read_filebuf(buf,18) < 18) return false;
blocklength-=18;
seekpoint_hi=(buf[0] << 24) | (buf[1] << 16) |
(buf[2] << 8) | buf[3];
seekpoint_lo=(buf[4] << 24) | (buf[5] << 16) |
(buf[6] << 8) | buf[7];
offset_hi=(buf[8] << 24) | (buf[9] << 16) |
(buf[10] << 8) | buf[11];
offset_lo=(buf[12] << 24) | (buf[13] << 16) |
(buf[14] << 8) | buf[15];
blocksize=(buf[16] << 8) | buf[17];
/* Only store seekpoints where the high 32 bits are zero */
if ((seekpoint_hi == 0) && (seekpoint_lo != 0xffffffff) &&
(offset_hi == 0)) {
seekpoints[nseekpoints].sample=seekpoint_lo;
seekpoints[nseekpoints].offset=offset_lo;
seekpoints[nseekpoints].blocksize=blocksize;
nseekpoints++;
}
}
/* Skip any unread seekpoints */
if (blocklength > 0)
ci->advance_buffer(blocklength);
} else {
/* Skip to next metadata block */
ci->advance_buffer(blocklength);
}
}
if (found_streaminfo) {
fc->bitrate = ((int64_t) (fc->filesize-fc->metadatalength) * 8)
/ fc->length;
return true;
} else {
return false;
}
}
/* Synchronize to next frame in stream - adapted from libFLAC 1.1.3b2 */
static bool frame_sync(FLACContext* fc) {
unsigned int x = 0;
bool cached = false;
/* Make sure we're byte aligned. */
align_get_bits(&fc->gb);
while(1) {
if(fc->gb.size_in_bits - get_bits_count(&fc->gb) < 8) {
/* Error, end of bitstream, a valid stream should never reach here
* since the buffer should contain at least one frame header.
*/
return false;
}
if(cached)
cached = false;
else
x = get_bits(&fc->gb, 8);
if(x == 0xff) { /* MAGIC NUMBER for first 8 frame sync bits. */
x = get_bits(&fc->gb, 8);
/* We have to check if we just read two 0xff's in a row; the second
* may actually be the beginning of the sync code.
*/
if(x == 0xff) { /* MAGIC NUMBER for first 8 frame sync bits. */
cached = true;
}
else if(x >> 2 == 0x3e) { /* MAGIC NUMBER for last 6 sync bits. */
/* Succesfully synced. */
break;
}
}
}
/* Advance and init bit buffer to the new frame. */
ci->advance_buffer((get_bits_count(&fc->gb)-16)>>3); /* consumed bytes */
bit_buffer = ci->request_buffer(&buff_size, MAX_FRAMESIZE+16);
init_get_bits(&fc->gb, bit_buffer, buff_size*8);
/* Decode the frame to verify the frame crc and
* fill fc with its metadata.
*/
if(flac_decode_frame(fc,
bit_buffer, buff_size, ci->yield) < 0) {
return false;
}
return true;
}
/* Seek to sample - adapted from libFLAC 1.1.3b2+ */
static bool flac_seek(FLACContext* fc, uint32_t target_sample) {
off_t orig_pos = ci->curpos;
off_t pos = -1;
unsigned long lower_bound, upper_bound;
unsigned long lower_bound_sample, upper_bound_sample;
int i;
unsigned approx_bytes_per_frame;
uint32_t this_frame_sample = fc->samplenumber;
unsigned this_block_size = fc->blocksize;
bool needs_seek = true, first_seek = true;
/* We are just guessing here. */
if(fc->max_framesize > 0)
approx_bytes_per_frame = (fc->max_framesize + fc->min_framesize)/2 + 1;
/* Check if it's a known fixed-blocksize stream. */
else if(fc->min_blocksize == fc->max_blocksize && fc->min_blocksize > 0)
approx_bytes_per_frame = fc->min_blocksize*fc->channels*fc->bps/8 + 64;
else
approx_bytes_per_frame = 4608 * fc->channels * fc->bps/8 + 64;
/* Set an upper and lower bound on where in the stream we will search. */
lower_bound = fc->metadatalength;
lower_bound_sample = 0;
upper_bound = fc->filesize;
upper_bound_sample = fc->totalsamples>0 ? fc->totalsamples : target_sample;
/* Refine the bounds if we have a seektable with suitable points. */
if(nseekpoints > 0) {
/* Find the closest seek point <= target_sample, if it exists. */
for(i = nseekpoints-1; i >= 0; i--) {
if(seekpoints[i].sample <= target_sample)
break;
}
if(i >= 0) { /* i.e. we found a suitable seek point... */
lower_bound = fc->metadatalength + seekpoints[i].offset;
lower_bound_sample = seekpoints[i].sample;
}
/* Find the closest seek point > target_sample, if it exists. */
for(i = 0; i < nseekpoints; i++) {
if(seekpoints[i].sample > target_sample)
break;
}
if(i < nseekpoints) { /* i.e. we found a suitable seek point... */
upper_bound = fc->metadatalength + seekpoints[i].offset;
upper_bound_sample = seekpoints[i].sample;
}
}
while(1) {
/* Check if bounds are still ok. */
if(lower_bound_sample >= upper_bound_sample ||
lower_bound > upper_bound) {
return false;
}
/* Calculate new seek position */
if(needs_seek) {
pos = (off_t)(lower_bound +
(((target_sample - lower_bound_sample) *
(int64_t)(upper_bound - lower_bound)) /
(upper_bound_sample - lower_bound_sample)) -
approx_bytes_per_frame);
if(pos >= (off_t)upper_bound)
pos = (off_t)upper_bound-1;
if(pos < (off_t)lower_bound)
pos = (off_t)lower_bound;
}
if(!ci->seek_buffer(pos))
return false;
bit_buffer = ci->request_buffer(&buff_size, MAX_FRAMESIZE+16);
init_get_bits(&fc->gb, bit_buffer, buff_size*8);
/* Now we need to get a frame. It is possible for our seek
* to land in the middle of audio data that looks exactly like
* a frame header from a future version of an encoder. When
* that happens, frame_sync() will return false.
* But there is a remote possibility that it is properly
* synced at such a "future-codec frame", so to make sure,
* we wait to see several "unparseable" errors in a row before
* bailing out.
*/
{
unsigned unparseable_count;
bool got_a_frame = false;
for(unparseable_count = 0; !got_a_frame
&& unparseable_count < 10; unparseable_count++) {
if(frame_sync(fc))
got_a_frame = true;
}
if(!got_a_frame) {
ci->seek_buffer(orig_pos);
return false;
}
}
this_frame_sample = fc->samplenumber;
this_block_size = fc->blocksize;
if(target_sample >= this_frame_sample
&& target_sample < this_frame_sample+this_block_size) {
/* Found the frame containing the target sample. */
fc->sample_skip = target_sample - this_frame_sample;
break;
}
if(this_frame_sample + this_block_size >= upper_bound_sample &&
!first_seek) {
if(pos == (off_t)lower_bound || !needs_seek) {
ci->seek_buffer(orig_pos);
return false;
}
/* Our last move backwards wasn't big enough, try again. */
approx_bytes_per_frame *= 2;
continue;
}
/* Allow one seek over upper bound,
* required for streams with unknown total samples.
*/
first_seek = false;
/* Make sure we are not seeking in a corrupted stream */
if(this_frame_sample < lower_bound_sample) {
ci->seek_buffer(orig_pos);
return false;
}
approx_bytes_per_frame = this_block_size*fc->channels*fc->bps/8 + 64;
/* We need to narrow the search. */
if(target_sample < this_frame_sample) {
upper_bound_sample = this_frame_sample;
upper_bound = ci->curpos;
}
else { /* Target is beyond this frame. */
/* We are close, continue in decoding next frames. */
if(target_sample < this_frame_sample + 4*this_block_size) {
pos = ci->curpos + fc->framesize;
needs_seek = false;
}
lower_bound_sample = this_frame_sample + this_block_size;
lower_bound = ci->curpos + fc->framesize;
}
}
return true;
}
/* Seek to file offset */
static bool flac_seek_offset(FLACContext* fc, uint32_t offset) {
unsigned unparseable_count;
bool got_a_frame = false;
if(!ci->seek_buffer(offset))
return false;
bit_buffer = ci->request_buffer(&buff_size, MAX_FRAMESIZE);
init_get_bits(&fc->gb, bit_buffer, buff_size*8);
for(unparseable_count = 0; !got_a_frame
&& unparseable_count < 10; unparseable_count++) {
if(frame_sync(fc))
got_a_frame = true;
}
if(!got_a_frame) {
ci->seek_buffer(fc->metadatalength);
return false;
}
return true;
}
/* this is the codec entry point */
enum codec_status codec_main(enum codec_entry_call_reason reason)
{
if (reason == CODEC_LOAD) {
/* Generic codec initialisation */
ci->configure(DSP_SET_SAMPLE_DEPTH, FLAC_OUTPUT_DEPTH-1);
}
return CODEC_OK;
}
/* this is called for each file to process */
enum codec_status codec_run(void)
{
int8_t *buf;
uint32_t samplesdone;
uint32_t elapsedtime;
size_t bytesleft;
int consumed;
int res;
int frame;
intptr_t param;
if (codec_init()) {
LOGF("FLAC: Error initialising codec\n");
return CODEC_ERROR;
}
/* Need to save offset for later use (cleared indirectly by flac_init) */
samplesdone = ci->id3->offset;
if (!flac_init(&fc,ci->id3->first_frame_offset)) {
LOGF("FLAC: Error initialising codec\n");
return CODEC_ERROR;
}
ci->configure(DSP_SWITCH_FREQUENCY, ci->id3->frequency);
ci->configure(DSP_SET_STEREO_MODE, fc.channels == 1 ?
STEREO_MONO : STEREO_NONINTERLEAVED);
codec_set_replaygain(ci->id3);
flac_seek_offset(&fc, samplesdone);
samplesdone=fc.samplenumber+fc.blocksize;
elapsedtime=(samplesdone*10)/(ci->id3->frequency/100);
ci->set_elapsed(elapsedtime);
/* The main decoding loop */
frame=0;
buf = ci->request_buffer(&bytesleft, MAX_FRAMESIZE);
while (bytesleft) {
enum codec_command_action action = ci->get_command(&param);
if (action == CODEC_ACTION_HALT)
break;
/* Deal with any pending seek requests */
if (action == CODEC_ACTION_SEEK_TIME) {
if (flac_seek(&fc,(uint32_t)(((uint64_t)param
*ci->id3->frequency)/1000))) {
/* Refill the input buffer */
buf = ci->request_buffer(&bytesleft, MAX_FRAMESIZE);
}
ci->set_elapsed(param);
ci->seek_complete();
}
if((res=flac_decode_frame(&fc,buf,
bytesleft,ci->yield)) < 0) {
LOGF("FLAC: Frame %d, error %d\n",frame,res);
return CODEC_ERROR;
}
consumed=fc.gb.index/8;
frame++;
ci->yield();
ci->pcmbuf_insert(&fc.decoded[0][fc.sample_skip], &fc.decoded[1][fc.sample_skip],
fc.blocksize - fc.sample_skip);
fc.sample_skip = 0;
/* Update the elapsed-time indicator */
samplesdone=fc.samplenumber+fc.blocksize;
elapsedtime=(samplesdone*10)/(ci->id3->frequency/100);
ci->set_elapsed(elapsedtime);
ci->advance_buffer(consumed);
buf = ci->request_buffer(&bytesleft, MAX_FRAMESIZE);
}
LOGF("FLAC: Decoded %lu samples\n",(unsigned long)samplesdone);
return CODEC_OK;
}

108
lib/rbcodec/codecs/gbs.c Normal file
View file

@ -0,0 +1,108 @@
/* Ripped off from Game_Music_Emu 0.5.2. http://www.slack.net/~ant/ */
#include <codecs/lib/codeclib.h>
#include "libgme/gbs_emu.h"
CODEC_HEADER
/* Maximum number of bytes to process in one iteration */
#define CHUNK_SIZE (1024*2)
static int16_t samples[CHUNK_SIZE] IBSS_ATTR;
static struct Gbs_Emu gbs_emu;
/****************** rockbox interface ******************/
static void set_codec_track(int t) {
Gbs_start_track(&gbs_emu, t);
/* for loop mode we disable track limits */
if (!ci->loop_track()) {
Track_set_fade(&gbs_emu, Track_get_length( &gbs_emu, t ), 4000);
}
ci->set_elapsed(t*1000); /* t is track no to display */
}
/* this is the codec entry point */
enum codec_status codec_main(enum codec_entry_call_reason reason)
{
if (reason == CODEC_LOAD) {
/* we only render 16 bits */
ci->configure(DSP_SET_SAMPLE_DEPTH, 16);
/* 44 Khz, Interleaved stereo */
ci->configure(DSP_SET_FREQUENCY, 44100);
ci->configure(DSP_SET_STEREO_MODE, STEREO_INTERLEAVED);
Gbs_init(&gbs_emu);
Gbs_set_sample_rate(&gbs_emu, 44100);
}
return CODEC_OK;
}
/* this is called for each file to process */
enum codec_status codec_run(void)
{
blargg_err_t err;
uint8_t *buf;
size_t n;
intptr_t param;
int track = 0;
DEBUGF("GBS: next_track\n");
if (codec_init()) {
return CODEC_ERROR;
}
codec_set_replaygain(ci->id3);
/* Read the entire file */
DEBUGF("GBS: request file\n");
ci->seek_buffer(0);
buf = ci->request_buffer(&n, ci->filesize);
if (!buf || n < (size_t)ci->filesize) {
DEBUGF("GBS: file load failed\n");
return CODEC_ERROR;
}
if ((err = Gbs_load_mem(&gbs_emu, buf, ci->filesize))) {
DEBUGF("GBS: Gbs_load_mem failed (%s)\n", err);
return CODEC_ERROR;
}
/* Update internal track count */
if (gbs_emu.m3u.size > 0)
gbs_emu.track_count = gbs_emu.m3u.size;
next_track:
set_codec_track(track);
/* The main decoder loop */
while (1) {
enum codec_command_action action = ci->get_command(&param);
if (action == CODEC_ACTION_HALT)
break;
if (action == CODEC_ACTION_SEEK_TIME) {
track = param/1000;
ci->seek_complete();
if (track >= gbs_emu.track_count) break;
goto next_track;
}
/* Generate audio buffer */
err = Gbs_play(&gbs_emu, CHUNK_SIZE, samples);
if (err || Track_ended(&gbs_emu)) {
track++;
if (track >= gbs_emu.track_count) break;
goto next_track;
}
ci->pcmbuf_insert(samples, NULL, CHUNK_SIZE >> 1);
}
return CODEC_OK;
}

108
lib/rbcodec/codecs/hes.c Normal file
View file

@ -0,0 +1,108 @@
/* Ripped off from Game_Music_Emu 0.5.2. http://www.slack.net/~ant/ */
#include <string.h>
#include "codeclib.h"
#include "libgme/hes_emu.h"
CODEC_HEADER
/* Maximum number of bytes to process in one iteration */
#define CHUNK_SIZE (1024*2)
static int16_t samples[CHUNK_SIZE] IBSS_ATTR;
static struct Hes_Emu hes_emu;
/****************** rockbox interface ******************/
static void set_codec_track(int t) {
Hes_start_track(&hes_emu, t);
/* for loop mode we disable track limits */
if (!ci->loop_track()) {
Track_set_fade(&hes_emu, Track_get_length( &hes_emu, t ), 4000);
}
ci->set_elapsed(t*1000); /* t is track no to display */
}
/* this is the codec entry point */
enum codec_status codec_main(enum codec_entry_call_reason reason)
{
if (reason == CODEC_LOAD) {
/* we only render 16 bits */
ci->configure(DSP_SET_SAMPLE_DEPTH, 16);
/* 44 Khz, Interleaved stereo */
ci->configure(DSP_SET_FREQUENCY, 44100);
ci->configure(DSP_SET_STEREO_MODE, STEREO_INTERLEAVED);
Hes_init(&hes_emu);
Hes_set_sample_rate(&hes_emu, 44100);
}
return CODEC_OK;
}
/* this is called for each file to process */
enum codec_status codec_run(void)
{
blargg_err_t err;
uint8_t *buf;
size_t n;
intptr_t param;
int track = 0;
DEBUGF("HES: next_track\n");
if (codec_init()) {
return CODEC_ERROR;
}
codec_set_replaygain(ci->id3);
/* Read the entire file */
DEBUGF("HES: request file\n");
ci->seek_buffer(0);
buf = ci->request_buffer(&n, ci->filesize);
if (!buf || n < (size_t)ci->filesize) {
DEBUGF("HES: file load failed\n");
return CODEC_ERROR;
}
if ((err = Hes_load_mem(&hes_emu, buf, ci->filesize))) {
DEBUGF("HES: Hes_load_mem failed (%s)\n", err);
return CODEC_ERROR;
}
/* Update internal track count */
if (hes_emu.m3u.size > 0)
hes_emu.track_count = hes_emu.m3u.size;
next_track:
set_codec_track(track);
/* The main decoder loop */
while ( 1 ) {
enum codec_command_action action = ci->get_command(&param);
if (action == CODEC_ACTION_HALT)
break;
if (action == CODEC_ACTION_SEEK_TIME) {
track = param/1000;
ci->seek_complete();
if (track >= hes_emu.track_count) break;
goto next_track;
}
/* Generate audio buffer */
err = Hes_play(&hes_emu, CHUNK_SIZE, samples);
if (err || Track_ended(&hes_emu)) {
track++;
if (track >= hes_emu.track_count) break;
goto next_track;
}
ci->pcmbuf_insert(samples, NULL, CHUNK_SIZE >> 1);
}
return CODEC_OK;
}

111
lib/rbcodec/codecs/kss.c Normal file
View file

@ -0,0 +1,111 @@
/* Ripped off from Game_Music_Emu 0.5.2. http://www.slack.net/~ant/ */
#include <codecs/lib/codeclib.h>
#include "libgme/kss_emu.h"
CODEC_HEADER
/* Maximum number of bytes to process in one iteration */
#define CHUNK_SIZE (1024*2)
static int16_t samples[CHUNK_SIZE] IBSS_ATTR;
static struct Kss_Emu kss_emu;
/****************** rockbox interface ******************/
static void set_codec_track(int t) {
Kss_start_track(&kss_emu, t);
/* for REPEAT_ONE we disable track limits */
if (!ci->loop_track()) {
Track_set_fade(&kss_emu, Track_get_length( &kss_emu, t ), 4000);
}
ci->set_elapsed(t*1000); /* t is track no to display */
}
/* this is the codec entry point */
enum codec_status codec_main(enum codec_entry_call_reason reason)
{
if (reason == CODEC_LOAD) {
/* we only render 16 bits */
ci->configure(DSP_SET_SAMPLE_DEPTH, 16);
/* 44 Khz, Interleaved stereo */
ci->configure(DSP_SET_FREQUENCY, 44100);
ci->configure(DSP_SET_STEREO_MODE, STEREO_INTERLEAVED);
Kss_init(&kss_emu);
Kss_set_sample_rate(&kss_emu, 44100);
}
return CODEC_OK;
}
/* this is called for each file to process */
enum codec_status codec_run(void)
{
blargg_err_t err;
uint8_t *buf;
size_t n;
int track;
intptr_t param;
/* reset values */
track = 0;
DEBUGF("KSS: next_track\n");
if (codec_init()) {
return CODEC_ERROR;
}
codec_set_replaygain(ci->id3);
/* Read the entire file */
DEBUGF("KSS: request file\n");
ci->seek_buffer(0);
buf = ci->request_buffer(&n, ci->filesize);
if (!buf || n < (size_t)ci->filesize) {
DEBUGF("KSS: file load failed\n");
return CODEC_ERROR;
}
if ((err = Kss_load_mem(&kss_emu, buf, ci->filesize))) {
DEBUGF("KSS: Kss_load failed (%s)\n", err);
return CODEC_ERROR;
}
/* Update internal track count */
if (kss_emu.m3u.size > 0)
kss_emu.track_count = kss_emu.m3u.size;
next_track:
set_codec_track(track);
/* The main decoder loop */
while (1) {
enum codec_command_action action = ci->get_command(&param);
if (action == CODEC_ACTION_HALT)
break;
if (action == CODEC_ACTION_SEEK_TIME) {
track = param/1000;
ci->seek_complete();
if (track >= kss_emu.track_count) break;
goto next_track;
}
/* Generate audio buffer */
err = Kss_play(&kss_emu, CHUNK_SIZE, samples);
if (err || Track_ended(&kss_emu)) {
track++;
if (track >= kss_emu.track_count) break;
goto next_track;
}
ci->pcmbuf_insert(samples, NULL, CHUNK_SIZE >> 1);
}
return CODEC_OK;
}

View file

@ -0,0 +1,12 @@
#if CONFIG_CODEC == SWCODEC /* software codec platforms */
codeclib.c
fixedpoint.c
ffmpeg_bitstream.c
mdct_lookup.c
fft-ffmpeg.c
mdct.c
#elif (CONFIG_PLATFORM & PLATFORM_HOSTED) && defined(__APPLE__)
osx.dummy.c
#endif

View file

@ -0,0 +1,292 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. *
* *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
* BY THE Xiph.Org FOUNDATION http://www.xiph.org/ *
* *
********************************************************************
function: arm7 and later wide math functions
********************************************************************/
#ifdef CPU_ARM
#define INCL_OPTIMIZED_MULT32
#if ARM_ARCH >= 6
static inline int32_t MULT32(int32_t x, int32_t y) {
int32_t hi;
asm volatile("smmul %[hi], %[x], %[y] \n\t"
: [hi] "=&r" (hi)
: [x] "r" (x), [y] "r" (y) );
return(hi);
}
#else
static inline int32_t MULT32(int32_t x, int32_t y) {
int32_t lo, hi;
asm volatile("smull\t%0, %1, %2, %3 \n\t"
: "=&r"(lo),"=&r"(hi)
: "r"(x),"r"(y) );
return(hi);
}
#endif
#define INCL_OPTIMIZED_MULT31
static inline int32_t MULT31(int32_t x, int32_t y) {
return MULT32(x,y)<<1;
}
#define INCL_OPTIMIZED_MULT31_SHIFT15
static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) {
int32_t lo,hi;
asm volatile("smull %0, %1, %2, %3\n\t"
"movs %0, %0, lsr #15\n\t"
"adc %1, %0, %1, lsl #17\n\t"
: "=&r"(lo),"=&r"(hi)
: "r"(x),"r"(y)
: "cc" );
return(hi);
}
#define INCL_OPTIMIZED_MULT31_SHIFT16
static inline int32_t MULT31_SHIFT16(int32_t x, int32_t y) {
int32_t lo,hi;
asm volatile("smull %0, %1, %2, %3\n\t"
"movs %0, %0, lsr #16\n\t"
"adc %1, %0, %1, lsl #16\n\t"
: "=&r"(lo),"=&r"(hi)
: "r"(x),"r"(y)
: "cc" );
return(hi);
}
#define INCL_OPTIMIZED_XPROD32
#define XPROD32(a, b, t, v, x, y) \
{ \
int32_t l; \
asm("smull %0, %1, %3, %5\n\t" \
"rsb %2, %6, #0\n\t" \
"smlal %0, %1, %4, %6\n\t" \
"smull %0, %2, %3, %2\n\t" \
"smlal %0, %2, %4, %5" \
: "=&r" (l), "=&r" (x), "=&r" (y) \
: "r" ((a)), "r" ((b)), "r" ((t)), "r" ((v)) ); \
}
#define INCL_OPTIMIZED_XPROD31_R
#define INCL_OPTIMIZED_XNPROD31_R
#if ARM_ARCH >= 6
/* These may yield slightly different result from the macros below
because only the high 32 bits of the multiplications are accumulated while
the below macros use a 64 bit accumulator that is truncated to 32 bits.*/
#define XPROD31_R(_a, _b, _t, _v, _x, _y)\
{\
int32_t x1, y1;\
asm("smmul %[x1], %[t], %[a] \n\t"\
"smmul %[y1], %[t], %[b] \n\t"\
"smmla %[x1], %[v], %[b], %[x1] \n\t"\
"smmls %[y1], %[v], %[a], %[y1] \n\t"\
: [x1] "=&r" (x1), [y1] "=&r" (y1)\
: [a] "r" (_a), [b] "r" (_b), [t] "r" (_t), [v] "r" (_v) );\
_x = x1 << 1;\
_y = y1 << 1;\
}
#define XNPROD31_R(_a, _b, _t, _v, _x, _y)\
{\
int32_t x1, y1;\
asm("smmul %[x1], %[t], %[a] \n\t"\
"smmul %[y1], %[t], %[b] \n\t"\
"smmls %[x1], %[v], %[b], %[x1] \n\t"\
"smmla %[y1], %[v], %[a], %[y1] \n\t"\
: [x1] "=&r" (x1), [y1] "=&r" (y1)\
: [a] "r" (_a), [b] "r" (_b), [t] "r" (_t), [v] "r" (_v) );\
_x = x1 << 1;\
_y = y1 << 1;\
}
#else
#define XPROD31_R(_a, _b, _t, _v, _x, _y)\
{\
int32_t x1, y1, l;\
asm("smull %0, %1, %5, %3\n\t"\
"rsb %2, %3, #0\n\t"\
"smlal %0, %1, %6, %4\n\t"\
"smull %0, %2, %6, %2\n\t"\
"smlal %0, %2, %5, %4"\
: "=&r" (l), "=&r" (x1), "=&r" (y1)\
: "r" (_a), "r" (_b), "r" (_t), "r" (_v) );\
_x = x1 << 1;\
_y = y1 << 1;\
}
#define XNPROD31_R(_a, _b, _t, _v, _x, _y)\
{\
int32_t x1, y1, l;\
asm("smull %0, %1, %5, %3\n\t"\
"rsb %2, %4, #0\n\t"\
"smlal %0, %1, %6, %2\n\t"\
"smull %0, %2, %5, %4\n\t"\
"smlal %0, %2, %6, %3"\
: "=&r" (l), "=&r" (x1), "=&r" (y1)\
: "r" (_a), "r" (_b), "r" (_t), "r" (_v) );\
_x = x1 << 1;\
_y = y1 << 1;\
}
#endif
#define INCL_OPTIMIZED_XPROD31
static inline void XPROD31(int32_t a, int32_t b,
int32_t t, int32_t v,
int32_t *x, int32_t *y)
{
int32_t _x1, _y1;
XPROD31_R(a, b, t, v, _x1, _y1);
*x = _x1;
*y = _y1;
}
#define INCL_OPTIMIZED_XNPROD31
static inline void XNPROD31(int32_t a, int32_t b,
int32_t t, int32_t v,
int32_t *x, int32_t *y)
{
int32_t _x1, _y1;
XNPROD31_R(a, b, t, v, _x1, _y1);
*x = _x1;
*y = _y1;
}
#ifndef _V_VECT_OPS
#define _V_VECT_OPS
/* asm versions of vector operations for block.c, window.c */
static inline
void vect_add(int32_t *x, const int32_t *y, int n)
{
while (n>=4) {
asm volatile ("ldmia %[x], {r0, r1, r2, r3};"
"ldmia %[y]!, {r4, r5, r6, r7};"
"add r0, r0, r4;"
"add r1, r1, r5;"
"add r2, r2, r6;"
"add r3, r3, r7;"
"stmia %[x]!, {r0, r1, r2, r3};"
: [x] "+r" (x), [y] "+r" (y)
: : "r0", "r1", "r2", "r3",
"r4", "r5", "r6", "r7",
"memory");
n -= 4;
}
/* add final elements */
while (n>0) {
*x++ += *y++;
n--;
}
}
static inline
void vect_copy(int32_t *x, const int32_t *y, int n)
{
while (n>=4) {
asm volatile ("ldmia %[y]!, {r0, r1, r2, r3};"
"stmia %[x]!, {r0, r1, r2, r3};"
: [x] "+r" (x), [y] "+r" (y)
: : "r0", "r1", "r2", "r3",
"memory");
n -= 4;
}
/* copy final elements */
while (n>0) {
*x++ = *y++;
n--;
}
}
static inline
void vect_mult_fw(int32_t *data, const int32_t *window, int n)
{
while (n>=4) {
asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
"ldmia %[w]!, {r4, r5, r6, r7};"
"smull r8, r9, r0, r4;"
"mov r0, r9, lsl #1;"
"smull r8, r9, r1, r5;"
"mov r1, r9, lsl #1;"
"smull r8, r9, r2, r6;"
"mov r2, r9, lsl #1;"
"smull r8, r9, r3, r7;"
"mov r3, r9, lsl #1;"
"stmia %[d]!, {r0, r1, r2, r3};"
: [d] "+r" (data), [w] "+r" (window)
: : "r0", "r1", "r2", "r3",
"r4", "r5", "r6", "r7", "r8", "r9",
"memory" );
n -= 4;
}
while(n>0) {
*data = MULT31(*data, *window);
data++;
window++;
n--;
}
}
static inline
void vect_mult_bw(int32_t *data, const int32_t *window, int n)
{
while (n>=4) {
asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
"ldmda %[w]!, {r4, r5, r6, r7};"
"smull r8, r9, r0, r7;"
"mov r0, r9, lsl #1;"
"smull r8, r9, r1, r6;"
"mov r1, r9, lsl #1;"
"smull r8, r9, r2, r5;"
"mov r2, r9, lsl #1;"
"smull r8, r9, r3, r4;"
"mov r3, r9, lsl #1;"
"stmia %[d]!, {r0, r1, r2, r3};"
: [d] "+r" (data), [w] "+r" (window)
: : "r0", "r1", "r2", "r3",
"r4", "r5", "r6", "r7", "r8", "r9",
"memory" );
n -= 4;
}
while(n>0) {
*data = MULT31(*data, *window);
data++;
window--;
n--;
}
}
#endif
/* not used anymore */
/*
#ifndef _V_CLIP_MATH
#define _V_CLIP_MATH
static inline int32_t CLIP_TO_15(int32_t x) {
int tmp;
asm volatile("subs %1, %0, #32768\n\t"
"movpl %0, #0x7f00\n\t"
"orrpl %0, %0, #0xff\n"
"adds %1, %0, #32768\n\t"
"movmi %0, #0x8000"
: "+r"(x),"=r"(tmp)
:
: "cc");
return(x);
}
#endif
*/
#endif

View file

@ -0,0 +1,353 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
*
* Copyright (C) 2005 by Pedro Vasconcelos
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
/* asm routines for wide math on the MCF5249 */
#if defined(CPU_COLDFIRE)
#define INCL_OPTIMIZED_MULT32
static inline int32_t MULT32(int32_t x, int32_t y) {
asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply & shift */
"movclr.l %%acc0, %[x];" /* move & clear acc */
"asr.l #1, %[x];" /* no overflow test */
: [x] "+&d" (x)
: [y] "r" (y)
: "cc");
return x;
}
#define INCL_OPTIMIZED_MULT31
static inline int32_t MULT31(int32_t x, int32_t y) {
asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
"movclr.l %%acc0, %[x];" /* move and clear */
: [x] "+&r" (x)
: [y] "r" (y)
: "cc");
return x;
}
#define INCL_OPTIMIZED_MULT31_SHIFT15
/* NOTE: this requires that the emac is *NOT* rounding */
static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) {
int32_t r;
asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
"mulu.l %[y], %[x];" /* get lower half, avoid emac stall */
"movclr.l %%acc0, %[r];" /* get higher half */
"swap %[r];" /* hi<<16, plus one free */
"lsr.l #8, %[x];" /* (unsigned)lo >> 15 */
"lsr.l #7, %[x];"
"move.w %[x], %[r];" /* logical-or results */
: [r] "=&d" (r), [x] "+d" (x)
: [y] "d" (y)
: "cc");
return r;
}
#define INCL_OPTIMIZED_MULT31_SHIFT16
static inline int32_t MULT31_SHIFT16(int32_t x, int32_t y) {
int32_t r;
asm volatile ("mac.l %[x], %[y], %%acc0;" /* multiply */
"mulu.l %[y], %[x];" /* get lower half, avoid emac stall */
"movclr.l %%acc0, %[r];" /* get higher half */
"lsr.l #1, %[r];" /* hi >> 1, to compensate emac shift */
"move.w %[r], %[x];" /* x = x & 0xffff0000 | r & 0xffff */
"swap %[x];" /* x = (unsigned)x << 16 | (unsigned)x >> 16 */
: [r] "=&d" (r), [x] "+d" (x)
: [y] "d" (y)
: "cc");
return x;
}
#define INCL_OPTIMIZED_XPROD31
static inline
void XPROD31(int32_t a, int32_t b,
int32_t t, int32_t v,
int32_t *x, int32_t *y)
{
asm volatile ("mac.l %[a], %[t], %%acc0;"
"mac.l %[b], %[v], %%acc0;"
"mac.l %[b], %[t], %%acc1;"
"msac.l %[a], %[v], %%acc1;"
"movclr.l %%acc0, %[a];"
"move.l %[a], (%[x]);"
"movclr.l %%acc1, %[a];"
"move.l %[a], (%[y]);"
: [a] "+&r" (a)
: [x] "a" (x), [y] "a" (y),
[b] "r" (b), [t] "r" (t), [v] "r" (v)
: "cc", "memory");
}
#define INCL_OPTIMIZED_XNPROD31
static inline
void XNPROD31(int32_t a, int32_t b,
int32_t t, int32_t v,
int32_t *x, int32_t *y)
{
asm volatile ("mac.l %[a], %[t], %%acc0;"
"msac.l %[b], %[v], %%acc0;"
"mac.l %[b], %[t], %%acc1;"
"mac.l %[a], %[v], %%acc1;"
"movclr.l %%acc0, %[a];"
"move.l %[a], (%[x]);"
"movclr.l %%acc1, %[a];"
"move.l %[a], (%[y]);"
: [a] "+&r" (a)
: [x] "a" (x), [y] "a" (y),
[b] "r" (b), [t] "r" (t), [v] "r" (v)
: "cc", "memory");
}
/* this could lose the LSB by overflow, but i don't think it'll ever happen.
if anyone think they can hear a bug caused by this, please try the above
version. */
#define INCL_OPTIMIZED_XPROD32
#define XPROD32(_a, _b, _t, _v, _x, _y) \
asm volatile ("mac.l %[a], %[t], %%acc0;" \
"mac.l %[b], %[v], %%acc0;" \
"mac.l %[b], %[t], %%acc1;" \
"msac.l %[a], %[v], %%acc1;" \
"movclr.l %%acc0, %[x];" \
"asr.l #1, %[x];" \
"movclr.l %%acc1, %[y];" \
"asr.l #1, %[y];" \
: [x] "=d" (_x), [y] "=d" (_y) \
: [a] "r" (_a), [b] "r" (_b), \
[t] "r" (_t), [v] "r" (_v) \
: "cc");
#define INCL_OPTIMIZED_XPROD31_R
#define XPROD31_R(_a, _b, _t, _v, _x, _y) \
asm volatile ("mac.l %[a], %[t], %%acc0;" \
"mac.l %[b], %[v], %%acc0;" \
"mac.l %[b], %[t], %%acc1;" \
"msac.l %[a], %[v], %%acc1;" \
"movclr.l %%acc0, %[x];" \
"movclr.l %%acc1, %[y];" \
: [x] "=r" (_x), [y] "=r" (_y) \
: [a] "r" (_a), [b] "r" (_b), \
[t] "r" (_t), [v] "r" (_v) \
: "cc");
#define INCL_OPTIMIZED_XNPROD31_R
#define XNPROD31_R(_a, _b, _t, _v, _x, _y) \
asm volatile ("mac.l %[a], %[t], %%acc0;" \
"msac.l %[b], %[v], %%acc0;" \
"mac.l %[b], %[t], %%acc1;" \
"mac.l %[a], %[v], %%acc1;" \
"movclr.l %%acc0, %[x];" \
"movclr.l %%acc1, %[y];" \
: [x] "=r" (_x), [y] "=r" (_y) \
: [a] "r" (_a), [b] "r" (_b), \
[t] "r" (_t), [v] "r" (_v) \
: "cc");
#ifndef _V_VECT_OPS
#define _V_VECT_OPS
/* asm versions of vector operations for block.c, window.c */
/* assumes MAC is initialized & accumulators cleared */
static inline
void vect_add(int32_t *x, const int32_t *y, int n)
{
/* align to 16 bytes */
while(n>0 && (int)x&15) {
*x++ += *y++;
n--;
}
asm volatile ("bra 1f;"
"0:" /* loop start */
"movem.l (%[x]), %%d0-%%d3;" /* fetch values */
"movem.l (%[y]), %%a0-%%a3;"
/* add */
"add.l %%a0, %%d0;"
"add.l %%a1, %%d1;"
"add.l %%a2, %%d2;"
"add.l %%a3, %%d3;"
/* store and advance */
"movem.l %%d0-%%d3, (%[x]);"
"lea.l (4*4, %[x]), %[x];"
"lea.l (4*4, %[y]), %[y];"
"subq.l #4, %[n];" /* done 4 elements */
"1: cmpi.l #4, %[n];"
"bge 0b;"
: [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
: : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
"cc", "memory");
/* add final elements */
while (n>0) {
*x++ += *y++;
n--;
}
}
static inline
void vect_copy(int32_t *x, const int32_t *y, int n)
{
/* align to 16 bytes */
while(n>0 && (int)x&15) {
*x++ = *y++;
n--;
}
asm volatile ("bra 1f;"
"0:" /* loop start */
"movem.l (%[y]), %%d0-%%d3;" /* fetch values */
"movem.l %%d0-%%d3, (%[x]);" /* store */
"lea.l (4*4, %[x]), %[x];" /* advance */
"lea.l (4*4, %[y]), %[y];"
"subq.l #4, %[n];" /* done 4 elements */
"1: cmpi.l #4, %[n];"
"bge 0b;"
: [n] "+d" (n), [x] "+a" (x), [y] "+a" (y)
: : "%d0", "%d1", "%d2", "%d3", "cc", "memory");
/* copy final elements */
while (n>0) {
*x++ = *y++;
n--;
}
}
static inline
void vect_mult_fw(int32_t *data, const int32_t *window, int n)
{
/* ensure data is aligned to 16-bytes */
while(n>0 && (int)data&15) {
*data = MULT31(*data, *window);
data++;
window++;
n--;
}
asm volatile ("movem.l (%[d]), %%d0-%%d3;" /* loop start */
"movem.l (%[w]), %%a0-%%a3;" /* pre-fetch registers */
"lea.l (4*4, %[w]), %[w];"
"bra 1f;" /* jump to loop condition */
"0:" /* loop body */
/* multiply and load next window values */
"mac.l %%d0, %%a0, (%[w])+, %%a0, %%acc0;"
"mac.l %%d1, %%a1, (%[w])+, %%a1, %%acc1;"
"mac.l %%d2, %%a2, (%[w])+, %%a2, %%acc2;"
"mac.l %%d3, %%a3, (%[w])+, %%a3, %%acc3;"
"movclr.l %%acc0, %%d0;" /* get the products */
"movclr.l %%acc1, %%d1;"
"movclr.l %%acc2, %%d2;"
"movclr.l %%acc3, %%d3;"
/* store and advance */
"movem.l %%d0-%%d3, (%[d]);"
"lea.l (4*4, %[d]), %[d];"
"movem.l (%[d]), %%d0-%%d3;"
"subq.l #4, %[n];" /* done 4 elements */
"1: cmpi.l #4, %[n];"
"bge 0b;"
/* multiply final elements */
"tst.l %[n];"
"beq 1f;" /* n=0 */
"mac.l %%d0, %%a0, %%acc0;"
"movclr.l %%acc0, %%d0;"
"move.l %%d0, (%[d])+;"
"subq.l #1, %[n];"
"beq 1f;" /* n=1 */
"mac.l %%d1, %%a1, %%acc0;"
"movclr.l %%acc0, %%d1;"
"move.l %%d1, (%[d])+;"
"subq.l #1, %[n];"
"beq 1f;" /* n=2 */
/* otherwise n = 3 */
"mac.l %%d2, %%a2, %%acc0;"
"movclr.l %%acc0, %%d2;"
"move.l %%d2, (%[d])+;"
"1:"
: [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
: : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
"cc", "memory");
}
static inline
void vect_mult_bw(int32_t *data, const int32_t *window, int n)
{
/* ensure at least data is aligned to 16-bytes */
while(n>0 && (int)data&15) {
*data = MULT31(*data, *window);
data++;
window--;
n--;
}
asm volatile ("lea.l (-3*4, %[w]), %[w];" /* loop start */
"movem.l (%[d]), %%d0-%%d3;" /* pre-fetch registers */
"movem.l (%[w]), %%a0-%%a3;"
"bra 1f;" /* jump to loop condition */
"0:" /* loop body */
/* multiply and load next window value */
"mac.l %%d0, %%a3, -(%[w]), %%a3, %%acc0;"
"mac.l %%d1, %%a2, -(%[w]), %%a2, %%acc1;"
"mac.l %%d2, %%a1, -(%[w]), %%a1, %%acc2;"
"mac.l %%d3, %%a0, -(%[w]), %%a0, %%acc3;"
"movclr.l %%acc0, %%d0;" /* get the products */
"movclr.l %%acc1, %%d1;"
"movclr.l %%acc2, %%d2;"
"movclr.l %%acc3, %%d3;"
/* store and advance */
"movem.l %%d0-%%d3, (%[d]);"
"lea.l (4*4, %[d]), %[d];"
"movem.l (%[d]), %%d0-%%d3;"
"subq.l #4, %[n];" /* done 4 elements */
"1: cmpi.l #4, %[n];"
"bge 0b;"
/* multiply final elements */
"tst.l %[n];"
"beq 1f;" /* n=0 */
"mac.l %%d0, %%a3, %%acc0;"
"movclr.l %%acc0, %%d0;"
"move.l %%d0, (%[d])+;"
"subq.l #1, %[n];"
"beq 1f;" /* n=1 */
"mac.l %%d1, %%a2, %%acc0;"
"movclr.l %%acc0, %%d1;"
"move.l %%d1, (%[d])+;"
"subq.l #1, %[n];"
"beq 1f;" /* n=2 */
/* otherwise n = 3 */
"mac.l %%d2, %%a1, %%acc0;"
"movclr.l %%acc0, %%d2;"
"move.l %%d2, (%[d])+;"
"1:"
: [n] "+d" (n), [d] "+a" (data), [w] "+a" (window)
: : "%d0", "%d1", "%d2", "%d3", "%a0", "%a1", "%a2", "%a3",
"cc", "memory");
}
#endif
/* not used anymore */
/*
#ifndef _V_CLIP_MATH
#define _V_CLIP_MATH
* this is portable C and simple; why not use this as default?
static inline int32_t CLIP_TO_15(register int32_t x) {
register int32_t hi=32767, lo=-32768;
return (x>=hi ? hi : (x<=lo ? lo : x));
}
#endif
*/
#endif

View file

@ -0,0 +1,182 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2005 Dave Chapman
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
/* "helper functions" common to all codecs */
#include <string.h>
#include "codecs.h"
#include "dsp.h"
#include "codeclib.h"
#include "metadata.h"
/* The following variables are used by codec_malloc() to make use of free RAM
* within the statically allocated codec buffer. */
static size_t mem_ptr = 0;
static size_t bufsize = 0;
static unsigned char* mallocbuf = NULL;
int codec_init(void)
{
/* codec_get_buffer() aligns the resulting point to CACHEALIGN_SIZE. */
mem_ptr = 0;
mallocbuf = (unsigned char *)ci->codec_get_buffer((size_t *)&bufsize);
return 0;
}
void codec_set_replaygain(const struct mp3entry *id3)
{
ci->configure(DSP_SET_TRACK_GAIN, id3->track_gain);
ci->configure(DSP_SET_ALBUM_GAIN, id3->album_gain);
ci->configure(DSP_SET_TRACK_PEAK, id3->track_peak);
ci->configure(DSP_SET_ALBUM_PEAK, id3->album_peak);
}
/* Various "helper functions" common to all the xxx2wav decoder plugins */
void* codec_malloc(size_t size)
{
void* x;
if (mem_ptr + (long)size > bufsize)
return NULL;
x=&mallocbuf[mem_ptr];
/* Keep memory aligned to CACHEALIGN_SIZE. */
mem_ptr += (size + (CACHEALIGN_SIZE-1)) & ~(CACHEALIGN_SIZE-1);
return(x);
}
void* codec_calloc(size_t nmemb, size_t size)
{
void* x;
x = codec_malloc(nmemb*size);
if (x == NULL)
return NULL;
ci->memset(x,0,nmemb*size);
return(x);
}
void codec_free(void* ptr) {
(void)ptr;
}
void* codec_realloc(void* ptr, size_t size)
{
void* x;
(void)ptr;
x = codec_malloc(size);
return(x);
}
size_t strlen(const char *s)
{
return(ci->strlen(s));
}
char *strcpy(char *dest, const char *src)
{
return(ci->strcpy(dest,src));
}
char *strcat(char *dest, const char *src)
{
return(ci->strcat(dest,src));
}
int strcmp(const char *s1, const char *s2)
{
return(ci->strcmp(s1,s2));
}
void *memcpy(void *dest, const void *src, size_t n)
{
return(ci->memcpy(dest,src,n));
}
void *memset(void *s, int c, size_t n)
{
return(ci->memset(s,c,n));
}
int memcmp(const void *s1, const void *s2, size_t n)
{
return(ci->memcmp(s1,s2,n));
}
void* memchr(const void *s, int c, size_t n)
{
return(ci->memchr(s,c,n));
}
void *memmove(void *dest, const void *src, size_t n)
{
return(ci->memmove(dest,src,n));
}
void qsort(void *base, size_t nmemb, size_t size,
int(*compar)(const void *, const void *))
{
ci->qsort(base,nmemb,size,compar);
}
/* From ffmpeg - libavutil/common.h */
const uint8_t bs_log2_tab[256] ICONST_ATTR = {
0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
};
const uint8_t bs_clz_tab[256] ICONST_ATTR = {
8,7,6,6,5,5,5,5,4,4,4,4,4,4,4,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
};
#ifdef RB_PROFILE
void __cyg_profile_func_enter(void *this_fn, void *call_site) {
/* This workaround is required for coldfire gcc 3.4 but is broken for 4.4
and 4.5, but for those the other way works. */
#if defined(CPU_COLDFIRE) && defined(__GNUC__) && __GNUC__ < 4
(void)call_site;
ci->profile_func_enter(this_fn, __builtin_return_address(1));
#else
ci->profile_func_enter(this_fn, call_site);
#endif
}
void __cyg_profile_func_exit(void *this_fn, void *call_site) {
ci->profile_func_exit(this_fn,call_site);
}
#endif

View file

@ -0,0 +1,163 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2005 Dave Chapman
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#ifndef __CODECLIB_H__
#define __CODECLIB_H__
#include <inttypes.h>
#include <string.h>
#include "config.h"
#include "codecs.h"
#include "mdct.h"
#include "fft.h"
extern struct codec_api *ci;
/* Standard library functions that are used by the codecs follow here */
/* Get these functions 'out of the way' of the standard functions. Not doing
* so confuses the cygwin linker, and maybe others. These functions need to
* be implemented elsewhere */
#define malloc(x) codec_malloc(x)
#define calloc(x,y) codec_calloc(x,y)
#define realloc(x,y) codec_realloc(x,y)
#define free(x) codec_free(x)
#undef alloca
#define alloca(x) __builtin_alloca(x)
void* codec_malloc(size_t size);
void* codec_calloc(size_t nmemb, size_t size);
void* codec_realloc(void* ptr, size_t size);
void codec_free(void* ptr);
void *memcpy(void *dest, const void *src, size_t n);
void *memset(void *s, int c, size_t n);
int memcmp(const void *s1, const void *s2, size_t n);
void *memmove(void *s1, const void *s2, size_t n);
size_t strlen(const char *s);
char *strcpy(char *dest, const char *src);
char *strcat(char *dest, const char *src);
/* on some platforms strcmp() seems to be a tricky define which
* breaks if we write down strcmp's prototype */
#undef strcmp
int strcmp(const char *s1, const char *s2);
void qsort(void *base, size_t nmemb, size_t size, int(*compar)(const void *, const void *));
/*MDCT library functions*/
/* -1- Tremor mdct */
extern void mdct_backward(int n, int32_t *in, int32_t *out);
/* -2- ffmpeg fft-based mdct */
extern void ff_imdct_half(unsigned int nbits, int32_t *output, const int32_t *input);
extern void ff_imdct_calc(unsigned int nbits, int32_t *output, const int32_t *input);
/*ffmpeg fft (can be used without mdct)*/
extern void ff_fft_calc_c(int nbits, FFTComplex *z);
#if !defined(CPU_ARM) || ARM_ARCH < 5
/* From libavutil/common.h */
extern const uint8_t bs_log2_tab[256] ICONST_ATTR;
extern const uint8_t bs_clz_tab[256] ICONST_ATTR;
#endif
#define BS_LOG2 0 /* default personality, equivalent floor(log2(x)) */
#define BS_CLZ 1 /* alternate personality, Count Leading Zeros */
#define BS_SHORT 2 /* input guaranteed not to exceed 16 bits */
#define BS_0_0 4 /* guarantee mapping of 0 input to 0 output */
/* Generic bit-scanning function, used to wrap platform CLZ instruction or
scan-and-lookup code, and to provide control over output for 0 inputs. */
static inline unsigned int bs_generic(unsigned int v, int mode)
{
#if defined(CPU_ARM) && ARM_ARCH >= 5
unsigned int r = __builtin_clz(v);
if (mode & BS_CLZ)
{
if (mode & BS_0_0)
r &= 31;
} else {
r = 31 - r;
/* If mode is constant, this is a single conditional instruction */
if (mode & BS_0_0 && (signed)r < 0)
r += 1;
}
#else
const uint8_t *bs_tab;
unsigned int r;
unsigned int n = v;
int inc;
/* Set up table, increment, and initial result value based on
personality. */
if (mode & BS_CLZ)
{
bs_tab = bs_clz_tab;
r = 24;
inc = -16;
} else {
bs_tab = bs_log2_tab;
r = 0;
inc = 16;
}
if (!(mode & BS_SHORT) && n >= 0x10000) {
n >>= 16;
r += inc;
}
if (n > 0xff) {
n >>= 8;
r += inc / 2;
}
#ifdef CPU_COLDFIRE
/* The high 24 bits of n are guaranteed empty after the above, so a
superfluous ext.b instruction can be saved by loading the LUT value over
n with asm */
asm volatile (
"move.b (%1,%0.l),%0"
: "+d" (n)
: "a" (bs_tab)
);
#else
n = bs_tab[n];
#endif
r += n;
if (mode & BS_CLZ && mode & BS_0_0 && v == 0)
r = 0;
#endif
return r;
}
/* TODO figure out if we really need to care about calculating
av_log2(0) */
#define av_log2(v) bs_generic(v, BS_0_0)
/* Various codec helper functions */
int codec_init(void);
void codec_set_replaygain(const struct mp3entry *id3);
#ifdef RB_PROFILE
void __cyg_profile_func_enter(void *this_fn, void *call_site)
NO_PROF_ATTR ICODE_ATTR;
void __cyg_profile_func_exit(void *this_fn, void *call_site)
NO_PROF_ATTR ICODE_ATTR;
#endif
#endif /* __CODECLIB_H__ */

View file

@ -0,0 +1,310 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. *
* *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
* BY THE Xiph.Org FOUNDATION http://www.xiph.org/ *
* *
********************************************************************
function: miscellaneous math and prototypes
********************************************************************/
#ifndef _CODECLIB_MISC_H_
#define _CODECLIB_MISC_H_
#include <stdint.h>
#include "asm_arm.h"
#include "asm_mcf5249.h"
#ifndef _LOW_ACCURACY_
/* 64 bit multiply */
#ifdef ROCKBOX_LITTLE_ENDIAN
union magic {
struct {
int32_t lo;
int32_t hi;
} halves;
int64_t whole;
};
#elif defined(ROCKBOX_BIG_ENDIAN)
union magic {
struct {
int32_t hi;
int32_t lo;
} halves;
int64_t whole;
};
#endif
#ifndef INCL_OPTIMIZED_MULT32
#define INCL_OPTIMIZED_MULT32
static inline int32_t MULT32(int32_t x, int32_t y) {
union magic magic;
magic.whole = (int64_t)x * y;
return magic.halves.hi;
}
#endif
#ifndef INCL_OPTIMIZED_MULT31
#define INCL_OPTIMIZED_MULT31
static inline int32_t MULT31(int32_t x, int32_t y) {
return MULT32(x,y)<<1;
}
#endif
#ifndef INCL_OPTIMIZED_MULT31_SHIFT15
#define INCL_OPTIMIZED_MULT31_SHIFT15
static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) {
union magic magic;
magic.whole = (int64_t)x * y;
return ((uint32_t)(magic.halves.lo)>>15) | ((magic.halves.hi)<<17);
}
#endif
#ifndef INCL_OPTIMIZED_MULT31_SHIFT16
#define INCL_OPTIMIZED_MULT31_SHIFT16
static inline int32_t MULT31_SHIFT16(int32_t x, int32_t y) {
union magic magic;
magic.whole = (int64_t)x * y;
return ((uint32_t)(magic.halves.lo)>>16) | ((magic.halves.hi)<<16);
}
#endif
#else
/* Rockbox: unused */
#if 0
/* 32 bit multiply, more portable but less accurate */
/*
* Note: Precision is biased towards the first argument therefore ordering
* is important. Shift values were chosen for the best sound quality after
* many listening tests.
*/
/*
* For MULT32 and MULT31: The second argument is always a lookup table
* value already preshifted from 31 to 8 bits. We therefore take the
* opportunity to save on text space and use unsigned char for those
* tables in this case.
*/
static inline int32_t MULT32(int32_t x, int32_t y) {
return (x >> 9) * y; /* y preshifted >>23 */
}
static inline int32_t MULT31(int32_t x, int32_t y) {
return (x >> 8) * y; /* y preshifted >>23 */
}
static inline int32_t MULT31_SHIFT15(int32_t x, int32_t y) {
return (x >> 6) * y; /* y preshifted >>9 */
}
#endif
#endif
/*
* The XPROD functions are meant to optimize the cross products found all
* over the place in mdct.c by forcing memory operation ordering to avoid
* unnecessary register reloads as soon as memory is being written to.
* However this is only beneficial on CPUs with a sane number of general
* purpose registers which exclude the Intel x86. On Intel, better let the
* compiler actually reload registers directly from original memory by using
* macros.
*/
#ifndef INCL_OPTIMIZED_XPROD32
#define INCL_OPTIMIZED_XPROD32
/* replaced XPROD32 with a macro to avoid memory reference
_x, _y are the results (must be l-values) */
#define XPROD32(_a, _b, _t, _v, _x, _y) \
{ (_x)=MULT32(_a,_t)+MULT32(_b,_v); \
(_y)=MULT32(_b,_t)-MULT32(_a,_v); }
#endif
/* Rockbox: Unused */
/*
#ifdef __i386__
#define XPROD31(_a, _b, _t, _v, _x, _y) \
{ *(_x)=MULT31(_a,_t)+MULT31(_b,_v); \
*(_y)=MULT31(_b,_t)-MULT31(_a,_v); }
#define XNPROD31(_a, _b, _t, _v, _x, _y) \
{ *(_x)=MULT31(_a,_t)-MULT31(_b,_v); \
*(_y)=MULT31(_b,_t)+MULT31(_a,_v); }
#else
*/
#ifndef INCL_OPTIMIZED_XPROD31
#define INCL_OPTIMIZED_XPROD31
static inline void XPROD31(int32_t a, int32_t b,
int32_t t, int32_t v,
int32_t *x, int32_t *y)
{
*x = MULT31(a, t) + MULT31(b, v);
*y = MULT31(b, t) - MULT31(a, v);
}
#endif
#ifndef INCL_OPTIMIZED_XNPROD31
#define INCL_OPTIMIZED_XNPROD31
static inline void XNPROD31(int32_t a, int32_t b,
int32_t t, int32_t v,
int32_t *x, int32_t *y)
{
*x = MULT31(a, t) - MULT31(b, v);
*y = MULT31(b, t) + MULT31(a, v);
}
#endif
/*#endif*/
#ifndef INCL_OPTIMIZED_XPROD31_R
#define INCL_OPTIMIZED_XPROD31_R
#define XPROD31_R(_a, _b, _t, _v, _x, _y)\
{\
_x = MULT31(_a, _t) + MULT31(_b, _v);\
_y = MULT31(_b, _t) - MULT31(_a, _v);\
}
#endif
#ifndef INCL_OPTIMIZED_XNPROD31_R
#define INCL_OPTIMIZED_XNPROD31_R
#define XNPROD31_R(_a, _b, _t, _v, _x, _y)\
{\
_x = MULT31(_a, _t) - MULT31(_b, _v);\
_y = MULT31(_b, _t) + MULT31(_a, _v);\
}
#endif
#ifndef _V_VECT_OPS
#define _V_VECT_OPS
static inline
void vect_add(int32_t *x, const int32_t *y, int n)
{
while (n>0) {
*x++ += *y++;
n--;
}
}
static inline
void vect_copy(int32_t *x, const int32_t *y, int n)
{
while (n>0) {
*x++ = *y++;
n--;
}
}
static inline
void vect_mult_fw(int32_t *data, const int32_t *window, int n)
{
while(n>0) {
*data = MULT31(*data, *window);
data++;
window++;
n--;
}
}
static inline
void vect_mult_bw(int32_t *data, const int32_t *window, int n)
{
while(n>0) {
*data = MULT31(*data, *window);
data++;
window--;
n--;
}
}
#endif
/* not used anymore */
/*
#ifndef _V_CLIP_MATH
#define _V_CLIP_MATH
static inline int32_t CLIP_TO_15(int32_t x) {
int ret=x;
ret-= ((x<=32767)-1)&(x-32767);
ret-= ((x>=-32768)-1)&(x+32768);
return(ret);
}
#endif
*/
static inline int32_t VFLOAT_MULT(int32_t a,int32_t ap,
int32_t b,int32_t bp,
int32_t *p){
if(a && b){
#ifndef _LOW_ACCURACY_
*p=ap+bp+32;
return MULT32(a,b);
#else
*p=ap+bp+31;
return (a>>15)*(b>>16);
#endif
}else
return 0;
}
/*static inline int32_t VFLOAT_MULTI(int32_t a,int32_t ap,
int32_t i,
int32_t *p){
int ip=_ilog(abs(i))-31;
return VFLOAT_MULT(a,ap,i<<-ip,ip,p);
}
*/
static inline int32_t VFLOAT_ADD(int32_t a,int32_t ap,
int32_t b,int32_t bp,
int32_t *p){
if(!a){
*p=bp;
return b;
}else if(!b){
*p=ap;
return a;
}
/* yes, this can leak a bit. */
if(ap>bp){
int shift=ap-bp+1;
*p=ap+1;
a>>=1;
if(shift<32){
b=(b+(1<<(shift-1)))>>shift;
}else{
b=0;
}
}else{
int shift=bp-ap+1;
*p=bp+1;
b>>=1;
if(shift<32){
a=(a+(1<<(shift-1)))>>shift;
}else{
a=0;
}
}
a+=b;
if((a&0xc0000000)==0xc0000000 ||
(a&0xc0000000)==0){
a<<=1;
(*p)--;
}
return(a);
}
#endif

View file

@ -0,0 +1,374 @@
/*
* Common bit i/o utils
* Copyright (c) 2000, 2001 Fabrice Bellard
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
* Copyright (c) 2010 Loren Merritt
*
* alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* bitstream api.
*/
//#include "avcodec.h"
#include "ffmpeg_get_bits.h"
#include "ffmpeg_put_bits.h"
#include "ffmpeg_intreadwrite.h"
#define av_log(...)
#ifdef ROCKBOX
#undef DEBUGF
#define DEBUGF(...)
#endif
const uint8_t ff_log2_run[32]={
0, 0, 0, 0, 1, 1, 1, 1,
2, 2, 2, 2, 3, 3, 3, 3,
4, 4, 5, 5, 6, 6, 7, 7,
8, 9,10,11,12,13,14,15
};
#if 0 // unused in rockbox
void align_put_bits(PutBitContext *s)
{
#ifdef ALT_BITSTREAM_WRITER
put_bits(s,( - s->index) & 7,0);
#else
put_bits(s,s->bit_left & 7,0);
#endif
}
void ff_put_string(PutBitContext *pb, const char *string, int terminate_string)
{
while(*string){
put_bits(pb, 8, *string);
string++;
}
if(terminate_string)
put_bits(pb, 8, 0);
}
#endif
void ff_copy_bits(PutBitContext *pb, const uint8_t *src, int length)
{
int words= length>>4;
int bits= length&15;
int i;
if(length==0) return;
if(words < 16 || put_bits_count(pb)&7){
for(i=0; i<words; i++) put_bits(pb, 16, AV_RB16(src + 2*i));
}else{
for(i=0; put_bits_count(pb)&31; i++)
put_bits(pb, 8, src[i]);
flush_put_bits(pb);
memcpy(put_bits_ptr(pb), src+i, 2*words-i);
skip_put_bytes(pb, 2*words-i);
}
put_bits(pb, bits, AV_RB16(src + 2*words)>>(16-bits));
}
/* VLC decoding */
//#define DEBUG_VLC
#define GET_DATA(v, table, i, wrap, size) \
{\
const uint8_t *ptr = (const uint8_t *)table + i * wrap;\
switch(size) {\
case 1:\
v = *(const uint8_t *)ptr;\
break;\
case 2:\
v = *(const uint16_t *)ptr;\
break;\
default:\
v = *(const uint32_t *)ptr;\
break;\
}\
}
static int alloc_table(VLC *vlc, int size, int use_static)
{
int index;
index = vlc->table_size;
vlc->table_size += size;
if (vlc->table_size > vlc->table_allocated) {
if(use_static)
{
DEBUGF("init_vlc() used with too little memory : table_size > allocated_memory\n");
return -1;
}
// abort(); //cant do anything, init_vlc() is used with too little memory
// vlc->table_allocated += (1 << vlc->bits);
// vlc->table = av_realloc(vlc->table,
// sizeof(VLC_TYPE) * 2 * vlc->table_allocated);
if (!vlc->table)
return -1;
}
return index;
}
/*
static av_always_inline uint32_t bitswap_32(uint32_t x) {
return av_reverse[x&0xFF]<<24
| av_reverse[(x>>8)&0xFF]<<16
| av_reverse[(x>>16)&0xFF]<<8
| av_reverse[x>>24];
}
*/
typedef struct {
uint8_t bits;
uint16_t symbol;
/** codeword, with the first bit-to-be-read in the msb
* (even if intended for a little-endian bitstream reader) */
uint32_t code;
} __attribute__((__packed__)) VLCcode; /* packed to save space */
static int compare_vlcspec(const void *a, const void *b)
{
const VLCcode *sa=a, *sb=b;
return (sa->code >> 1) - (sb->code >> 1);
}
/**
* Build VLC decoding tables suitable for use with get_vlc().
*
* @param vlc the context to be initted
*
* @param table_nb_bits max length of vlc codes to store directly in this table
* (Longer codes are delegated to subtables.)
*
* @param nb_codes number of elements in codes[]
*
* @param codes descriptions of the vlc codes
* These must be ordered such that codes going into the same subtable are contiguous.
* Sorting by VLCcode.code is sufficient, though not necessary.
*/
static int build_table(VLC *vlc, int table_nb_bits, int nb_codes,
VLCcode *codes, int flags)
{
int table_size, table_index, index, symbol, subtable_bits;
int i, j, k, n, nb, inc;
uint32_t code, code_prefix;
VLC_TYPE (*table)[2];
table_size = 1 << table_nb_bits;
table_index = alloc_table(vlc, table_size, flags & INIT_VLC_USE_NEW_STATIC);
#ifdef DEBUG_VLC
av_log(NULL,AV_LOG_DEBUG,"new table index=%d size=%d\n",
table_index, table_size);
#endif
if (table_index < 0)
return -1;
table = &vlc->table[table_index];
for (i = 0; i < table_size; i++) {
table[i][1] = 0; //bits
table[i][0] = -1; //codes
}
/* first pass: map codes and compute auxillary table sizes */
for (i = 0; i < nb_codes; i++) {
n = codes[i].bits;
code = codes[i].code;
symbol = codes[i].symbol;
#if defined(DEBUG_VLC) && 0
av_log(NULL,AV_LOG_DEBUG,"i=%d n=%d code=0x%x\n", i, n, code);
#endif
if (n <= table_nb_bits) {
/* no need to add another table */
j = code >> (32 - table_nb_bits);
nb = 1 << (table_nb_bits - n);
inc = 1;
/* if (flags & INIT_VLC_LE) {
j = bitswap_32(code);
inc = 1 << n;
} */
for (k = 0; k < nb; k++) {
#ifdef DEBUG_VLC
av_log(NULL, AV_LOG_DEBUG, "%4x: code=%d n=%d\n",
j, i, n);
#endif
if (table[j][1] /*bits*/ != 0) {
av_log(NULL, AV_LOG_ERROR, "incorrect codes\n");
return -1;
}
table[j][1] = n; //bits
table[j][0] = symbol;
j += inc;
}
} else {
/* fill auxiliary table recursively */
n -= table_nb_bits;
code_prefix = code >> (32 - table_nb_bits);
subtable_bits = n;
codes[i].bits = n;
codes[i].code = code << table_nb_bits;
for (k = i+1; k < nb_codes; k++) {
n = codes[k].bits - table_nb_bits;
if (n <= 0)
break;
code = codes[k].code;
if (code >> (32 - table_nb_bits) != code_prefix)
break;
codes[k].bits = n;
codes[k].code = code << table_nb_bits;
subtable_bits = FFMAX(subtable_bits, n);
}
subtable_bits = FFMIN(subtable_bits, table_nb_bits);
j = /*(flags & INIT_VLC_LE) ? bitswap_32(code_prefix) >> (32 - table_nb_bits) :*/ code_prefix;
table[j][1] = -subtable_bits;
#ifdef DEBUG_VLC
av_log(NULL,AV_LOG_DEBUG,"%4x: n=%d (subtable)\n",
j, codes[i].bits + table_nb_bits);
#endif
index = build_table(vlc, subtable_bits, k-i, codes+i, flags);
if (index < 0)
return -1;
/* note: realloc has been done, so reload tables */
table = &vlc->table[table_index];
table[j][0] = index; //code
i = k-1;
}
}
return table_index;
}
/* Build VLC decoding tables suitable for use with get_vlc().
'nb_bits' set thee decoding table size (2^nb_bits) entries. The
bigger it is, the faster is the decoding. But it should not be too
big to save memory and L1 cache. '9' is a good compromise.
'nb_codes' : number of vlcs codes
'bits' : table which gives the size (in bits) of each vlc code.
'codes' : table which gives the bit pattern of of each vlc code.
'symbols' : table which gives the values to be returned from get_vlc().
'xxx_wrap' : give the number of bytes between each entry of the
'bits' or 'codes' tables.
'xxx_size' : gives the number of bytes of each entry of the 'bits'
or 'codes' tables.
'wrap' and 'size' allows to use any memory configuration and types
(byte/word/long) to store the 'bits', 'codes', and 'symbols' tables.
'use_static' should be set to 1 for tables, which should be freed
with av_free_static(), 0 if free_vlc() will be used.
*/
/* Rockbox: support for INIT_VLC_LE is currently disabled since none of our
codecs use it, there's a LUT based bit reverse function for this commented
out above (bitswap_32) and an inline asm version in libtremor/codebook.c
if we ever want this */
static VLCcode buf[1336+1]; /* worst case is wma, which has one table with 1336 entries */
int init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes,
const void *bits, int bits_wrap, int bits_size,
const void *codes, int codes_wrap, int codes_size,
const void *symbols, int symbols_wrap, int symbols_size,
int flags)
{
if (nb_codes+1 > (int)(sizeof (buf)/ sizeof (VLCcode)))
{
DEBUGF("Table is larger than temp buffer!\n");
return -1;
}
int i, j, ret;
vlc->bits = nb_bits;
if(flags & INIT_VLC_USE_NEW_STATIC){
if(vlc->table_size && vlc->table_size == vlc->table_allocated){
return 0;
}else if(vlc->table_size){
DEBUGF("fatal error, we are called on a partially initialized table\n");
return -1;
// abort(); // fatal error, we are called on a partially initialized table
}
}else {
vlc->table = NULL;
vlc->table_allocated = 0;
vlc->table_size = 0;
}
#ifdef DEBUG_VLC
av_log(NULL,AV_LOG_DEBUG,"build table nb_codes=%d\n", nb_codes);
#endif
// buf = av_malloc((nb_codes+1)*sizeof(VLCcode));
// assert(symbols_size <= 2 || !symbols);
j = 0;
#define COPY(condition)\
for (i = 0; i < nb_codes; i++) {\
GET_DATA(buf[j].bits, bits, i, bits_wrap, bits_size);\
if (!(condition))\
continue;\
GET_DATA(buf[j].code, codes, i, codes_wrap, codes_size);\
/* if (flags & INIT_VLC_LE)*/\
/* buf[j].code = bitswap_32(buf[j].code);*/\
/* else*/\
buf[j].code <<= 32 - buf[j].bits;\
if (symbols)\
GET_DATA(buf[j].symbol, symbols, i, symbols_wrap, symbols_size)\
else\
buf[j].symbol = i;\
j++;\
}
COPY(buf[j].bits > nb_bits);
// qsort is the slowest part of init_vlc, and could probably be improved or avoided
qsort(buf, j, sizeof(VLCcode), compare_vlcspec);
COPY(buf[j].bits && buf[j].bits <= nb_bits);
nb_codes = j;
ret = build_table(vlc, nb_bits, nb_codes, buf, flags);
// av_free(buf);
if (ret < 0) {
// av_freep(&vlc->table);
return -1;
}
if((flags & INIT_VLC_USE_NEW_STATIC) && vlc->table_size != vlc->table_allocated) {
av_log(NULL, AV_LOG_ERROR, "needed %d had %d\n", vlc->table_size, vlc->table_allocated);
}
return 0;
}
/* not used in rockbox
void free_vlc(VLC *vlc)
{
av_freep(&vlc->table);
}
*/

View file

@ -0,0 +1,150 @@
/**
* @file bswap.h
* byte swap.
*/
#ifndef __BSWAP_H__
#define __BSWAP_H__
#ifdef HAVE_BYTESWAP_H
#include <byteswap.h>
#else
#ifdef ROCKBOX
#include "codecs.h"
/* rockbox' optimised inline functions */
#define bswap_16(x) swap16(x)
#define bswap_32(x) swap32(x)
static inline uint64_t ByteSwap64(uint64_t x)
{
union {
uint64_t ll;
struct {
uint32_t l,h;
} l;
} r;
r.l.l = bswap_32 (x);
r.l.h = bswap_32 (x>>32);
return r.ll;
}
#define bswap_64(x) ByteSwap64(x)
#elif defined(ARCH_X86)
static inline unsigned short ByteSwap16(unsigned short x)
{
__asm("xchgb %b0,%h0" :
"=q" (x) :
"0" (x));
return x;
}
#define bswap_16(x) ByteSwap16(x)
static inline unsigned int ByteSwap32(unsigned int x)
{
#if __CPU__ > 386
__asm("bswap %0":
"=r" (x) :
#else
__asm("xchgb %b0,%h0\n"
" rorl $16,%0\n"
" xchgb %b0,%h0":
"=q" (x) :
#endif
"0" (x));
return x;
}
#define bswap_32(x) ByteSwap32(x)
static inline unsigned long long int ByteSwap64(unsigned long long int x)
{
register union { __extension__ uint64_t __ll;
uint32_t __l[2]; } __x;
asm("xchgl %0,%1":
"=r"(__x.__l[0]),"=r"(__x.__l[1]):
"0"(bswap_32((unsigned long)x)),"1"(bswap_32((unsigned long)(x>>32))));
return __x.__ll;
}
#define bswap_64(x) ByteSwap64(x)
#elif defined(ARCH_SH4)
static inline uint16_t ByteSwap16(uint16_t x) {
__asm__("swap.b %0,%0":"=r"(x):"0"(x));
return x;
}
static inline uint32_t ByteSwap32(uint32_t x) {
__asm__(
"swap.b %0,%0\n"
"swap.w %0,%0\n"
"swap.b %0,%0\n"
:"=r"(x):"0"(x));
return x;
}
#define bswap_16(x) ByteSwap16(x)
#define bswap_32(x) ByteSwap32(x)
static inline uint64_t ByteSwap64(uint64_t x)
{
union {
uint64_t ll;
struct {
uint32_t l,h;
} l;
} r;
r.l.l = bswap_32 (x);
r.l.h = bswap_32 (x>>32);
return r.ll;
}
#define bswap_64(x) ByteSwap64(x)
#else
#define bswap_16(x) (((x) & 0x00ff) << 8 | ((x) & 0xff00) >> 8)
// code from bits/byteswap.h (C) 1997, 1998 Free Software Foundation, Inc.
#define bswap_32(x) \
((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \
(((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24))
static inline uint64_t ByteSwap64(uint64_t x)
{
union {
uint64_t ll;
uint32_t l[2];
} w, r;
w.ll = x;
r.l[0] = bswap_32 (w.l[1]);
r.l[1] = bswap_32 (w.l[0]);
return r.ll;
}
#define bswap_64(x) ByteSwap64(x)
#endif /* !ARCH_X86 */
#endif /* !HAVE_BYTESWAP_H */
// be2me ... BigEndian to MachineEndian
// le2me ... LittleEndian to MachineEndian
#ifdef ROCKBOX_BIG_ENDIAN
#define be2me_16(x) (x)
#define be2me_32(x) (x)
#define be2me_64(x) (x)
#define le2me_16(x) bswap_16(x)
#define le2me_32(x) bswap_32(x)
#define le2me_64(x) bswap_64(x)
#else
#define be2me_16(x) bswap_16(x)
#define be2me_32(x) bswap_32(x)
#define be2me_64(x) bswap_64(x)
#define le2me_16(x) (x)
#define le2me_32(x) (x)
#define le2me_64(x) (x)
#endif
#endif /* __BSWAP_H__ */

View file

@ -0,0 +1,743 @@
/*
* copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* bitstream reader API header.
*/
#ifndef AVCODEC_GET_BITS_H
#define AVCODEC_GET_BITS_H
#include <stdint.h>
#include <stdlib.h>
#include "ffmpeg_intreadwrite.h"
//#include <assert.h>
//#include "libavutil/bswap.h"
//#include "libavutil/common.h"
//#include "libavutil/intreadwrite.h"
//#include "libavutil/log.h"
//#include "mathops.h"
#include "codecs.h"
/* rockbox' optimised inline functions */
#define bswap_16(x) swap16(x)
#define bswap_32(x) swap32(x)
#ifdef ROCKBOX_BIG_ENDIAN
#define be2me_16(x) (x)
#define be2me_32(x) (x)
#define le2me_16(x) bswap_16(x)
#define le2me_32(x) bswap_32(x)
#else
#define be2me_16(x) bswap_16(x)
#define be2me_32(x) bswap_32(x)
#define le2me_16(x) (x)
#define le2me_32(x) (x)
#endif
#define av_const __attribute__((const))
#define av_always_inline inline __attribute__((always_inline))
/* The following is taken from mathops.h */
#ifndef sign_extend
static inline av_const int sign_extend(int val, unsigned bits)
{
return (val << ((8 * sizeof(int)) - bits)) >> ((8 * sizeof(int)) - bits);
}
#endif
#ifndef NEG_SSR32
# define NEG_SSR32(a,s) ((( int32_t)(a))>>(32-(s)))
#endif
#ifndef NEG_USR32
# define NEG_USR32(a,s) (((uint32_t)(a))>>(32-(s)))
#endif
/* these 2 are from libavutil/common.h */
#define FFMAX(a,b) ((a) > (b) ? (a) : (b))
#define FFMIN(a,b) ((a) > (b) ? (b) : (a))
#if defined(ALT_BITSTREAM_READER_LE) && !defined(ALT_BITSTREAM_READER)
# define ALT_BITSTREAM_READER
#endif
/*
#if !defined(LIBMPEG2_BITSTREAM_READER) && !defined(A32_BITSTREAM_READER) && !defined(ALT_BITSTREAM_READER)
# if ARCH_ARM && !HAVE_FAST_UNALIGNED
# define A32_BITSTREAM_READER
# else
*/
# define ALT_BITSTREAM_READER
/*
//#define LIBMPEG2_BITSTREAM_READER
//#define A32_BITSTREAM_READER
# endif
#endif
*/
/* bit input */
/* buffer, buffer_end and size_in_bits must be present and used by every reader */
typedef struct GetBitContext {
const uint8_t *buffer, *buffer_end;
#ifdef ALT_BITSTREAM_READER
int index;
#elif defined LIBMPEG2_BITSTREAM_READER
uint8_t *buffer_ptr;
uint32_t cache;
int bit_count;
#elif defined A32_BITSTREAM_READER
uint32_t *buffer_ptr;
uint32_t cache0;
uint32_t cache1;
int bit_count;
#endif
int size_in_bits;
} GetBitContext;
#define VLC_TYPE int16_t
typedef struct VLC {
int bits;
VLC_TYPE (*table)[2]; ///< code, bits
int table_size, table_allocated;
} VLC;
typedef struct RL_VLC_ELEM {
int16_t level;
int8_t len;
uint8_t run;
} RL_VLC_ELEM;
/* Bitstream reader API docs:
name
arbitrary name which is used as prefix for the internal variables
gb
getbitcontext
OPEN_READER(name, gb)
loads gb into local variables
CLOSE_READER(name, gb)
stores local vars in gb
UPDATE_CACHE(name, gb)
refills the internal cache from the bitstream
after this call at least MIN_CACHE_BITS will be available,
GET_CACHE(name, gb)
will output the contents of the internal cache, next bit is MSB of 32 or 64 bit (FIXME 64bit)
SHOW_UBITS(name, gb, num)
will return the next num bits
SHOW_SBITS(name, gb, num)
will return the next num bits and do sign extension
SKIP_BITS(name, gb, num)
will skip over the next num bits
note, this is equivalent to SKIP_CACHE; SKIP_COUNTER
SKIP_CACHE(name, gb, num)
will remove the next num bits from the cache (note SKIP_COUNTER MUST be called before UPDATE_CACHE / CLOSE_READER)
SKIP_COUNTER(name, gb, num)
will increment the internal bit counter (see SKIP_CACHE & SKIP_BITS)
LAST_SKIP_CACHE(name, gb, num)
will remove the next num bits from the cache if it is needed for UPDATE_CACHE otherwise it will do nothing
LAST_SKIP_BITS(name, gb, num)
is equivalent to LAST_SKIP_CACHE; SKIP_COUNTER
for examples see get_bits, show_bits, skip_bits, get_vlc
*/
#ifdef ALT_BITSTREAM_READER
# define MIN_CACHE_BITS 25
/* ROCKBOX: work around "set but not used" warning */
# define OPEN_READER(name, gb)\
unsigned int name##_index= (gb)->index;\
int name##_cache __attribute__((unused)) = 0;\
# define CLOSE_READER(name, gb)\
(gb)->index= name##_index;\
# ifdef ALT_BITSTREAM_READER_LE
# define UPDATE_CACHE(name, gb)\
name##_cache= AV_RL32( ((const uint8_t *)(gb)->buffer)+(name##_index>>3) ) >> (name##_index&0x07);\
# define SKIP_CACHE(name, gb, num)\
name##_cache >>= (num);
# else
# define UPDATE_CACHE(name, gb)\
name##_cache= AV_RB32( ((const uint8_t *)(gb)->buffer)+(name##_index>>3) ) << (name##_index&0x07);\
# define SKIP_CACHE(name, gb, num)\
name##_cache <<= (num);
# endif
// FIXME name?
# define SKIP_COUNTER(name, gb, num)\
name##_index += (num);\
# define SKIP_BITS(name, gb, num)\
{\
SKIP_CACHE(name, gb, num)\
SKIP_COUNTER(name, gb, num)\
}\
# define LAST_SKIP_BITS(name, gb, num) SKIP_COUNTER(name, gb, num)
# define LAST_SKIP_CACHE(name, gb, num) ;
# ifdef ALT_BITSTREAM_READER_LE
# define SHOW_UBITS(name, gb, num)\
zero_extend(name##_cache, num)
# define SHOW_SBITS(name, gb, num)\
sign_extend(name##_cache, num)
# else
# define SHOW_UBITS(name, gb, num)\
NEG_USR32(name##_cache, num)
# define SHOW_SBITS(name, gb, num)\
NEG_SSR32(name##_cache, num)
# endif
# define GET_CACHE(name, gb)\
((uint32_t)name##_cache)
static inline int get_bits_count(const GetBitContext *s){
return s->index;
}
static inline void skip_bits_long(GetBitContext *s, int n){
s->index += n;
}
#elif defined LIBMPEG2_BITSTREAM_READER
//libmpeg2 like reader
# define MIN_CACHE_BITS 17
# define OPEN_READER(name, gb)\
int name##_bit_count=(gb)->bit_count;\
int name##_cache= (gb)->cache;\
uint8_t * name##_buffer_ptr=(gb)->buffer_ptr;\
# define CLOSE_READER(name, gb)\
(gb)->bit_count= name##_bit_count;\
(gb)->cache= name##_cache;\
(gb)->buffer_ptr= name##_buffer_ptr;\
# define UPDATE_CACHE(name, gb)\
if(name##_bit_count >= 0){\
name##_cache+= AV_RB16(name##_buffer_ptr) << name##_bit_count; \
name##_buffer_ptr+=2;\
name##_bit_count-= 16;\
}\
# define SKIP_CACHE(name, gb, num)\
name##_cache <<= (num);\
# define SKIP_COUNTER(name, gb, num)\
name##_bit_count += (num);\
# define SKIP_BITS(name, gb, num)\
{\
SKIP_CACHE(name, gb, num)\
SKIP_COUNTER(name, gb, num)\
}\
# define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num)
# define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num)
# define SHOW_UBITS(name, gb, num)\
NEG_USR32(name##_cache, num)
# define SHOW_SBITS(name, gb, num)\
NEG_SSR32(name##_cache, num)
# define GET_CACHE(name, gb)\
((uint32_t)name##_cache)
static inline int get_bits_count(const GetBitContext *s){
return (s->buffer_ptr - s->buffer)*8 - 16 + s->bit_count;
}
static inline void skip_bits_long(GetBitContext *s, int n){
OPEN_READER(re, s)
re_bit_count += n;
re_buffer_ptr += 2*(re_bit_count>>4);
re_bit_count &= 15;
re_cache = ((re_buffer_ptr[-2]<<8) + re_buffer_ptr[-1]) << (16+re_bit_count);
UPDATE_CACHE(re, s)
CLOSE_READER(re, s)
}
#elif defined A32_BITSTREAM_READER
# define MIN_CACHE_BITS 32
# define OPEN_READER(name, gb)\
int name##_bit_count=(gb)->bit_count;\
uint32_t name##_cache0= (gb)->cache0;\
uint32_t name##_cache1= (gb)->cache1;\
uint32_t * name##_buffer_ptr=(gb)->buffer_ptr;\
# define CLOSE_READER(name, gb)\
(gb)->bit_count= name##_bit_count;\
(gb)->cache0= name##_cache0;\
(gb)->cache1= name##_cache1;\
(gb)->buffer_ptr= name##_buffer_ptr;\
# define UPDATE_CACHE(name, gb)\
if(name##_bit_count > 0){\
const uint32_t next= av_be2ne32( *name##_buffer_ptr );\
name##_cache0 |= NEG_USR32(next,name##_bit_count);\
name##_cache1 |= next<<name##_bit_count;\
name##_buffer_ptr++;\
name##_bit_count-= 32;\
}\
#if ARCH_X86
# define SKIP_CACHE(name, gb, num)\
__asm__(\
"shldl %2, %1, %0 \n\t"\
"shll %2, %1 \n\t"\
: "+r" (name##_cache0), "+r" (name##_cache1)\
: "Ic" ((uint8_t)(num))\
);
#else
# define SKIP_CACHE(name, gb, num)\
name##_cache0 <<= (num);\
name##_cache0 |= NEG_USR32(name##_cache1,num);\
name##_cache1 <<= (num);
#endif
# define SKIP_COUNTER(name, gb, num)\
name##_bit_count += (num);\
# define SKIP_BITS(name, gb, num)\
{\
SKIP_CACHE(name, gb, num)\
SKIP_COUNTER(name, gb, num)\
}\
# define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num)
# define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num)
# define SHOW_UBITS(name, gb, num)\
NEG_USR32(name##_cache0, num)
# define SHOW_SBITS(name, gb, num)\
NEG_SSR32(name##_cache0, num)
# define GET_CACHE(name, gb)\
(name##_cache0)
static inline int get_bits_count(const GetBitContext *s){
return ((uint8_t*)s->buffer_ptr - s->buffer)*8 - 32 + s->bit_count;
}
static inline void skip_bits_long(GetBitContext *s, int n){
OPEN_READER(re, s)
re_bit_count += n;
re_buffer_ptr += re_bit_count>>5;
re_bit_count &= 31;
re_cache0 = av_be2ne32( re_buffer_ptr[-1] ) << re_bit_count;
re_cache1 = 0;
UPDATE_CACHE(re, s)
CLOSE_READER(re, s)
}
#endif
/**
* read mpeg1 dc style vlc (sign bit + mantisse with no MSB).
* if MSB not set it is negative
* @param n length in bits
* @author BERO
*/
static inline int get_xbits(GetBitContext *s, int n){
register int sign;
register int32_t cache;
OPEN_READER(re, s)
UPDATE_CACHE(re, s)
cache = GET_CACHE(re,s);
sign=(~cache)>>31;
LAST_SKIP_BITS(re, s, n)
CLOSE_READER(re, s)
return (NEG_USR32(sign ^ cache, n) ^ sign) - sign;
}
static inline int get_sbits(GetBitContext *s, int n){
register int tmp;
OPEN_READER(re, s)
UPDATE_CACHE(re, s)
tmp= SHOW_SBITS(re, s, n);
LAST_SKIP_BITS(re, s, n)
CLOSE_READER(re, s)
return tmp;
}
/**
* reads 1-17 bits.
* Note, the alt bitstream reader can read up to 25 bits, but the libmpeg2 reader can't
*/
static inline unsigned int get_bits(GetBitContext *s, int n){
register int tmp;
OPEN_READER(re, s)
UPDATE_CACHE(re, s)
tmp= SHOW_UBITS(re, s, n);
LAST_SKIP_BITS(re, s, n)
CLOSE_READER(re, s)
return tmp;
}
/**
* shows 1-17 bits.
* Note, the alt bitstream reader can read up to 25 bits, but the libmpeg2 reader can't
*/
static inline unsigned int show_bits(GetBitContext *s, int n){
register int tmp;
OPEN_READER(re, s)
UPDATE_CACHE(re, s)
tmp= SHOW_UBITS(re, s, n);
// CLOSE_READER(re, s)
return tmp;
}
static inline void skip_bits(GetBitContext *s, int n){
//Note gcc seems to optimize this to s->index+=n for the ALT_READER :))
OPEN_READER(re, s)
UPDATE_CACHE(re, s)
LAST_SKIP_BITS(re, s, n)
CLOSE_READER(re, s)
}
static inline unsigned int get_bits1(GetBitContext *s){
#ifdef ALT_BITSTREAM_READER
unsigned int index= s->index;
uint8_t result= s->buffer[ index>>3 ];
#ifdef ALT_BITSTREAM_READER_LE
result>>= (index&0x07);
result&= 1;
#else
result<<= (index&0x07);
result>>= 8 - 1;
#endif
index++;
s->index= index;
return result;
#else
return get_bits(s, 1);
#endif
}
static inline unsigned int show_bits1(GetBitContext *s){
return show_bits(s, 1);
}
static inline void skip_bits1(GetBitContext *s){
skip_bits(s, 1);
}
/**
* reads 0-32 bits.
*/
static inline unsigned int get_bits_long(GetBitContext *s, int n){
if(n<=MIN_CACHE_BITS) return get_bits(s, n);
else{
#ifdef ALT_BITSTREAM_READER_LE
int ret= get_bits(s, 16);
return ret | (get_bits(s, n-16) << 16);
#else
int ret= get_bits(s, 16) << (n-16);
return ret | get_bits(s, n-16);
#endif
}
}
/**
* reads 0-32 bits as a signed integer.
*/
static inline int get_sbits_long(GetBitContext *s, int n) {
return sign_extend(get_bits_long(s, n), n);
}
/**
* shows 0-32 bits.
*/
static inline unsigned int show_bits_long(GetBitContext *s, int n){
if(n<=MIN_CACHE_BITS) return show_bits(s, n);
else{
GetBitContext gb= *s;
return get_bits_long(&gb, n);
}
}
/* not used
static inline int check_marker(GetBitContext *s, const char *msg)
{
int bit= get_bits1(s);
if(!bit)
av_log(NULL, AV_LOG_INFO, "Marker bit missing %s\n", msg);
return bit;
}
*/
/**
* init GetBitContext.
* @param buffer bitstream buffer, must be FF_INPUT_BUFFER_PADDING_SIZE bytes larger then the actual read bits
* because some optimized bitstream readers read 32 or 64 bit at once and could read over the end
* @param bit_size the size of the buffer in bits
*
* While GetBitContext stores the buffer size, for performance reasons you are
* responsible for checking for the buffer end yourself (take advantage of the padding)!
*/
static inline void init_get_bits(GetBitContext *s,
const uint8_t *buffer, int bit_size)
{
int buffer_size= (bit_size+7)>>3;
if(buffer_size < 0 || bit_size < 0) {
buffer_size = bit_size = 0;
buffer = NULL;
}
s->buffer= buffer;
s->size_in_bits= bit_size;
s->buffer_end= buffer + buffer_size;
#ifdef ALT_BITSTREAM_READER
s->index=0;
#elif defined LIBMPEG2_BITSTREAM_READER
s->buffer_ptr = (uint8_t*)((intptr_t)buffer&(~1));
s->bit_count = 16 + 8*((intptr_t)buffer&1);
skip_bits_long(s, 0);
#elif defined A32_BITSTREAM_READER
s->buffer_ptr = (uint32_t*)((intptr_t)buffer&(~3));
s->bit_count = 32 + 8*((intptr_t)buffer&3);
skip_bits_long(s, 0);
#endif
}
static inline void align_get_bits(GetBitContext *s)
{
int n= (-get_bits_count(s)) & 7;
if(n) skip_bits(s, n);
}
#define init_vlc(vlc, nb_bits, nb_codes,\
bits, bits_wrap, bits_size,\
codes, codes_wrap, codes_size,\
flags)\
init_vlc_sparse(vlc, nb_bits, nb_codes,\
bits, bits_wrap, bits_size,\
codes, codes_wrap, codes_size,\
NULL, 0, 0, flags)
int init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes,
const void *bits, int bits_wrap, int bits_size,
const void *codes, int codes_wrap, int codes_size,
const void *symbols, int symbols_wrap, int symbols_size,
int flags);
#define INIT_VLC_LE 2
#define INIT_VLC_USE_NEW_STATIC 4
void free_vlc(VLC *vlc);
#define INIT_VLC_STATIC(vlc, bits, a,b,c,d,e,f,g, static_size, attr)\
{\
static VLC_TYPE table[static_size][2] attr;\
(vlc)->table= table;\
(vlc)->table_allocated= static_size;\
init_vlc(vlc, bits, a,b,c,d,e,f,g, INIT_VLC_USE_NEW_STATIC);\
}
/**
*
* If the vlc code is invalid and max_depth=1, then no bits will be removed.
* If the vlc code is invalid and max_depth>1, then the number of bits removed
* is undefined.
*/
#define GET_VLC(code, name, gb, table, bits, max_depth)\
{\
int n, nb_bits;\
unsigned int index;\
\
index= SHOW_UBITS(name, gb, bits);\
code = table[index][0];\
n = table[index][1];\
\
if(max_depth > 1 && n < 0){\
LAST_SKIP_BITS(name, gb, bits)\
UPDATE_CACHE(name, gb)\
\
nb_bits = -n;\
\
index= SHOW_UBITS(name, gb, nb_bits) + code;\
code = table[index][0];\
n = table[index][1];\
if(max_depth > 2 && n < 0){\
LAST_SKIP_BITS(name, gb, nb_bits)\
UPDATE_CACHE(name, gb)\
\
nb_bits = -n;\
\
index= SHOW_UBITS(name, gb, nb_bits) + code;\
code = table[index][0];\
n = table[index][1];\
}\
}\
SKIP_BITS(name, gb, n)\
}
#define GET_RL_VLC(level, run, name, gb, table, bits, max_depth, need_update)\
{\
int n, nb_bits;\
unsigned int index;\
\
index= SHOW_UBITS(name, gb, bits);\
level = table[index].level;\
n = table[index].len;\
\
if(max_depth > 1 && n < 0){\
SKIP_BITS(name, gb, bits)\
if(need_update){\
UPDATE_CACHE(name, gb)\
}\
\
nb_bits = -n;\
\
index= SHOW_UBITS(name, gb, nb_bits) + level;\
level = table[index].level;\
n = table[index].len;\
}\
run= table[index].run;\
SKIP_BITS(name, gb, n)\
}
/**
* parses a vlc code, faster then get_vlc()
* @param bits is the number of bits which will be read at once, must be
* identical to nb_bits in init_vlc()
* @param max_depth is the number of times bits bits must be read to completely
* read the longest vlc code
* = (max_vlc_length + bits - 1) / bits
*/
static av_always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
int bits, int max_depth)
{
int code;
OPEN_READER(re, s)
UPDATE_CACHE(re, s)
GET_VLC(code, re, s, table, bits, max_depth)
CLOSE_READER(re, s)
return code;
}
//#define TRACE
#ifdef TRACE
static inline void print_bin(int bits, int n){
int i;
for(i=n-1; i>=0; i--){
av_log(NULL, AV_LOG_DEBUG, "%d", (bits>>i)&1);
}
for(i=n; i<24; i++)
av_log(NULL, AV_LOG_DEBUG, " ");
}
static inline int get_bits_trace(GetBitContext *s, int n, char *file, const char *func, int line){
int r= get_bits(s, n);
print_bin(r, n);
av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d bit @%5d in %s %s:%d\n", r, n, r, get_bits_count(s)-n, file, func, line);
return r;
}
static inline int get_vlc_trace(GetBitContext *s, VLC_TYPE (*table)[2], int bits, int max_depth, char *file, const char *func, int line){
int show= show_bits(s, 24);
int pos= get_bits_count(s);
int r= get_vlc2(s, table, bits, max_depth);
int len= get_bits_count(s) - pos;
int bits2= show>>(24-len);
print_bin(bits2, len);
av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d vlc @%5d in %s %s:%d\n", bits2, len, r, pos, file, func, line);
return r;
}
static inline int get_xbits_trace(GetBitContext *s, int n, char *file, const char *func, int line){
int show= show_bits(s, n);
int r= get_xbits(s, n);
print_bin(show, n);
av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d xbt @%5d in %s %s:%d\n", show, n, r, get_bits_count(s)-n, file, func, line);
return r;
}
#define get_bits(s, n) get_bits_trace(s, n, __FILE__, __PRETTY_FUNCTION__, __LINE__)
#define get_bits1(s) get_bits_trace(s, 1, __FILE__, __PRETTY_FUNCTION__, __LINE__)
#define get_xbits(s, n) get_xbits_trace(s, n, __FILE__, __PRETTY_FUNCTION__, __LINE__)
#define get_vlc(s, vlc) get_vlc_trace(s, (vlc)->table, (vlc)->bits, 3, __FILE__, __PRETTY_FUNCTION__, __LINE__)
#define get_vlc2(s, tab, bits, max) get_vlc_trace(s, tab, bits, max, __FILE__, __PRETTY_FUNCTION__, __LINE__)
#define tprintf(p, ...) av_log(p, AV_LOG_DEBUG, __VA_ARGS__)
#else //TRACE
#define tprintf(p, ...) {}
#endif
static inline int decode012(GetBitContext *gb){
int n;
n = get_bits1(gb);
if (n == 0)
return 0;
else
return get_bits1(gb) + 1;
}
static inline int decode210(GetBitContext *gb){
if (get_bits1(gb))
return 0;
else
return 2 - get_bits1(gb);
}
static inline int get_bits_left(GetBitContext *gb)
{
return gb->size_in_bits - get_bits_count(gb);
}
#endif /* AVCODEC_GET_BITS_H */

View file

@ -0,0 +1,484 @@
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVUTIL_INTREADWRITE_H
#define AVUTIL_INTREADWRITE_H
#include <stdint.h>
/*
* Arch-specific headers can provide any combination of
* AV_[RW][BLN](16|24|32|64) and AV_(COPY|SWAP|ZERO)(64|128) macros.
* Preprocessor symbols must be defined, even if these are implemented
* as inline functions.
*/
/*
* Map AV_RNXX <-> AV_R[BL]XX for all variants provided by per-arch headers.
*/
#define HAVE_BIGENDIAN 0
#if HAVE_BIGENDIAN
# if defined(AV_RN16) && !defined(AV_RB16)
# define AV_RB16(p) AV_RN16(p)
# elif !defined(AV_RN16) && defined(AV_RB16)
# define AV_RN16(p) AV_RB16(p)
# endif
# if defined(AV_WN16) && !defined(AV_WB16)
# define AV_WB16(p, v) AV_WN16(p, v)
# elif !defined(AV_WN16) && defined(AV_WB16)
# define AV_WN16(p, v) AV_WB16(p, v)
# endif
# if defined(AV_RN24) && !defined(AV_RB24)
# define AV_RB24(p) AV_RN24(p)
# elif !defined(AV_RN24) && defined(AV_RB24)
# define AV_RN24(p) AV_RB24(p)
# endif
# if defined(AV_WN24) && !defined(AV_WB24)
# define AV_WB24(p, v) AV_WN24(p, v)
# elif !defined(AV_WN24) && defined(AV_WB24)
# define AV_WN24(p, v) AV_WB24(p, v)
# endif
# if defined(AV_RN32) && !defined(AV_RB32)
# define AV_RB32(p) AV_RN32(p)
# elif !defined(AV_RN32) && defined(AV_RB32)
# define AV_RN32(p) AV_RB32(p)
# endif
# if defined(AV_WN32) && !defined(AV_WB32)
# define AV_WB32(p, v) AV_WN32(p, v)
# elif !defined(AV_WN32) && defined(AV_WB32)
# define AV_WN32(p, v) AV_WB32(p, v)
# endif
# if defined(AV_RN64) && !defined(AV_RB64)
# define AV_RB64(p) AV_RN64(p)
# elif !defined(AV_RN64) && defined(AV_RB64)
# define AV_RN64(p) AV_RB64(p)
# endif
# if defined(AV_WN64) && !defined(AV_WB64)
# define AV_WB64(p, v) AV_WN64(p, v)
# elif !defined(AV_WN64) && defined(AV_WB64)
# define AV_WN64(p, v) AV_WB64(p, v)
# endif
#else /* HAVE_BIGENDIAN */
# if defined(AV_RN16) && !defined(AV_RL16)
# define AV_RL16(p) AV_RN16(p)
# elif !defined(AV_RN16) && defined(AV_RL16)
# define AV_RN16(p) AV_RL16(p)
# endif
# if defined(AV_WN16) && !defined(AV_WL16)
# define AV_WL16(p, v) AV_WN16(p, v)
# elif !defined(AV_WN16) && defined(AV_WL16)
# define AV_WN16(p, v) AV_WL16(p, v)
# endif
# if defined(AV_RN24) && !defined(AV_RL24)
# define AV_RL24(p) AV_RN24(p)
# elif !defined(AV_RN24) && defined(AV_RL24)
# define AV_RN24(p) AV_RL24(p)
# endif
# if defined(AV_WN24) && !defined(AV_WL24)
# define AV_WL24(p, v) AV_WN24(p, v)
# elif !defined(AV_WN24) && defined(AV_WL24)
# define AV_WN24(p, v) AV_WL24(p, v)
# endif
# if defined(AV_RN32) && !defined(AV_RL32)
# define AV_RL32(p) AV_RN32(p)
# elif !defined(AV_RN32) && defined(AV_RL32)
# define AV_RN32(p) AV_RL32(p)
# endif
# if defined(AV_WN32) && !defined(AV_WL32)
# define AV_WL32(p, v) AV_WN32(p, v)
# elif !defined(AV_WN32) && defined(AV_WL32)
# define AV_WN32(p, v) AV_WL32(p, v)
# endif
# if defined(AV_RN64) && !defined(AV_RL64)
# define AV_RL64(p) AV_RN64(p)
# elif !defined(AV_RN64) && defined(AV_RL64)
# define AV_RN64(p) AV_RL64(p)
# endif
# if defined(AV_WN64) && !defined(AV_WL64)
# define AV_WL64(p, v) AV_WN64(p, v)
# elif !defined(AV_WN64) && defined(AV_WL64)
# define AV_WN64(p, v) AV_WL64(p, v)
# endif
#endif /* !HAVE_BIGENDIAN */
#define HAVE_ATTRIBUTE_PACKED 0
#define HAVE_FAST_UNALIGNED 0
/*
* Define AV_[RW]N helper macros to simplify definitions not provided
* by per-arch headers.
*/
#if HAVE_ATTRIBUTE_PACKED
union unaligned_64 { uint64_t l; } __attribute__((packed)) av_alias;
union unaligned_32 { uint32_t l; } __attribute__((packed)) av_alias;
union unaligned_16 { uint16_t l; } __attribute__((packed)) av_alias;
# define AV_RN(s, p) (((const union unaligned_##s *) (p))->l)
# define AV_WN(s, p, v) ((((union unaligned_##s *) (p))->l) = (v))
#elif defined(__DECC)
# define AV_RN(s, p) (*((const __unaligned uint##s##_t*)(p)))
# define AV_WN(s, p, v) (*((__unaligned uint##s##_t*)(p)) = (v))
#elif HAVE_FAST_UNALIGNED
# define AV_RN(s, p) (((const av_alias##s*)(p))->u##s)
# define AV_WN(s, p, v) (((av_alias##s*)(p))->u##s = (v))
#else
#ifndef AV_RB16
# define AV_RB16(x) \
((((const uint8_t*)(x))[0] << 8) | \
((const uint8_t*)(x))[1])
#endif
#ifndef AV_WB16
# define AV_WB16(p, d) do { \
((uint8_t*)(p))[1] = (d); \
((uint8_t*)(p))[0] = (d)>>8; \
} while(0)
#endif
#ifndef AV_RL16
# define AV_RL16(x) \
((((const uint8_t*)(x))[1] << 8) | \
((const uint8_t*)(x))[0])
#endif
#ifndef AV_WL16
# define AV_WL16(p, d) do { \
((uint8_t*)(p))[0] = (d); \
((uint8_t*)(p))[1] = (d)>>8; \
} while(0)
#endif
#ifndef AV_RB32
/* Coldfire and ARMv6 and above support unaligned long reads */
#if defined CPU_COLDFIRE || (defined CPU_ARM && ARM_ARCH >= 6)
#define AV_RB32(x) (htobe32(*(const uint32_t*)(x)))
#else
# define AV_RB32(x) \
((((const uint8_t*)(x))[0] << 24) | \
(((const uint8_t*)(x))[1] << 16) | \
(((const uint8_t*)(x))[2] << 8) | \
((const uint8_t*)(x))[3])
#endif
#endif
#ifndef AV_WB32
# define AV_WB32(p, d) do { \
((uint8_t*)(p))[3] = (d); \
((uint8_t*)(p))[2] = (d)>>8; \
((uint8_t*)(p))[1] = (d)>>16; \
((uint8_t*)(p))[0] = (d)>>24; \
} while(0)
#endif
#ifndef AV_RL32
# define AV_RL32(x) \
((((const uint8_t*)(x))[3] << 24) | \
(((const uint8_t*)(x))[2] << 16) | \
(((const uint8_t*)(x))[1] << 8) | \
((const uint8_t*)(x))[0])
#endif
#ifndef AV_WL32
# define AV_WL32(p, d) do { \
((uint8_t*)(p))[0] = (d); \
((uint8_t*)(p))[1] = (d)>>8; \
((uint8_t*)(p))[2] = (d)>>16; \
((uint8_t*)(p))[3] = (d)>>24; \
} while(0)
#endif
#ifndef AV_RB64
# define AV_RB64(x) \
(((uint64_t)((const uint8_t*)(x))[0] << 56) | \
((uint64_t)((const uint8_t*)(x))[1] << 48) | \
((uint64_t)((const uint8_t*)(x))[2] << 40) | \
((uint64_t)((const uint8_t*)(x))[3] << 32) | \
((uint64_t)((const uint8_t*)(x))[4] << 24) | \
((uint64_t)((const uint8_t*)(x))[5] << 16) | \
((uint64_t)((const uint8_t*)(x))[6] << 8) | \
(uint64_t)((const uint8_t*)(x))[7])
#endif
#ifndef AV_WB64
# define AV_WB64(p, d) do { \
((uint8_t*)(p))[7] = (d); \
((uint8_t*)(p))[6] = (d)>>8; \
((uint8_t*)(p))[5] = (d)>>16; \
((uint8_t*)(p))[4] = (d)>>24; \
((uint8_t*)(p))[3] = (d)>>32; \
((uint8_t*)(p))[2] = (d)>>40; \
((uint8_t*)(p))[1] = (d)>>48; \
((uint8_t*)(p))[0] = (d)>>56; \
} while(0)
#endif
#ifndef AV_RL64
# define AV_RL64(x) \
(((uint64_t)((const uint8_t*)(x))[7] << 56) | \
((uint64_t)((const uint8_t*)(x))[6] << 48) | \
((uint64_t)((const uint8_t*)(x))[5] << 40) | \
((uint64_t)((const uint8_t*)(x))[4] << 32) | \
((uint64_t)((const uint8_t*)(x))[3] << 24) | \
((uint64_t)((const uint8_t*)(x))[2] << 16) | \
((uint64_t)((const uint8_t*)(x))[1] << 8) | \
(uint64_t)((const uint8_t*)(x))[0])
#endif
#ifndef AV_WL64
# define AV_WL64(p, d) do { \
((uint8_t*)(p))[0] = (d); \
((uint8_t*)(p))[1] = (d)>>8; \
((uint8_t*)(p))[2] = (d)>>16; \
((uint8_t*)(p))[3] = (d)>>24; \
((uint8_t*)(p))[4] = (d)>>32; \
((uint8_t*)(p))[5] = (d)>>40; \
((uint8_t*)(p))[6] = (d)>>48; \
((uint8_t*)(p))[7] = (d)>>56; \
} while(0)
#endif
#if HAVE_BIGENDIAN
# define AV_RN(s, p) AV_RB##s(p)
# define AV_WN(s, p, v) AV_WB##s(p, v)
#else
# define AV_RN(s, p) AV_RL##s(p)
# define AV_WN(s, p, v) AV_WL##s(p, v)
#endif
#endif /* HAVE_FAST_UNALIGNED */
#ifndef AV_RN16
# define AV_RN16(p) AV_RN(16, p)
#endif
#ifndef AV_RN32
# define AV_RN32(p) AV_RN(32, p)
#endif
#ifndef AV_RN64
# define AV_RN64(p) AV_RN(64, p)
#endif
#ifndef AV_WN16
# define AV_WN16(p, v) AV_WN(16, p, v)
#endif
#ifndef AV_WN32
# define AV_WN32(p, v) AV_WN(32, p, v)
#endif
#ifndef AV_WN64
# define AV_WN64(p, v) AV_WN(64, p, v)
#endif
#if HAVE_BIGENDIAN
# define AV_RB(s, p) AV_RN##s(p)
# define AV_WB(s, p, v) AV_WN##s(p, v)
# define AV_RL(s, p) bswap_##s(AV_RN##s(p))
# define AV_WL(s, p, v) AV_WN##s(p, bswap_##s(v))
#else
# define AV_RB(s, p) bswap_##s(AV_RN##s(p))
# define AV_WB(s, p, v) AV_WN##s(p, bswap_##s(v))
# define AV_RL(s, p) AV_RN##s(p)
# define AV_WL(s, p, v) AV_WN##s(p, v)
#endif
#define AV_RB8(x) (((const uint8_t*)(x))[0])
#define AV_WB8(p, d) do { ((uint8_t*)(p))[0] = (d); } while(0)
#define AV_RL8(x) AV_RB8(x)
#define AV_WL8(p, d) AV_WB8(p, d)
#ifndef AV_RB16
# define AV_RB16(p) AV_RB(16, p)
#endif
#ifndef AV_WB16
# define AV_WB16(p, v) AV_WB(16, p, v)
#endif
#ifndef AV_RL16
# define AV_RL16(p) AV_RL(16, p)
#endif
#ifndef AV_WL16
# define AV_WL16(p, v) AV_WL(16, p, v)
#endif
#ifndef AV_RB32
# define AV_RB32(p) AV_RB(32, p)
#endif
#ifndef AV_WB32
# define AV_WB32(p, v) AV_WB(32, p, v)
#endif
#ifndef AV_RL32
# define AV_RL32(p) AV_RL(32, p)
#endif
#ifndef AV_WL32
# define AV_WL32(p, v) AV_WL(32, p, v)
#endif
#ifndef AV_RB64
# define AV_RB64(p) AV_RB(64, p)
#endif
#ifndef AV_WB64
# define AV_WB64(p, v) AV_WB(64, p, v)
#endif
#ifndef AV_RL64
# define AV_RL64(p) AV_RL(64, p)
#endif
#ifndef AV_WL64
# define AV_WL64(p, v) AV_WL(64, p, v)
#endif
#ifndef AV_RB24
# define AV_RB24(x) \
((((const uint8_t*)(x))[0] << 16) | \
(((const uint8_t*)(x))[1] << 8) | \
((const uint8_t*)(x))[2])
#endif
#ifndef AV_WB24
# define AV_WB24(p, d) do { \
((uint8_t*)(p))[2] = (d); \
((uint8_t*)(p))[1] = (d)>>8; \
((uint8_t*)(p))[0] = (d)>>16; \
} while(0)
#endif
#ifndef AV_RL24
# define AV_RL24(x) \
((((const uint8_t*)(x))[2] << 16) | \
(((const uint8_t*)(x))[1] << 8) | \
((const uint8_t*)(x))[0])
#endif
#ifndef AV_WL24
# define AV_WL24(p, d) do { \
((uint8_t*)(p))[0] = (d); \
((uint8_t*)(p))[1] = (d)>>8; \
((uint8_t*)(p))[2] = (d)>>16; \
} while(0)
#endif
/*
* The AV_[RW]NA macros access naturally aligned data
* in a type-safe way.
*/
#define AV_RNA(s, p) (((const av_alias##s*)(p))->u##s)
#define AV_WNA(s, p, v) (((av_alias##s*)(p))->u##s = (v))
#ifndef AV_RN16A
# define AV_RN16A(p) AV_RNA(16, p)
#endif
#ifndef AV_RN32A
# define AV_RN32A(p) AV_RNA(32, p)
#endif
#ifndef AV_RN64A
# define AV_RN64A(p) AV_RNA(64, p)
#endif
#ifndef AV_WN16A
# define AV_WN16A(p, v) AV_WNA(16, p, v)
#endif
#ifndef AV_WN32A
# define AV_WN32A(p, v) AV_WNA(32, p, v)
#endif
#ifndef AV_WN64A
# define AV_WN64A(p, v) AV_WNA(64, p, v)
#endif
/* Parameters for AV_COPY*, AV_SWAP*, AV_ZERO* must be
* naturally aligned. They may be implemented using MMX,
* so emms_c() must be called before using any float code
* afterwards.
*/
#define AV_COPY(n, d, s) \
(((av_alias##n*)(d))->u##n = ((const av_alias##n*)(s))->u##n)
#ifndef AV_COPY16
# define AV_COPY16(d, s) AV_COPY(16, d, s)
#endif
#ifndef AV_COPY32
# define AV_COPY32(d, s) AV_COPY(32, d, s)
#endif
#ifndef AV_COPY64
# define AV_COPY64(d, s) AV_COPY(64, d, s)
#endif
#ifndef AV_COPY128
# define AV_COPY128(d, s) \
do { \
AV_COPY64(d, s); \
AV_COPY64((char*)(d)+8, (char*)(s)+8); \
} while(0)
#endif
#define AV_SWAP(n, a, b) FFSWAP(av_alias##n, *(av_alias##n*)(a), *(av_alias##n*)(b))
#ifndef AV_SWAP64
# define AV_SWAP64(a, b) AV_SWAP(64, a, b)
#endif
#define AV_ZERO(n, d) (((av_alias##n*)(d))->u##n = 0)
#ifndef AV_ZERO16
# define AV_ZERO16(d) AV_ZERO(16, d)
#endif
#ifndef AV_ZERO32
# define AV_ZERO32(d) AV_ZERO(32, d)
#endif
#ifndef AV_ZERO64
# define AV_ZERO64(d) AV_ZERO(64, d)
#endif
#ifndef AV_ZERO128
# define AV_ZERO128(d) \
do { \
AV_ZERO64(d); \
AV_ZERO64((char*)(d)+8); \
} while(0)
#endif
#endif /* AVUTIL_INTREADWRITE_H */

View file

@ -0,0 +1,323 @@
/*
* copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file libavcodec/put_bits.h
* bitstream writer API
*/
#ifndef AVCODEC_PUT_BITS_H
#define AVCODEC_PUT_BITS_H
#include <stdint.h>
#include <stdlib.h>
#include "ffmpeg_bswap.h"
#include "ffmpeg_intreadwrite.h"
#define av_log(...)
#define HAVE_FAST_UNALIGNED 0
/* buf and buf_end must be present and used by every alternative writer. */
typedef struct PutBitContext {
#ifdef ALT_BITSTREAM_WRITER
uint8_t *buf, *buf_end;
int index;
#else
uint32_t bit_buf;
int bit_left;
uint8_t *buf, *buf_ptr, *buf_end;
#endif
int size_in_bits;
} PutBitContext;
/**
* Initializes the PutBitContext s.
*
* @param buffer the buffer where to put bits
* @param buffer_size the size in bytes of buffer
*/
static inline void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
{
if(buffer_size < 0) {
buffer_size = 0;
buffer = NULL;
}
s->size_in_bits= 8*buffer_size;
s->buf = buffer;
s->buf_end = s->buf + buffer_size;
#ifdef ALT_BITSTREAM_WRITER
s->index=0;
((uint32_t*)(s->buf))[0]=0;
// memset(buffer, 0, buffer_size);
#else
s->buf_ptr = s->buf;
s->bit_left=32;
s->bit_buf=0;
#endif
}
/**
* Returns the total number of bits written to the bitstream.
*/
static inline int put_bits_count(PutBitContext *s)
{
#ifdef ALT_BITSTREAM_WRITER
return s->index;
#else
return (s->buf_ptr - s->buf) * 8 + 32 - s->bit_left;
#endif
}
/**
* Pads the end of the output stream with zeros.
*/
static inline void flush_put_bits(PutBitContext *s)
{
#ifdef ALT_BITSTREAM_WRITER
align_put_bits(s);
#else
#ifndef BITSTREAM_WRITER_LE
s->bit_buf<<= s->bit_left;
#endif
while (s->bit_left < 32) {
/* XXX: should test end of buffer */
#ifdef BITSTREAM_WRITER_LE
*s->buf_ptr++=s->bit_buf;
s->bit_buf>>=8;
#else
*s->buf_ptr++=s->bit_buf >> 24;
s->bit_buf<<=8;
#endif
s->bit_left+=8;
}
s->bit_left=32;
s->bit_buf=0;
#endif
}
#if defined(ALT_BITSTREAM_WRITER) || defined(BITSTREAM_WRITER_LE)
#define align_put_bits align_put_bits_unsupported_here
#define ff_put_string ff_put_string_unsupported_here
#define ff_copy_bits ff_copy_bits_unsupported_here
#else
/**
* Pads the bitstream with zeros up to the next byte boundary.
*/
void align_put_bits(PutBitContext *s);
/**
* Puts the string string in the bitstream.
*
* @param terminate_string 0-terminates the written string if value is 1
*/
void ff_put_string(PutBitContext *pb, const char *string, int terminate_string);
/**
* Copies the content of src to the bitstream.
*
* @param length the number of bits of src to copy
*/
void ff_copy_bits(PutBitContext *pb, const uint8_t *src, int length);
#endif
/**
* Writes up to 31 bits into a bitstream.
* Use put_bits32 to write 32 bits.
*/
static inline void put_bits(PutBitContext *s, int n, unsigned int value)
#ifndef ALT_BITSTREAM_WRITER
{
unsigned int bit_buf;
int bit_left;
// printf("put_bits=%d %x\n", n, value);
//assert(n <= 31 && value < (1U << n));
bit_buf = s->bit_buf;
bit_left = s->bit_left;
// printf("n=%d value=%x cnt=%d buf=%x\n", n, value, bit_cnt, bit_buf);
/* XXX: optimize */
#ifdef BITSTREAM_WRITER_LE
bit_buf |= value << (32 - bit_left);
if (n >= bit_left) {
#if !HAVE_FAST_UNALIGNED
if (3 & (intptr_t) s->buf_ptr) {
AV_WL32(s->buf_ptr, bit_buf);
} else
#endif
*(uint32_t *)s->buf_ptr = le2me_32(bit_buf);
s->buf_ptr+=4;
bit_buf = (bit_left==32)?0:value >> bit_left;
bit_left+=32;
}
bit_left-=n;
#else
if (n < bit_left) {
bit_buf = (bit_buf<<n) | value;
bit_left-=n;
} else {
bit_buf<<=bit_left;
bit_buf |= value >> (n - bit_left);
#if !HAVE_FAST_UNALIGNED
if (3 & (intptr_t) s->buf_ptr) {
AV_WB32(s->buf_ptr, bit_buf);
} else
#endif
*(uint32_t *)s->buf_ptr = be2me_32(bit_buf);
//printf("bitbuf = %08x\n", bit_buf);
s->buf_ptr+=4;
bit_left+=32 - n;
bit_buf = value;
}
#endif
s->bit_buf = bit_buf;
s->bit_left = bit_left;
}
#else /* ALT_BITSTREAM_WRITER defined */
{
# ifdef ALIGNED_BITSTREAM_WRITER
# if ARCH_X86
__asm__ volatile(
"movl %0, %%ecx \n\t"
"xorl %%eax, %%eax \n\t"
"shrdl %%cl, %1, %%eax \n\t"
"shrl %%cl, %1 \n\t"
"movl %0, %%ecx \n\t"
"shrl $3, %%ecx \n\t"
"andl $0xFFFFFFFC, %%ecx \n\t"
"bswapl %1 \n\t"
"orl %1, (%2, %%ecx) \n\t"
"bswapl %%eax \n\t"
"addl %3, %0 \n\t"
"movl %%eax, 4(%2, %%ecx) \n\t"
: "=&r" (s->index), "=&r" (value)
: "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n))
: "%eax", "%ecx"
);
# else
int index= s->index;
uint32_t *ptr= ((uint32_t *)s->buf)+(index>>5);
value<<= 32-n;
ptr[0] |= be2me_32(value>>(index&31));
ptr[1] = be2me_32(value<<(32-(index&31)));
//if(n>24) printf("%d %d\n", n, value);
index+= n;
s->index= index;
# endif
# else //ALIGNED_BITSTREAM_WRITER
# if ARCH_X86
__asm__ volatile(
"movl $7, %%ecx \n\t"
"andl %0, %%ecx \n\t"
"addl %3, %%ecx \n\t"
"negl %%ecx \n\t"
"shll %%cl, %1 \n\t"
"bswapl %1 \n\t"
"movl %0, %%ecx \n\t"
"shrl $3, %%ecx \n\t"
"orl %1, (%%ecx, %2) \n\t"
"addl %3, %0 \n\t"
"movl $0, 4(%%ecx, %2) \n\t"
: "=&r" (s->index), "=&r" (value)
: "r" (s->buf), "r" (n), "0" (s->index), "1" (value)
: "%ecx"
);
# else
int index= s->index;
uint32_t *ptr= (uint32_t*)(((uint8_t *)s->buf)+(index>>3));
ptr[0] |= be2me_32(value<<(32-n-(index&7) ));
ptr[1] = 0;
//if(n>24) printf("%d %d\n", n, value);
index+= n;
s->index= index;
# endif
# endif //!ALIGNED_BITSTREAM_WRITER
}
#endif
static inline void put_sbits(PutBitContext *pb, int n, int32_t value)
{
//assert(n >= 0 && n <= 31);
put_bits(pb, n, value & ((1<<n)-1));
}
/**
* Returns the pointer to the byte where the bitstream writer will put
* the next bit.
*/
static inline uint8_t* put_bits_ptr(PutBitContext *s)
{
#ifdef ALT_BITSTREAM_WRITER
return s->buf + (s->index>>3);
#else
return s->buf_ptr;
#endif
}
/**
* Skips the given number of bytes.
* PutBitContext must be flushed & aligned to a byte boundary before calling this.
*/
static inline void skip_put_bytes(PutBitContext *s, int n)
{
//assert((put_bits_count(s)&7)==0);
#ifdef ALT_BITSTREAM_WRITER
FIXME may need some cleaning of the buffer
s->index += n<<3;
#else
//assert(s->bit_left==32);
s->buf_ptr += n;
#endif
}
/**
* Skips the given number of bits.
* Must only be used if the actual values in the bitstream do not matter.
* If n is 0 the behavior is undefined.
*/
static inline void skip_put_bits(PutBitContext *s, int n)
{
#ifdef ALT_BITSTREAM_WRITER
s->index += n;
#else
s->bit_left -= n;
s->buf_ptr-= 4*(s->bit_left>>5);
s->bit_left &= 31;
#endif
}
/**
* Changes the end of the buffer.
*
* @param size the new size in bytes of the buffer where to put bits
*/
static inline void set_put_bits_buffer_size(PutBitContext *s, int size)
{
s->buf_end= s->buf + size;
}
#endif /* AVCODEC_PUT_BITS_H */

View file

@ -0,0 +1,473 @@
/*
* FFT/IFFT transforms converted to integer precision
* Copyright (c) 2010 Dave Hooper, Mohamed Tarek, Michael Giacomelli
* Copyright (c) 2008 Loren Merritt
* Copyright (c) 2002 Fabrice Bellard
* Partly based on libdjbfft by D. J. Bernstein
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file libavcodec/fft.c
* FFT/IFFT transforms.
*/
#ifdef CPU_ARM
// we definitely want CONFIG_SMALL undefined for ipod
// so we get the inlined version of fft16 (which is measurably faster)
#undef CONFIG_SMALL
#else
#undef CONFIG_SMALL
#endif
#include "fft.h"
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <inttypes.h>
#include <time.h>
#include <codecs/lib/codeclib.h>
#include "codeclib_misc.h"
#include "mdct_lookup.h"
/* constants for fft_16 (same constants as in mdct_arm.S ... ) */
#define cPI1_8 (0x7641af3d) /* cos(pi/8) s.31 */
#define cPI2_8 (0x5a82799a) /* cos(2pi/8) = 1/sqrt(2) s.31 */
#define cPI3_8 (0x30fbc54d) /* cos(3pi/8) s.31 */
/* asm-optimised functions and/or macros */
#include "fft-ffmpeg_arm.h"
#include "fft-ffmpeg_cf.h"
#ifndef ICODE_ATTR_TREMOR_MDCT
#define ICODE_ATTR_TREMOR_MDCT ICODE_ATTR
#endif
#if 0
static int split_radix_permutation(int i, int n, int inverse)
{
int m;
if(n <= 2) return i&1;
m = n >> 1;
if(!(i&m)) return split_radix_permutation(i, m, inverse)*2;
m >>= 1;
if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1;
else return split_radix_permutation(i, m, inverse)*4 - 1;
}
static void ff_fft_permute_c(FFTContext *s, FFTComplex *z)
{
int j, k, np;
FFTComplex tmp;
//const uint16_t *revtab = s->revtab;
np = 1 << s->nbits;
const int revtab_shift = (12 - s->nbits);
/* reverse */
for(j=0;j<np;j++) {
k = revtab[j]>>revtab_shift;
if (k < j) {
tmp = z[k];
z[k] = z[j];
z[j] = tmp;
}
}
}
#endif
#define BF(x,y,a,b) {\
x = a - b;\
y = a + b;\
}
#define BF_REV(x,y,a,b) {\
x = a + b;\
y = a - b;\
}
#ifndef FFT_FFMPEG_INCL_OPTIMISED_BUTTERFLIES
#define BUTTERFLIES(a0,a1,a2,a3) {\
{\
FFTSample temp1,temp2;\
BF(temp1, temp2, t5, t1);\
BF(a2.re, a0.re, a0.re, temp2);\
BF(a3.im, a1.im, a1.im, temp1);\
}\
{\
FFTSample temp1,temp2;\
BF(temp1, temp2, t2, t6);\
BF(a3.re, a1.re, a1.re, temp1);\
BF(a2.im, a0.im, a0.im, temp2);\
}\
}
// force loading all the inputs before storing any.
// this is slightly slower for small data, but avoids store->load aliasing
// for addresses separated by large powers of 2.
#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
{\
FFTSample temp1, temp2;\
BF(temp1, temp2, t5, t1);\
BF(a2.re, a0.re, r0, temp2);\
BF(a3.im, a1.im, i1, temp1);\
}\
{\
FFTSample temp1, temp2;\
BF(temp1, temp2, t2, t6);\
BF(a3.re, a1.re, r1, temp1);\
BF(a2.im, a0.im, i0, temp2);\
}\
}
#endif
/*
see conjugate pair description in
http://www.fftw.org/newsplit.pdf
a0 = z[k]
a1 = z[k+N/4]
a2 = z[k+2N/4]
a3 = z[k+3N/4]
result:
y[k] = z[k]+w(z[k+2N/4])+w'(z[k+3N/4])
y[k+N/4] = z[k+N/4]-iw(z[k+2N/4])+iw'(z[k+3N/4])
y[k+2N/4] = z[k]-w(z[k+2N/4])-w'(z[k+3N/4])
y[k+3N/4] = z[k+N/4]+iw(z[k+2N/4])-iw'(z[k+3N/4])
i.e.
a0 = a0 + (w.a2 + w'.a3)
a1 = a1 - i(w.a2 - w'.a3)
a2 = a0 - (w.a2 + w'.a3)
a3 = a1 + i(w.a2 - w'.a3)
note re(w') = re(w) and im(w') = -im(w)
so therefore
re(a0) = re(a0) + re(w.a2) + re(w.a3)
im(a0) = im(a0) + im(w.a2) - im(w.a3) etc
and remember also that
Re([s+it][u+iv]) = su-tv
Im([s+it][u+iv]) = sv+tu
so
Re(w'.(s+it)) = Re(w').s - Im(w').t = Re(w).s + Im(w).t
Im(w'.(s+it)) = Re(w').t + Im(w').s = Re(w).t - Im(w).s
For inverse dft we take the complex conjugate of all twiddle factors.
Hence
a0 = a0 + (w'.a2 + w.a3)
a1 = a1 - i(w'.a2 - w.a3)
a2 = a0 - (w'.a2 + w.a3)
a3 = a1 + i(w'.a2 - w.a3)
Define t1 = Re(w'.a2) = Re(w)*Re(a2) + Im(w)*Im(a2)
t2 = Im(w'.a2) = Re(w)*Im(a2) - Im(w)*Re(a2)
t5 = Re(w.a3) = Re(w)*Re(a3) - Im(w)*Im(a3)
t6 = Im(w.a3) = Re(w)*Im(a3) + Im(w)*Re(a3)
Then we just output:
a0.re = a0.re + ( t1 + t5 )
a0.im = a0.im + ( t2 + t6 )
a1.re = a1.re + ( t2 - t6 ) // since we multiply by -i and i(-i) = 1
a1.im = a1.im - ( t1 - t5 ) // since we multiply by -i and 1(-i) = -i
a2.re = a0.re - ( t1 + t5 )
a2.im = a0.im - ( t1 + t5 )
a3.re = a1.re - ( t2 - t6 ) // since we multiply by +i and i(+i) = -1
a3.im = a1.im + ( t1 - t5 ) // since we multiply by +i and 1(+i) = i
*/
#ifndef FFT_FFMPEG_INCL_OPTIMISED_TRANSFORM
static inline FFTComplex* TRANSFORM(FFTComplex * z, unsigned int n, FFTSample wre, FFTSample wim)
{
register FFTSample t1,t2,t5,t6,r_re,r_im;
r_re = z[n*2].re;
r_im = z[n*2].im;
XPROD31_R(r_re, r_im, wre, wim, t1,t2);
r_re = z[n*3].re;
r_im = z[n*3].im;
XNPROD31_R(r_re, r_im, wre, wim, t5,t6);
BUTTERFLIES(z[0],z[n],z[n*2],z[n*3]);
return z+1;
}
static inline FFTComplex* TRANSFORM_W01(FFTComplex * z, unsigned int n, const FFTSample * w)
{
register const FFTSample wre=w[0],wim=w[1];
register FFTSample t1,t2,t5,t6,r_re,r_im;
r_re = z[n*2].re;
r_im = z[n*2].im;
XPROD31_R(r_re, r_im, wre, wim, t1,t2);
r_re = z[n*3].re;
r_im = z[n*3].im;
XNPROD31_R(r_re, r_im, wre, wim, t5,t6);
BUTTERFLIES(z[0],z[n],z[n*2],z[n*3]);
return z+1;
}
static inline FFTComplex* TRANSFORM_W10(FFTComplex * z, unsigned int n, const FFTSample * w)
{
register const FFTSample wim=w[0],wre=w[1];
register FFTSample t1,t2,t5,t6,r_re,r_im;
r_re = z[n*2].re;
r_im = z[n*2].im;
XPROD31_R(r_re, r_im, wre, wim, t1,t2);
r_re = z[n*3].re;
r_im = z[n*3].im;
XNPROD31_R(r_re, r_im, wre, wim, t5,t6);
BUTTERFLIES(z[0],z[n],z[n*2],z[n*3]);
return z+1;
}
static inline FFTComplex* TRANSFORM_EQUAL(FFTComplex * z, unsigned int n)
{
register FFTSample t1,t2,t5,t6,temp1,temp2;
register FFTSample * my_z = (FFTSample *)(z);
my_z += n*4;
t2 = MULT31(my_z[0], cPI2_8);
temp1 = MULT31(my_z[1], cPI2_8);
my_z += n*2;
temp2 = MULT31(my_z[0], cPI2_8);
t5 = MULT31(my_z[1], cPI2_8);
t1 = ( temp1 + t2 );
t2 = ( temp1 - t2 );
t6 = ( temp2 + t5 );
t5 = ( temp2 - t5 );
my_z -= n*6;
BUTTERFLIES(z[0],z[n],z[n*2],z[n*3]);
return z+1;
}
static inline FFTComplex* TRANSFORM_ZERO(FFTComplex * z, unsigned int n)
{
FFTSample t1,t2,t5,t6;
t1 = z[n*2].re;
t2 = z[n*2].im;
t5 = z[n*3].re;
t6 = z[n*3].im;
BUTTERFLIES(z[0],z[n],z[n*2],z[n*3]);
return z+1;
}
#endif
/* z[0...8n-1], w[1...2n-1] */
static void pass(FFTComplex *z_arg, unsigned int STEP_arg, unsigned int n_arg) ICODE_ATTR_TREMOR_MDCT;
static void pass(FFTComplex *z_arg, unsigned int STEP_arg, unsigned int n_arg)
{
register FFTComplex * z = z_arg;
register unsigned int STEP = STEP_arg;
register unsigned int n = n_arg;
register const FFTSample *w = sincos_lookup0+STEP;
/* wre = *(wim+1) . ordering is sin,cos */
register const FFTSample *w_end = sincos_lookup0+1024;
/* first two are special (well, first one is special, but we need to do pairs) */
z = TRANSFORM_ZERO(z,n);
z = TRANSFORM_W10(z,n,w);
w += STEP;
/* first pass forwards through sincos_lookup0*/
do {
z = TRANSFORM_W10(z,n,w);
w += STEP;
z = TRANSFORM_W10(z,n,w);
w += STEP;
} while(LIKELY(w < w_end));
/* second half: pass backwards through sincos_lookup0*/
/* wim and wre are now in opposite places so ordering now [0],[1] */
w_end=sincos_lookup0;
while(LIKELY(w>w_end))
{
z = TRANSFORM_W01(z,n,w);
w -= STEP;
z = TRANSFORM_W01(z,n,w);
w -= STEP;
}
}
/* what is STEP?
sincos_lookup0 has sin,cos pairs for 1/4 cycle, in 1024 points
so half cycle would be 2048 points
ff_cos_16 has 8 elements corresponding to 4 cos points and 4 sin points
so each of the 4 points pairs corresponds to a 256*2-byte jump in sincos_lookup0
8192/16 (from "ff_cos_16") is 512 bytes.
i.e. for fft16, STEP = 8192/16 */
#define DECL_FFT(n,n2,n4)\
static void fft##n(FFTComplex *z) ICODE_ATTR_TREMOR_MDCT;\
static void fft##n(FFTComplex *z)\
{\
fft##n2(z);\
fft##n4(z+n4*2);\
fft##n4(z+n4*3);\
pass(z,8192/n,n4);\
}
#ifndef FFT_FFMPEG_INCL_OPTIMISED_FFT4
static inline void fft4(FFTComplex *z)
{
FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
BF(t3, t1, z[0].re, z[1].re); // t3=r1-r3 ; t1 = r1+r3
BF(t8, t6, z[3].re, z[2].re); // t8=r7-r5 ; t6 = r7+r5
BF(z[2].re, z[0].re, t1, t6); // r5=t1-t6 ; r1 = t1+t6
BF(t4, t2, z[0].im, z[1].im); // t4=r2-r4 ; t2 = r2+r4
BF(t7, t5, z[2].im, z[3].im); // t7=r6-r8 ; t5 = r6+r8
BF(z[3].im, z[1].im, t4, t8); // r8=t4-t8 ; r4 = t4+t8
BF(z[3].re, z[1].re, t3, t7); // r7=t3-t7 ; r3 = t3+t7
BF(z[2].im, z[0].im, t2, t5); // r6=t2-t5 ; r2 = t2+t5
}
#endif
static void fft4_dispatch(FFTComplex *z)
{
fft4(z);
}
#ifndef FFT_FFMPEG_INCL_OPTIMISED_FFT8
static inline void fft8(FFTComplex *z)
{
fft4(z);
FFTSample t1,t2,t3,t4,t7,t8;
BF(t1, z[5].re, z[4].re, -z[5].re);
BF(t2, z[5].im, z[4].im, -z[5].im);
BF(t3, z[7].re, z[6].re, -z[7].re);
BF(t4, z[7].im, z[6].im, -z[7].im);
BF(t8, t1, t3, t1);
BF(t7, t2, t2, t4);
BF(z[4].re, z[0].re, z[0].re, t1);
BF(z[4].im, z[0].im, z[0].im, t2);
BF(z[6].re, z[2].re, z[2].re, t7);
BF(z[6].im, z[2].im, z[2].im, t8);
z++;
TRANSFORM_EQUAL(z,2);
}
#endif
static void fft8_dispatch(FFTComplex *z)
{
fft8(z);
}
#ifndef CONFIG_SMALL
static void fft16(FFTComplex *z) ICODE_ATTR_TREMOR_MDCT;
static void fft16(FFTComplex *z)
{
fft8(z);
fft4(z+8);
fft4(z+12);
TRANSFORM_ZERO(z,4);
z+=2;
TRANSFORM_EQUAL(z,4);
z-=1;
TRANSFORM(z,4,cPI1_8,cPI3_8);
z+=2;
TRANSFORM(z,4,cPI3_8,cPI1_8);
}
#else
DECL_FFT(16,8,4)
#endif
DECL_FFT(32,16,8)
DECL_FFT(64,32,16)
DECL_FFT(128,64,32)
DECL_FFT(256,128,64)
DECL_FFT(512,256,128)
DECL_FFT(1024,512,256)
DECL_FFT(2048,1024,512)
DECL_FFT(4096,2048,1024)
static void (*fft_dispatch[])(FFTComplex*) = {
fft4_dispatch, fft8_dispatch, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
fft2048, fft4096
};
void ff_fft_calc_c(int nbits, FFTComplex *z)
{
fft_dispatch[nbits-2](z);
}
#if 0
int main (void)
{
#define PRECISION 16
#define FFT_SIZE 1024
#define ftofix32(x) ((fixed32)((x) * (float)(1 << PRECISION) + ((x) < 0 ? -0.5 : 0.5)))
#define itofix32(x) ((x) << PRECISION)
#define fixtoi32(x) ((x) >> PRECISION)
int j;
const long N = FFT_SIZE;
double r[FFT_SIZE] = {0.0}, i[FFT_SIZE] = {0.0};
long n;
double t;
double amp, phase;
clock_t start, end;
double exec_time = 0;
FFTContext s;
FFTComplex z[FFT_SIZE];
memset(z, 0, 64*sizeof(FFTComplex));
/* Generate saw-tooth test data */
for (n = 0; n < FFT_SIZE; n++)
{
t = (2 * M_PI * n)/N;
/*z[n].re = 1.1 + sin( t) +
0.5 * sin(2.0 * t) +
(1.0/3.0) * sin(3.0 * t) +
0.25 * sin(4.0 * t) +
0.2 * sin(5.0 * t) +
(1.0/6.0) * sin(6.0 * t) +
(1.0/7.0) * sin(7.0 * t) ;*/
z[n].re = ftofix32(cos(2*M_PI*n/64));
//printf("z[%d] = %f\n", n, z[n].re);
//getchar();
}
ff_fft_init(&s, 10, 1);
//start = clock();
//for(n = 0; n < 1000000; n++)
ff_fft_permute_c(&s, z);
ff_fft_calc_c(&s, z);
//end = clock();
//exec_time = (((double)end-(double)start)/CLOCKS_PER_SEC);
for(j = 0; j < FFT_SIZE; j++)
{
printf("%8.4f\n", sqrt(pow(fixtof32(z[j].re),2)+ pow(fixtof32(z[j].im), 2)));
//getchar();
}
printf("muls = %d, adds = %d\n", muls, adds);
//printf(" Time elapsed = %f\n", exec_time);
//ff_fft_end(&s);
}
#endif

View file

@ -0,0 +1,456 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2010 Dave Hooper
*
* ARM optimisations for ffmpeg's fft (used in fft-ffmpeg.c)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#ifdef CPU_ARM
/* Start off with optimised variants of the butterflies that work
nicely on arm */
/* 1. where y and a share the same variable/register */
#define BF_OPT(x,y,a,b) {\
y = a + b;\
x = y - (b<<1);\
}
/* 2. where y and b share the same variable/register */
#define BF_OPT2(x,y,a,b) {\
x = a - b;\
y = x + (b<<1);\
}
/* 3. where y and b share the same variable/register (but y=(-b)) */
#define BF_OPT2_REV(x,y,a,b) {\
x = a + b;\
y = x - (b<<1);\
}
/* standard BUTTERFLIES package. Note, we actually manually inline this
in all the TRANSFORM macros below anyway */
#define FFT_FFMPEG_INCL_OPTIMISED_BUTTERFLIES
#define BUTTERFLIES(a0,a1,a2,a3) {\
{\
BF_OPT(t1, t5, t5, t1);\
BF_OPT(t6, t2, t2, t6);\
BF_OPT(a2.re, a0.re, a0.re, t5);\
BF_OPT(a2.im, a0.im, a0.im, t2);\
BF_OPT(a3.re, a1.re, a1.re, t6);\
BF_OPT(a3.im, a1.im, a1.im, t1);\
}\
}
#define FFT_FFMPEG_INCL_OPTIMISED_TRANSFORM
static inline FFTComplex* TRANSFORM( FFTComplex* z, int n, FFTSample wre, FFTSample wim )
{
register FFTSample t1,t2 asm("r5"),t5 asm("r6"),t6 asm("r7"),r_re asm("r8"),r_im asm("r9");
z += n*2; /* z[o2] */
asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
XPROD31_R(r_re, r_im, wre, wim, t1,t2);
z += n; /* z[o3] */
asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
XNPROD31_R(r_re, r_im, wre, wim, t5,t6);
BF_OPT(t1, t5, t5, t1);
BF_OPT(t6, t2, t2, t6);
{
register FFTSample rt0temp asm("r4");
/*{*/
/* BF_OPT(t1, t5, t5, t1);*/
/* BF_OPT(t6, t2, t2, t6);*/
/* BF_OPT(a2.re, a0.re, a0.re, t5);*/
/* BF_OPT(a2.im, a0.im, a0.im, t2);*/
/* BF_OPT(a3.re, a1.re, a1.re, t6);*/
/* BF_OPT(a3.im, a1.im, a1.im, t1);*/
/*}*/
z -= n*3;
/* r_re = my_z[0]; r_im = my_z[1]; */
asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
BF_OPT(rt0temp, r_re, r_re, t5);
BF_OPT(t2, r_im, r_im, t2);
/* my_z[0] = r_re; my_z[1] = r_im; */
asm volatile( "stmia %[my_z], {%[r_re],%[r_im]}\n\t"::[my_z] "r" (z), [r_re] "r" (r_re), [r_im] "r" (r_im):"memory" );
z += n;
/* r_re = my_z[0]; r_im = my_z[1]; */
asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
BF_OPT(t5, r_re, r_re, t6);
BF_OPT(t6, r_im, r_im, t1);
/* my_z[0] = r_re; my_z[1] = r_im; */
asm volatile( "stmia %[my_z], {%[r_re],%[r_im]}\n\t"::[my_z] "r" (z), [r_re] "r" (r_re), [r_im] "r" (r_im):"memory");
z += n;
/* my_z[0] = rt0temp; my_z[1] = t2; */
asm volatile( "stmia %[my_z], {%[rt0temp],%[t2]}\n\t"::[my_z] "r" (z), [rt0temp] "r" (rt0temp), [t2] "r" (t2):"memory");
}
z += n;
/* my_z[0] = t5; my_z[1] = t6; */
asm volatile( "stmia %[my_z]!, {%[t5],%[t6]}\n\t":[my_z] "+r" (z) : [t5] "r" (t5), [t6] "r" (t6):"memory");
z -= n*3;
return(z);
}
static inline FFTComplex* TRANSFORM_W01( FFTComplex* z, int n, const FFTSample* w )
{
register FFTSample t1,t2 asm("r5"),t5 asm("r6"),t6 asm("r7"),r_re asm("r8"),r_im asm("r9");
/* load wre,wim into t5,t6 */
asm volatile( "ldmia %[w], {%[wre], %[wim]}\n\t":[wre] "=r" (t5), [wim] "=r" (t6):[w] "r" (w));
z += n*2; /* z[o2] -- 2n * 2 since complex numbers */
asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
XPROD31_R(r_re, r_im, t5 /*wre*/, t6 /*wim*/, t1,t2);
z += n; /* z[o3] */
asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
XNPROD31_R(r_re, r_im, t5 /*wre*/, t6 /*wim*/, t5,t6);
BF_OPT(t1, t5, t5, t1);
BF_OPT(t6, t2, t2, t6);
{
register FFTSample rt0temp asm("r4");
/*{*/
/* BF_OPT(t1, t5, t5, t1);*/
/* BF_OPT(t6, t2, t2, t6);*/
/* BF_OPT(a2.re, a0.re, a0.re, t5);*/
/* BF_OPT(a2.im, a0.im, a0.im, t2);*/
/* BF_OPT(a3.re, a1.re, a1.re, t6);*/
/* BF_OPT(a3.im, a1.im, a1.im, t1);*/
/*}*/
z -= n*3;
/* r_re = my_z[0]; r_im = my_z[1]; */
asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
BF_OPT(rt0temp, r_re, r_re, t5);
BF_OPT(t2, r_im, r_im, t2);
/* my_z[0] = r_re; my_z[1] = r_im; */
asm volatile( "stmia %[my_z], {%[r_re],%[r_im]}\n\t"::[my_z] "r" (z), [r_re] "r" (r_re), [r_im] "r" (r_im):"memory");
z += n;
/* r_re = my_z[0]; r_im = my_z[1]; */
asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
BF_OPT(t5, r_re, r_re, t6);
BF_OPT(t6, r_im, r_im, t1);
/* my_z[0] = r_re; my_z[1] = r_im; */
asm volatile( "stmia %[my_z], {%[r_re],%[r_im]}\n\t"::[my_z] "r" (z), [r_re] "r" (r_re), [r_im] "r" (r_im):"memory");
z += n;
/* my_z[0] = rt0temp; my_z[1] = t2; */
asm volatile( "stmia %[my_z], {%[rt0temp],%[t2]}\n\t"::[my_z] "r" (z), [rt0temp] "r" (rt0temp), [t2] "r" (t2):"memory");
}
z += n;
/* my_z[0] = t5; my_z[1] = t6; */
asm volatile( "stmia %[my_z]!, {%[t5],%[t6]}\n\t":[my_z] "+r" (z) : [t5] "r" (t5), [t6] "r" (t6):"memory");
z -= n*3;
return(z);
}
static inline FFTComplex* TRANSFORM_W10( FFTComplex* z, int n, const FFTSample* w )
{
register FFTSample t1,t2 asm("r5"),t5 asm("r6"),t6 asm("r7"),r_re asm("r8"),r_im asm("r9");
/* load wim,wre into t5,t6 */
asm volatile( "ldmia %[w], {%[wim], %[wre]}\n\t":[wim] "=r" (t5), [wre] "=r" (t6):[w] "r" (w));
z += n*2; /* z[o2] -- 2n * 2 since complex numbers */
asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
XPROD31_R(r_re, r_im, t6 /*wim*/, t5 /*wre*/, t1,t2);
z += n; /* z[o3] */
asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
XNPROD31_R(r_re, r_im, t6 /*wim*/, t5 /*wre*/, t5,t6);
BF_OPT(t1, t5, t5, t1);
BF_OPT(t6, t2, t2, t6);
{
register FFTSample rt0temp asm("r4");
/*{*/
/* BF_OPT(t1, t5, t5, t1);*/
/* BF_OPT(t6, t2, t2, t6);*/
/* BF_OPT(a2.re, a0.re, a0.re, t5);*/
/* BF_OPT(a2.im, a0.im, a0.im, t2);*/
/* BF_OPT(a3.re, a1.re, a1.re, t6);*/
/* BF_OPT(a3.im, a1.im, a1.im, t1);*/
/*}*/
z -= n*3;
/* r_re = my_z[0]; r_im = my_z[1]; */
asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
BF_OPT(rt0temp, r_re, r_re, t5);
BF_OPT(t2, r_im, r_im, t2);
/* my_z[0] = r_re; my_z[1] = r_im; */
asm volatile( "stmia %[my_z], {%[r_re],%[r_im]}\n\t"::[my_z] "r" (z), [r_re] "r" (r_re), [r_im] "r" (r_im):"memory");
z += n;
/* r_re = my_z[0]; r_im = my_z[1]; */
asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
BF_OPT(t5, r_re, r_re, t6);
BF_OPT(t6, r_im, r_im, t1);
/* my_z[0] = r_re; my_z[1] = r_im; */
asm volatile( "stmia %[my_z], {%[r_re],%[r_im]}\n\t"::[my_z] "r" (z), [r_re] "r" (r_re), [r_im] "r" (r_im):"memory");
z += n;
/* my_z[0] = rt0temp; my_z[1] = t2; */
asm volatile( "stmia %[my_z], {%[rt0temp],%[t2]}\n\t"::[my_z] "r" (z), [rt0temp] "r" (rt0temp), [t2] "r" (t2):"memory");
}
z += n;
/* my_z[0] = t5; my_z[1] = t6; */
asm volatile( "stmia %[my_z]!, {%[t5],%[t6]}\n\t":[my_z] "+r" (z) : [t5] "r" (t5), [t6] "r" (t6):"memory");
z -= n*3;
return(z);
}
static inline FFTComplex* TRANSFORM_EQUAL( FFTComplex* z, int n )
{
register FFTSample t1,t2 asm("r5"),t5 asm("r6"),t6 asm("r7"),r_re asm("r8"),r_im asm("r9");
z += n*2; /* z[o2] -- 2n * 2 since complex numbers */
asm volatile( "ldmia %[my_z], {%[t5],%[t6]}\n\t":[t5] "=r" (t5), [t6] "=r" (t6):[my_z] "r" (z));
z += n; /* z[o3] */
asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
/**/
/*t2 = MULT32(cPI2_8, t5);*/
/*t1 = MULT31(cPI2_8, t6);*/
/*t6 = MULT31(cPI2_8, r_re);*/
/*t5 = MULT32(cPI2_8, r_im);*/
/*t1 = ( t1 + (t2<<1) );*/
/*t2 = ( t1 - (t2<<2) );*/
/*t6 = ( t6 + (t5<<1) );*/
/*t5 = ( t6 - (t5<<2) );*/
/**/
t2 = MULT31(cPI2_8, t5);
t6 = MULT31(cPI2_8, t6);
r_re = MULT31(cPI2_8, r_re);
t5 = MULT31(cPI2_8, r_im);
t1 = ( t6 + t2 );
t2 = ( t6 - t2 );
t6 = ( r_re + t5 );
t5 = ( r_re - t5 );
BF_OPT(t1, t5, t5, t1);
BF_OPT(t6, t2, t2, t6);
{
register FFTSample rt0temp asm("r4");
/*{*/
/* BF_OPT(t1, t5, t5, t1);*/
/* BF_OPT(t6, t2, t2, t6);*/
/* BF_OPT(a2.re, a0.re, a0.re, t5);*/
/* BF_OPT(a2.im, a0.im, a0.im, t2);*/
/* BF_OPT(a3.re, a1.re, a1.re, t6);*/
/* BF_OPT(a3.im, a1.im, a1.im, t1);*/
/*}*/
z -= n*3;
/* r_re = my_z[0]; r_im = my_z[1]; */
asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
BF_OPT(rt0temp, r_re, r_re, t5);
BF_OPT(t2, r_im, r_im, t2);
/* my_z[0] = r_re; my_z[1] = r_im; */
asm volatile( "stmia %[my_z], {%[r_re],%[r_im]}\n\t"::[my_z] "r" (z), [r_re] "r" (r_re), [r_im] "r" (r_im):"memory");
z += n;
/* r_re = my_z[0]; r_im = my_z[1]; */
asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
BF_OPT(t5, r_re, r_re, t6);
BF_OPT(t6, r_im, r_im, t1);
/* my_z[0] = r_re; my_z[1] = r_im; */
asm volatile( "stmia %[my_z], {%[r_re],%[r_im]}\n\t"::[my_z] "r" (z), [r_re] "r" (r_re), [r_im] "r" (r_im):"memory");
z += n;
/* my_z[0] = rt0temp; my_z[1] = t2; */
asm volatile( "stmia %[my_z], {%[rt0temp],%[t2]}\n\t"::[my_z] "r" (z), [rt0temp] "r" (rt0temp), [t2] "r" (t2):"memory");
}
z += n;
/* my_z[0] = t5; my_z[1] = t6; */
asm volatile( "stmia %[my_z]!, {%[t5],%[t6]}\n\t":[my_z] "+r" (z) : [t5] "r" (t5), [t6] "r" (t6):"memory");
z -= n*3;
return(z);
}
static inline FFTComplex* TRANSFORM_ZERO( FFTComplex* z, int n )
{
register FFTSample t1,t2 asm("r5"),t5 asm("r6"),t6 asm("r7"), r_re asm("r8"), r_im asm("r9");
z += n*2; /* z[o2] -- 2n * 2 since complex numbers */
asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
z += n; /* z[o3] */
asm volatile( "ldmia %[my_z], {%[t5],%[t6]}\n\t":[t5] "=r" (t5), [t6] "=r" (t6):[my_z] "r" (z));
BF_OPT(t1, t5, t5, r_re);
BF_OPT(t6, t2, r_im, t6);
{
register FFTSample rt0temp asm("r4");
/*{*/
/* BF_OPT(t1, t5, t5, t1);*/
/* BF_OPT(t6, t2, t2, t6);*/
/* BF_OPT(a2.re, a0.re, a0.re, t5);*/
/* BF_OPT(a2.im, a0.im, a0.im, t2);*/
/* BF_OPT(a3.re, a1.re, a1.re, t6);*/
/* BF_OPT(a3.im, a1.im, a1.im, t1);*/
/*}*/
z -= n*3;
/* r_re = my_z[0]; r_im = my_z[1]; */
asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
BF_OPT(rt0temp, r_re, r_re, t5);
BF_OPT(t2, r_im, r_im, t2);
/* my_z[0] = r_re; my_z[1] = r_im; */
asm volatile( "stmia %[my_z], {%[r_re],%[r_im]}\n\t"::[my_z] "r" (z), [r_re] "r" (r_re), [r_im] "r" (r_im):"memory");
z += n;
/* r_re = my_z[0]; r_im = my_z[1]; */
asm volatile( "ldmia %[my_z], {%[r_re],%[r_im]}\n\t":[r_re] "=r" (r_re), [r_im] "=r" (r_im):[my_z] "r" (z));
BF_OPT(t5, r_re, r_re, t6);
BF_OPT(t6, r_im, r_im, t1);
/* my_z[0] = r_re; my_z[1] = r_im; */
asm volatile( "stmia %[my_z], {%[r_re],%[r_im]}\n\t"::[my_z] "r" (z), [r_re] "r" (r_re), [r_im] "r" (r_im):"memory");
z += n;
/* my_z[0] = rt0temp; my_z[1] = t2; */
asm volatile( "stmia %[my_z], {%[rt0temp],%[t2]}\n\t"::[my_z] "r" (z), [rt0temp] "r" (rt0temp), [t2] "r" (t2):"memory");
}
z += n;
/* my_z[0] = t5; my_z[1] = t6; */
asm volatile( "stmia %[my_z]!, {%[t5],%[t6]}\n\t":[my_z] "+r" (z) : [t5] "r" (t5), [t6] "r" (t6):"memory");
z -= n*3;
return(z);
}
#define FFT_FFMPEG_INCL_OPTIMISED_FFT4
static inline FFTComplex* fft4(FFTComplex * z)
{
FFTSample temp;
/* input[0..7] -> output[0..7] */
/* load r1=z[0],r2=z[1],...,r8=z[7] */
asm volatile(
"ldmia %[z], {r1-r8}\n\t"
"add r1,r1,r3\n\t" /* r1 :=t1 */
"sub r3,r1,r3, lsl #1\n\t" /* r3 :=t3 */
"sub r7,r7,r5\n\t" /* r10:=t8 */
"add r5,r7,r5, lsl #1\n\t" /* r5 :=t6 */
"add r1,r1,r5\n\t" /* r1 = o[0] */
"sub r5,r1,r5, lsl #1\n\t" /* r5 = o[4] */
"add r2,r2,r4\n\t" /* r2 :=t2 */
"sub r4,r2,r4, lsl #1\n\t" /* r9 :=t4 */
"add %[temp],r6,r8\n\t" /* r10:=t5 */
"sub r6,r6,r8\n\t" /* r6 :=t7 */
"sub r8,r4,r7\n\t" /* r8 = o[7]*/
"add r4,r4,r7\n\t" /* r4 = o[3]*/
"sub r7,r3,r6\n\t" /* r7 = o[6]*/
"add r3,r3,r6\n\t" /* r3 = o[2]*/
"sub r6,r2,%[temp]\n\t" /* r6 = o[5]*/
"add r2,r2,%[temp]\n\t" /* r2 = o[1]*/
"stmia %[z]!, {r1-r8}\n\t"
: /* outputs */ [z] "+r" (z), [temp] "=r" (temp)
: /* inputs */
: /* clobbers */
"r1","r2","r3","r4","r5","r6","r7","r8","memory"
);
return z;
}
#define FFT_FFMPEG_INCL_OPTIMISED_FFT8
/* The chunk of asm below is equivalent to the following:
// first load in z[4].re thru z[7].im into local registers
// ...
BF_OPT2_REV(z[4].re, z[5].re, z[4].re, z[5].re); // x=a+b; y=x-(b<<1)
BF_OPT2_REV(z[4].im, z[5].im, z[4].im, z[5].im);
BF_REV (temp, z[7].re, z[6].re, z[7].re); // x=a+b; y=a-b;
BF_REV (z[6].re, z[7].im, z[6].im, z[7].im);
// save z[7].re and z[7].im as those are complete now
// z[5].re and z[5].im are also complete now but save these later on
BF(z[6].im, z[4].re, temp, z[4].re); // x=a-b; y=a+b
BF_OPT(z[6].re, z[4].im, z[4].im, z[6].re); // y=a+b; x=y-(b<<1)
// now load z[2].re and z[2].im
// ...
BF_OPT(z[6].re, z[2].re, z[2].re, z[6].re); // y=a+b; x=y-(b<<1)
BF_OPT(z[6].im, z[2].im, z[2].im, z[6].im); // y=a+b; x=y-(b<<1)
// Now save z[6].re and z[6].im, along with z[5].re and z[5].im
// for efficiency. Also save z[2].re and z[2].im.
// Now load z[0].re and z[0].im
// ...
BF_OPT(z[4].re, z[0].re, z[0].re, z[4].re); // y=a+b; x=y-(b<<1)
BF_OPT(z[4].im, z[0].im, z[0].im, z[4].im); // y=a+b; x=y-(b<<1)
// Finally save out z[4].re, z[4].im, z[0].re and z[0].im
// ...
*/
static inline void fft8(FFTComplex * z)
{
FFTComplex* m4 = fft4(z);
{
/* note that we increment z_ptr on the final stmia, which
leaves z_ptr pointing to z[1].re ready for the Transform step */
register FFTSample temp;
asm volatile(
/* read in z[4].re thru z[7].im */
"ldmia %[z4_ptr]!, {r1-r8}\n\t"
/* (now points one word past &z[7].im) */
"add r1,r1,r3\n\t"
"sub r3,r1,r3,lsl #1\n\t"
"add r2,r2,r4\n\t"
"sub r4,r2,r4,lsl #1\n\t"
"add %[temp],r5,r7\n\t"
"sub r7,r5,r7\n\t"
"add r5,r6,r8\n\t"
"sub r8,r6,r8\n\t"
"stmdb %[z4_ptr]!, {r7,r8}\n\t" /* write z[7].re,z[7].im straight away */
/* Note, registers r7 & r8 now free */
"sub r6,%[temp],r1\n\t"
"add r1,%[temp],r1\n\t"
"add r2,r2,r5\n\t"
"sub r5,r2,r5,lsl #1\n\t"
"add %[temp], %[z_ptr], #16\n\t" /* point to &z[2].re */
"ldmia %[temp],{r7,r8}\n\t" /* load z[2].re and z[2].im */
"add r7,r7,r5\n\t"
"sub r5,r7,r5,lsl #1\n\t"
"add r8,r8,r6\n\t"
"sub r6,r8,r6,lsl #1\n\t"
/* write out z[5].re, z[5].im, z[6].re, z[6].im in one go*/
"stmdb %[z4_ptr]!, {r3-r6}\n\t"
"stmia %[temp],{r7,r8}\n\t" /* write out z[2].re, z[2].im */
"ldmia %[z_ptr],{r7,r8}\n\t" /* load r[0].re, r[0].im */
"add r7,r7,r1\n\t"
"sub r1,r7,r1,lsl #1\n\t"
"add r8,r8,r2\n\t"
"sub r2,r8,r2,lsl #1\n\t"
"stmia %[z_ptr]!,{r7,r8}\n\t" /* write out z[0].re, z[0].im */
"stmdb %[z4_ptr], {r1,r2}\n\t" /* write out z[4].re, z[4].im */
: [z4_ptr] "+r" (m4), [temp] "=r" (temp), [z_ptr] "+r" (z)
:
: "r1","r2","r3","r4","r5","r6","r7","r8","memory"
);
}
TRANSFORM_EQUAL(z,2);
}
#endif // CPU_ARM

View file

@ -0,0 +1,370 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2010 Nils Wallménius
*
* Coldfire v2 optimisations for ffmpeg's fft (used in fft-ffmpeg.c)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#ifdef CPU_COLDFIRE
#define FFT_FFMPEG_INCL_OPTIMISED_FFT4
static inline void fft4(FFTComplex * z)
{
asm volatile ("movem.l (%[z]), %%d0-%%d7\n\t"
"move.l %%d0, %%a0\n\t"
"add.l %%d2, %%d0\n\t" /* d0 == t1 */
"neg.l %%d2\n\t"
"add.l %%a0, %%d2\n\t" /* d2 == t3, a0 free */
"move.l %%d6, %%a0\n\t"
"sub.l %%d4, %%d6\n\t" /* d6 == t8 */
"add.l %%d4, %%a0\n\t" /* a0 == t6 */
"move.l %%d0, %%d4\n\t"
"sub.l %%a0, %%d4\n\t" /* z[2].re done */
"add.l %%a0, %%d0\n\t" /* z[0].re done, a0 free */
"move.l %%d5, %%a0\n\t"
"sub.l %%d7, %%d5\n\t" /* d5 == t7 */
"add.l %%d7, %%a0\n\t" /* a0 == t5 */
"move.l %%d1, %%d7\n\t"
"sub.l %%d3, %%d7\n\t" /* d7 == t4 */
"add.l %%d3, %%d1\n\t" /* d1 == t2 */
"move.l %%d7, %%d3\n\t"
"sub.l %%d6, %%d7\n\t" /* z[3].im done */
"add.l %%d6, %%d3\n\t" /* z[1].im done */
"move.l %%d2, %%d6\n\t"
"sub.l %%d5, %%d6\n\t" /* z[3].re done */
"add.l %%d5, %%d2\n\t" /* z[1].re done */
"move.l %%d1, %%d5\n\t"
"sub.l %%a0, %%d5\n\t" /* z[2].im done */
"add.l %%a0, %%d1\n\t" /* z[0].im done */
"movem.l %%d0-%%d7, (%[z])\n\t"
: :[z] "a" (z)
: "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
"a0", "cc", "memory");
}
#define FFT_FFMPEG_INCL_OPTIMISED_FFT8
static inline void fft8(FFTComplex *z)
{
asm volatile ("movem.l (4*8, %[z]), %%d0-%%d7\n\t"
"move.l %%d0, %%a1\n\t"
"add.l %%d2, %%a1\n\t" /* a1 == t1 */
"sub.l %%d2, %%d0\n\t" /* d0 == z[5].re */
"move.l %%d1, %%a2\n\t"
"add.l %%d3, %%a2\n\t" /* a2 == t2 */
"sub.l %%d3, %%d1\n\t" /* d1 == z[5].im */
"move.l %%d4, %%d2\n\t"
"add.l %%d6, %%d2\n\t" /* d2 == t3 */
"sub.l %%d6, %%d4\n\t" /* d4 == z[7].re */
"move.l %%d5, %%d3\n\t"
"add.l %%d7, %%d3\n\t" /* d3 == t4 */
"sub.l %%d7, %%d5\n\t" /* d5 == z[7].im */
"move.l %%d2, %%a4\n\t"
"sub.l %%a1, %%a4\n\t" /* a4 == t8 */
"add.l %%d2, %%a1\n\t" /* a1 == t1, d2 free */
"move.l %%a2, %%a3\n\t"
"sub.l %%d3, %%a3\n\t" /* a3 == t7 */
"add.l %%d3, %%a2\n\t" /* a2 == t2, d3 free */
/* emac block from TRANSFORM_EQUAL, do this now
so we don't need to store and load z[5] and z[7] */
"move.l %[_cPI2_8], %%d2\n\t"
"mac.l %%d2, %%d0, %%acc0\n\t"
"mac.l %%d2, %%d1, %%acc1\n\t"
"mac.l %%d2, %%d4, %%acc2\n\t"
"mac.l %%d2, %%d5, %%acc3\n\t"
/* fft4, clobbers all d regs and a0 */
"movem.l (%[z]), %%d0-%%d7\n\t"
"move.l %%d0, %%a0\n\t"
"add.l %%d2, %%d0\n\t" /* d0 == t1 */
"neg.l %%d2\n\t"
"add.l %%a0, %%d2\n\t" /* d2 == t3, a0 free */
"move.l %%d6, %%a0\n\t"
"sub.l %%d4, %%d6\n\t" /* d6 == t8 */
"add.l %%d4, %%a0\n\t" /* a0 == t6 */
"move.l %%d0, %%d4\n\t"
"sub.l %%a0, %%d4\n\t" /* z[2].re done */
"add.l %%a0, %%d0\n\t" /* z[0].re done, a0 free */
"move.l %%d5, %%a0\n\t"
"sub.l %%d7, %%d5\n\t" /* d5 == t7 */
"add.l %%d7, %%a0\n\t" /* a0 == t5 */
"move.l %%d1, %%d7\n\t"
"sub.l %%d3, %%d7\n\t" /* d7 == t4 */
"add.l %%d3, %%d1\n\t" /* d1 == t2 */
"move.l %%d7, %%d3\n\t"
"sub.l %%d6, %%d7\n\t" /* z[3].im done */
"add.l %%d6, %%d3\n\t" /* z[1].im done */
"move.l %%d2, %%d6\n\t"
"sub.l %%d5, %%d6\n\t" /* z[3].re done */
"add.l %%d5, %%d2\n\t" /* z[1].re done */
"move.l %%d1, %%d5\n\t"
"sub.l %%a0, %%d5\n\t" /* z[2].im done */
"add.l %%a0, %%d1\n\t" /* z[0].im done */
/* end of fft4, but don't store yet */
"move.l %%d0, %%a0\n\t"
"add.l %%a1, %%d0\n\t"
"sub.l %%a1, %%a0\n\t" /* z[4].re, z[0].re done, a1 free */
"move.l %%d1, %%a1\n\t"
"add.l %%a2, %%d1\n\t"
"sub.l %%a2, %%a1\n\t" /* z[4].im, z[0].im done, a2 free */
"move.l %%d4, %%a2\n\t"
"add.l %%a3, %%d4\n\t"
"sub.l %%a3, %%a2\n\t" /* z[6].re, z[2].re done, a3 free */
"move.l %%d5, %%a3\n\t"
"add.l %%a4, %%d5\n\t"
"sub.l %%a4, %%a3\n\t" /* z[6].im, z[2].im done, a4 free */
"movem.l %%d0-%%d1, (%[z])\n\t" /* save z[0] */
"movem.l %%d4-%%d5, (2*8, %[z])\n\t" /* save z[2] */
"movem.l %%a0-%%a1, (4*8, %[z])\n\t" /* save z[4] */
"movem.l %%a2-%%a3, (6*8, %[z])\n\t" /* save z[6] */
/* TRANSFORM_EQUAL */
"movclr.l %%acc0, %%d0\n\t"
"movclr.l %%acc1, %%d1\n\t"
"movclr.l %%acc2, %%d4\n\t"
"movclr.l %%acc3, %%d5\n\t"
"move.l %%d1, %%a0\n\t"
"add.l %%d0, %%a0\n\t" /* a0 == t1 */
"sub.l %%d0, %%d1\n\t" /* d1 == t2 */
"move.l %%d4, %%d0\n\t"
"add.l %%d5, %%d0\n\t" /* d0 == t6 */
"sub.l %%d5, %%d4\n\t" /* d4 == t5 */
"move.l %%d4, %%a1\n\t"
"sub.l %%a0, %%a1\n\t" /* a1 == temp1 */
"add.l %%a0, %%d4\n\t" /* d4 == temp2 */
"move.l %%d2, %%a2\n\t"
"sub.l %%d4, %%a2\n\t" /* a2 == z[5].re */
"add.l %%d4, %%d2\n\t" /* z[1].re done */
"move.l %%d7, %%d5\n\t"
"sub.l %%a1, %%d5\n\t" /* d5 == z[7].im */
"add.l %%a1, %%d7\n\t" /* z[3].im done */
"move.l %%d1, %%a0\n\t"
"sub.l %%d0, %%a0\n\t" /* a0 == temp1 */
"add.l %%d0, %%d1\n\t" /* d1 == temp2 */
"move.l %%d6, %%d4\n\t"
"sub.l %%a0, %%d4\n\t" /* d4 == z[7].re */
"add.l %%a0, %%d6\n\t" /* z[3].re done */
"move.l %%d3, %%a3\n\t"
"sub.l %%d1, %%a3\n\t" /* a3 == z[5].im */
"add.l %%d1, %%d3\n\t" /* z[1].im done */
"movem.l %%d2-%%d3, (1*8, %[z])\n\t" /* save z[1] */
"movem.l %%d6-%%d7, (3*8, %[z])\n\t" /* save z[3] */
"movem.l %%a2-%%a3, (5*8, %[z])\n\t" /* save z[5] */
"movem.l %%d4-%%d5, (7*8, %[z])\n\t" /* save z[7] */
: :[z] "a" (z), [_cPI2_8] "i" (cPI2_8)
: "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
"a0", "a1", "a2", "a3", "a4", "cc", "memory");
}
#define FFT_FFMPEG_INCL_OPTIMISED_TRANSFORM
static inline FFTComplex* TRANSFORM(FFTComplex * z, unsigned int n, FFTSample wre, FFTSample wim)
{
asm volatile ("move.l (%[z2]), %%d5\n\t"
"mac.l %%d5, %[wre], (4, %[z2]), %%d4, %%acc0\n\t"
"mac.l %%d4, %[wim], %%acc0\n\t"
"mac.l %%d4, %[wre], (%[z3]), %%d6, %%acc1\n\t"
"msac.l %%d5, %[wim], (4, %[z3]), %%d7, %%acc1\n\t"
"mac.l %%d6, %[wre], (%[z])+, %%d4, %%acc2\n\t"
"msac.l %%d7, %[wim], (%[z])+, %%d5, %%acc2\n\t"
"mac.l %%d7, %[wre], %%acc3\n\t"
"mac.l %%d6, %[wim], %%acc3\n\t"
"movclr.l %%acc0, %[wre]\n\t" /* t1 */
"movclr.l %%acc2, %[wim]\n\t" /* t5 */
"move.l %%d4, %%d6\n\t"
"move.l %[wim], %%d7\n\t"
"sub.l %[wre], %[wim]\n\t" /* t5 = t5-t1 */
"add.l %[wre], %%d7\n\t"
"sub.l %%d7, %%d6\n\t" /* d6 = a0re - (t5+t1) => a2re */
"add.l %%d7, %%d4\n\t" /* d4 = a0re + (t5+t1) => a0re */
"movclr.l %%acc3, %%d7\n\t" /* t6 */
"movclr.l %%acc1, %%d3\n\t" /* t2 */
"move.l %%d3, %[wre]\n\t"
"add.l %%d7, %[wre]\n\t"
"sub.l %%d7, %%d3\n\t" /* t2 = t6-t2 */
"move.l %%d5, %%d7\n\t"
"sub.l %[wre], %%d7\n\t" /* d7 = a0im - (t2+t6) => a2im */
"movem.l %%d6-%%d7, (%[z2])\n\t" /* store z2 */
"add.l %[wre], %%d5\n\t" /* d5 = a0im + (t2+t6) => a0im */
"movem.l %%d4-%%d5, (-8, %[z])\n\t" /* store z0 */
"movem.l (%[z1]), %%d4-%%d5\n\t" /* load z1 */
"move.l %%d4, %%d6\n\t"
"sub.l %%d3, %%d6\n\t" /* d6 = a1re - (t2-t6) => a3re */
"add.l %%d3, %%d4\n\t" /* d4 = a1re + (t2-t6) => a1re */
"move.l %%d5, %%d7\n\t"
"sub.l %[wim], %%d7\n\t"
"movem.l %%d6-%%d7, (%[z3])\n\t" /* store z3 */
"add.l %[wim], %%d5\n\t"
"movem.l %%d4-%%d5, (%[z1])\n\t" /* store z1 */
: [wre] "+r" (wre), [wim] "+r" (wim), /* we clobber these after using them */
[z] "+a" (z)
: [z1] "a" (&z[n]), [z2] "a" (&z[2*n]), [z3] "a" (&z[3*n])
: "d3", "d4", "d5", "d6", "d7", "cc", "memory");
return z;
}
static inline FFTComplex* TRANSFORM_W01(FFTComplex * z, unsigned int n, const FFTSample * w)
{
return TRANSFORM(z, n, w[0], w[1]);
}
static inline FFTComplex* TRANSFORM_W10(FFTComplex * z, unsigned int n, const FFTSample * w)
{
return TRANSFORM(z, n, w[1], w[0]);
}
static inline FFTComplex* TRANSFORM_ZERO(FFTComplex * z, unsigned int n)
{
asm volatile("movem.l (%[z]), %%d4-%%d5\n\t" /* load z0 */
"move.l %%d4, %%d6\n\t"
"movem.l (%[z2]), %%d2-%%d3\n\t" /* load z2 */
"movem.l (%[z3]), %%d0-%%d1\n\t" /* load z0 */
"move.l %%d0, %%d7\n\t"
"sub.l %%d2, %%d0\n\t"
"add.l %%d2, %%d7\n\t"
"sub.l %%d7, %%d6\n\t" /* d6 = a0re - (t5+t1) => a2re */
"add.l %%d7, %%d4\n\t" /* d4 = a0re + (t5+t1) => a0re */
"move.l %%d5, %%d7\n\t"
"move.l %%d3, %%d2\n\t"
"add.l %%d1, %%d2\n\t"
"sub.l %%d2, %%d7\n\t" /* d7 = a0im - (t2+t6) => a2im */
"movem.l %%d6-%%d7, (%[z2])\n\t" /* store z2 */
"add.l %%d2, %%d5\n\t" /* d5 = a0im + (t2+t6) => a0im */
"movem.l %%d4-%%d5, (%[z])\n\t" /* store z0 */
"movem.l (%[z1]), %%d4-%%d5\n\t" /* load z1 */
"move.l %%d4, %%d6\n\t"
"sub.l %%d1, %%d3\n\t"
"sub.l %%d3, %%d6\n\t" /* d6 = a1re - (t2-t6) => a3re */
"add.l %%d3, %%d4\n\t" /* d4 = a1re + (t2-t6) => a1re */
"move.l %%d5, %%d7\n\t"
"sub.l %%d0, %%d7\n\t"
"movem.l %%d6-%%d7, (%[z3])\n\t" /* store z3 */
"add.l %%d0, %%d5\n\t"
"movem.l %%d4-%%d5, (%[z1])\n\t" /* store z1 */
:
: [z] "a" (z), [z1] "a" (&z[n]), [z2] "a" (&z[2*n]), [z3] "a" (&z[3*n])
: "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "cc", "memory");
return z+1;
}
static inline FFTComplex* TRANSFORM_EQUAL(FFTComplex * z, unsigned int n)
{
asm volatile ("movem.l (%[z2]), %%d0-%%d1\n\t"
"move.l %[_cPI2_8], %%d2\n\t"
"mac.l %%d0, %%d2, (%[z3]), %%d0, %%acc0\n\t"
"mac.l %%d1, %%d2, (4, %[z3]), %%d1, %%acc1\n\t"
"mac.l %%d0, %%d2, (%[z]), %%d4, %%acc2\n\t"
"mac.l %%d1, %%d2, (4, %[z]), %%d5, %%acc3\n\t"
"movclr.l %%acc0, %%d0\n\t"
"movclr.l %%acc1, %%d1\n\t"
"movclr.l %%acc2, %%d2\n\t"
"movclr.l %%acc3, %%d3\n\t"
"move.l %%d0, %%d7\n\t"
"add.l %%d1, %%d0\n\t" /* d0 == t1 */
"sub.l %%d7, %%d1\n\t" /* d1 == t2 */
"move.l %%d3, %%d7\n\t"
"add.l %%d2, %%d3\n\t" /* d3 == t6 */
"sub.l %%d7, %%d2\n\t" /* d2 == t5 */
"move.l %%d4, %%d6\n\t"
"move.l %%d2, %%d7\n\t"
"sub.l %%d0, %%d2\n\t" /* t5 = t5-t1 */
"add.l %%d0, %%d7\n\t"
"sub.l %%d7, %%d6\n\t" /* d6 = a0re - (t5+t1) => a2re */
"add.l %%d7, %%d4\n\t" /* d4 = a0re + (t5+t1) => a0re */
"move.l %%d1, %%d0\n\t"
"add.l %%d3, %%d0\n\t"
"sub.l %%d3, %%d1\n\t" /* t2 = t6-t2 */
"move.l %%d5, %%d7\n\t"
"sub.l %%d0, %%d7\n\t" /* d7 = a0im - (t2+t6) => a2im */
"movem.l %%d6-%%d7, (%[z2])\n\t" /* store z2 */
"add.l %%d0, %%d5\n\t" /* d5 = a0im + (t2+t6) => a0im */
"movem.l %%d4-%%d5, (%[z])\n\t" /* store z0 */
"movem.l (%[z1]), %%d4-%%d5\n\t" /* load z1 */
"move.l %%d4, %%d6\n\t"
"sub.l %%d1, %%d6\n\t" /* d6 = a1re - (t2-t6) => a3re */
"add.l %%d1, %%d4\n\t" /* d4 = a1re + (t2-t6) => a1re */
"move.l %%d5, %%d7\n\t"
"sub.l %%d2, %%d7\n\t"
"movem.l %%d6-%%d7, (%[z3])\n\t" /* store z3 */
"add.l %%d2, %%d5\n\t"
"movem.l %%d4-%%d5, (%[z1])\n\t" /* store z1 */
:: [z] "a" (z), [z1] "a" (&z[n]), [z2] "a" (&z[2*n]), [z3] "a" (&z[3*n]),
[_cPI2_8] "i" (cPI2_8)
: "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "cc", "memory");
return z+1;
}
#endif /* CPU_COLDIFRE */

View file

@ -0,0 +1,64 @@
/*
* WMA compatible decoder
* Copyright (c) 2002 The FFmpeg Project.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef CODECLIB_FFT_H_INCLUDED
#define CODECLIB_FFT_H_INCLUDED
#include <inttypes.h>
typedef int32_t fixed32;
typedef int64_t fixed64;
#define FFT_FIXED
#ifdef FFT_FIXED
typedef fixed32 FFTSample;
#else /* FFT_FIXED */
typedef float FFTSample;
#endif /* FFT_FIXED */
typedef struct FFTComplex {
FFTSample re, im;
} FFTComplex;
typedef struct FFTContext {
int nbits;
int inverse;
uint16_t *revtab;
int mdct_size; /* size of MDCT (i.e. number of input data * 2) */
int mdct_bits; /* n = 2^nbits */
/* pre/post rotation tables */
FFTSample *tcos;
FFTSample *tsin;
void (*fft_permute)(struct FFTContext *s, FFTComplex *z);
void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
void (*imdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
void (*imdct_half)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
void (*mdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
int split_radix;
int permutation;
#define FF_MDCT_PERM_NONE 0
#define FF_MDCT_PERM_INTERLEAVE 1
} FFTContext;
// internal api (fft<->mdct)
//int fft_calc_unscaled(FFTContext *s, FFTComplex *z);
//void ff_fft_permute_c(FFTContext *s, FFTComplex *z); // internal only?
void ff_fft_calc_c(int nbits, FFTComplex *z);
#endif // CODECLIB_FFT_H_INCLUDED

View file

@ -0,0 +1 @@
#include "../../../apps/fixedpoint.c"

View file

@ -0,0 +1,49 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2006 Jens Arnold
*
* Fixed point library for plugins
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
/** CODECS - FIXED POINT MATH ROUTINES - USAGE
*
* - x and y arguments are fixed point integers
* - fracbits is the number of fractional bits in the argument(s)
* - functions return long fixed point integers with the specified number
* of fractional bits unless otherwise specified
*
* Calculate sin and cos of an angle:
* fp_sincos(phase, *cos)
* where phase is a 32 bit unsigned integer with 0 representing 0
* and 0xFFFFFFFF representing 2*pi, and *cos is the address to
* a long signed integer. Value returned is a long signed integer
* from -0x80000000 to 0x7fffffff, representing -1 to 1 respectively.
* That is, value is a fixed point integer with 31 fractional bits.
*
* Take square root of a fixed point number:
* fp_sqrt(x, fracbits)
*
*/
#ifndef _FIXEDPOINT_H_CODECS
#define _FIXEDPOINT_H_CODECS
long fp_sincos(unsigned long phase, long *cos);
long fp_sqrt(long a, unsigned int fracbits);
#endif

View file

@ -0,0 +1,37 @@
# __________ __ ___.
# Open \______ \ ____ ____ | | _\_ |__ _______ ___
# Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
# Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
# Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
# \/ \/ \/ \/ \/
# $Id$
#
CODECLIB := $(CODECDIR)/libcodec.a
CODECLIB_SRC := $(call preprocess, $(RBCODECLIB_DIR)/codecs/lib/SOURCES)
CODECLIB_OBJ := $(call c2obj, $(CODECLIB_SRC))
OTHER_SRC += $(CODECLIB_SRC)
$(CODECLIB): $(CODECLIB_OBJ)
$(SILENT)$(shell rm -f $@)
$(call PRINTS,AR $(@F))$(AR) rcs $@ $^ >/dev/null
CODECLIBFLAGS = $(filter-out -O%,$(CODECFLAGS))
ifeq ($(MEMORYSIZE),2)
CODECLIBFLAGS += -Os
else ifeq ($(ARCH),arch_m68k)
CODECLIBFLAGS += -O2
else
CODECLIBFLAGS += -O1
endif
# Do not use '-ffunction-sections' when compiling sdl-sim
ifneq ($(findstring sdl-sim, $(APP_TYPE)), sdl-sim)
CODECLIBFLAGS += -ffunction-sections
endif
$(CODECDIR)/lib/%.o: $(RBCODECLIB_DIR)/codecs/lib/%.c
$(SILENT)mkdir -p $(dir $@)
$(call PRINTS,CC $(subst $(ROOTDIR)/,,$<))$(CC) \
-I$(dir $<) $(CODECLIBFLAGS) -c $< -o $@

View file

@ -0,0 +1,644 @@
/*
* Fixed Point IMDCT
* Copyright (c) 2002 The FFmpeg Project.
* Copyright (c) 2010 Dave Hooper, Mohamed Tarek, Michael Giacomelli
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "codeclib.h"
#include "mdct.h"
#include "codeclib_misc.h"
#include "mdct_lookup.h"
#ifndef ICODE_ATTR_TREMOR_MDCT
#define ICODE_ATTR_TREMOR_MDCT ICODE_ATTR
#endif
/**
* Compute the middle half of the inverse MDCT of size N = 2^nbits
* thus excluding the parts that can be derived by symmetry
* @param output N/2 samples
* @param input N/2 samples
*
* NOTE - CANNOT CURRENTLY OPERATE IN PLACE (input and output must
* not overlap or intersect at all)
*/
void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input) ICODE_ATTR_TREMOR_MDCT;
void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input)
{
int n8, n4, n2, n, j;
const fixed32 *in1, *in2;
(void)j;
n = 1 << nbits;
n2 = n >> 1;
n4 = n >> 2;
n8 = n >> 3;
FFTComplex *z = (FFTComplex *)output;
/* pre rotation */
in1 = input;
in2 = input + n2 - 1;
/* revtab comes from the fft; revtab table is sized for N=4096 size fft = 2^12.
The fft is size N/4 so s->nbits-2, so our shift needs to be (12-(nbits-2)) */
const int revtab_shift = (14- nbits);
/* bitreverse reorder the input and rotate; result here is in OUTPUT ... */
/* (note that when using the current split radix, the bitreverse ordering is
complex, meaning that this reordering cannot easily be done in-place) */
/* Using the following pdf, you can see that it is possible to rearrange
the 'classic' pre/post rotate with an alternative one that enables
us to use fewer distinct twiddle factors.
http://www.eurasip.org/Proceedings/Eusipco/Eusipco2006/papers/1568980508.pdf
For prerotation, the factors are just sin,cos(2PI*i/N)
For postrotation, the factors are sin,cos(2PI*(i+1/4)/N)
Therefore, prerotation can immediately reuse the same twiddles as fft
(for postrotation it's still a bit complex, we reuse the fft trig tables
where we can, or a special table for N=2048, or interpolate between
trig tables for N>2048)
*/
const int32_t *T = sincos_lookup0;
const int step = 2<<(12-nbits);
const uint16_t * p_revtab=revtab;
{
const uint16_t * const p_revtab_end = p_revtab + n8;
#ifdef CPU_COLDFIRE
asm volatile ("move.l (%[in2]), %%d0\n\t"
"move.l (%[in1]), %%d1\n\t"
"bra.s 1f\n\t"
"0:\n\t"
"movem.l (%[T]), %%d2-%%d3\n\t"
"addq.l #8, %[in1]\n\t"
"subq.l #8, %[in2]\n\t"
"lea (%[step]*4, %[T]), %[T]\n\t"
"mac.l %%d0, %%d3, (%[T]), %%d4, %%acc0;"
"msac.l %%d1, %%d2, (4, %[T]), %%d5, %%acc0;"
"mac.l %%d1, %%d3, (%[in1]), %%d1, %%acc1;"
"mac.l %%d0, %%d2, (%[in2]), %%d0, %%acc1;"
"addq.l #8, %[in1]\n\t"
"subq.l #8, %[in2]\n\t"
"mac.l %%d0, %%d5, %%acc2;"
"msac.l %%d1, %%d4, (%[p_revtab])+, %%d2, %%acc2;"
"mac.l %%d1, %%d5, (%[in1]), %%d1, %%acc3;"
"mac.l %%d0, %%d4, (%[in2]), %%d0, %%acc3;"
"clr.l %%d3\n\t"
"move.w %%d2, %%d3\n\t"
"eor.l %%d3, %%d2\n\t"
"swap %%d2\n\t"
"lsr.l %[revtab_shift], %%d2\n\t"
"movclr.l %%acc0, %%d4;"
"movclr.l %%acc1, %%d5;"
"lsl.l #3, %%d2\n\t"
"lea (%%d2, %[z]), %%a1\n\t"
"movem.l %%d4-%%d5, (%%a1)\n\t"
"lsr.l %[revtab_shift], %%d3\n\t"
"movclr.l %%acc2, %%d4;"
"movclr.l %%acc3, %%d5;"
"lsl.l #3, %%d3\n\t"
"lea (%%d3, %[z]), %%a1\n\t"
"movem.l %%d4-%%d5, (%%a1)\n\t"
"lea (%[step]*4, %[T]), %[T]\n\t"
"1:\n\t"
"cmp.l %[p_revtab_end], %[p_revtab]\n\t"
"bcs.s 0b\n\t"
: [in1] "+a" (in1), [in2] "+a" (in2), [T] "+a" (T),
[p_revtab] "+a" (p_revtab)
: [z] "a" (z), [step] "d" (step), [revtab_shift] "d" (revtab_shift),
[p_revtab_end] "r" (p_revtab_end)
: "d0", "d1", "d2", "d3", "d4", "d5", "a1", "cc", "memory");
#else
while(LIKELY(p_revtab < p_revtab_end))
{
j = (*p_revtab)>>revtab_shift;
XNPROD31(*in2, *in1, T[1], T[0], &z[j].re, &z[j].im );
T += step;
in1 += 2;
in2 -= 2;
p_revtab++;
j = (*p_revtab)>>revtab_shift;
XNPROD31(*in2, *in1, T[1], T[0], &z[j].re, &z[j].im );
T += step;
in1 += 2;
in2 -= 2;
p_revtab++;
}
#endif
}
{
const uint16_t * const p_revtab_end = p_revtab + n8;
#ifdef CPU_COLDFIRE
asm volatile ("move.l (%[in2]), %%d0\n\t"
"move.l (%[in1]), %%d1\n\t"
"bra.s 1f\n\t"
"0:\n\t"
"movem.l (%[T]), %%d2-%%d3\n\t"
"addq.l #8, %[in1]\n\t"
"subq.l #8, %[in2]\n\t"
"lea (%[step]*4, %[T]), %[T]\n\t"
"mac.l %%d0, %%d2, (%[T]), %%d4, %%acc0;"
"msac.l %%d1, %%d3, (4, %[T]), %%d5, %%acc0;"
"mac.l %%d1, %%d2, (%[in1]), %%d1, %%acc1;"
"mac.l %%d0, %%d3, (%[in2]), %%d0, %%acc1;"
"addq.l #8, %[in1]\n\t"
"subq.l #8, %[in2]\n\t"
"mac.l %%d0, %%d4, %%acc2;"
"msac.l %%d1, %%d5, (%[p_revtab])+, %%d2, %%acc2;"
"mac.l %%d1, %%d4, (%[in1]), %%d1, %%acc3;"
"mac.l %%d0, %%d5, (%[in2]), %%d0, %%acc3;"
"clr.l %%d3\n\t"
"move.w %%d2, %%d3\n\t"
"eor.l %%d3, %%d2\n\t"
"swap %%d2\n\t"
"lsr.l %[revtab_shift], %%d2\n\t"
"movclr.l %%acc0, %%d4;"
"movclr.l %%acc1, %%d5;"
"lsl.l #3, %%d2\n\t"
"lea (%%d2, %[z]), %%a1\n\t"
"movem.l %%d4-%%d5, (%%a1)\n\t"
"lsr.l %[revtab_shift], %%d3\n\t"
"movclr.l %%acc2, %%d4;"
"movclr.l %%acc3, %%d5;"
"lsl.l #3, %%d3\n\t"
"lea (%%d3, %[z]), %%a1\n\t"
"movem.l %%d4-%%d5, (%%a1)\n\t"
"lea (%[step]*4, %[T]), %[T]\n\t"
"1:\n\t"
"cmp.l %[p_revtab_end], %[p_revtab]\n\t"
"bcs.s 0b\n\t"
: [in1] "+a" (in1), [in2] "+a" (in2), [T] "+a" (T),
[p_revtab] "+a" (p_revtab)
: [z] "a" (z), [step] "d" (-step), [revtab_shift] "d" (revtab_shift),
[p_revtab_end] "r" (p_revtab_end)
: "d0", "d1", "d2", "d3", "d4", "d5", "a1", "cc", "memory");
#else
while(LIKELY(p_revtab < p_revtab_end))
{
j = (*p_revtab)>>revtab_shift;
XNPROD31(*in2, *in1, T[0], T[1], &z[j].re, &z[j].im);
T -= step;
in1 += 2;
in2 -= 2;
p_revtab++;
j = (*p_revtab)>>revtab_shift;
XNPROD31(*in2, *in1, T[0], T[1], &z[j].re, &z[j].im);
T -= step;
in1 += 2;
in2 -= 2;
p_revtab++;
}
#endif
}
/* ... and so fft runs in OUTPUT buffer */
ff_fft_calc_c(nbits-2, z);
/* post rotation + reordering. now keeps the result within the OUTPUT buffer */
switch( nbits )
{
default:
{
fixed32 * z1 = (fixed32 *)(&z[0]);
int magic_step = step>>2;
int newstep;
if(n<=1024)
{
T = sincos_lookup0 + magic_step;
newstep = step>>1;
}
else
{
T = sincos_lookup1;
newstep = 2;
}
#ifdef CPU_COLDFIRE
fixed32 * z2 = (fixed32 *)(&z[n4]);
int c = n4;
if (newstep == 2)
{
asm volatile ("movem.l (%[z1]), %%d0-%%d1\n\t"
"addq.l #8, %[z1]\n\t"
"movem.l (%[T]), %%d2-%%d3\n\t"
"addq.l #8, %[T]\n\t"
"bra.s 1f\n\t"
"0:\n\t"
"msac.l %%d1, %%d2, (%[T])+, %%a3, %%acc0\n\t"
"mac.l %%d0, %%d3, (%[T])+, %%a4, %%acc0\n\t"
"msac.l %%d1, %%d3, -(%[z2]), %%d1, %%acc1\n\t"
"msac.l %%d0, %%d2, -(%[z2]), %%d0, %%acc1\n\t"
"msac.l %%d1, %%a4, (%[T])+, %%d2, %%acc2\n\t"
"mac.l %%d0, %%a3, (%[T])+, %%d3, %%acc2\n\t"
"msac.l %%d0, %%a4, (%[z1])+, %%d0, %%acc3\n\t"
"msac.l %%d1, %%a3, (%[z1])+, %%d1, %%acc3\n\t"
"movclr.l %%acc0, %%a3\n\t"
"movclr.l %%acc3, %%a4\n\t"
"movem.l %%a3-%%a4, (-16, %[z1])\n\t"
"movclr.l %%acc1, %%a4\n\t"
"movclr.l %%acc2, %%a3\n\t"
"movem.l %%a3-%%a4, (%[z2])\n\t"
"subq.l #2, %[n]\n\t"
"1:\n\t"
"bhi.s 0b\n\t"
: [z1] "+a" (z1), [z2] "+a" (z2), [T] "+a" (T), [n] "+d" (c)
:
: "d0", "d1", "d2", "d3", "a3", "a4", "cc", "memory");
}
else
{
asm volatile ("movem.l (%[z1]), %%d0-%%d1\n\t"
"addq.l #8, %[z1]\n\t"
"movem.l (%[T]), %%d2-%%d3\n\t"
"lea (%[newstep]*4, %[T]), %[T]\n\t"
"bra.s 1f\n\t"
"0:\n\t"
"msac.l %%d1, %%d2, (%[T]), %%a3, %%acc0\n\t"
"mac.l %%d0, %%d3, (4, %[T]), %%a4, %%acc0\n\t"
"msac.l %%d1, %%d3, -(%[z2]), %%d1, %%acc1\n\t"
"msac.l %%d0, %%d2, -(%[z2]), %%d0, %%acc1\n\t"
"lea (%[newstep]*4, %[T]), %[T]\n\t"
"msac.l %%d1, %%a4, (%[T]), %%d2, %%acc2\n\t"
"mac.l %%d0, %%a3, (4, %[T]), %%d3, %%acc2\n\t"
"msac.l %%d0, %%a4, (%[z1])+, %%d0, %%acc3\n\t"
"msac.l %%d1, %%a3, (%[z1])+, %%d1, %%acc3\n\t"
"lea (%[newstep]*4, %[T]), %[T]\n\t"
"movclr.l %%acc0, %%a3\n\t"
"movclr.l %%acc3, %%a4\n\t"
"movem.l %%a3-%%a4, (-16, %[z1])\n\t"
"movclr.l %%acc1, %%a4\n\t"
"movclr.l %%acc2, %%a3\n\t"
"movem.l %%a3-%%a4, (%[z2])\n\t"
"subq.l #2, %[n]\n\t"
"1:\n\t"
"bhi.s 0b\n\t"
: [z1] "+a" (z1), [z2] "+a" (z2), [T] "+a" (T), [n] "+d" (c)
: [newstep] "d" (newstep)
: "d0", "d1", "d2", "d3", "a3", "a4", "cc", "memory");
}
#else
fixed32 * z2 = (fixed32 *)(&z[n4-1]);
while(z1<z2)
{
fixed32 r0,i0,r1,i1;
XNPROD31_R(z1[1], z1[0], T[0], T[1], r0, i1 ); T+=newstep;
XNPROD31_R(z2[1], z2[0], T[1], T[0], r1, i0 ); T+=newstep;
z1[0] = -r0;
z1[1] = -i0;
z2[0] = -r1;
z2[1] = -i1;
z1+=2;
z2-=2;
}
#endif
break;
}
case 12: /* n=4096 */
{
/* linear interpolation (50:50) between sincos_lookup0 and sincos_lookup1 */
const int32_t * V = sincos_lookup1;
T = sincos_lookup0;
int32_t t0,t1,v0,v1;
fixed32 * z1 = (fixed32 *)(&z[0]);
fixed32 * z2 = (fixed32 *)(&z[n4-1]);
t0 = T[0]>>1; t1=T[1]>>1;
while(z1<z2)
{
fixed32 r0,i0,r1,i1;
t0 += (v0 = (V[0]>>1));
t1 += (v1 = (V[1]>>1));
XNPROD31_R(z1[1], z1[0], t0, t1, r0, i1 );
T+=2;
v0 += (t0 = (T[0]>>1));
v1 += (t1 = (T[1]>>1));
XNPROD31_R(z2[1], z2[0], v1, v0, r1, i0 );
z1[0] = -r0;
z1[1] = -i0;
z2[0] = -r1;
z2[1] = -i1;
z1+=2;
z2-=2;
V+=2;
}
break;
}
case 13: /* n = 8192 */
{
/* weight linear interpolation between sincos_lookup0 and sincos_lookup1
specifically: 25:75 for first twiddle and 75:25 for second twiddle */
const int32_t * V = sincos_lookup1;
T = sincos_lookup0;
int32_t t0,t1,v0,v1,q0,q1;
fixed32 * z1 = (fixed32 *)(&z[0]);
fixed32 * z2 = (fixed32 *)(&z[n4-1]);
t0 = T[0]; t1=T[1];
while(z1<z2)
{
fixed32 r0,i0,r1,i1;
v0 = V[0]; v1 = V[1];
t0 += (q0 = (v0-t0)>>1);
t1 += (q1 = (v1-t1)>>1);
XNPROD31_R(z1[1], z1[0], t0, t1, r0, i1 );
t0 = v0-q0;
t1 = v1-q1;
XNPROD31_R(z2[1], z2[0], t1, t0, r1, i0 );
z1[0] = -r0;
z1[1] = -i0;
z2[0] = -r1;
z2[1] = -i1;
z1+=2;
z2-=2;
T+=2;
t0 = T[0]; t1 = T[1];
v0 += (q0 = (t0-v0)>>1);
v1 += (q1 = (t1-v1)>>1);
XNPROD31_R(z1[1], z1[0], v0, v1, r0, i1 );
v0 = t0-q0;
v1 = t1-q1;
XNPROD31_R(z2[1], z2[0], v1, v0, r1, i0 );
z1[0] = -r0;
z1[1] = -i0;
z2[0] = -r1;
z2[1] = -i1;
z1+=2;
z2-=2;
V+=2;
}
break;
}
}
}
/**
* Compute inverse MDCT of size N = 2^nbits
* @param output N samples
* @param input N/2 samples
* "In-place" processing can be achieved provided that:
* [0 .. N/2-1 | N/2 .. N-1 ]
* <----input---->
* <-----------output----------->
*
* The result of ff_imdct_half is to put the 'half' imdct here
*
* N/2 N-1
* <--half imdct-->
*
* We want it here for the full imdct:
* N/4 3N/4-1
* <-------------->
*
* In addition we need to apply two symmetries to get the full imdct:
*
* <AAAAAA> <DDDDDD>
* <BBBBBB><CCCCCC>
*
* D is a reflection of C
* A is a reflection of B (but with sign flipped)
*
* We process the symmetries at the same time as we 'move' the half imdct
* from [N/2,N-1] to [N/4,3N/4-1]
*
* TODO: find a way to make ff_imdct_half put the result in [N/4..3N/4-1]
* This would require being able to use revtab 'inplace' (since the input
* and output of imdct_half would then overlap somewhat)
*/
void ff_imdct_calc(unsigned int nbits, fixed32 *output, const fixed32 *input) ICODE_ATTR_TREMOR_MDCT;
#ifndef CPU_ARM
void ff_imdct_calc(unsigned int nbits, fixed32 *output, const fixed32 *input)
{
const int n = (1<<nbits);
const int n2 = (n>>1);
const int n4 = (n>>2);
/* tell imdct_half to put the output in [N/2..3N/4-1] i.e. output+n2 */
ff_imdct_half(nbits,output+n2,input);
fixed32 * in_r, * in_r2, * out_r, * out_r2;
/* Copy BBBB to AAAA, reflected and sign-flipped.
Also copy BBBB to its correct destination (from [N/2..3N/4-1] to [N/4..N/2-1]) */
out_r = output;
out_r2 = output+n2-8;
in_r = output+n2+n4-8;
while(out_r<out_r2)
{
#if defined CPU_COLDFIRE
asm volatile(
"movem.l (%[in_r]), %%d0-%%d7\n\t"
"movem.l %%d0-%%d7, (%[out_r2])\n\t"
"neg.l %%d7\n\t"
"move.l %%d7, (%[out_r])+\n\t"
"neg.l %%d6\n\t"
"move.l %%d6, (%[out_r])+\n\t"
"neg.l %%d5\n\t"
"move.l %%d5, (%[out_r])+\n\t"
"neg.l %%d4\n\t"
"move.l %%d4, (%[out_r])+\n\t"
"neg.l %%d3\n\t"
"move.l %%d3, (%[out_r])+\n\t"
"neg.l %%d2\n\t"
"move.l %%d2, (%[out_r])+\n\t"
"lea.l (-8*4, %[in_r]), %[in_r]\n\t"
"neg.l %%d1\n\t"
"move.l %%d1, (%[out_r])+\n\t"
"lea.l (-8*4, %[out_r2]), %[out_r2]\n\t"
"neg.l %%d0\n\t"
"move.l %%d0, (%[out_r])+\n\t"
: [in_r] "+a" (in_r), [out_r] "+a" (out_r), [out_r2] "+a" (out_r2)
:
: "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "cc", "memory" );
#else
out_r[0] = -(out_r2[7] = in_r[7]);
out_r[1] = -(out_r2[6] = in_r[6]);
out_r[2] = -(out_r2[5] = in_r[5]);
out_r[3] = -(out_r2[4] = in_r[4]);
out_r[4] = -(out_r2[3] = in_r[3]);
out_r[5] = -(out_r2[2] = in_r[2]);
out_r[6] = -(out_r2[1] = in_r[1]);
out_r[7] = -(out_r2[0] = in_r[0]);
in_r -= 8;
out_r += 8;
out_r2 -= 8;
#endif
}
in_r = output + n2+n4;
in_r2 = output + n-4;
out_r = output + n2;
out_r2 = output + n2 + n4 - 4;
while(in_r<in_r2)
{
#if defined CPU_COLDFIRE
asm volatile(
"movem.l (%[in_r]), %%d0-%%d3\n\t"
"movem.l %%d0-%%d3, (%[out_r])\n\t"
"movem.l (%[in_r2]), %%d4-%%d7\n\t"
"movem.l %%d4-%%d7, (%[out_r2])\n\t"
"move.l %%d0, %%a3\n\t"
"move.l %%d3, %%d0\n\t"
"move.l %%d1, %%d3\n\t"
"movem.l %%d0/%%d2-%%d3/%%a3, (%[in_r2])\n\t"
"move.l %%d7, %%d1\n\t"
"move.l %%d6, %%d2\n\t"
"move.l %%d5, %%d3\n\t"
"movem.l %%d1-%%d4, (%[in_r])\n\t"
"lea.l (4*4, %[in_r]), %[in_r]\n\t"
"lea.l (-4*4, %[in_r2]), %[in_r2]\n\t"
"lea.l (4*4, %[out_r]), %[out_r]\n\t"
"lea.l (-4*4, %[out_r2]), %[out_r2]\n\t"
: [in_r] "+a" (in_r), [in_r2] "+a" (in_r2),
[out_r] "+a" (out_r), [out_r2] "+a" (out_r2)
:
: "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "a3", "memory", "cc" );
#else
register fixed32 t0,t1,t2,t3;
register fixed32 s0,s1,s2,s3;
/* Copy and reflect CCCC to DDDD. Because CCCC is already where
we actually want to put DDDD this is a bit complicated.
* So simultaneously do the following things:
* 1. copy range from [n2+n4 .. n-1] to range[n2 .. n2+n4-1]
* 2. reflect range from [n2+n4 .. n-1] inplace
*
* [ | ]
* ^a -> <- ^b ^c -> <- ^d
*
* #1: copy from ^c to ^a
* #2: copy from ^d to ^b
* #3: swap ^c and ^d in place
*/
/* #1 pt1 : load 4 words from ^c. */
t0=in_r[0]; t1=in_r[1]; t2=in_r[2]; t3=in_r[3];
/* #1 pt2 : write to ^a */
out_r[0]=t0;out_r[1]=t1;out_r[2]=t2;out_r[3]=t3;
/* #2 pt1 : load 4 words from ^d */
s0=in_r2[0];s1=in_r2[1];s2=in_r2[2];s3=in_r2[3];
/* #2 pt2 : write to ^b */
out_r2[0]=s0;out_r2[1]=s1;out_r2[2]=s2;out_r2[3]=s3;
/* #3 pt1 : write words from #2 to ^c */
in_r[0]=s3;in_r[1]=s2;in_r[2]=s1;in_r[3]=s0;
/* #3 pt2 : write words from #1 to ^d */
in_r2[0]=t3;in_r2[1]=t2;in_r2[2]=t1;in_r2[3]=t0;
in_r += 4;
in_r2 -= 4;
out_r += 4;
out_r2 -= 4;
#endif
}
}
#else
/* Follows the same structure as the canonical version above */
void ff_imdct_calc(unsigned int nbits, fixed32 *output, const fixed32 *input)
{
const int n = (1<<nbits);
const int n2 = (n>>1);
const int n4 = (n>>2);
ff_imdct_half(nbits,output+n2,input);
fixed32 * in_r, * in_r2, * out_r, * out_r2;
out_r = output;
out_r2 = output+n2;
in_r = output+n2+n4;
while(out_r<out_r2)
{
asm volatile(
"ldmdb %[in_r]!, {r0-r7}\n\t"
"stmdb %[out_r2]!, {r0-r7}\n\t"
"rsb r8,r0,#0\n\t"
"rsb r0,r7,#0\n\t"
"rsb r7,r1,#0\n\t"
"rsb r1,r6,#0\n\t"
"rsb r6,r2,#0\n\t"
"rsb r2,r5,#0\n\t"
"rsb r5,r3,#0\n\t"
"rsb r3,r4,#0\n\t"
"stmia %[out_r]!, {r0-r3,r5-r8}\n\t"
: [in_r] "+r" (in_r), [out_r] "+r" (out_r), [out_r2] "+r" (out_r2)
:
: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "memory" );
}
in_r = output + n2+n4;
in_r2 = output + n;
out_r = output + n2;
out_r2 = output + n2 + n4;
while(in_r<in_r2)
{
asm volatile(
"ldmia %[in_r], {r0-r3}\n\t"
"stmia %[out_r]!, {r0-r3}\n\t"
"ldmdb %[in_r2], {r5-r8}\n\t"
"stmdb %[out_r2]!, {r5-r8}\n\t"
"mov r4,r0\n\t"
"mov r0,r3\n\t"
"mov r3,r1\n\t"
"stmdb %[in_r2]!, {r0,r2,r3,r4}\n\t"
"mov r4,r8\n\t"
"mov r8,r5\n\t"
"mov r5,r7\n\t"
"stmia %[in_r]!, {r4,r5,r6,r8}\n\t"
:
[in_r] "+r" (in_r), [in_r2] "+r" (in_r2), [out_r] "+r" (out_r), [out_r2] "+r" (out_r2)
:
: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "memory" );
}
}
#endif

View file

@ -0,0 +1,139 @@
/*
* WMA compatible decoder
* Copyright (c) 2002 The FFmpeg Project.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef CODECLIB_MDCT_H_INCLUDED
#define CODECLIB_MDCT_H_INCLUDED
//#include "types.h"
#include "fft.h"
void ff_imdct_calc(unsigned int nbits, fixed32 *output, const fixed32 *input);
void ff_imdct_half(unsigned int nbits, fixed32 *output, const fixed32 *input);
#ifdef CPU_ARM
/*Sign-15.16 format */
#define fixmul32b(x, y) \
({ int32_t __hi; \
uint32_t __lo; \
int32_t __result; \
asm ("smull %0, %1, %3, %4\n\t" \
"mov %2, %1, lsl #1" \
: "=&r" (__lo), "=&r" (__hi), "=r" (__result) \
: "%r" (x), "r" (y) \
: "cc" ); \
__result; \
})
#elif defined(CPU_COLDFIRE)
static inline int32_t fixmul32b(int32_t x, int32_t y)
{
asm (
"mac.l %[x], %[y], %%acc0 \n" /* multiply */
"movclr.l %%acc0, %[x] \n" /* get higher half */
: [x] "+d" (x)
: [y] "d" (y)
);
return x;
}
#else
static inline fixed32 fixmul32b(fixed32 x, fixed32 y)
{
fixed64 temp;
temp = x;
temp *= y;
temp >>= 31; //16+31-16 = 31 bits
return (fixed32)temp;
}
#endif
#ifdef CPU_ARM
static inline
void CMUL(fixed32 *x, fixed32 *y,
fixed32 a, fixed32 b,
fixed32 t, fixed32 v)
{
/* This version loses one bit of precision. Could be solved at the cost
* of 2 extra cycles if it becomes an issue. */
int x1, y1, l;
asm(
"smull %[l], %[y1], %[b], %[t] \n"
"smlal %[l], %[y1], %[a], %[v] \n"
"rsb %[b], %[b], #0 \n"
"smull %[l], %[x1], %[a], %[t] \n"
"smlal %[l], %[x1], %[b], %[v] \n"
: [l] "=&r" (l), [x1]"=&r" (x1), [y1]"=&r" (y1), [b] "+r" (b)
: [a] "r" (a), [t] "r" (t), [v] "r" (v)
: "cc"
);
*x = x1 << 1;
*y = y1 << 1;
}
#elif defined CPU_COLDFIRE
static inline
void CMUL(fixed32 *x, fixed32 *y,
fixed32 a, fixed32 b,
fixed32 t, fixed32 v)
{
asm volatile ("mac.l %[a], %[t], %%acc0;"
"msac.l %[b], %[v], %%acc0;"
"mac.l %[b], %[t], %%acc1;"
"mac.l %[a], %[v], %%acc1;"
"movclr.l %%acc0, %[a];"
"move.l %[a], (%[x]);"
"movclr.l %%acc1, %[a];"
"move.l %[a], (%[y]);"
: [a] "+&r" (a)
: [x] "a" (x), [y] "a" (y),
[b] "r" (b), [t] "r" (t), [v] "r" (v)
: "cc", "memory");
}
#else
static inline
void CMUL(fixed32 *pre,
fixed32 *pim,
fixed32 are,
fixed32 aim,
fixed32 bre,
fixed32 bim)
{
//int64_t x,y;
fixed32 _aref = are;
fixed32 _aimf = aim;
fixed32 _bref = bre;
fixed32 _bimf = bim;
fixed32 _r1 = fixmul32b(_bref, _aref);
fixed32 _r2 = fixmul32b(_bimf, _aimf);
fixed32 _r3 = fixmul32b(_bref, _aimf);
fixed32 _r4 = fixmul32b(_bimf, _aref);
*pre = _r1 - _r2;
*pim = _r3 + _r4;
}
#endif
#endif // CODECLIB_MDCT_H_INCLUDED

View file

@ -0,0 +1,872 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2009 Michael Giacomelli
*
*
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#ifdef ROCKBOX
#include <codecs/lib/codeclib.h>
#else
#include <stdlib.h>
#include <stdint.h>
#endif /* ROCKBOX */
/* {sin(2*i*PI/4096, cos(2*i*PI/4096)}, with i = 0 to 512 */
const int32_t sincos_lookup0[1026] ICONST_ATTR = {
0x00000000, 0x7fffffff, 0x003243f5, 0x7ffff621,
0x006487e3, 0x7fffd886, 0x0096cbc1, 0x7fffa72c,
0x00c90f88, 0x7fff6216, 0x00fb5330, 0x7fff0943,
0x012d96b1, 0x7ffe9cb2, 0x015fda03, 0x7ffe1c65,
0x01921d20, 0x7ffd885a, 0x01c45ffe, 0x7ffce093,
0x01f6a297, 0x7ffc250f, 0x0228e4e2, 0x7ffb55ce,
0x025b26d7, 0x7ffa72d1, 0x028d6870, 0x7ff97c18,
0x02bfa9a4, 0x7ff871a2, 0x02f1ea6c, 0x7ff75370,
0x03242abf, 0x7ff62182, 0x03566a96, 0x7ff4dbd9,
0x0388a9ea, 0x7ff38274, 0x03bae8b2, 0x7ff21553,
0x03ed26e6, 0x7ff09478, 0x041f6480, 0x7feeffe1,
0x0451a177, 0x7fed5791, 0x0483ddc3, 0x7feb9b85,
0x04b6195d, 0x7fe9cbc0, 0x04e8543e, 0x7fe7e841,
0x051a8e5c, 0x7fe5f108, 0x054cc7b1, 0x7fe3e616,
0x057f0035, 0x7fe1c76b, 0x05b137df, 0x7fdf9508,
0x05e36ea9, 0x7fdd4eec, 0x0615a48b, 0x7fdaf519,
0x0647d97c, 0x7fd8878e, 0x067a0d76, 0x7fd6064c,
0x06ac406f, 0x7fd37153, 0x06de7262, 0x7fd0c8a3,
0x0710a345, 0x7fce0c3e, 0x0742d311, 0x7fcb3c23,
0x077501be, 0x7fc85854, 0x07a72f45, 0x7fc560cf,
0x07d95b9e, 0x7fc25596, 0x080b86c2, 0x7fbf36aa,
0x083db0a7, 0x7fbc040a, 0x086fd947, 0x7fb8bdb8,
0x08a2009a, 0x7fb563b3, 0x08d42699, 0x7fb1f5fc,
0x09064b3a, 0x7fae7495, 0x09386e78, 0x7faadf7c,
0x096a9049, 0x7fa736b4, 0x099cb0a7, 0x7fa37a3c,
0x09cecf89, 0x7f9faa15, 0x0a00ece8, 0x7f9bc640,
0x0a3308bd, 0x7f97cebd, 0x0a6522fe, 0x7f93c38c,
0x0a973ba5, 0x7f8fa4b0, 0x0ac952aa, 0x7f8b7227,
0x0afb6805, 0x7f872bf3, 0x0b2d7baf, 0x7f82d214,
0x0b5f8d9f, 0x7f7e648c, 0x0b919dcf, 0x7f79e35a,
0x0bc3ac35, 0x7f754e80, 0x0bf5b8cb, 0x7f70a5fe,
0x0c27c389, 0x7f6be9d4, 0x0c59cc68, 0x7f671a05,
0x0c8bd35e, 0x7f62368f, 0x0cbdd865, 0x7f5d3f75,
0x0cefdb76, 0x7f5834b7, 0x0d21dc87, 0x7f531655,
0x0d53db92, 0x7f4de451, 0x0d85d88f, 0x7f489eaa,
0x0db7d376, 0x7f434563, 0x0de9cc40, 0x7f3dd87c,
0x0e1bc2e4, 0x7f3857f6, 0x0e4db75b, 0x7f32c3d1,
0x0e7fa99e, 0x7f2d1c0e, 0x0eb199a4, 0x7f2760af,
0x0ee38766, 0x7f2191b4, 0x0f1572dc, 0x7f1baf1e,
0x0f475bff, 0x7f15b8ee, 0x0f7942c7, 0x7f0faf25,
0x0fab272b, 0x7f0991c4, 0x0fdd0926, 0x7f0360cb,
0x100ee8ad, 0x7efd1c3c, 0x1040c5bb, 0x7ef6c418,
0x1072a048, 0x7ef05860, 0x10a4784b, 0x7ee9d914,
0x10d64dbd, 0x7ee34636, 0x11082096, 0x7edc9fc6,
0x1139f0cf, 0x7ed5e5c6, 0x116bbe60, 0x7ecf1837,
0x119d8941, 0x7ec8371a, 0x11cf516a, 0x7ec14270,
0x120116d5, 0x7eba3a39, 0x1232d979, 0x7eb31e78,
0x1264994e, 0x7eabef2c, 0x1296564d, 0x7ea4ac58,
0x12c8106f, 0x7e9d55fc, 0x12f9c7aa, 0x7e95ec1a,
0x132b7bf9, 0x7e8e6eb2, 0x135d2d53, 0x7e86ddc6,
0x138edbb1, 0x7e7f3957, 0x13c0870a, 0x7e778166,
0x13f22f58, 0x7e6fb5f4, 0x1423d492, 0x7e67d703,
0x145576b1, 0x7e5fe493, 0x148715ae, 0x7e57dea7,
0x14b8b17f, 0x7e4fc53e, 0x14ea4a1f, 0x7e47985b,
0x151bdf86, 0x7e3f57ff, 0x154d71aa, 0x7e37042a,
0x157f0086, 0x7e2e9cdf, 0x15b08c12, 0x7e26221f,
0x15e21445, 0x7e1d93ea, 0x16139918, 0x7e14f242,
0x16451a83, 0x7e0c3d29, 0x1676987f, 0x7e0374a0,
0x16a81305, 0x7dfa98a8, 0x16d98a0c, 0x7df1a942,
0x170afd8d, 0x7de8a670, 0x173c6d80, 0x7ddf9034,
0x176dd9de, 0x7dd6668f, 0x179f429f, 0x7dcd2981,
0x17d0a7bc, 0x7dc3d90d, 0x1802092c, 0x7dba7534,
0x183366e9, 0x7db0fdf8, 0x1864c0ea, 0x7da77359,
0x18961728, 0x7d9dd55a, 0x18c7699b, 0x7d9423fc,
0x18f8b83c, 0x7d8a5f40, 0x192a0304, 0x7d808728,
0x195b49ea, 0x7d769bb5, 0x198c8ce7, 0x7d6c9ce9,
0x19bdcbf3, 0x7d628ac6, 0x19ef0707, 0x7d58654d,
0x1a203e1b, 0x7d4e2c7f, 0x1a517128, 0x7d43e05e,
0x1a82a026, 0x7d3980ec, 0x1ab3cb0d, 0x7d2f0e2b,
0x1ae4f1d6, 0x7d24881b, 0x1b161479, 0x7d19eebf,
0x1b4732ef, 0x7d0f4218, 0x1b784d30, 0x7d048228,
0x1ba96335, 0x7cf9aef0, 0x1bda74f6, 0x7ceec873,
0x1c0b826a, 0x7ce3ceb2, 0x1c3c8b8c, 0x7cd8c1ae,
0x1c6d9053, 0x7ccda169, 0x1c9e90b8, 0x7cc26de5,
0x1ccf8cb3, 0x7cb72724, 0x1d00843d, 0x7cabcd28,
0x1d31774d, 0x7ca05ff1, 0x1d6265dd, 0x7c94df83,
0x1d934fe5, 0x7c894bde, 0x1dc4355e, 0x7c7da505,
0x1df5163f, 0x7c71eaf9, 0x1e25f282, 0x7c661dbc,
0x1e56ca1e, 0x7c5a3d50, 0x1e879d0d, 0x7c4e49b7,
0x1eb86b46, 0x7c4242f2, 0x1ee934c3, 0x7c362904,
0x1f19f97b, 0x7c29fbee, 0x1f4ab968, 0x7c1dbbb3,
0x1f7b7481, 0x7c116853, 0x1fac2abf, 0x7c0501d2,
0x1fdcdc1b, 0x7bf88830, 0x200d888d, 0x7bebfb70,
0x203e300d, 0x7bdf5b94, 0x206ed295, 0x7bd2a89e,
0x209f701c, 0x7bc5e290, 0x20d0089c, 0x7bb9096b,
0x21009c0c, 0x7bac1d31, 0x21312a65, 0x7b9f1de6,
0x2161b3a0, 0x7b920b89, 0x219237b5, 0x7b84e61f,
0x21c2b69c, 0x7b77ada8, 0x21f3304f, 0x7b6a6227,
0x2223a4c5, 0x7b5d039e, 0x225413f8, 0x7b4f920e,
0x22847de0, 0x7b420d7a, 0x22b4e274, 0x7b3475e5,
0x22e541af, 0x7b26cb4f, 0x23159b88, 0x7b190dbc,
0x2345eff8, 0x7b0b3d2c, 0x23763ef7, 0x7afd59a4,
0x23a6887f, 0x7aef6323, 0x23d6cc87, 0x7ae159ae,
0x24070b08, 0x7ad33d45, 0x243743fa, 0x7ac50dec,
0x24677758, 0x7ab6cba4, 0x2497a517, 0x7aa8766f,
0x24c7cd33, 0x7a9a0e50, 0x24f7efa2, 0x7a8b9348,
0x25280c5e, 0x7a7d055b, 0x2558235f, 0x7a6e648a,
0x2588349d, 0x7a5fb0d8, 0x25b84012, 0x7a50ea47,
0x25e845b6, 0x7a4210d8, 0x26184581, 0x7a332490,
0x26483f6c, 0x7a24256f, 0x26783370, 0x7a151378,
0x26a82186, 0x7a05eead, 0x26d809a5, 0x79f6b711,
0x2707ebc7, 0x79e76ca7, 0x2737c7e3, 0x79d80f6f,
0x27679df4, 0x79c89f6e, 0x27976df1, 0x79b91ca4,
0x27c737d3, 0x79a98715, 0x27f6fb92, 0x7999dec4,
0x2826b928, 0x798a23b1, 0x2856708d, 0x797a55e0,
0x288621b9, 0x796a7554, 0x28b5cca5, 0x795a820e,
0x28e5714b, 0x794a7c12, 0x29150fa1, 0x793a6361,
0x2944a7a2, 0x792a37fe, 0x29743946, 0x7919f9ec,
0x29a3c485, 0x7909a92d, 0x29d34958, 0x78f945c3,
0x2a02c7b8, 0x78e8cfb2, 0x2a323f9e, 0x78d846fb,
0x2a61b101, 0x78c7aba2, 0x2a911bdc, 0x78b6fda8,
0x2ac08026, 0x78a63d11, 0x2aefddd8, 0x789569df,
0x2b1f34eb, 0x78848414, 0x2b4e8558, 0x78738bb3,
0x2b7dcf17, 0x786280bf, 0x2bad1221, 0x7851633b,
0x2bdc4e6f, 0x78403329, 0x2c0b83fa, 0x782ef08b,
0x2c3ab2b9, 0x781d9b65, 0x2c69daa6, 0x780c33b8,
0x2c98fbba, 0x77fab989, 0x2cc815ee, 0x77e92cd9,
0x2cf72939, 0x77d78daa, 0x2d263596, 0x77c5dc01,
0x2d553afc, 0x77b417df, 0x2d843964, 0x77a24148,
0x2db330c7, 0x7790583e, 0x2de2211e, 0x777e5cc3,
0x2e110a62, 0x776c4edb, 0x2e3fec8b, 0x775a2e89,
0x2e6ec792, 0x7747fbce, 0x2e9d9b70, 0x7735b6af,
0x2ecc681e, 0x77235f2d, 0x2efb2d95, 0x7710f54c,
0x2f29ebcc, 0x76fe790e, 0x2f58a2be, 0x76ebea77,
0x2f875262, 0x76d94989, 0x2fb5fab2, 0x76c69647,
0x2fe49ba7, 0x76b3d0b4, 0x30133539, 0x76a0f8d2,
0x3041c761, 0x768e0ea6, 0x30705217, 0x767b1231,
0x309ed556, 0x76680376, 0x30cd5115, 0x7654e279,
0x30fbc54d, 0x7641af3d, 0x312a31f8, 0x762e69c4,
0x3158970e, 0x761b1211, 0x3186f487, 0x7607a828,
0x31b54a5e, 0x75f42c0b, 0x31e39889, 0x75e09dbd,
0x3211df04, 0x75ccfd42, 0x32401dc6, 0x75b94a9c,
0x326e54c7, 0x75a585cf, 0x329c8402, 0x7591aedd,
0x32caab6f, 0x757dc5ca, 0x32f8cb07, 0x7569ca99,
0x3326e2c3, 0x7555bd4c, 0x3354f29b, 0x75419de7,
0x3382fa88, 0x752d6c6c, 0x33b0fa84, 0x751928e0,
0x33def287, 0x7504d345, 0x340ce28b, 0x74f06b9e,
0x343aca87, 0x74dbf1ef, 0x3468aa76, 0x74c7663a,
0x34968250, 0x74b2c884, 0x34c4520d, 0x749e18cd,
0x34f219a8, 0x7489571c, 0x351fd918, 0x74748371,
0x354d9057, 0x745f9dd1, 0x357b3f5d, 0x744aa63f,
0x35a8e625, 0x74359cbd, 0x35d684a6, 0x74208150,
0x36041ad9, 0x740b53fb, 0x3631a8b8, 0x73f614c0,
0x365f2e3b, 0x73e0c3a3, 0x368cab5c, 0x73cb60a8,
0x36ba2014, 0x73b5ebd1, 0x36e78c5b, 0x73a06522,
0x3714f02a, 0x738acc9e, 0x37424b7b, 0x73752249,
0x376f9e46, 0x735f6626, 0x379ce885, 0x73499838,
0x37ca2a30, 0x7333b883, 0x37f76341, 0x731dc70a,
0x382493b0, 0x7307c3d0, 0x3851bb77, 0x72f1aed9,
0x387eda8e, 0x72db8828, 0x38abf0ef, 0x72c54fc1,
0x38d8fe93, 0x72af05a7, 0x39060373, 0x7298a9dd,
0x3932ff87, 0x72823c67, 0x395ff2c9, 0x726bbd48,
0x398cdd32, 0x72552c85, 0x39b9bebc, 0x723e8a20,
0x39e6975e, 0x7227d61c, 0x3a136712, 0x7211107e,
0x3a402dd2, 0x71fa3949, 0x3a6ceb96, 0x71e35080,
0x3a99a057, 0x71cc5626, 0x3ac64c0f, 0x71b54a41,
0x3af2eeb7, 0x719e2cd2, 0x3b1f8848, 0x7186fdde,
0x3b4c18ba, 0x716fbd68, 0x3b78a007, 0x71586b74,
0x3ba51e29, 0x71410805, 0x3bd19318, 0x7129931f,
0x3bfdfecd, 0x71120cc5, 0x3c2a6142, 0x70fa74fc,
0x3c56ba70, 0x70e2cbc6, 0x3c830a50, 0x70cb1128,
0x3caf50da, 0x70b34525, 0x3cdb8e09, 0x709b67c0,
0x3d07c1d6, 0x708378ff, 0x3d33ec39, 0x706b78e3,
0x3d600d2c, 0x70536771, 0x3d8c24a8, 0x703b44ad,
0x3db832a6, 0x7023109a, 0x3de4371f, 0x700acb3c,
0x3e10320d, 0x6ff27497, 0x3e3c2369, 0x6fda0cae,
0x3e680b2c, 0x6fc19385, 0x3e93e950, 0x6fa90921,
0x3ebfbdcd, 0x6f906d84, 0x3eeb889c, 0x6f77c0b3,
0x3f1749b8, 0x6f5f02b2, 0x3f430119, 0x6f463383,
0x3f6eaeb8, 0x6f2d532c, 0x3f9a5290, 0x6f1461b0,
0x3fc5ec98, 0x6efb5f12, 0x3ff17cca, 0x6ee24b57,
0x401d0321, 0x6ec92683, 0x40487f94, 0x6eaff099,
0x4073f21d, 0x6e96a99d, 0x409f5ab6, 0x6e7d5193,
0x40cab958, 0x6e63e87f, 0x40f60dfb, 0x6e4a6e66,
0x4121589b, 0x6e30e34a, 0x414c992f, 0x6e174730,
0x4177cfb1, 0x6dfd9a1c, 0x41a2fc1a, 0x6de3dc11,
0x41ce1e65, 0x6dca0d14, 0x41f93689, 0x6db02d29,
0x42244481, 0x6d963c54, 0x424f4845, 0x6d7c3a98,
0x427a41d0, 0x6d6227fa, 0x42a5311b, 0x6d48047e,
0x42d0161e, 0x6d2dd027, 0x42faf0d4, 0x6d138afb,
0x4325c135, 0x6cf934fc, 0x4350873c, 0x6cdece2f,
0x437b42e1, 0x6cc45698, 0x43a5f41e, 0x6ca9ce3b,
0x43d09aed, 0x6c8f351c, 0x43fb3746, 0x6c748b3f,
0x4425c923, 0x6c59d0a9, 0x4450507e, 0x6c3f055d,
0x447acd50, 0x6c242960, 0x44a53f93, 0x6c093cb6,
0x44cfa740, 0x6bee3f62, 0x44fa0450, 0x6bd3316a,
0x452456bd, 0x6bb812d1, 0x454e9e80, 0x6b9ce39b,
0x4578db93, 0x6b81a3cd, 0x45a30df0, 0x6b66536b,
0x45cd358f, 0x6b4af279, 0x45f7526b, 0x6b2f80fb,
0x4621647d, 0x6b13fef5, 0x464b6bbe, 0x6af86c6c,
0x46756828, 0x6adcc964, 0x469f59b4, 0x6ac115e2,
0x46c9405c, 0x6aa551e9, 0x46f31c1a, 0x6a897d7d,
0x471cece7, 0x6a6d98a4, 0x4746b2bc, 0x6a51a361,
0x47706d93, 0x6a359db9, 0x479a1d67, 0x6a1987b0,
0x47c3c22f, 0x69fd614a, 0x47ed5be6, 0x69e12a8c,
0x4816ea86, 0x69c4e37a, 0x48406e08, 0x69a88c19,
0x4869e665, 0x698c246c, 0x48935397, 0x696fac78,
0x48bcb599, 0x69532442, 0x48e60c62, 0x69368bce,
0x490f57ee, 0x6919e320, 0x49389836, 0x68fd2a3d,
0x4961cd33, 0x68e06129, 0x498af6df, 0x68c387e9,
0x49b41533, 0x68a69e81, 0x49dd282a, 0x6889a4f6,
0x4a062fbd, 0x686c9b4b, 0x4a2f2be6, 0x684f8186,
0x4a581c9e, 0x683257ab, 0x4a8101de, 0x68151dbe,
0x4aa9dba2, 0x67f7d3c5, 0x4ad2a9e2, 0x67da79c3,
0x4afb6c98, 0x67bd0fbd, 0x4b2423be, 0x679f95b7,
0x4b4ccf4d, 0x67820bb7, 0x4b756f40, 0x676471c0,
0x4b9e0390, 0x6746c7d8, 0x4bc68c36, 0x67290e02,
0x4bef092d, 0x670b4444, 0x4c177a6e, 0x66ed6aa1,
0x4c3fdff4, 0x66cf8120, 0x4c6839b7, 0x66b187c3,
0x4c9087b1, 0x66937e91, 0x4cb8c9dd, 0x6675658c,
0x4ce10034, 0x66573cbb, 0x4d092ab0, 0x66390422,
0x4d31494b, 0x661abbc5, 0x4d595bfe, 0x65fc63a9,
0x4d8162c4, 0x65ddfbd3, 0x4da95d96, 0x65bf8447,
0x4dd14c6e, 0x65a0fd0b, 0x4df92f46, 0x65826622,
0x4e210617, 0x6563bf92, 0x4e48d0dd, 0x6545095f,
0x4e708f8f, 0x6526438f, 0x4e984229, 0x65076e25,
0x4ebfe8a5, 0x64e88926, 0x4ee782fb, 0x64c99498,
0x4f0f1126, 0x64aa907f, 0x4f369320, 0x648b7ce0,
0x4f5e08e3, 0x646c59bf, 0x4f857269, 0x644d2722,
0x4faccfab, 0x642de50d, 0x4fd420a4, 0x640e9386,
0x4ffb654d, 0x63ef3290, 0x50229da1, 0x63cfc231,
0x5049c999, 0x63b0426d, 0x5070e92f, 0x6390b34a,
0x5097fc5e, 0x637114cc, 0x50bf031f, 0x635166f9,
0x50e5fd6d, 0x6331a9d4, 0x510ceb40, 0x6311dd64,
0x5133cc94, 0x62f201ac, 0x515aa162, 0x62d216b3,
0x518169a5, 0x62b21c7b, 0x51a82555, 0x6292130c,
0x51ced46e, 0x6271fa69, 0x51f576ea, 0x6251d298,
0x521c0cc2, 0x62319b9d, 0x524295f0, 0x6211557e,
0x5269126e, 0x61f1003f, 0x528f8238, 0x61d09be5,
0x52b5e546, 0x61b02876, 0x52dc3b92, 0x618fa5f7,
0x53028518, 0x616f146c, 0x5328c1d0, 0x614e73da,
0x534ef1b5, 0x612dc447, 0x537514c2, 0x610d05b7,
0x539b2af0, 0x60ec3830, 0x53c13439, 0x60cb5bb7,
0x53e73097, 0x60aa7050, 0x540d2005, 0x60897601,
0x5433027d, 0x60686ccf, 0x5458d7f9, 0x604754bf,
0x547ea073, 0x60262dd6, 0x54a45be6, 0x6004f819,
0x54ca0a4b, 0x5fe3b38d, 0x54efab9c, 0x5fc26038,
0x55153fd4, 0x5fa0fe1f, 0x553ac6ee, 0x5f7f8d46,
0x556040e2, 0x5f5e0db3, 0x5585adad, 0x5f3c7f6b,
0x55ab0d46, 0x5f1ae274, 0x55d05faa, 0x5ef936d1,
0x55f5a4d2, 0x5ed77c8a, 0x561adcb9, 0x5eb5b3a2,
0x56400758, 0x5e93dc1f, 0x566524aa, 0x5e71f606,
0x568a34a9, 0x5e50015d, 0x56af3750, 0x5e2dfe29,
0x56d42c99, 0x5e0bec6e, 0x56f9147e, 0x5de9cc33,
0x571deefa, 0x5dc79d7c, 0x5742bc06, 0x5da5604f,
0x57677b9d, 0x5d8314b1, 0x578c2dba, 0x5d60baa7,
0x57b0d256, 0x5d3e5237, 0x57d5696d, 0x5d1bdb65,
0x57f9f2f8, 0x5cf95638, 0x581e6ef1, 0x5cd6c2b5,
0x5842dd54, 0x5cb420e0, 0x58673e1b, 0x5c9170bf,
0x588b9140, 0x5c6eb258, 0x58afd6bd, 0x5c4be5b0,
0x58d40e8c, 0x5c290acc, 0x58f838a9, 0x5c0621b2,
0x591c550e, 0x5be32a67, 0x594063b5, 0x5bc024f0,
0x59646498, 0x5b9d1154, 0x598857b2, 0x5b79ef96,
0x59ac3cfd, 0x5b56bfbd, 0x59d01475, 0x5b3381ce,
0x59f3de12, 0x5b1035cf, 0x5a1799d1, 0x5aecdbc5,
0x5a3b47ab, 0x5ac973b5, 0x5a5ee79a, 0x5aa5fda5,
0x5a82799a, 0x5a82799a
};
/* {sin((2*i+1)*PI/4096, cos((2*i+1)*PI/4096)}, with i = 0 to 511 */
const int32_t sincos_lookup1[1024] ICONST_ATTR = {
0x001921fb, 0x7ffffd88, 0x004b65ee, 0x7fffe9cb,
0x007da9d4, 0x7fffc251, 0x00afeda8, 0x7fff8719,
0x00e23160, 0x7fff3824, 0x011474f6, 0x7ffed572,
0x0146b860, 0x7ffe5f03, 0x0178fb99, 0x7ffdd4d7,
0x01ab3e97, 0x7ffd36ee, 0x01dd8154, 0x7ffc8549,
0x020fc3c6, 0x7ffbbfe6, 0x024205e8, 0x7ffae6c7,
0x027447b0, 0x7ff9f9ec, 0x02a68917, 0x7ff8f954,
0x02d8ca16, 0x7ff7e500, 0x030b0aa4, 0x7ff6bcf0,
0x033d4abb, 0x7ff58125, 0x036f8a51, 0x7ff4319d,
0x03a1c960, 0x7ff2ce5b, 0x03d407df, 0x7ff1575d,
0x040645c7, 0x7fefcca4, 0x04388310, 0x7fee2e30,
0x046abfb3, 0x7fec7c02, 0x049cfba7, 0x7feab61a,
0x04cf36e5, 0x7fe8dc78, 0x05017165, 0x7fe6ef1c,
0x0533ab20, 0x7fe4ee06, 0x0565e40d, 0x7fe2d938,
0x05981c26, 0x7fe0b0b1, 0x05ca5361, 0x7fde7471,
0x05fc89b8, 0x7fdc247a, 0x062ebf22, 0x7fd9c0ca,
0x0660f398, 0x7fd74964, 0x06932713, 0x7fd4be46,
0x06c5598a, 0x7fd21f72, 0x06f78af6, 0x7fcf6ce8,
0x0729bb4e, 0x7fcca6a7, 0x075bea8c, 0x7fc9ccb2,
0x078e18a7, 0x7fc6df08, 0x07c04598, 0x7fc3dda9,
0x07f27157, 0x7fc0c896, 0x08249bdd, 0x7fbd9fd0,
0x0856c520, 0x7fba6357, 0x0888ed1b, 0x7fb7132b,
0x08bb13c5, 0x7fb3af4e, 0x08ed3916, 0x7fb037bf,
0x091f5d06, 0x7facac7f, 0x09517f8f, 0x7fa90d8e,
0x0983a0a7, 0x7fa55aee, 0x09b5c048, 0x7fa1949e,
0x09e7de6a, 0x7f9dbaa0, 0x0a19fb04, 0x7f99ccf4,
0x0a4c1610, 0x7f95cb9a, 0x0a7e2f85, 0x7f91b694,
0x0ab0475c, 0x7f8d8de1, 0x0ae25d8d, 0x7f895182,
0x0b147211, 0x7f850179, 0x0b4684df, 0x7f809dc5,
0x0b7895f0, 0x7f7c2668, 0x0baaa53b, 0x7f779b62,
0x0bdcb2bb, 0x7f72fcb4, 0x0c0ebe66, 0x7f6e4a5e,
0x0c40c835, 0x7f698461, 0x0c72d020, 0x7f64aabf,
0x0ca4d620, 0x7f5fbd77, 0x0cd6da2d, 0x7f5abc8a,
0x0d08dc3f, 0x7f55a7fa, 0x0d3adc4e, 0x7f507fc7,
0x0d6cda53, 0x7f4b43f2, 0x0d9ed646, 0x7f45f47b,
0x0dd0d01f, 0x7f409164, 0x0e02c7d7, 0x7f3b1aad,
0x0e34bd66, 0x7f359057, 0x0e66b0c3, 0x7f2ff263,
0x0e98a1e9, 0x7f2a40d2, 0x0eca90ce, 0x7f247ba5,
0x0efc7d6b, 0x7f1ea2dc, 0x0f2e67b8, 0x7f18b679,
0x0f604faf, 0x7f12b67c, 0x0f923546, 0x7f0ca2e7,
0x0fc41876, 0x7f067bba, 0x0ff5f938, 0x7f0040f6,
0x1027d784, 0x7ef9f29d, 0x1059b352, 0x7ef390ae,
0x108b8c9b, 0x7eed1b2c, 0x10bd6356, 0x7ee69217,
0x10ef377d, 0x7edff570, 0x11210907, 0x7ed94538,
0x1152d7ed, 0x7ed28171, 0x1184a427, 0x7ecbaa1a,
0x11b66dad, 0x7ec4bf36, 0x11e83478, 0x7ebdc0c6,
0x1219f880, 0x7eb6aeca, 0x124bb9be, 0x7eaf8943,
0x127d7829, 0x7ea85033, 0x12af33ba, 0x7ea1039b,
0x12e0ec6a, 0x7e99a37c, 0x1312a230, 0x7e922fd6,
0x13445505, 0x7e8aa8ac, 0x137604e2, 0x7e830dff,
0x13a7b1bf, 0x7e7b5fce, 0x13d95b93, 0x7e739e1d,
0x140b0258, 0x7e6bc8eb, 0x143ca605, 0x7e63e03b,
0x146e4694, 0x7e5be40c, 0x149fe3fc, 0x7e53d462,
0x14d17e36, 0x7e4bb13c, 0x1503153a, 0x7e437a9c,
0x1534a901, 0x7e3b3083, 0x15663982, 0x7e32d2f4,
0x1597c6b7, 0x7e2a61ed, 0x15c95097, 0x7e21dd73,
0x15fad71b, 0x7e194584, 0x162c5a3b, 0x7e109a24,
0x165dd9f0, 0x7e07db52, 0x168f5632, 0x7dff0911,
0x16c0cef9, 0x7df62362, 0x16f2443e, 0x7ded2a47,
0x1723b5f9, 0x7de41dc0, 0x17552422, 0x7ddafdce,
0x17868eb3, 0x7dd1ca75, 0x17b7f5a3, 0x7dc883b4,
0x17e958ea, 0x7dbf298d, 0x181ab881, 0x7db5bc02,
0x184c1461, 0x7dac3b15, 0x187d6c82, 0x7da2a6c6,
0x18aec0db, 0x7d98ff17, 0x18e01167, 0x7d8f4409,
0x19115e1c, 0x7d85759f, 0x1942a6f3, 0x7d7b93da,
0x1973ebe6, 0x7d719eba, 0x19a52ceb, 0x7d679642,
0x19d669fc, 0x7d5d7a74, 0x1a07a311, 0x7d534b50,
0x1a38d823, 0x7d4908d9, 0x1a6a0929, 0x7d3eb30f,
0x1a9b361d, 0x7d3449f5, 0x1acc5ef6, 0x7d29cd8c,
0x1afd83ad, 0x7d1f3dd6, 0x1b2ea43a, 0x7d149ad5,
0x1b5fc097, 0x7d09e489, 0x1b90d8bb, 0x7cff1af5,
0x1bc1ec9e, 0x7cf43e1a, 0x1bf2fc3a, 0x7ce94dfb,
0x1c240786, 0x7cde4a98, 0x1c550e7c, 0x7cd333f3,
0x1c861113, 0x7cc80a0f, 0x1cb70f43, 0x7cbcccec,
0x1ce80906, 0x7cb17c8d, 0x1d18fe54, 0x7ca618f3,
0x1d49ef26, 0x7c9aa221, 0x1d7adb73, 0x7c8f1817,
0x1dabc334, 0x7c837ad8, 0x1ddca662, 0x7c77ca65,
0x1e0d84f5, 0x7c6c06c0, 0x1e3e5ee5, 0x7c602fec,
0x1e6f342c, 0x7c5445e9, 0x1ea004c1, 0x7c4848ba,
0x1ed0d09d, 0x7c3c3860, 0x1f0197b8, 0x7c3014de,
0x1f325a0b, 0x7c23de35, 0x1f63178f, 0x7c179467,
0x1f93d03c, 0x7c0b3777, 0x1fc4840a, 0x7bfec765,
0x1ff532f2, 0x7bf24434, 0x2025dcec, 0x7be5ade6,
0x205681f1, 0x7bd9047c, 0x208721f9, 0x7bcc47fa,
0x20b7bcfe, 0x7bbf7860, 0x20e852f6, 0x7bb295b0,
0x2118e3dc, 0x7ba59fee, 0x21496fa7, 0x7b989719,
0x2179f64f, 0x7b8b7b36, 0x21aa77cf, 0x7b7e4c45,
0x21daf41d, 0x7b710a49, 0x220b6b32, 0x7b63b543,
0x223bdd08, 0x7b564d36, 0x226c4996, 0x7b48d225,
0x229cb0d5, 0x7b3b4410, 0x22cd12bd, 0x7b2da2fa,
0x22fd6f48, 0x7b1feee5, 0x232dc66d, 0x7b1227d3,
0x235e1826, 0x7b044dc7, 0x238e646a, 0x7af660c2,
0x23beab33, 0x7ae860c7, 0x23eeec78, 0x7ada4dd8,
0x241f2833, 0x7acc27f7, 0x244f5e5c, 0x7abdef25,
0x247f8eec, 0x7aafa367, 0x24afb9da, 0x7aa144bc,
0x24dfdf20, 0x7a92d329, 0x250ffeb7, 0x7a844eae,
0x25401896, 0x7a75b74f, 0x25702cb7, 0x7a670d0d,
0x25a03b11, 0x7a584feb, 0x25d0439f, 0x7a497feb,
0x26004657, 0x7a3a9d0f, 0x26304333, 0x7a2ba75a,
0x26603a2c, 0x7a1c9ece, 0x26902b39, 0x7a0d836d,
0x26c01655, 0x79fe5539, 0x26effb76, 0x79ef1436,
0x271fda96, 0x79dfc064, 0x274fb3ae, 0x79d059c8,
0x277f86b5, 0x79c0e062, 0x27af53a6, 0x79b15435,
0x27df1a77, 0x79a1b545, 0x280edb23, 0x79920392,
0x283e95a1, 0x79823f20, 0x286e49ea, 0x797267f2,
0x289df7f8, 0x79627e08, 0x28cd9fc1, 0x79528167,
0x28fd4140, 0x79427210, 0x292cdc6d, 0x79325006,
0x295c7140, 0x79221b4b, 0x298bffb2, 0x7911d3e2,
0x29bb87bc, 0x790179cd, 0x29eb0957, 0x78f10d0f,
0x2a1a847b, 0x78e08dab, 0x2a49f920, 0x78cffba3,
0x2a796740, 0x78bf56f9, 0x2aa8ced3, 0x78ae9fb0,
0x2ad82fd2, 0x789dd5cb, 0x2b078a36, 0x788cf94c,
0x2b36ddf7, 0x787c0a36, 0x2b662b0e, 0x786b088c,
0x2b957173, 0x7859f44f, 0x2bc4b120, 0x7848cd83,
0x2bf3ea0d, 0x7837942b, 0x2c231c33, 0x78264849,
0x2c52478a, 0x7814e9df, 0x2c816c0c, 0x780378f1,
0x2cb089b1, 0x77f1f581, 0x2cdfa071, 0x77e05f91,
0x2d0eb046, 0x77ceb725, 0x2d3db928, 0x77bcfc3f,
0x2d6cbb10, 0x77ab2ee2, 0x2d9bb5f6, 0x77994f11,
0x2dcaa9d5, 0x77875cce, 0x2df996a3, 0x7775581d,
0x2e287c5a, 0x776340ff, 0x2e575af3, 0x77511778,
0x2e863267, 0x773edb8b, 0x2eb502ae, 0x772c8d3a,
0x2ee3cbc1, 0x771a2c88, 0x2f128d99, 0x7707b979,
0x2f41482e, 0x76f5340e, 0x2f6ffb7a, 0x76e29c4b,
0x2f9ea775, 0x76cff232, 0x2fcd4c19, 0x76bd35c7,
0x2ffbe95d, 0x76aa670d, 0x302a7f3a, 0x76978605,
0x30590dab, 0x768492b4, 0x308794a6, 0x76718d1c,
0x30b61426, 0x765e7540, 0x30e48c22, 0x764b4b23,
0x3112fc95, 0x76380ec8, 0x31416576, 0x7624c031,
0x316fc6be, 0x76115f63, 0x319e2067, 0x75fdec60,
0x31cc7269, 0x75ea672a, 0x31fabcbd, 0x75d6cfc5,
0x3228ff5c, 0x75c32634, 0x32573a3f, 0x75af6a7b,
0x32856d5e, 0x759b9c9b, 0x32b398b3, 0x7587bc98,
0x32e1bc36, 0x7573ca75, 0x330fd7e1, 0x755fc635,
0x333debab, 0x754bafdc, 0x336bf78f, 0x7537876c,
0x3399fb85, 0x75234ce8, 0x33c7f785, 0x750f0054,
0x33f5eb89, 0x74faa1b3, 0x3423d78a, 0x74e63108,
0x3451bb81, 0x74d1ae55, 0x347f9766, 0x74bd199f,
0x34ad6b32, 0x74a872e8, 0x34db36df, 0x7493ba34,
0x3508fa66, 0x747eef85, 0x3536b5be, 0x746a12df,
0x356468e2, 0x74552446, 0x359213c9, 0x744023bc,
0x35bfb66e, 0x742b1144, 0x35ed50c9, 0x7415ece2,
0x361ae2d3, 0x7400b69a, 0x36486c86, 0x73eb6e6e,
0x3675edd9, 0x73d61461, 0x36a366c6, 0x73c0a878,
0x36d0d746, 0x73ab2ab4, 0x36fe3f52, 0x73959b1b,
0x372b9ee3, 0x737ff9ae, 0x3758f5f2, 0x736a4671,
0x37864477, 0x73548168, 0x37b38a6d, 0x733eaa96,
0x37e0c7cc, 0x7328c1ff, 0x380dfc8d, 0x7312c7a5,
0x383b28a9, 0x72fcbb8c, 0x38684c19, 0x72e69db7,
0x389566d6, 0x72d06e2b, 0x38c278d9, 0x72ba2cea,
0x38ef821c, 0x72a3d9f7, 0x391c8297, 0x728d7557,
0x39497a43, 0x7276ff0d, 0x39766919, 0x7260771b,
0x39a34f13, 0x7249dd86, 0x39d02c2a, 0x72333251,
0x39fd0056, 0x721c7580, 0x3a29cb91, 0x7205a716,
0x3a568dd4, 0x71eec716, 0x3a834717, 0x71d7d585,
0x3aaff755, 0x71c0d265, 0x3adc9e86, 0x71a9bdba,
0x3b093ca3, 0x71929789, 0x3b35d1a5, 0x717b5fd3,
0x3b625d86, 0x7164169d, 0x3b8ee03e, 0x714cbbeb,
0x3bbb59c7, 0x71354fc0, 0x3be7ca1a, 0x711dd220,
0x3c143130, 0x7106430e, 0x3c408f03, 0x70eea28e,
0x3c6ce38a, 0x70d6f0a4, 0x3c992ec0, 0x70bf2d53,
0x3cc5709e, 0x70a7589f, 0x3cf1a91c, 0x708f728b,
0x3d1dd835, 0x70777b1c, 0x3d49fde1, 0x705f7255,
0x3d761a19, 0x70475839, 0x3da22cd7, 0x702f2ccd,
0x3dce3614, 0x7016f014, 0x3dfa35c8, 0x6ffea212,
0x3e262bee, 0x6fe642ca, 0x3e52187f, 0x6fcdd241,
0x3e7dfb73, 0x6fb5507a, 0x3ea9d4c3, 0x6f9cbd79,
0x3ed5a46b, 0x6f841942, 0x3f016a61, 0x6f6b63d8,
0x3f2d26a0, 0x6f529d40, 0x3f58d921, 0x6f39c57d,
0x3f8481dd, 0x6f20dc92, 0x3fb020ce, 0x6f07e285,
0x3fdbb5ec, 0x6eeed758, 0x40074132, 0x6ed5bb10,
0x4032c297, 0x6ebc8db0, 0x405e3a16, 0x6ea34f3d,
0x4089a7a8, 0x6e89ffb9, 0x40b50b46, 0x6e709f2a,
0x40e064ea, 0x6e572d93, 0x410bb48c, 0x6e3daaf8,
0x4136fa27, 0x6e24175c, 0x416235b2, 0x6e0a72c5,
0x418d6729, 0x6df0bd35, 0x41b88e84, 0x6dd6f6b1,
0x41e3abbc, 0x6dbd1f3c, 0x420ebecb, 0x6da336dc,
0x4239c7aa, 0x6d893d93, 0x4264c653, 0x6d6f3365,
0x428fbabe, 0x6d551858, 0x42baa4e6, 0x6d3aec6e,
0x42e584c3, 0x6d20afac, 0x43105a50, 0x6d066215,
0x433b2585, 0x6cec03af, 0x4365e65b, 0x6cd1947c,
0x43909ccd, 0x6cb71482, 0x43bb48d4, 0x6c9c83c3,
0x43e5ea68, 0x6c81e245, 0x44108184, 0x6c67300b,
0x443b0e21, 0x6c4c6d1a, 0x44659039, 0x6c319975,
0x449007c4, 0x6c16b521, 0x44ba74bd, 0x6bfbc021,
0x44e4d71c, 0x6be0ba7b, 0x450f2edb, 0x6bc5a431,
0x45397bf4, 0x6baa7d49, 0x4563be60, 0x6b8f45c7,
0x458df619, 0x6b73fdae, 0x45b82318, 0x6b58a503,
0x45e24556, 0x6b3d3bcb, 0x460c5cce, 0x6b21c208,
0x46366978, 0x6b0637c1, 0x46606b4e, 0x6aea9cf8,
0x468a624a, 0x6acef1b2, 0x46b44e65, 0x6ab335f4,
0x46de2f99, 0x6a9769c1, 0x470805df, 0x6a7b8d1e,
0x4731d131, 0x6a5fa010, 0x475b9188, 0x6a43a29a,
0x478546de, 0x6a2794c1, 0x47aef12c, 0x6a0b7689,
0x47d8906d, 0x69ef47f6, 0x48022499, 0x69d3090e,
0x482badab, 0x69b6b9d3, 0x48552b9b, 0x699a5a4c,
0x487e9e64, 0x697dea7b, 0x48a805ff, 0x69616a65,
0x48d16265, 0x6944da10, 0x48fab391, 0x6928397e,
0x4923f97b, 0x690b88b5, 0x494d341e, 0x68eec7b9,
0x49766373, 0x68d1f68f, 0x499f8774, 0x68b5153a,
0x49c8a01b, 0x689823bf, 0x49f1ad61, 0x687b2224,
0x4a1aaf3f, 0x685e106c, 0x4a43a5b0, 0x6840ee9b,
0x4a6c90ad, 0x6823bcb7, 0x4a957030, 0x68067ac3,
0x4abe4433, 0x67e928c5, 0x4ae70caf, 0x67cbc6c0,
0x4b0fc99d, 0x67ae54ba, 0x4b387af9, 0x6790d2b6,
0x4b6120bb, 0x677340ba, 0x4b89badd, 0x67559eca,
0x4bb24958, 0x6737ecea, 0x4bdacc28, 0x671a2b20,
0x4c034345, 0x66fc596f, 0x4c2baea9, 0x66de77dc,
0x4c540e4e, 0x66c0866d, 0x4c7c622d, 0x66a28524,
0x4ca4aa41, 0x66847408, 0x4ccce684, 0x6666531d,
0x4cf516ee, 0x66482267, 0x4d1d3b7a, 0x6629e1ec,
0x4d455422, 0x660b91af, 0x4d6d60df, 0x65ed31b5,
0x4d9561ac, 0x65cec204, 0x4dbd5682, 0x65b0429f,
0x4de53f5a, 0x6591b38c, 0x4e0d1c30, 0x657314cf,
0x4e34ecfc, 0x6554666d, 0x4e5cb1b9, 0x6535a86b,
0x4e846a60, 0x6516dacd, 0x4eac16eb, 0x64f7fd98,
0x4ed3b755, 0x64d910d1, 0x4efb4b96, 0x64ba147d,
0x4f22d3aa, 0x649b08a0, 0x4f4a4f89, 0x647bed3f,
0x4f71bf2e, 0x645cc260, 0x4f992293, 0x643d8806,
0x4fc079b1, 0x641e3e38, 0x4fe7c483, 0x63fee4f8,
0x500f0302, 0x63df7c4d, 0x50363529, 0x63c0043b,
0x505d5af1, 0x63a07cc7, 0x50847454, 0x6380e5f6,
0x50ab814d, 0x63613fcd, 0x50d281d5, 0x63418a50,
0x50f975e6, 0x6321c585, 0x51205d7b, 0x6301f171,
0x5147388c, 0x62e20e17, 0x516e0715, 0x62c21b7e,
0x5194c910, 0x62a219aa, 0x51bb7e75, 0x628208a1,
0x51e22740, 0x6261e866, 0x5208c36a, 0x6241b8ff,
0x522f52ee, 0x62217a72, 0x5255d5c5, 0x62012cc2,
0x527c4bea, 0x61e0cff5, 0x52a2b556, 0x61c06410,
0x52c91204, 0x619fe918, 0x52ef61ee, 0x617f5f12,
0x5315a50e, 0x615ec603, 0x533bdb5d, 0x613e1df0,
0x536204d7, 0x611d66de, 0x53882175, 0x60fca0d2,
0x53ae3131, 0x60dbcbd1, 0x53d43406, 0x60bae7e1,
0x53fa29ed, 0x6099f505, 0x542012e1, 0x6078f344,
0x5445eedb, 0x6057e2a2, 0x546bbdd7, 0x6036c325,
0x54917fce, 0x601594d1, 0x54b734ba, 0x5ff457ad,
0x54dcdc96, 0x5fd30bbc, 0x5502775c, 0x5fb1b104,
0x55280505, 0x5f90478a, 0x554d858d, 0x5f6ecf53,
0x5572f8ed, 0x5f4d4865, 0x55985f20, 0x5f2bb2c5,
0x55bdb81f, 0x5f0a0e77, 0x55e303e6, 0x5ee85b82,
0x5608426e, 0x5ec699e9, 0x562d73b2, 0x5ea4c9b3,
0x565297ab, 0x5e82eae5, 0x5677ae54, 0x5e60fd84,
0x569cb7a8, 0x5e3f0194, 0x56c1b3a1, 0x5e1cf71c,
0x56e6a239, 0x5dfade20, 0x570b8369, 0x5dd8b6a7,
0x5730572e, 0x5db680b4, 0x57551d80, 0x5d943c4e,
0x5779d65b, 0x5d71e979, 0x579e81b8, 0x5d4f883b,
0x57c31f92, 0x5d2d189a, 0x57e7afe4, 0x5d0a9a9a,
0x580c32a7, 0x5ce80e41, 0x5830a7d6, 0x5cc57394,
0x58550f6c, 0x5ca2ca99, 0x58796962, 0x5c801354,
0x589db5b3, 0x5c5d4dcc, 0x58c1f45b, 0x5c3a7a05,
0x58e62552, 0x5c179806, 0x590a4893, 0x5bf4a7d2,
0x592e5e19, 0x5bd1a971, 0x595265df, 0x5bae9ce7,
0x59765fde, 0x5b8b8239, 0x599a4c12, 0x5b68596d,
0x59be2a74, 0x5b452288, 0x59e1faff, 0x5b21dd90,
0x5a05bdae, 0x5afe8a8b, 0x5a29727b, 0x5adb297d,
0x5a4d1960, 0x5ab7ba6c, 0x5a70b258, 0x5a943d5e,
};
/*split radix bit reverse table for FFT of size up to 2048*/
const uint16_t revtab[1<<12] = {
0, 3072, 1536, 2816, 768, 3840, 1408, 2432, 384, 3456, 1920, 2752, 704,
3776, 1216, 2240, 192, 3264, 1728, 3008, 960, 4032, 1376, 2400, 352, 3424,
1888, 2656, 608, 3680, 1120, 2144, 96, 3168, 1632, 2912, 864, 3936, 1504,
2528, 480, 3552, 2016, 2736, 688, 3760, 1200, 2224, 176, 3248, 1712, 2992,
944, 4016, 1328, 2352, 304, 3376, 1840, 2608, 560, 3632, 1072, 2096, 48,
3120, 1584, 2864, 816, 3888, 1456, 2480, 432, 3504, 1968, 2800, 752, 3824,
1264, 2288, 240, 3312, 1776, 3056, 1008, 4080, 1368, 2392, 344, 3416, 1880,
2648, 600, 3672, 1112, 2136, 88, 3160, 1624, 2904, 856, 3928, 1496, 2520,
472, 3544, 2008, 2712, 664, 3736, 1176, 2200, 152, 3224, 1688, 2968, 920,
3992, 1304, 2328, 280, 3352, 1816, 2584, 536, 3608, 1048, 2072, 24, 3096,
1560, 2840, 792, 3864, 1432, 2456, 408, 3480, 1944, 2776, 728, 3800, 1240,
2264, 216, 3288, 1752, 3032, 984, 4056, 1400, 2424, 376, 3448, 1912, 2680,
632, 3704, 1144, 2168, 120, 3192, 1656, 2936, 888, 3960, 1528, 2552, 504,
3576, 2040, 2732, 684, 3756, 1196, 2220, 172, 3244, 1708, 2988, 940, 4012,
1324, 2348, 300, 3372, 1836, 2604, 556, 3628, 1068, 2092, 44, 3116, 1580,
2860, 812, 3884, 1452, 2476, 428, 3500, 1964, 2796, 748, 3820, 1260, 2284,
236, 3308, 1772, 3052, 1004, 4076, 1356, 2380, 332, 3404, 1868, 2636, 588,
3660, 1100, 2124, 76, 3148, 1612, 2892, 844, 3916, 1484, 2508, 460, 3532,
1996, 2700, 652, 3724, 1164, 2188, 140, 3212, 1676, 2956, 908, 3980, 1292,
2316, 268, 3340, 1804, 2572, 524, 3596, 1036, 2060, 12, 3084, 1548, 2828,
780, 3852, 1420, 2444, 396, 3468, 1932, 2764, 716, 3788, 1228, 2252, 204,
3276, 1740, 3020, 972, 4044, 1388, 2412, 364, 3436, 1900, 2668, 620, 3692,
1132, 2156, 108, 3180, 1644, 2924, 876, 3948, 1516, 2540, 492, 3564, 2028,
2748, 700, 3772, 1212, 2236, 188, 3260, 1724, 3004, 956, 4028, 1340, 2364,
316, 3388, 1852, 2620, 572, 3644, 1084, 2108, 60, 3132, 1596, 2876, 828,
3900, 1468, 2492, 444, 3516, 1980, 2812, 764, 3836, 1276, 2300, 252, 3324,
1788, 3068, 1020, 4092, 1366, 2390, 342, 3414, 1878, 2646, 598, 3670, 1110,
2134, 86, 3158, 1622, 2902, 854, 3926, 1494, 2518, 470, 3542, 2006, 2710,
662, 3734, 1174, 2198, 150, 3222, 1686, 2966, 918, 3990, 1302, 2326, 278,
3350, 1814, 2582, 534, 3606, 1046, 2070, 22, 3094, 1558, 2838, 790, 3862,
1430, 2454, 406, 3478, 1942, 2774, 726, 3798, 1238, 2262, 214, 3286, 1750,
3030, 982, 4054, 1398, 2422, 374, 3446, 1910, 2678, 630, 3702, 1142, 2166,
118, 3190, 1654, 2934, 886, 3958, 1526, 2550, 502, 3574, 2038, 2726, 678,
3750, 1190, 2214, 166, 3238, 1702, 2982, 934, 4006, 1318, 2342, 294, 3366,
1830, 2598, 550, 3622, 1062, 2086, 38, 3110, 1574, 2854, 806, 3878, 1446,
2470, 422, 3494, 1958, 2790, 742, 3814, 1254, 2278, 230, 3302, 1766, 3046,
998, 4070, 1350, 2374, 326, 3398, 1862, 2630, 582, 3654, 1094, 2118, 70,
3142, 1606, 2886, 838, 3910, 1478, 2502, 454, 3526, 1990, 2694, 646, 3718,
1158, 2182, 134, 3206, 1670, 2950, 902, 3974, 1286, 2310, 262, 3334, 1798,
2566, 518, 3590, 1030, 2054, 6, 3078, 1542, 2822, 774, 3846, 1414, 2438,
390, 3462, 1926, 2758, 710, 3782, 1222, 2246, 198, 3270, 1734, 3014, 966,
4038, 1382, 2406, 358, 3430, 1894, 2662, 614, 3686, 1126, 2150, 102, 3174,
1638, 2918, 870, 3942, 1510, 2534, 486, 3558, 2022, 2742, 694, 3766, 1206,
2230, 182, 3254, 1718, 2998, 950, 4022, 1334, 2358, 310, 3382, 1846, 2614,
566, 3638, 1078, 2102, 54, 3126, 1590, 2870, 822, 3894, 1462, 2486, 438,
3510, 1974, 2806, 758, 3830, 1270, 2294, 246, 3318, 1782, 3062, 1014, 4086,
1374, 2398, 350, 3422, 1886, 2654, 606, 3678, 1118, 2142, 94, 3166, 1630,
2910, 862, 3934, 1502, 2526, 478, 3550, 2014, 2718, 670, 3742, 1182, 2206,
158, 3230, 1694, 2974, 926, 3998, 1310, 2334, 286, 3358, 1822, 2590, 542,
3614, 1054, 2078, 30, 3102, 1566, 2846, 798, 3870, 1438, 2462, 414, 3486,
1950, 2782, 734, 3806, 1246, 2270, 222, 3294, 1758, 3038, 990, 4062, 1406,
2430, 382, 3454, 1918, 2686, 638, 3710, 1150, 2174, 126, 3198, 1662, 2942,
894, 3966, 1534, 2558, 510, 3582, 2046, 2731, 683, 3755, 1195, 2219, 171,
3243, 1707, 2987, 939, 4011, 1323, 2347, 299, 3371, 1835, 2603, 555, 3627,
1067, 2091, 43, 3115, 1579, 2859, 811, 3883, 1451, 2475, 427, 3499, 1963,
2795, 747, 3819, 1259, 2283, 235, 3307, 1771, 3051, 1003, 4075, 1355, 2379,
331, 3403, 1867, 2635, 587, 3659, 1099, 2123, 75, 3147, 1611, 2891, 843,
3915, 1483, 2507, 459, 3531, 1995, 2699, 651, 3723, 1163, 2187, 139, 3211,
1675, 2955, 907, 3979, 1291, 2315, 267, 3339, 1803, 2571, 523, 3595, 1035,
2059, 11, 3083, 1547, 2827, 779, 3851, 1419, 2443, 395, 3467, 1931, 2763,
715, 3787, 1227, 2251, 203, 3275, 1739, 3019, 971, 4043, 1387, 2411, 363,
3435, 1899, 2667, 619, 3691, 1131, 2155, 107, 3179, 1643, 2923, 875, 3947,
1515, 2539, 491, 3563, 2027, 2747, 699, 3771, 1211, 2235, 187, 3259, 1723,
3003, 955, 4027, 1339, 2363, 315, 3387, 1851, 2619, 571, 3643, 1083, 2107,
59, 3131, 1595, 2875, 827, 3899, 1467, 2491, 443, 3515, 1979, 2811, 763,
3835, 1275, 2299, 251, 3323, 1787, 3067, 1019, 4091, 1363, 2387, 339, 3411,
1875, 2643, 595, 3667, 1107, 2131, 83, 3155, 1619, 2899, 851, 3923, 1491,
2515, 467, 3539, 2003, 2707, 659, 3731, 1171, 2195, 147, 3219, 1683, 2963,
915, 3987, 1299, 2323, 275, 3347, 1811, 2579, 531, 3603, 1043, 2067, 19,
3091, 1555, 2835, 787, 3859, 1427, 2451, 403, 3475, 1939, 2771, 723, 3795,
1235, 2259, 211, 3283, 1747, 3027, 979, 4051, 1395, 2419, 371, 3443, 1907,
2675, 627, 3699, 1139, 2163, 115, 3187, 1651, 2931, 883, 3955, 1523, 2547,
499, 3571, 2035, 2723, 675, 3747, 1187, 2211, 163, 3235, 1699, 2979, 931,
4003, 1315, 2339, 291, 3363, 1827, 2595, 547, 3619, 1059, 2083, 35, 3107,
1571, 2851, 803, 3875, 1443, 2467, 419, 3491, 1955, 2787, 739, 3811, 1251,
2275, 227, 3299, 1763, 3043, 995, 4067, 1347, 2371, 323, 3395, 1859, 2627,
579, 3651, 1091, 2115, 67, 3139, 1603, 2883, 835, 3907, 1475, 2499, 451,
3523, 1987, 2691, 643, 3715, 1155, 2179, 131, 3203, 1667, 2947, 899, 3971,
1283, 2307, 259, 3331, 1795, 2563, 515, 3587, 1027, 2051, 3, 3075, 1539,
2819, 771, 3843, 1411, 2435, 387, 3459, 1923, 2755, 707, 3779, 1219, 2243,
195, 3267, 1731, 3011, 963, 4035, 1379, 2403, 355, 3427, 1891, 2659, 611,
3683, 1123, 2147, 99, 3171, 1635, 2915, 867, 3939, 1507, 2531, 483, 3555,
2019, 2739, 691, 3763, 1203, 2227, 179, 3251, 1715, 2995, 947, 4019, 1331,
2355, 307, 3379, 1843, 2611, 563, 3635, 1075, 2099, 51, 3123, 1587, 2867,
819, 3891, 1459, 2483, 435, 3507, 1971, 2803, 755, 3827, 1267, 2291, 243,
3315, 1779, 3059, 1011, 4083, 1371, 2395, 347, 3419, 1883, 2651, 603, 3675,
1115, 2139, 91, 3163, 1627, 2907, 859, 3931, 1499, 2523, 475, 3547, 2011,
2715, 667, 3739, 1179, 2203, 155, 3227, 1691, 2971, 923, 3995, 1307, 2331,
283, 3355, 1819, 2587, 539, 3611, 1051, 2075, 27, 3099, 1563, 2843, 795,
3867, 1435, 2459, 411, 3483, 1947, 2779, 731, 3803, 1243, 2267, 219, 3291,
1755, 3035, 987, 4059, 1403, 2427, 379, 3451, 1915, 2683, 635, 3707, 1147,
2171, 123, 3195, 1659, 2939, 891, 3963, 1531, 2555, 507, 3579, 2043, 2735,
687, 3759, 1199, 2223, 175, 3247, 1711, 2991, 943, 4015, 1327, 2351, 303,
3375, 1839, 2607, 559, 3631, 1071, 2095, 47, 3119, 1583, 2863, 815, 3887,
1455, 2479, 431, 3503, 1967, 2799, 751, 3823, 1263, 2287, 239, 3311, 1775,
3055, 1007, 4079, 1359, 2383, 335, 3407, 1871, 2639, 591, 3663, 1103, 2127,
79, 3151, 1615, 2895, 847, 3919, 1487, 2511, 463, 3535, 1999, 2703, 655,
3727, 1167, 2191, 143, 3215, 1679, 2959, 911, 3983, 1295, 2319, 271, 3343,
1807, 2575, 527, 3599, 1039, 2063, 15, 3087, 1551, 2831, 783, 3855, 1423,
2447, 399, 3471, 1935, 2767, 719, 3791, 1231, 2255, 207, 3279, 1743, 3023,
975, 4047, 1391, 2415, 367, 3439, 1903, 2671, 623, 3695, 1135, 2159, 111,
3183, 1647, 2927, 879, 3951, 1519, 2543, 495, 3567, 2031, 2751, 703, 3775,
1215, 2239, 191, 3263, 1727, 3007, 959, 4031, 1343, 2367, 319, 3391, 1855,
2623, 575, 3647, 1087, 2111, 63, 3135, 1599, 2879, 831, 3903, 1471, 2495,
447, 3519, 1983, 2815, 767, 3839, 1279, 2303, 255, 3327, 1791, 3071, 1023,
4095, 1365, 2389, 341, 3413, 1877, 2645, 597, 3669, 1109, 2133, 85, 3157,
1621, 2901, 853, 3925, 1493, 2517, 469, 3541, 2005, 2709, 661, 3733, 1173,
2197, 149, 3221, 1685, 2965, 917, 3989, 1301, 2325, 277, 3349, 1813, 2581,
533, 3605, 1045, 2069, 21, 3093, 1557, 2837, 789, 3861, 1429, 2453, 405,
3477, 1941, 2773, 725, 3797, 1237, 2261, 213, 3285, 1749, 3029, 981, 4053,
1397, 2421, 373, 3445, 1909, 2677, 629, 3701, 1141, 2165, 117, 3189, 1653,
2933, 885, 3957, 1525, 2549, 501, 3573, 2037, 2725, 677, 3749, 1189, 2213,
165, 3237, 1701, 2981, 933, 4005, 1317, 2341, 293, 3365, 1829, 2597, 549,
3621, 1061, 2085, 37, 3109, 1573, 2853, 805, 3877, 1445, 2469, 421, 3493,
1957, 2789, 741, 3813, 1253, 2277, 229, 3301, 1765, 3045, 997, 4069, 1349,
2373, 325, 3397, 1861, 2629, 581, 3653, 1093, 2117, 69, 3141, 1605, 2885,
837, 3909, 1477, 2501, 453, 3525, 1989, 2693, 645, 3717, 1157, 2181, 133,
3205, 1669, 2949, 901, 3973, 1285, 2309, 261, 3333, 1797, 2565, 517, 3589,
1029, 2053, 5, 3077, 1541, 2821, 773, 3845, 1413, 2437, 389, 3461, 1925,
2757, 709, 3781, 1221, 2245, 197, 3269, 1733, 3013, 965, 4037, 1381, 2405,
357, 3429, 1893, 2661, 613, 3685, 1125, 2149, 101, 3173, 1637, 2917, 869,
3941, 1509, 2533, 485, 3557, 2021, 2741, 693, 3765, 1205, 2229, 181, 3253,
1717, 2997, 949, 4021, 1333, 2357, 309, 3381, 1845, 2613, 565, 3637, 1077,
2101, 53, 3125, 1589, 2869, 821, 3893, 1461, 2485, 437, 3509, 1973, 2805,
757, 3829, 1269, 2293, 245, 3317, 1781, 3061, 1013, 4085, 1373, 2397, 349,
3421, 1885, 2653, 605, 3677, 1117, 2141, 93, 3165, 1629, 2909, 861, 3933,
1501, 2525, 477, 3549, 2013, 2717, 669, 3741, 1181, 2205, 157, 3229, 1693,
2973, 925, 3997, 1309, 2333, 285, 3357, 1821, 2589, 541, 3613, 1053, 2077,
29, 3101, 1565, 2845, 797, 3869, 1437, 2461, 413, 3485, 1949, 2781, 733,
3805, 1245, 2269, 221, 3293, 1757, 3037, 989, 4061, 1405, 2429, 381, 3453,
1917, 2685, 637, 3709, 1149, 2173, 125, 3197, 1661, 2941, 893, 3965, 1533,
2557, 509, 3581, 2045, 2729, 681, 3753, 1193, 2217, 169, 3241, 1705, 2985,
937, 4009, 1321, 2345, 297, 3369, 1833, 2601, 553, 3625, 1065, 2089, 41,
3113, 1577, 2857, 809, 3881, 1449, 2473, 425, 3497, 1961, 2793, 745, 3817,
1257, 2281, 233, 3305, 1769, 3049, 1001, 4073, 1353, 2377, 329, 3401, 1865,
2633, 585, 3657, 1097, 2121, 73, 3145, 1609, 2889, 841, 3913, 1481, 2505,
457, 3529, 1993, 2697, 649, 3721, 1161, 2185, 137, 3209, 1673, 2953, 905,
3977, 1289, 2313, 265, 3337, 1801, 2569, 521, 3593, 1033, 2057, 9, 3081,
1545, 2825, 777, 3849, 1417, 2441, 393, 3465, 1929, 2761, 713, 3785, 1225,
2249, 201, 3273, 1737, 3017, 969, 4041, 1385, 2409, 361, 3433, 1897, 2665,
617, 3689, 1129, 2153, 105, 3177, 1641, 2921, 873, 3945, 1513, 2537, 489,
3561, 2025, 2745, 697, 3769, 1209, 2233, 185, 3257, 1721, 3001, 953, 4025,
1337, 2361, 313, 3385, 1849, 2617, 569, 3641, 1081, 2105, 57, 3129, 1593,
2873, 825, 3897, 1465, 2489, 441, 3513, 1977, 2809, 761, 3833, 1273, 2297,
249, 3321, 1785, 3065, 1017, 4089, 1361, 2385, 337, 3409, 1873, 2641, 593,
3665, 1105, 2129, 81, 3153, 1617, 2897, 849, 3921, 1489, 2513, 465, 3537,
2001, 2705, 657, 3729, 1169, 2193, 145, 3217, 1681, 2961, 913, 3985, 1297,
2321, 273, 3345, 1809, 2577, 529, 3601, 1041, 2065, 17, 3089, 1553, 2833,
785, 3857, 1425, 2449, 401, 3473, 1937, 2769, 721, 3793, 1233, 2257, 209,
3281, 1745, 3025, 977, 4049, 1393, 2417, 369, 3441, 1905, 2673, 625, 3697,
1137, 2161, 113, 3185, 1649, 2929, 881, 3953, 1521, 2545, 497, 3569, 2033,
2721, 673, 3745, 1185, 2209, 161, 3233, 1697, 2977, 929, 4001, 1313, 2337,
289, 3361, 1825, 2593, 545, 3617, 1057, 2081, 33, 3105, 1569, 2849, 801,
3873, 1441, 2465, 417, 3489, 1953, 2785, 737, 3809, 1249, 2273, 225, 3297,
1761, 3041, 993, 4065, 1345, 2369, 321, 3393, 1857, 2625, 577, 3649, 1089,
2113, 65, 3137, 1601, 2881, 833, 3905, 1473, 2497, 449, 3521, 1985, 2689,
641, 3713, 1153, 2177, 129, 3201, 1665, 2945, 897, 3969, 1281, 2305, 257,
3329, 1793, 2561, 513, 3585, 1025, 2049, 1, 3073, 1537, 2817, 769, 3841,
1409, 2433, 385, 3457, 1921, 2753, 705, 3777, 1217, 2241, 193, 3265, 1729,
3009, 961, 4033, 1377, 2401, 353, 3425, 1889, 2657, 609, 3681, 1121, 2145,
97, 3169, 1633, 2913, 865, 3937, 1505, 2529, 481, 3553, 2017, 2737, 689,
3761, 1201, 2225, 177, 3249, 1713, 2993, 945, 4017, 1329, 2353, 305, 3377,
1841, 2609, 561, 3633, 1073, 2097, 49, 3121, 1585, 2865, 817, 3889, 1457,
2481, 433, 3505, 1969, 2801, 753, 3825, 1265, 2289, 241, 3313, 1777, 3057,
1009, 4081, 1369, 2393, 345, 3417, 1881, 2649, 601, 3673, 1113, 2137, 89,
3161, 1625, 2905, 857, 3929, 1497, 2521, 473, 3545, 2009, 2713, 665, 3737,
1177, 2201, 153, 3225, 1689, 2969, 921, 3993, 1305, 2329, 281, 3353, 1817,
2585, 537, 3609, 1049, 2073, 25, 3097, 1561, 2841, 793, 3865, 1433, 2457,
409, 3481, 1945, 2777, 729, 3801, 1241, 2265, 217, 3289, 1753, 3033, 985,
4057, 1401, 2425, 377, 3449, 1913, 2681, 633, 3705, 1145, 2169, 121, 3193,
1657, 2937, 889, 3961, 1529, 2553, 505, 3577, 2041, 2733, 685, 3757, 1197,
2221, 173, 3245, 1709, 2989, 941, 4013, 1325, 2349, 301, 3373, 1837, 2605,
557, 3629, 1069, 2093, 45, 3117, 1581, 2861, 813, 3885, 1453, 2477, 429,
3501, 1965, 2797, 749, 3821, 1261, 2285, 237, 3309, 1773, 3053, 1005, 4077,
1357, 2381, 333, 3405, 1869, 2637, 589, 3661, 1101, 2125, 77, 3149, 1613,
2893, 845, 3917, 1485, 2509, 461, 3533, 1997, 2701, 653, 3725, 1165, 2189,
141, 3213, 1677, 2957, 909, 3981, 1293, 2317, 269, 3341, 1805, 2573, 525,
3597, 1037, 2061, 13, 3085, 1549, 2829, 781, 3853, 1421, 2445, 397, 3469,
1933, 2765, 717, 3789, 1229, 2253, 205, 3277, 1741, 3021, 973, 4045, 1389,
2413, 365, 3437, 1901, 2669, 621, 3693, 1133, 2157, 109, 3181, 1645, 2925,
877, 3949, 1517, 2541, 493, 3565, 2029, 2749, 701, 3773, 1213, 2237, 189,
3261, 1725, 3005, 957, 4029, 1341, 2365, 317, 3389, 1853, 2621, 573, 3645,
1085, 2109, 61, 3133, 1597, 2877, 829, 3901, 1469, 2493, 445, 3517, 1981,
2813, 765, 3837, 1277, 2301, 253, 3325, 1789, 3069, 1021, 4093, 1367, 2391,
343, 3415, 1879, 2647, 599, 3671, 1111, 2135, 87, 3159, 1623, 2903, 855,
3927, 1495, 2519, 471, 3543, 2007, 2711, 663, 3735, 1175, 2199, 151, 3223,
1687, 2967, 919, 3991, 1303, 2327, 279, 3351, 1815, 2583, 535, 3607, 1047,
2071, 23, 3095, 1559, 2839, 791, 3863, 1431, 2455, 407, 3479, 1943, 2775,
727, 3799, 1239, 2263, 215, 3287, 1751, 3031, 983, 4055, 1399, 2423, 375,
3447, 1911, 2679, 631, 3703, 1143, 2167, 119, 3191, 1655, 2935, 887, 3959,
1527, 2551, 503, 3575, 2039, 2727, 679, 3751, 1191, 2215, 167, 3239, 1703,
2983, 935, 4007, 1319, 2343, 295, 3367, 1831, 2599, 551, 3623, 1063, 2087,
39, 3111, 1575, 2855, 807, 3879, 1447, 2471, 423, 3495, 1959, 2791, 743,
3815, 1255, 2279, 231, 3303, 1767, 3047, 999, 4071, 1351, 2375, 327, 3399,
1863, 2631, 583, 3655, 1095, 2119, 71, 3143, 1607, 2887, 839, 3911, 1479,
2503, 455, 3527, 1991, 2695, 647, 3719, 1159, 2183, 135, 3207, 1671, 2951,
903, 3975, 1287, 2311, 263, 3335, 1799, 2567, 519, 3591, 1031, 2055, 7,
3079, 1543, 2823, 775, 3847, 1415, 2439, 391, 3463, 1927, 2759, 711, 3783,
1223, 2247, 199, 3271, 1735, 3015, 967, 4039, 1383, 2407, 359, 3431, 1895,
2663, 615, 3687, 1127, 2151, 103, 3175, 1639, 2919, 871, 3943, 1511, 2535,
487, 3559, 2023, 2743, 695, 3767, 1207, 2231, 183, 3255, 1719, 2999, 951,
4023, 1335, 2359, 311, 3383, 1847, 2615, 567, 3639, 1079, 2103, 55, 3127,
1591, 2871, 823, 3895, 1463, 2487, 439, 3511, 1975, 2807, 759, 3831, 1271,
2295, 247, 3319, 1783, 3063, 1015, 4087, 1375, 2399, 351, 3423, 1887, 2655,
607, 3679, 1119, 2143, 95, 3167, 1631, 2911, 863, 3935, 1503, 2527, 479,
3551, 2015, 2719, 671, 3743, 1183, 2207, 159, 3231, 1695, 2975, 927, 3999,
1311, 2335, 287, 3359, 1823, 2591, 543, 3615, 1055, 2079, 31, 3103, 1567,
2847, 799, 3871, 1439, 2463, 415, 3487, 1951, 2783, 735, 3807, 1247, 2271,
223, 3295, 1759, 3039, 991, 4063, 1407, 2431, 383, 3455, 1919, 2687, 639,
3711, 1151, 2175, 127, 3199, 1663, 2943, 895, 3967, 1535, 2559, 511, 3583,
2047, 2730, 682, 3754, 1194, 2218, 170, 3242, 1706, 2986, 938, 4010, 1322,
2346, 298, 3370, 1834, 2602, 554, 3626, 1066, 2090, 42, 3114, 1578, 2858,
810, 3882, 1450, 2474, 426, 3498, 1962, 2794, 746, 3818, 1258, 2282, 234,
3306, 1770, 3050, 1002, 4074, 1354, 2378, 330, 3402, 1866, 2634, 586, 3658,
1098, 2122, 74, 3146, 1610, 2890, 842, 3914, 1482, 2506, 458, 3530, 1994,
2698, 650, 3722, 1162, 2186, 138, 3210, 1674, 2954, 906, 3978, 1290, 2314,
266, 3338, 1802, 2570, 522, 3594, 1034, 2058, 10, 3082, 1546, 2826, 778,
3850, 1418, 2442, 394, 3466, 1930, 2762, 714, 3786, 1226, 2250, 202, 3274,
1738, 3018, 970, 4042, 1386, 2410, 362, 3434, 1898, 2666, 618, 3690, 1130,
2154, 106, 3178, 1642, 2922, 874, 3946, 1514, 2538, 490, 3562, 2026, 2746,
698, 3770, 1210, 2234, 186, 3258, 1722, 3002, 954, 4026, 1338, 2362, 314,
3386, 1850, 2618, 570, 3642, 1082, 2106, 58, 3130, 1594, 2874, 826, 3898,
1466, 2490, 442, 3514, 1978, 2810, 762, 3834, 1274, 2298, 250, 3322, 1786,
3066, 1018, 4090, 1362, 2386, 338, 3410, 1874, 2642, 594, 3666, 1106, 2130,
82, 3154, 1618, 2898, 850, 3922, 1490, 2514, 466, 3538, 2002, 2706, 658,
3730, 1170, 2194, 146, 3218, 1682, 2962, 914, 3986, 1298, 2322, 274, 3346,
1810, 2578, 530, 3602, 1042, 2066, 18, 3090, 1554, 2834, 786, 3858, 1426,
2450, 402, 3474, 1938, 2770, 722, 3794, 1234, 2258, 210, 3282, 1746, 3026,
978, 4050, 1394, 2418, 370, 3442, 1906, 2674, 626, 3698, 1138, 2162, 114,
3186, 1650, 2930, 882, 3954, 1522, 2546, 498, 3570, 2034, 2722, 674, 3746,
1186, 2210, 162, 3234, 1698, 2978, 930, 4002, 1314, 2338, 290, 3362, 1826,
2594, 546, 3618, 1058, 2082, 34, 3106, 1570, 2850, 802, 3874, 1442, 2466,
418, 3490, 1954, 2786, 738, 3810, 1250, 2274, 226, 3298, 1762, 3042, 994,
4066, 1346, 2370, 322, 3394, 1858, 2626, 578, 3650, 1090, 2114, 66, 3138,
1602, 2882, 834, 3906, 1474, 2498, 450, 3522, 1986, 2690, 642, 3714, 1154,
2178, 130, 3202, 1666, 2946, 898, 3970, 1282, 2306, 258, 3330, 1794, 2562,
514, 3586, 1026, 2050, 2, 3074, 1538, 2818, 770, 3842, 1410, 2434, 386,
3458, 1922, 2754, 706, 3778, 1218, 2242, 194, 3266, 1730, 3010, 962, 4034,
1378, 2402, 354, 3426, 1890, 2658, 610, 3682, 1122, 2146, 98, 3170, 1634,
2914, 866, 3938, 1506, 2530, 482, 3554, 2018, 2738, 690, 3762, 1202, 2226,
178, 3250, 1714, 2994, 946, 4018, 1330, 2354, 306, 3378, 1842, 2610, 562,
3634, 1074, 2098, 50, 3122, 1586, 2866, 818, 3890, 1458, 2482, 434, 3506,
1970, 2802, 754, 3826, 1266, 2290, 242, 3314, 1778, 3058, 1010, 4082, 1370,
2394, 346, 3418, 1882, 2650, 602, 3674, 1114, 2138, 90, 3162, 1626, 2906,
858, 3930, 1498, 2522, 474, 3546, 2010, 2714, 666, 3738, 1178, 2202, 154,
3226, 1690, 2970, 922, 3994, 1306, 2330, 282, 3354, 1818, 2586, 538, 3610,
1050, 2074, 26, 3098, 1562, 2842, 794, 3866, 1434, 2458, 410, 3482, 1946,
2778, 730, 3802, 1242, 2266, 218, 3290, 1754, 3034, 986, 4058, 1402, 2426,
378, 3450, 1914, 2682, 634, 3706, 1146, 2170, 122, 3194, 1658, 2938, 890,
3962, 1530, 2554, 506, 3578, 2042, 2734, 686, 3758, 1198, 2222, 174, 3246,
1710, 2990, 942, 4014, 1326, 2350, 302, 3374, 1838, 2606, 558, 3630, 1070,
2094, 46, 3118, 1582, 2862, 814, 3886, 1454, 2478, 430, 3502, 1966, 2798,
750, 3822, 1262, 2286, 238, 3310, 1774, 3054, 1006, 4078, 1358, 2382, 334,
3406, 1870, 2638, 590, 3662, 1102, 2126, 78, 3150, 1614, 2894, 846, 3918,
1486, 2510, 462, 3534, 1998, 2702, 654, 3726, 1166, 2190, 142, 3214, 1678,
2958, 910, 3982, 1294, 2318, 270, 3342, 1806, 2574, 526, 3598, 1038, 2062,
14, 3086, 1550, 2830, 782, 3854, 1422, 2446, 398, 3470, 1934, 2766, 718,
3790, 1230, 2254, 206, 3278, 1742, 3022, 974, 4046, 1390, 2414, 366, 3438,
1902, 2670, 622, 3694, 1134, 2158, 110, 3182, 1646, 2926, 878, 3950, 1518,
2542, 494, 3566, 2030, 2750, 702, 3774, 1214, 2238, 190, 3262, 1726, 3006,
958, 4030, 1342, 2366, 318, 3390, 1854, 2622, 574, 3646, 1086, 2110, 62,
3134, 1598, 2878, 830, 3902, 1470, 2494, 446, 3518, 1982, 2814, 766, 3838,
1278, 2302, 254, 3326, 1790, 3070, 1022, 4094, 1364, 2388, 340, 3412, 1876,
2644, 596, 3668, 1108, 2132, 84, 3156, 1620, 2900, 852, 3924, 1492, 2516,
468, 3540, 2004, 2708, 660, 3732, 1172, 2196, 148, 3220, 1684, 2964, 916,
3988, 1300, 2324, 276, 3348, 1812, 2580, 532, 3604, 1044, 2068, 20, 3092,
1556, 2836, 788, 3860, 1428, 2452, 404, 3476, 1940, 2772, 724, 3796, 1236,
2260, 212, 3284, 1748, 3028, 980, 4052, 1396, 2420, 372, 3444, 1908, 2676,
628, 3700, 1140, 2164, 116, 3188, 1652, 2932, 884, 3956, 1524, 2548, 500,
3572, 2036, 2724, 676, 3748, 1188, 2212, 164, 3236, 1700, 2980, 932, 4004,
1316, 2340, 292, 3364, 1828, 2596, 548, 3620, 1060, 2084, 36, 3108, 1572,
2852, 804, 3876, 1444, 2468, 420, 3492, 1956, 2788, 740, 3812, 1252, 2276,
228, 3300, 1764, 3044, 996, 4068, 1348, 2372, 324, 3396, 1860, 2628, 580,
3652, 1092, 2116, 68, 3140, 1604, 2884, 836, 3908, 1476, 2500, 452, 3524,
1988, 2692, 644, 3716, 1156, 2180, 132, 3204, 1668, 2948, 900, 3972, 1284,
2308, 260, 3332, 1796, 2564, 516, 3588, 1028, 2052, 4, 3076, 1540, 2820,
772, 3844, 1412, 2436, 388, 3460, 1924, 2756, 708, 3780, 1220, 2244, 196,
3268, 1732, 3012, 964, 4036, 1380, 2404, 356, 3428, 1892, 2660, 612, 3684,
1124, 2148, 100, 3172, 1636, 2916, 868, 3940, 1508, 2532, 484, 3556, 2020,
2740, 692, 3764, 1204, 2228, 180, 3252, 1716, 2996, 948, 4020, 1332, 2356,
308, 3380, 1844, 2612, 564, 3636, 1076, 2100, 52, 3124, 1588, 2868, 820,
3892, 1460, 2484, 436, 3508, 1972, 2804, 756, 3828, 1268, 2292, 244, 3316,
1780, 3060, 1012, 4084, 1372, 2396, 348, 3420, 1884, 2652, 604, 3676, 1116,
2140, 92, 3164, 1628, 2908, 860, 3932, 1500, 2524, 476, 3548, 2012, 2716,
668, 3740, 1180, 2204, 156, 3228, 1692, 2972, 924, 3996, 1308, 2332, 284,
3356, 1820, 2588, 540, 3612, 1052, 2076, 28, 3100, 1564, 2844, 796, 3868,
1436, 2460, 412, 3484, 1948, 2780, 732, 3804, 1244, 2268, 220, 3292, 1756,
3036, 988, 4060, 1404, 2428, 380, 3452, 1916, 2684, 636, 3708, 1148, 2172,
124, 3196, 1660, 2940, 892, 3964, 1532, 2556, 508, 3580, 2044, 2728, 680,
3752, 1192, 2216, 168, 3240, 1704, 2984, 936, 4008, 1320, 2344, 296, 3368,
1832, 2600, 552, 3624, 1064, 2088, 40, 3112, 1576, 2856, 808, 3880, 1448,
2472, 424, 3496, 1960, 2792, 744, 3816, 1256, 2280, 232, 3304, 1768, 3048,
1000, 4072, 1352, 2376, 328, 3400, 1864, 2632, 584, 3656, 1096, 2120, 72,
3144, 1608, 2888, 840, 3912, 1480, 2504, 456, 3528, 1992, 2696, 648, 3720,
1160, 2184, 136, 3208, 1672, 2952, 904, 3976, 1288, 2312, 264, 3336, 1800,
2568, 520, 3592, 1032, 2056, 8, 3080, 1544, 2824, 776, 3848, 1416, 2440,
392, 3464, 1928, 2760, 712, 3784, 1224, 2248, 200, 3272, 1736, 3016, 968,
4040, 1384, 2408, 360, 3432, 1896, 2664, 616, 3688, 1128, 2152, 104, 3176,
1640, 2920, 872, 3944, 1512, 2536, 488, 3560, 2024, 2744, 696, 3768, 1208,
2232, 184, 3256, 1720, 3000, 952, 4024, 1336, 2360, 312, 3384, 1848, 2616,
568, 3640, 1080, 2104, 56, 3128, 1592, 2872, 824, 3896, 1464, 2488, 440,
3512, 1976, 2808, 760, 3832, 1272, 2296, 248, 3320, 1784, 3064, 1016, 4088,
1360, 2384, 336, 3408, 1872, 2640, 592, 3664, 1104, 2128, 80, 3152, 1616,
2896, 848, 3920, 1488, 2512, 464, 3536, 2000, 2704, 656, 3728, 1168, 2192,
144, 3216, 1680, 2960, 912, 3984, 1296, 2320, 272, 3344, 1808, 2576, 528,
3600, 1040, 2064, 16, 3088, 1552, 2832, 784, 3856, 1424, 2448, 400, 3472,
1936, 2768, 720, 3792, 1232, 2256, 208, 3280, 1744, 3024, 976, 4048, 1392,
2416, 368, 3440, 1904, 2672, 624, 3696, 1136, 2160, 112, 3184, 1648, 2928,
880, 3952, 1520, 2544, 496, 3568, 2032, 2720, 672, 3744, 1184, 2208, 160,
3232, 1696, 2976, 928, 4000, 1312, 2336, 288, 3360, 1824, 2592, 544, 3616,
1056, 2080, 32, 3104, 1568, 2848, 800, 3872, 1440, 2464, 416, 3488, 1952,
2784, 736, 3808, 1248, 2272, 224, 3296, 1760, 3040, 992, 4064, 1344, 2368,
320, 3392, 1856, 2624, 576, 3648, 1088, 2112, 64, 3136, 1600, 2880, 832,
3904, 1472, 2496, 448, 3520, 1984, 2688, 640, 3712, 1152, 2176, 128, 3200,
1664, 2944, 896, 3968, 1280, 2304, 256, 3328, 1792, 2560, 512, 3584, 1024,
2048};

View file

@ -0,0 +1,24 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. *
* *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
* BY THE Xiph.Org FOUNDATION http://www.xiph.org/ *
* *
********************************************************************
function: sin,cos lookup tables
********************************************************************/
extern const int32_t sincos_lookup0[1026];
extern const int32_t sincos_lookup1[1024];
extern const uint16_t revtab[1<<12];

View file

View file

@ -0,0 +1,23 @@
Aaron Holtzman <aholtzma@ess.engr.uvic.ca> started the project and
made the initial working implementation.
Michel Lespinasse <walken@zoy.org> did major changes for speed and
conformance and is the current maintainer.
Other contributors include:
Gildas Bazin <gbazin@netcourrier.com> - mingw32 port
Billy Biggs <vektor@div8.net> - most of liba52.txt
Jeroen Dobbelaere <jeroen.dobbelaere@acunia.com> - fixed point version
Eduard Hasenleithner <eduardh@aon.at> - gcc 3.0 fixes
Håkan Hjort <d95hjort@dtek.chalmers.se> - Solaris output, mlib code
Charles M. Hannum <root@ihack.net> - fixes
Chris Hodges <hodges@stradis.com> - made the library reentrant
Michael Holzt <kju@flummi.de> - OSS output.c and misc errata
Angelos Keromytis <angelos@dsl.cis.upenn.edu> - OpenBSD fixes
David I. Lehn <dlehn@vt.edu> - API cleanup suggestion
Don Mahurin <dmahurin@dma.org> - stdin support for extract_a52
Jim Miller <jmiller@heli.engr.sgi.com> - IRIX output.c
Takefumi Sayo <stake@niagara.shiojiri.ne.jp> - FreeBSD tweak
Shoji Tokunaga <toku@mac.com> - aif file output
(let me know if I forgot anyone)

View file

@ -0,0 +1,340 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Library General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) 19yy <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) 19yy name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Library General
Public License instead of this License.

View file

@ -0,0 +1,97 @@
a52dec-0.7.4 Sat Jul 27 20:44:00 PDT 2002
-The library is now fully reentrant.
-Added win32 output module, al file output, gain control.
-A few additional portability enhancements.
a52dec-0.7.3 Wed Feb 20 23:38:22 PST 2002
-rewrite of the imdct code, making a52dec 40% to 80% faster than version 0.7.2
-fixed one memory corruption problem in parse.c
-small liba52 portability fixes
-byte order and CRLF bugfixes in wav file output
-aif file output
-IRIX al sound output (untested, tell us if it works)
a52dec-0.7.2 Sun Dec 16 14:39:56 PST 2001
-demuxer improvements, with support for TS streams.
-smaller demux buffer, making it easier to use a52dec as a pipe
-wav output
-avoid -fPIC when possible (-prefer-non-pic)
-support for vc++ and TenDRA
-portability fixes
a52dec-0.7.1b Fri Aug 31 02:37:23 PDT 2001
-removed an #include <stdint.h> that was only breaking libc5 builds
a52dec-0.7.1 Thu Aug 30 02:13:23 PDT 2001
-gcc 3.0 fixes
-mlib fixes (now passes the test suite)
-in a52dec.c, made sample_data and flags static
-removed a few statics in liba52/parse.c (still not reentrant, but closer)
a52dec-0.7.0 Thu Aug 23 23:18:00 PDT 2001
-Downmix to arbitrary speaker configurations
-Dynamic range compression
-Major speedups: 2x for 2.0 streams with 2.0 output,
3x for 5.1 streams with 2.0 output.
-New library interface
-Rematrixing bugfix
-Higher precision
ac3dec-0.6.1 Mon Mar 27 20:27:06 EST 2000
-Fix another 2.0 problem (rematrix was wrong).
-Fix the never resync on a bad crc bug.
ac3dec-0.6.0 Sat Mar 18 19:43:25 EST 2000
-New library interface
-Fix bug wrt coupling channels that was causing sound quality problems.
-Fix 2.0 mode problems (aka the I forgot to implement the phase flags bug).
-All around speed improvements (almost twice as fast)
-Improved robustness when fed bad data. The entire frame is checksummed before playback.
ac3dec-0.5.6 Tue Nov 16 00:37:34 EST 1999
-Irix support
-Alpha fixes
-Minor performance enhancements to downmix and imdct
-OpenBSD fixes
-extract_ac3 can now read from stdin
-Change output_linux to block on write instead of using the
ring buffer. Let me know if this causes/fixes any problems
ac3dec-0.5.5 Wed Aug 25 15:36:44 EDT 1999
-Fixed a cut and paste bug (argh!) in parse.c which potentially
screwed up delta bit allocation info.
-Martin Mueller <mamueller@topmail.de> informed me that I was missing
some corrections from the AC-3 errata document. It turns out that
I used an earlier version of the errata when I initially wrote ac3dec.
Fortunately the errata fix the outstanding bugs that I was pulling
my hair out on for a long time. Woohoo! Thanks Martin. Kudos to Dolby
Labs for keeping their documentation up to date as well.
-stereo downmixing (downmix.c) is now in. Matrix encoded surround
(Dolby Prologic Surround) should work too.
-clipping due to high level signals has been fixed. We normalize a
block of samples by its maximum absolute value if the max exceeds
the %100 digital level. This shouldn't be a problem, but for some
reason some channels have a dynamic range that exceeds [-1.0,1.0].
I blame the encoder :)
-Multiple track support in extract_ac3. Simply just give it the track
number you want [1,8] after the filename.
ac3dec-0.5.4 Thu Jul 29 16:55:10 PDT 1999
-Fixed a stupid bug with the coupling channel that was causing
high frequencies to be attenuated.
-Re-wrote the extract_ac3 tool.
-Added to a tool to verify the checksums on a given AC3 stream.
(tools/verify_ac3)
ac3dec-0.5.3 Mon Jul 12 10:45:56 PDT 1999
-Fixed problems related to streams with coupling channel enabled.
-Minor performance enhancements
ac3dec-0.5.2 Sun Jul 4 12:00:25 PDT 1999
-output_linux.c patch provided by Michael Holzt <kju@flummi.de>
ac3dec-0.5.1 Wed Jun 30 17:48:52 PDT 1999
-Compiles and dies gracefully under Linux now.
ac3dec-0.5.0 Wed Jun 23 11:06:06 EDT 1999
-First public release of ac3dec.

View file

@ -0,0 +1,28 @@
changes that affected the PCM output:
2001/05/14 05:48:59 - aaron's 0.6.1 was cutting before the end
2001/06/04 01:42:47 - slightly adjusted some values (volume, downmix adjustemtn, clev/slev tables)
2001/06/04 05:48:31 - added adjust_level to the downmix
2001/06/12
2001/06/13 - changed the order of the dither() calls
also introduced bug with thx_2_0 stream
2001/06/22 08:23:37 - fixed bug with thx_2_0 stream (see parse.c)
2001/07/02 08:44:55 - changed float-to-int conversion (+ added saturation)
2001/07/06 06:53:01 - more precise imdct init, more precise q_* coefficients
2001/07/06 08:52:30 - rematrixing fixes
2001/07/26 21:31:39 - do 3dB dither reduction in coeff_get() not dither_gen()
2001/08/09 08:11:31 - implemented dynamic range compression
2002/02/24 08:54:49 - window function computed at runtime, with more precision
2003/01/28 06.57:37 - switched to integer q_* coefficients
2003/01/28 07:39:35 - use level of 0.75 for dithering instead of 0.707

View file

@ -0,0 +1,58 @@
Unix build instructions
-----------------------
./configure
make
make install
If you install from CVS you'll have to run ./bootstrap first
Building for win32
------------------
There are at least three ways to do it:
- natively on Windows using Microsoft VC++ and the vc++ project
included in this distribution.
- natively on Windows using MSYS + MINGW (www.mingw.org) (MSYS is a
minimal build environnement to compile unixish projects under
windows. It provides all the common unix tools like sh, gmake...)
- or on Linux, using the mingw32 cross-compiler
Building using MSYS + MINGW on windows
--------------------------------------
First you will need to download and install the latest MSYS (version
1.0.7 as of now) and MINGW. The installation is really easy. Begin
with the MSYS auto-installer and once this is done, extract MINGW into
c:\msys\1.0\mingw. You also have to remember to remove the make
utility included with MINGW as it conflicts with the one from MSYS
(just rename or remove c:\msys\1.0\mingw\bin\make.exe).
http://prdownloads.sourceforge.net/mingw/MSYS-1.0.7-i686-2002.04.24-1.exe
http://prdownloads.sourceforge.net/mingw/MinGW-1.1.tar.gz
Then you can build the package using:
# ./configure
# make
Building using the mingw32 cross-compiler
-----------------------------------------
You need to install mingw32 first. For Debian GNU/Linux users, there
is a mingw32 package. Otherwise you might get it from the mingw site
at http://www.mingw.org/download.shtml.
The videolan project also keeps precompiled mingw32 binaries at
http://www.videolan.org/vlc/windows.html . If you install these,
you'll have to set your PATH accordingly to include
/usr/local/cross-tools/bin too.
The build should then proceed using something like:
# CC=i586-mingw32msvc-gcc ./configure --host=i586-mingw32msvc
# make

View file

@ -0,0 +1,46 @@
a52dec-0.7.4 Sat Jul 27 20:44:00 PDT 2002
The library is now fully reentrant.
Added win32 output module, al file output, gain control.
A few additional portability enhancements.
a52dec-0.7.3 Wed Feb 20 23:38:22 PST 2002
Performance enhancements, from 40% to 80% depending on streams.
Fixed a few embarassing bugs in liba52: one memory corruption issue
and a few minor portability problems.
Several new output modules, and fixes in the existing .wav file output.
a52dec-0.7.2 Sun Dec 16 14:39:56 PST 2001
Minor bugfixes, performance and portability enhancements.
Also added wav format output, and reduced the demux buffer size which
makes it easier to use a52dec as a pipe.
a52dec-0.7.1 Thu Aug 30 02:13:23 PDT 2001
Minor release for bugfixes. Looks like 0.7.0 was a bit rushed out.
Now compiles with gcc 3.0, made sure mlib implementation works, and
fixed a small bug in the a52dec test program.
a52dec-0.7.0 Thu Aug 23 23:18:00 PDT 2001
First release since more than one year !
The most user-noticeable additions are the downmix to arbitrary
speaker configurations, and the implementation of dynamic range
compression.
The speed has been improved by a factor of 2 to 3, the conformance and
precision should be higher, and we also fixed a small bug when playing
stereo rematrixed streams.

View file

@ -0,0 +1,180 @@
ABOUT LIBA52
liba52 is a free library for decoding ATSC A/52 streams. It is
released under the terms of the GPL license. The A/52 standard is used
in a variety of applications, including digital television and DVD. It
is also known as AC-3.
The main goals in liba52 development are:
* Portability - Currently all of the code is written in C, and
when we write platform-specific optimizations we will always
keep a generic C routine to fall back on.
* Reuseability - we do not want liba52 to include any
project-specific code, but it should still include enough
features to be used by very diverse projects.
* Precision - We are trying to implement all of the A/52
standard, and to have a very precise output by doing all the
calculations in floating point. We have a test suite that
detects any deviation in the output when compared to previous
versions. We do not have access to official A/52 test vectors
though, so we have to use our judgement to ensure that such
deviations are only intruduced when we fix bugs !
* Speed - liba52 is really fast, on any modern PC it should take
only a few percent of CPU time.
The project homepage is at http://liba52.sourceforge.net/
A52DEC
a52dec is a test program for liba52. It decodes ATSC A/52 streams, and
also includes a demultiplexer for mpeg-1 and mpeg-2 program streams.
The liba52 source code is always distributed in the a52dec package, to
make sure it easier for people to test it.
The basic usage is to just type "a52dec file" where file is an ATSC
A/52 file.
The "-s" option must be used for multiplexed (audio and video) mpeg-2
files. These files are usualy found on the internet or on unencrypted
DVDs.
The "-o" option is used to select a given output layer. By default
a52dec does a stereo downmix and outputs to your speakers, but you can
try other choices using this option. This is also used for performance
testing and conformance testing.
The "-c" option is used to disable all optimizations (currently only djbfft).
The "-r" option is used to disable the dynamic range compression.
OTHER PROJECTS USING LIBA52
liba52 (and its ancestor libac3) is being used by various other
projects, including:
* xine (http://xine.sourceforge.net/) - started as a simple
mpeg-2 audio and video decoder, but it since became a
full-featured DVD and video media player.
* VideoLAN (http://www.videolan.org/) - video streaming over an
ethernet network, can also be used as a standalone player.
* MPlayer (http://www.MPlayerHQ.hu) - another good player, it is
also very robust against damaged streams.
* movietime (http://movietime.sourceforge.net/) - still quite
young, but it looks very promising !
* ffmpeg (http://ffmpeg.sourceforge.net/) - a nice audio/video
encoder and transcoder, uses liba52 for decoding A/52 streams.
* Ogle (http://www.dtek.chalmers.se/groups/dvd/) - a good DVD
player with menu support
* a52decX (http://homepage1.nifty.com/~toku/software_en.html) -
a graphical interface for a52dec in macintosh osX.
* TCVP (http://tcvp.sf.net) - video and music player for unix.
* bd4go (http://denisx.dyndns.org/bd4go/) - another graphical
interface for macintosh osX.
* drip (http://drip.sourceforge.net/) - a DVD to DIVX transcoder.
* OMS (http://www.linuxvideo.org/oms/)
* XMPS (http://xmps.sourceforge.net/)
* GStreamer (http://www.gstreamer.net/) - a framework for
streaming media; it has an A/52 decoding plugin based on liba52.
* mpeglib (http://mpeglib.sourceforge.net/) - a video decoding
library that usess liba52 when decoding A/52 streams.
If you use liba52 in another project, let us know !
TASKS
There are several places where we could easily use some help:
* Web design: This site sucks ! at the very least, we'd like to
come up with a nicer background picture and a logo.
* Testing: If you find any stream that does not decode right
with liba52, let us know ! The best thing would be to mail to
the liba52-devel mailing list. Also if you have access to
encoders, we'd love to get test streams that would be free of
rights - so that we can put them on this server.
* Coding: you can have a look in the TODO file first ! The most
important item is probably to make the code fully reentrant.
* Porting: If you're porting to a new architecture, you might
want to experiment with the compile flags defined in
configure.in . When you figure out whats fastest on your
platform, send us a patch !
REFERENCES
The A/52 standard, as published by the ATSC, is available at
http://www.atsc.org/standards/a_52a.pdf
CVS SNAPSHOTS
A daily snapshot is created using "make distcheck" every night and
uploaded to http://liba52.sourceforge.net/files/a52dec-snapshot.tar.gz .
It is easier to use than the CVS repository, because you do not need
to have the right versions of automake, autoconf and libtool
installed. It might be convenient when working on a liba52 port for
example.
CVS REPOSITORY
The latest liba52 and a52dec source code can always be found by
anonymous CVS:
# export CVSROOT=:pserver:anonymous@cvs.liba52.sourceforge.net:/cvsroot/liba52
# cvs login (Just press Return when prompted for a password)
# cvs checkout a52dec
You can also browse the latest changes online at
http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/liba52/a52dec/
The other CVS modules are ac3dec-livid for the CVS history of the
project while it was still hosted on the linuxvideo.org servers, and
ac3dec for the CVS history of the project while the linuxvideo.org
servers were down and before the library switched its name to liba52.
MAILING LISTS
See the subscription information at http://liba52.sourceforge.net/lists.html
liba52-devel
This is the main mailing list for technical discussion about
liba52. Anyone wanting to work on liba52, or maybe just stay informed
about the development process, should probably subscribe to this list.
liba52-checkins
All liba52 checkins are announced there. This is a good way to keep
track of what goes into CVS.
liba52-announce
This is a very low traffic mailing list, only for announcements of new
versions of liba52. Only project administrators can post there.

View file

@ -0,0 +1,27 @@
Library: liba52-0.7.5 (CVS version 2005-02-16)
Imported: 2005-02-16 by Dave Chapman
This directory contains a local version of liba52 for decoding ATSC
A/52 (aka AC-3) audio streams. A/52 is commonly used in digital TV and
on DVDs.
LICENSING INFORMATION
liba52 is released under the GNU General Public License as described
in the COPYING file in this directory.
IMPORT DETAILS
The base version first imported into Rockbox was the CVS version of
liba52-0.7.5 (0.7.4 was at the time the latest official) which was
checked out of sourceforge on 2005-02-16.
The .[ch] files from a52dec/liba52/ and a52dec/include/ as well as the
documentation files in a52dec/ were imported into Rockbox. The other
files in the archive relate to the test player (a52dec) and were not
imported.
A simple config.h file was added to enable liba52's fixed-point
integer-only mode and to specify the endianness of the target CPU.

View file

@ -0,0 +1,5 @@
bit_allocate.c
bitstream.c
downmix.c
imdct.c
parse.c

View file

@ -0,0 +1,17 @@
* look at possible overflow/precision issues in integer port
* redo all bit allocation if previous frame had zero_snr_offsets
* make dynrng work in dual-channel streams
* implement A/52a downmix extensions
* reduce size of delay buffer by 50%
* include float->s16 conversion in liba52 API ?
* include up/downsampling 44100<->48000 in liba52 API ?
* include audio dithering in liba52 API ?
* API extensions might be at a different level (base vs. extended)
* use restrict pointers where appropriate
* avoid overflows, including reading the a52 stream !!!
* faster bitstream parsing ?
* make dither code faster (generate dither table in advance ?)
* SIMD optimizations

View file

@ -0,0 +1,67 @@
/*
* a52.h
* Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
* Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
*
* This file is part of a52dec, a free ATSC A-52 stream decoder.
* See http://liba52.sourceforge.net/ for updates.
*
* a52dec is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* a52dec is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef A52_H
#define A52_H
#if defined(LIBA52_FIXED)
typedef int32_t sample_t;
typedef int32_t level_t;
#elif defined(LIBA52_DOUBLE)
typedef double sample_t;
typedef double level_t;
#else
typedef float sample_t;
typedef float level_t;
#endif
typedef struct a52_state_s a52_state_t;
#define A52_CHANNEL 0
#define A52_MONO 1
#define A52_STEREO 2
#define A52_3F 3
#define A52_2F1R 4
#define A52_3F1R 5
#define A52_2F2R 6
#define A52_3F2R 7
#define A52_CHANNEL1 8
#define A52_CHANNEL2 9
#define A52_DOLBY 10
#define A52_CHANNEL_MASK 15
#define A52_LFE 16
#define A52_ADJUST_LEVEL 32
a52_state_t * a52_init (uint32_t mm_accel);
sample_t * a52_samples (a52_state_t * state);
int a52_syncinfo (uint8_t * buf, int * flags,
int * sample_rate, int * bit_rate);
int a52_frame (a52_state_t * state, uint8_t * buf, int * flags,
level_t * level, sample_t bias);
void a52_dynrng (a52_state_t * state,
level_t (* call) (level_t, void *), void * data);
int a52_block (a52_state_t * state);
void a52_free (a52_state_t * state);
#endif /* A52_H */

View file

@ -0,0 +1,215 @@
/*
* a52_internal.h
* Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
* Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
*
* This file is part of a52dec, a free ATSC A-52 stream decoder.
* See http://liba52.sourceforge.net/ for updates.
*
* a52dec is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* a52dec is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
typedef struct {
uint8_t bai; /* fine SNR offset, fast gain */
uint8_t deltbae; /* delta bit allocation exists */
int8_t deltba[50]; /* per-band delta bit allocation */
} ba_t;
typedef struct {
uint8_t exp[256]; /* decoded channel exponents */
int8_t bap[256]; /* derived channel bit allocation */
} expbap_t;
struct a52_state_s {
uint8_t fscod; /* sample rate */
uint8_t halfrate; /* halfrate factor */
uint8_t acmod; /* coded channels */
uint8_t lfeon; /* coded lfe channel */
level_t clev; /* centre channel mix level */
level_t slev; /* surround channels mix level */
int output; /* type of output */
level_t level; /* output level */
sample_t bias; /* output bias */
int dynrnge; /* apply dynamic range */
level_t dynrng; /* dynamic range */
void * dynrngdata; /* dynamic range callback funtion and data */
level_t (* dynrngcall) (level_t range, void * dynrngdata);
uint8_t chincpl; /* channel coupled */
uint8_t phsflginu; /* phase flags in use (stereo only) */
uint8_t cplstrtmant; /* coupling channel start mantissa */
uint8_t cplendmant; /* coupling channel end mantissa */
uint32_t cplbndstrc; /* coupling band structure */
level_t cplco[5][18]; /* coupling coordinates */
/* derived information */
uint8_t cplstrtbnd; /* coupling start band (for bit allocation) */
uint8_t ncplbnd; /* number of coupling bands */
uint8_t rematflg; /* stereo rematrixing */
uint8_t endmant[5]; /* channel end mantissa */
uint16_t bai; /* bit allocation information */
uint32_t * buffer_start;
uint16_t lfsr_state; /* dither state */
uint32_t bits_left;
uint32_t current_word;
uint8_t csnroffst; /* coarse SNR offset */
ba_t cplba; /* coupling bit allocation parameters */
ba_t ba[5]; /* channel bit allocation parameters */
ba_t lfeba; /* lfe bit allocation parameters */
uint8_t cplfleak; /* coupling fast leak init */
uint8_t cplsleak; /* coupling slow leak init */
expbap_t cpl_expbap;
expbap_t fbw_expbap[5];
expbap_t lfe_expbap;
sample_t * samples;
int downmixed;
};
#define LEVEL_PLUS6DB 2.0
#define LEVEL_PLUS3DB 1.4142135623730951
#define LEVEL_3DB 0.7071067811865476
#define LEVEL_45DB 0.5946035575013605
#define LEVEL_6DB 0.5
#define EXP_REUSE (0)
#define EXP_D15 (1)
#define EXP_D25 (2)
#define EXP_D45 (3)
#define DELTA_BIT_REUSE (0)
#define DELTA_BIT_NEW (1)
#define DELTA_BIT_NONE (2)
#define DELTA_BIT_RESERVED (3)
void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart,
int start, int end, int fastleak, int slowleak,
expbap_t * expbap);
int a52_downmix_init (int input, int flags, level_t * level,
level_t clev, level_t slev);
int a52_downmix_coeff (level_t * coeff, int acmod, int output, level_t level,
level_t clev, level_t slev);
void a52_downmix (sample_t * samples, int acmod, int output,
level_t clev, level_t slev);
void a52_upmix (sample_t * samples, int acmod, int output);
void a52_imdct_init (uint32_t mm_accel);
void a52_imdct_256 (sample_t * data, sample_t * delay);
void a52_imdct_512 (sample_t * data, sample_t * delay);
#define ROUND(x) ((int)((x) + ((x) > 0 ? 0.5 : -0.5)))
#ifndef LIBA52_FIXED
typedef sample_t quantizer_t;
#define SAMPLE(x) (x)
#define LEVEL(x) (x)
#define MUL(a,b) ((a) * (b))
#define MUL_L(a,b) ((a) * (b))
#define MUL_C(a,b) ((a) * (b))
#define DIV(a,b) ((a) / (b))
#define BIAS(x) ((x) + bias)
#else /* LIBA52_FIXED */
typedef int16_t quantizer_t;
#define SAMPLE(x) (sample_t)((x) * (1 << 30))
#define LEVEL(x) (level_t)((x) * (1 << 26))
#if 0
#define MUL(a,b) ((int)(((int64_t)(a) * (b) + (1 << 29)) >> 30))
#define MUL_L(a,b) ((int)(((int64_t)(a) * (b) + (1 << 25)) >> 26))
#elif defined(CPU_COLDFIRE)
/* loses 1 bit of accuracy */
#define MUL(a, b) \
({ \
int32_t t; \
asm volatile ( \
"mac.l %[A], %[B], %%acc0\n\t" \
"movclr.l %%acc0, %[t]\n\t" \
"asl.l #1, %[t]" \
: [t] "=d" (t) \
: [A] "r" ((a)), [B] "r" ((b))); \
t; \
})
/* loses 5 bits of accuracy */
#define MUL_L(a, b) \
({ \
int32_t t; \
asm volatile ( \
"mac.l %[A], %[B], %%acc0\n\t" \
"movclr.l %%acc0, %[t]\n\t" \
"asl.l #5, %[t]" \
: [t] "=d" (t) \
: [A] "r" ((a)), [B] "r" ((b))); \
t; \
})
#elif defined(CPU_ARM)
#define MUL(x, y) \
({ int32_t __hi; \
uint32_t __lo; \
int32_t __result; \
asm ("smull %0, %1, %3, %4\n\t" \
"movs %2, %1, lsl #2" \
: "=&r" (__lo), "=&r" (__hi), "=r" (__result) \
: "%r" (x), "r" (y) \
: "cc"); \
__result; \
})
#define MUL_L(x, y) \
({ int32_t __hi; \
uint32_t __lo; \
int32_t __result; \
asm ("smull %0, %1, %3, %4\n\t" \
"movs %0, %0, lsr %5\n\t" \
"adc %2, %0, %1, lsl %6" \
: "=&r" (__lo), "=&r" (__hi), "=r" (__result) \
: "%r" (x), "r" (y), \
"M" (26), "M" (32 - 26) \
: "cc"); \
__result; \
})
#elif 1
#define MUL(a,b) \
({ int32_t _ta=(a), _tb=(b), _tc; \
_tc=(_ta & 0xffff)*(_tb >> 16)+(_ta >> 16)*(_tb & 0xffff); (int32_t)(((_tc >> 14))+ (((_ta >> 16)*(_tb >> 16)) << 2 )); })
#define MUL_L(a,b) \
({ int32_t _ta=(a), _tb=(b), _tc; \
_tc=(_ta & 0xffff)*(_tb >> 16)+(_ta >> 16)*(_tb & 0xffff); (int32_t)((_tc >> 10) + (((_ta >> 16)*(_tb >> 16)) << 6)); })
#else
#define MUL(a,b) (((a) >> 15) * ((b) >> 15))
#define MUL_L(a,b) (((a) >> 13) * ((b) >> 13))
#endif
#define MUL_C(a,b) MUL_L (a, LEVEL (b))
#define DIV(a,b) ((((int64_t)LEVEL (a)) << 26) / (b))
#define BIAS(x) ((x))
#endif

View file

@ -0,0 +1,37 @@
/*
* attributes.h
* Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
* Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
*
* This file is part of a52dec, a free ATSC A-52 stream decoder.
* See http://liba52.sourceforge.net/ for updates.
*
* a52dec is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* a52dec is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/* use gcc attribs to align critical data structures */
#ifdef ATTRIBUTE_ALIGNED_MAX
#define ATTR_ALIGN(align) __attribute__ ((__aligned__ ((ATTRIBUTE_ALIGNED_MAX < align) ? ATTRIBUTE_ALIGNED_MAX : align)))
#else
#define ATTR_ALIGN(align)
#endif
#ifdef HAVE_BUILTIN_EXPECT
#define likely(x) __builtin_expect ((x) != 0, 1)
#define unlikely(x) __builtin_expect ((x) != 0, 0)
#else
#define likely(x) (x)
#define unlikely(x) (x)
#endif

View file

@ -0,0 +1,265 @@
/*
* bit_allocate.c
* Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
* Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
*
* This file is part of a52dec, a free ATSC A-52 stream decoder.
* See http://liba52.sourceforge.net/ for updates.
*
* a52dec is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* a52dec is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "config-a52.h"
#include <inttypes.h>
#include "a52.h"
#include "a52_internal.h"
static int hthtab[3][50] IDATA_ATTR = {
{0x730, 0x730, 0x7c0, 0x800, 0x820, 0x840, 0x850, 0x850, 0x860, 0x860,
0x860, 0x860, 0x860, 0x870, 0x870, 0x870, 0x880, 0x880, 0x890, 0x890,
0x8a0, 0x8a0, 0x8b0, 0x8b0, 0x8c0, 0x8c0, 0x8d0, 0x8e0, 0x8f0, 0x900,
0x910, 0x910, 0x910, 0x910, 0x900, 0x8f0, 0x8c0, 0x870, 0x820, 0x7e0,
0x7a0, 0x770, 0x760, 0x7a0, 0x7c0, 0x7c0, 0x6e0, 0x400, 0x3c0, 0x3c0},
{0x710, 0x710, 0x7a0, 0x7f0, 0x820, 0x830, 0x840, 0x850, 0x850, 0x860,
0x860, 0x860, 0x860, 0x860, 0x870, 0x870, 0x870, 0x880, 0x880, 0x880,
0x890, 0x890, 0x8a0, 0x8a0, 0x8b0, 0x8b0, 0x8c0, 0x8c0, 0x8e0, 0x8f0,
0x900, 0x910, 0x910, 0x910, 0x910, 0x900, 0x8e0, 0x8b0, 0x870, 0x820,
0x7e0, 0x7b0, 0x760, 0x770, 0x7a0, 0x7c0, 0x780, 0x5d0, 0x3c0, 0x3c0},
{0x680, 0x680, 0x750, 0x7b0, 0x7e0, 0x810, 0x820, 0x830, 0x840, 0x850,
0x850, 0x850, 0x860, 0x860, 0x860, 0x860, 0x860, 0x860, 0x860, 0x860,
0x870, 0x870, 0x870, 0x870, 0x880, 0x880, 0x880, 0x890, 0x8a0, 0x8b0,
0x8c0, 0x8d0, 0x8e0, 0x8f0, 0x900, 0x910, 0x910, 0x910, 0x900, 0x8f0,
0x8d0, 0x8b0, 0x840, 0x7f0, 0x790, 0x760, 0x7a0, 0x7c0, 0x7b0, 0x720}
};
static int8_t baptab[305] IDATA_ATTR = {
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, /* 93 padding elems */
16, 16, 16, 16, 16, 16, 16, 16, 16, 14, 14, 14, 14, 14, 14, 14,
14, 12, 12, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9,
9, 8, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5,
5, 4, 4, -3, -3, 3, 3, 3, -2, -2, -1, -1, -1, -1, -1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0 /* 148 padding elems */
};
static int bndtab[30] IDATA_ATTR = {21, 22, 23, 24, 25, 26, 27, 28, 31, 34,
37, 40, 43, 46, 49, 55, 61, 67, 73, 79,
85, 97, 109, 121, 133, 157, 181, 205, 229, 253};
static int8_t latab[256] IDATA_ATTR = {
-64, -63, -62, -61, -60, -59, -58, -57, -56, -55, -54, -53,
-52, -52, -51, -50, -49, -48, -47, -47, -46, -45, -44, -44,
-43, -42, -41, -41, -40, -39, -38, -38, -37, -36, -36, -35,
-35, -34, -33, -33, -32, -32, -31, -30, -30, -29, -29, -28,
-28, -27, -27, -26, -26, -25, -25, -24, -24, -23, -23, -22,
-22, -21, -21, -21, -20, -20, -19, -19, -19, -18, -18, -18,
-17, -17, -17, -16, -16, -16, -15, -15, -15, -14, -14, -14,
-13, -13, -13, -13, -12, -12, -12, -12, -11, -11, -11, -11,
-10, -10, -10, -10, -10, -9, -9, -9, -9, -9, -8, -8,
-8, -8, -8, -8, -7, -7, -7, -7, -7, -7, -6, -6,
-6, -6, -6, -6, -6, -6, -5, -5, -5, -5, -5, -5,
-5, -5, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
-4, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3,
-3, -3, -3, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0
};
#define UPDATE_LEAK() \
do { \
fastleak += fdecay; \
if (fastleak > psd + fgain) \
fastleak = psd + fgain; \
slowleak += sdecay; \
if (slowleak > psd + sgain) \
slowleak = psd + sgain; \
} while (0)
#define COMPUTE_MASK() \
do { \
if (psd > dbknee) \
mask -= (psd - dbknee) >> 2; \
if (mask > hth [i >> halfrate]) \
mask = hth [i >> halfrate]; \
mask -= snroffset + 128 * deltba[i]; \
mask = (mask > 0) ? 0 : ((-mask) >> 5); \
mask -= floor; \
} while (0)
void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart,
int start, int end, int fastleak, int slowleak,
expbap_t * expbap)
{
static int slowgain[4] = {0x540, 0x4d8, 0x478, 0x410};
static int dbpbtab[4] = {0xc00, 0x500, 0x300, 0x100};
static int floortab[8] = {0x910, 0x950, 0x990, 0x9d0,
0xa10, 0xa90, 0xb10, 0x1400};
int i, j;
uint8_t * exp;
int8_t * bap;
int fdecay, fgain, sdecay, sgain, dbknee, floor, snroffset;
int psd, mask;
int8_t * deltba;
int * hth;
int halfrate;
halfrate = state->halfrate;
fdecay = (63 + 20 * ((state->bai >> 7) & 3)) >> halfrate; /* fdcycod */
fgain = 128 + 128 * (ba->bai & 7); /* fgaincod */
sdecay = (15 + 2 * (state->bai >> 9)) >> halfrate; /* sdcycod */
sgain = slowgain[(state->bai >> 5) & 3]; /* sgaincod */
dbknee = dbpbtab[(state->bai >> 3) & 3]; /* dbpbcod */
hth = hthtab[state->fscod];
/*
* if there is no delta bit allocation, make deltba point to an area
* known to contain zeroes. baptab+156 here.
*/
deltba = (ba->deltbae == DELTA_BIT_NONE) ? baptab + 156 : ba->deltba;
floor = floortab[state->bai & 7]; /* floorcod */
snroffset = 960 - 64 * state->csnroffst - 4 * (ba->bai >> 3) + floor;
floor >>= 5;
exp = expbap->exp;
bap = expbap->bap;
i = bndstart;
j = start;
if (start == 0) { /* not the coupling channel */
int lowcomp;
lowcomp = 0;
j = end - 1;
do {
if (i < j) {
if (exp[i+1] == exp[i] - 2)
lowcomp = 384;
else if (lowcomp && (exp[i+1] > exp[i]))
lowcomp -= 64;
}
psd = 128 * exp[i];
mask = psd + fgain + lowcomp;
COMPUTE_MASK ();
bap[i] = (baptab+156)[mask + 4 * exp[i]];
i++;
} while ((i < 3) || ((i < 7) && (exp[i] > exp[i-1])));
fastleak = psd + fgain;
slowleak = psd + sgain;
while (i < 7) {
if (i < j) {
if (exp[i+1] == exp[i] - 2)
lowcomp = 384;
else if (lowcomp && (exp[i+1] > exp[i]))
lowcomp -= 64;
}
psd = 128 * exp[i];
UPDATE_LEAK ();
mask = ((fastleak + lowcomp < slowleak) ?
fastleak + lowcomp : slowleak);
COMPUTE_MASK ();
bap[i] = (baptab+156)[mask + 4 * exp[i]];
i++;
}
if (end == 7) /* lfe channel */
return;
do {
if (exp[i+1] == exp[i] - 2)
lowcomp = 320;
else if (lowcomp && (exp[i+1] > exp[i]))
lowcomp -= 64;
psd = 128 * exp[i];
UPDATE_LEAK ();
mask = ((fastleak + lowcomp < slowleak) ?
fastleak + lowcomp : slowleak);
COMPUTE_MASK ();
bap[i] = (baptab+156)[mask + 4 * exp[i]];
i++;
} while (i < 20);
while (lowcomp > 128) { /* two iterations maximum */
lowcomp -= 128;
psd = 128 * exp[i];
UPDATE_LEAK ();
mask = ((fastleak + lowcomp < slowleak) ?
fastleak + lowcomp : slowleak);
COMPUTE_MASK ();
bap[i] = (baptab+156)[mask + 4 * exp[i]];
i++;
}
j = i;
}
do {
int startband, endband;
startband = j;
endband = (bndtab[i-20] < end) ? bndtab[i-20] : end;
psd = 128 * exp[j++];
while (j < endband) {
int next, delta;
next = 128 * exp[j++];
delta = next - psd;
switch (delta >> 9) {
case -6: case -5: case -4: case -3: case -2:
psd = next;
break;
case -1:
psd = next + latab[(-delta) >> 1];
break;
case 0:
psd += latab[delta >> 1];
break;
}
}
/* minpsd = -289 */
UPDATE_LEAK ();
mask = (fastleak < slowleak) ? fastleak : slowleak;
COMPUTE_MASK ();
i++;
j = startband;
do {
/* max(mask+4*exp)=147=-(minpsd+fgain-deltba-snroffset)>>5+4*exp */
/* min(mask+4*exp)=-156=-(sgain-deltba-snroffset)>>5 */
bap[j] = (baptab+156)[mask + 4 * exp[j]];
} while (++j < endband);
} while (j < end);
}

View file

@ -0,0 +1,97 @@
/*
* bitstream.c
* Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
* Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
*
* This file is part of a52dec, a free ATSC A-52 stream decoder.
* See http://liba52.sourceforge.net/ for updates.
*
* a52dec is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* a52dec is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "config-a52.h"
#include <inttypes.h>
#include "a52.h"
#include "a52_internal.h"
#include "bitstream.h"
#define BUFFER_SIZE 4096
void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf)
{
int align;
align = (long)buf & 3;
state->buffer_start = (uint32_t *) (buf - align);
state->bits_left = 0;
state->current_word = 0;
bitstream_get (state, align * 8);
bitstream_get_2 (state, 0); /* pretend function is used - keep gcc happy */
}
static inline void bitstream_fill_current (a52_state_t * state)
{
uint32_t tmp;
tmp = *(state->buffer_start++);
state->current_word = swab32 (tmp);
}
/*
* The fast paths for _get is in the
* bitstream.h header file so it can be inlined.
*
* The "bottom half" of this routine is suffixed _bh
*
* -ah
*/
uint32_t a52_bitstream_get_bh (a52_state_t * state, uint32_t num_bits)
{
uint32_t result;
num_bits -= state->bits_left;
result = ((state->current_word << (32 - state->bits_left)) >>
(32 - state->bits_left));
bitstream_fill_current (state);
if (num_bits != 0)
result = (result << num_bits) | (state->current_word >> (32 - num_bits));
state->bits_left = 32 - num_bits;
return result;
}
int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits)
{
int32_t result;
num_bits -= state->bits_left;
result = ((((int32_t)state->current_word) << (32 - state->bits_left)) >>
(32 - state->bits_left));
bitstream_fill_current(state);
if (num_bits != 0)
result = (result << num_bits) | (state->current_word >> (32 - num_bits));
state->bits_left = 32 - num_bits;
return result;
}

View file

@ -0,0 +1,54 @@
/*
* bitstream.h
* Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
* Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
*
* This file is part of a52dec, a free ATSC A-52 stream decoder.
* See http://liba52.sourceforge.net/ for updates.
*
* a52dec is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* a52dec is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#define swab32(x) (betoh32(x))
void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf);
uint32_t a52_bitstream_get_bh (a52_state_t * state, uint32_t num_bits);
int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits);
static inline uint32_t bitstream_get (a52_state_t * state, uint32_t num_bits)
{
uint32_t result;
if (num_bits < state->bits_left) {
result = (state->current_word << (32 - state->bits_left)) >> (32 - num_bits);
state->bits_left -= num_bits;
return result;
}
return a52_bitstream_get_bh (state, num_bits);
}
static inline int32_t bitstream_get_2 (a52_state_t * state, uint32_t num_bits)
{
int32_t result;
if (num_bits < state->bits_left) {
result = (((int32_t)state->current_word) << (32 - state->bits_left)) >> (32 - num_bits);
state->bits_left -= num_bits;
return result;
}
return a52_bitstream_get_bh_2 (state, num_bits);
}

View file

@ -0,0 +1,26 @@
#include "codeclib.h"
/* a52dec profiling */
/* #undef A52DEC_GPROF */
/* Define to 1 if you have the `memalign' function. */
/* #undef HAVE_MEMALIGN 1 */
/* liba52 djbfft support */
/* #undef LIBA52_DJBFFT */
/* a52 sample precision */
/* #undef LIBA52_DOUBLE */
/* use fixed-point arithmetic */
#define LIBA52_FIXED
/* Define to 1 if your processor stores words with the most significant byte
first (like Motorola and SPARC, unlike Intel and VAX). */
/* Used in bitstream.h */
#ifdef ROCKBOX_BIG_ENDIAN
#define WORDS_BIGENDIAN 1
#endif

View file

@ -0,0 +1,688 @@
/*
* downmix.c
* Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
* Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
*
* This file is part of a52dec, a free ATSC A-52 stream decoder.
* See http://liba52.sourceforge.net/ for updates.
*
* a52dec is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* a52dec is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "config-a52.h"
#include <string.h>
#include <inttypes.h>
#include "a52.h"
#include "a52_internal.h"
#define CONVERT(acmod,output) (((output) << 3) + (acmod))
int a52_downmix_init (int input, int flags, level_t * level,
level_t clev, level_t slev)
{
static uint8_t table[11][8] = {
{A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
A52_STEREO, A52_STEREO, A52_STEREO, A52_STEREO},
{A52_MONO, A52_MONO, A52_MONO, A52_MONO,
A52_MONO, A52_MONO, A52_MONO, A52_MONO},
{A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
A52_STEREO, A52_STEREO, A52_STEREO, A52_STEREO},
{A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F,
A52_STEREO, A52_3F, A52_STEREO, A52_3F},
{A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
A52_2F1R, A52_2F1R, A52_2F1R, A52_2F1R},
{A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
A52_2F1R, A52_3F1R, A52_2F1R, A52_3F1R},
{A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F,
A52_2F2R, A52_2F2R, A52_2F2R, A52_2F2R},
{A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F,
A52_2F2R, A52_3F2R, A52_2F2R, A52_3F2R},
{A52_CHANNEL1, A52_MONO, A52_MONO, A52_MONO,
A52_MONO, A52_MONO, A52_MONO, A52_MONO},
{A52_CHANNEL2, A52_MONO, A52_MONO, A52_MONO,
A52_MONO, A52_MONO, A52_MONO, A52_MONO},
{A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_DOLBY,
A52_DOLBY, A52_DOLBY, A52_DOLBY, A52_DOLBY}
};
int output;
output = flags & A52_CHANNEL_MASK;
if (output > A52_DOLBY)
return -1;
output = table[output][input & 7];
if (output == A52_STEREO &&
(input == A52_DOLBY || (input == A52_3F && clev == LEVEL (LEVEL_3DB))))
output = A52_DOLBY;
if (flags & A52_ADJUST_LEVEL) {
level_t adjust;
switch (CONVERT (input & 7, output)) {
case CONVERT (A52_3F, A52_MONO):
adjust = DIV (LEVEL_3DB, LEVEL (1) + clev);
break;
case CONVERT (A52_STEREO, A52_MONO):
case CONVERT (A52_2F2R, A52_2F1R):
case CONVERT (A52_3F2R, A52_3F1R):
level_3db:
adjust = LEVEL (LEVEL_3DB);
break;
case CONVERT (A52_3F2R, A52_2F1R):
if (clev < LEVEL (LEVEL_PLUS3DB - 1))
goto level_3db;
/* break thru */
case CONVERT (A52_3F, A52_STEREO):
case CONVERT (A52_3F1R, A52_2F1R):
case CONVERT (A52_3F1R, A52_2F2R):
case CONVERT (A52_3F2R, A52_2F2R):
adjust = DIV (1, LEVEL (1) + clev);
break;
case CONVERT (A52_2F1R, A52_MONO):
adjust = DIV (LEVEL_PLUS3DB, LEVEL (2) + slev);
break;
case CONVERT (A52_2F1R, A52_STEREO):
case CONVERT (A52_3F1R, A52_3F):
adjust = DIV (1, LEVEL (1) + MUL_C (slev, LEVEL_3DB));
break;
case CONVERT (A52_3F1R, A52_MONO):
adjust = DIV (LEVEL_3DB, LEVEL (1) + clev + MUL_C (slev, 0.5));
break;
case CONVERT (A52_3F1R, A52_STEREO):
adjust = DIV (1, LEVEL (1) + clev + MUL_C (slev, LEVEL_3DB));
break;
case CONVERT (A52_2F2R, A52_MONO):
adjust = DIV (LEVEL_3DB, LEVEL (1) + slev);
break;
case CONVERT (A52_2F2R, A52_STEREO):
case CONVERT (A52_3F2R, A52_3F):
adjust = DIV (1, LEVEL (1) + slev);
break;
case CONVERT (A52_3F2R, A52_MONO):
adjust = DIV (LEVEL_3DB, LEVEL (1) + clev + slev);
break;
case CONVERT (A52_3F2R, A52_STEREO):
adjust = DIV (1, LEVEL (1) + clev + slev);
break;
case CONVERT (A52_MONO, A52_DOLBY):
adjust = LEVEL (LEVEL_PLUS3DB);
break;
case CONVERT (A52_3F, A52_DOLBY):
case CONVERT (A52_2F1R, A52_DOLBY):
adjust = LEVEL (1 / (1 + LEVEL_3DB));
break;
case CONVERT (A52_3F1R, A52_DOLBY):
case CONVERT (A52_2F2R, A52_DOLBY):
adjust = LEVEL (1 / (1 + 2 * LEVEL_3DB));
break;
case CONVERT (A52_3F2R, A52_DOLBY):
adjust = LEVEL (1 / (1 + 3 * LEVEL_3DB));
break;
default:
return output;
}
*level = MUL_L (*level, adjust);
}
return output;
}
int a52_downmix_coeff (level_t * coeff, int acmod, int output, level_t level,
level_t clev, level_t slev)
{
level_t level_3db;
level_3db = MUL_C (level, LEVEL_3DB);
switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
case CONVERT (A52_CHANNEL, A52_CHANNEL):
case CONVERT (A52_MONO, A52_MONO):
case CONVERT (A52_STEREO, A52_STEREO):
case CONVERT (A52_3F, A52_3F):
case CONVERT (A52_2F1R, A52_2F1R):
case CONVERT (A52_3F1R, A52_3F1R):
case CONVERT (A52_2F2R, A52_2F2R):
case CONVERT (A52_3F2R, A52_3F2R):
case CONVERT (A52_STEREO, A52_DOLBY):
coeff[0] = coeff[1] = coeff[2] = coeff[3] = coeff[4] = level;
return 0;
case CONVERT (A52_CHANNEL, A52_MONO):
coeff[0] = coeff[1] = MUL_C (level, LEVEL_6DB);
return 3;
case CONVERT (A52_STEREO, A52_MONO):
coeff[0] = coeff[1] = level_3db;
return 3;
case CONVERT (A52_3F, A52_MONO):
coeff[0] = coeff[2] = level_3db;
coeff[1] = MUL_C (MUL_L (level_3db, clev), LEVEL_PLUS6DB);
return 7;
case CONVERT (A52_2F1R, A52_MONO):
coeff[0] = coeff[1] = level_3db;
coeff[2] = MUL_L (level_3db, slev);
return 7;
case CONVERT (A52_2F2R, A52_MONO):
coeff[0] = coeff[1] = level_3db;
coeff[2] = coeff[3] = MUL_L (level_3db, slev);
return 15;
case CONVERT (A52_3F1R, A52_MONO):
coeff[0] = coeff[2] = level_3db;
coeff[1] = MUL_C (MUL_L (level_3db, clev), LEVEL_PLUS6DB);
coeff[3] = MUL_L (level_3db, slev);
return 15;
case CONVERT (A52_3F2R, A52_MONO):
coeff[0] = coeff[2] = level_3db;
coeff[1] = MUL_C (MUL_L (level_3db, clev), LEVEL_PLUS6DB);
coeff[3] = coeff[4] = MUL_L (level_3db, slev);
return 31;
case CONVERT (A52_MONO, A52_DOLBY):
coeff[0] = level_3db;
return 0;
case CONVERT (A52_3F, A52_DOLBY):
coeff[0] = coeff[2] = coeff[3] = coeff[4] = level;
coeff[1] = level_3db;
return 7;
case CONVERT (A52_3F, A52_STEREO):
case CONVERT (A52_3F1R, A52_2F1R):
case CONVERT (A52_3F2R, A52_2F2R):
coeff[0] = coeff[2] = coeff[3] = coeff[4] = level;
coeff[1] = MUL_L (level, clev);
return 7;
case CONVERT (A52_2F1R, A52_DOLBY):
coeff[0] = coeff[1] = level;
coeff[2] = level_3db;
return 7;
case CONVERT (A52_2F1R, A52_STEREO):
coeff[0] = coeff[1] = level;
coeff[2] = MUL_L (level_3db, slev);
return 7;
case CONVERT (A52_3F1R, A52_DOLBY):
coeff[0] = coeff[2] = level;
coeff[1] = coeff[3] = level_3db;
return 15;
case CONVERT (A52_3F1R, A52_STEREO):
coeff[0] = coeff[2] = level;
coeff[1] = MUL_L (level, clev);
coeff[3] = MUL_L (level_3db, slev);
return 15;
case CONVERT (A52_2F2R, A52_DOLBY):
coeff[0] = coeff[1] = level;
coeff[2] = coeff[3] = level_3db;
return 15;
case CONVERT (A52_2F2R, A52_STEREO):
coeff[0] = coeff[1] = level;
coeff[2] = coeff[3] = MUL_L (level, slev);
return 15;
case CONVERT (A52_3F2R, A52_DOLBY):
coeff[0] = coeff[2] = level;
coeff[1] = coeff[3] = coeff[4] = level_3db;
return 31;
case CONVERT (A52_3F2R, A52_2F1R):
coeff[0] = coeff[2] = level;
coeff[1] = MUL_L (level, clev);
coeff[3] = coeff[4] = level_3db;
return 31;
case CONVERT (A52_3F2R, A52_STEREO):
coeff[0] = coeff[2] = level;
coeff[1] = MUL_L (level, clev);
coeff[3] = coeff[4] = MUL_L (level, slev);
return 31;
case CONVERT (A52_3F1R, A52_3F):
coeff[0] = coeff[1] = coeff[2] = level;
coeff[3] = MUL_L (level_3db, slev);
return 13;
case CONVERT (A52_3F2R, A52_3F):
coeff[0] = coeff[1] = coeff[2] = level;
coeff[3] = coeff[4] = MUL_L (level, slev);
return 29;
case CONVERT (A52_2F2R, A52_2F1R):
coeff[0] = coeff[1] = level;
coeff[2] = coeff[3] = level_3db;
return 12;
case CONVERT (A52_3F2R, A52_3F1R):
coeff[0] = coeff[1] = coeff[2] = level;
coeff[3] = coeff[4] = level_3db;
return 24;
case CONVERT (A52_2F1R, A52_2F2R):
coeff[0] = coeff[1] = level;
coeff[2] = level_3db;
return 0;
case CONVERT (A52_3F1R, A52_2F2R):
coeff[0] = coeff[2] = level;
coeff[1] = MUL_L (level, clev);
coeff[3] = level_3db;
return 7;
case CONVERT (A52_3F1R, A52_3F2R):
coeff[0] = coeff[1] = coeff[2] = level;
coeff[3] = level_3db;
return 0;
case CONVERT (A52_CHANNEL, A52_CHANNEL1):
coeff[0] = level;
coeff[1] = 0;
return 0;
case CONVERT (A52_CHANNEL, A52_CHANNEL2):
coeff[0] = 0;
coeff[1] = level;
return 0;
}
return -1; /* NOTREACHED */
}
static void mix2to1 (sample_t * dest, sample_t * src)
{
int i;
for (i = 0; i < 256; i++)
dest[i] += BIAS (src[i]);
}
static void mix3to1 (sample_t * samples)
{
int i;
for (i = 0; i < 256; i++)
samples[i] += BIAS (samples[i + 256] + samples[i + 512]);
}
static void mix4to1 (sample_t * samples)
{
int i;
for (i = 0; i < 256; i++)
samples[i] += BIAS (samples[i + 256] + samples[i + 512] +
samples[i + 768]);
}
static void mix5to1 (sample_t * samples)
{
int i;
for (i = 0; i < 256; i++)
samples[i] += BIAS (samples[i + 256] + samples[i + 512] +
samples[i + 768] + samples[i + 1024]);
}
static void mix3to2 (sample_t * samples)
{
int i;
sample_t common;
for (i = 0; i < 256; i++) {
common = BIAS (samples[i + 256]);
samples[i] += common;
samples[i + 256] = samples[i + 512] + common;
}
}
static void mix21to2 (sample_t * left, sample_t * right)
{
int i;
sample_t common;
for (i = 0; i < 256; i++) {
common = BIAS (right[i + 256]);
left[i] += common;
right[i] += common;
}
}
static void mix21toS (sample_t * samples)
{
int i;
sample_t surround;
for (i = 0; i < 256; i++) {
surround = samples[i + 512];
samples[i] += BIAS (-surround);
samples[i + 256] += BIAS (surround);
}
}
static void mix31to2 (sample_t * samples)
{
int i;
sample_t common;
for (i = 0; i < 256; i++) {
common = BIAS (samples[i + 256] + samples[i + 768]);
samples[i] += common;
samples[i + 256] = samples[i + 512] + common;
}
}
static void mix31toS (sample_t * samples)
{
int i;
sample_t common, surround;
for (i = 0; i < 256; i++) {
common = BIAS (samples[i + 256]);
surround = samples[i + 768];
samples[i] += common - surround;
samples[i + 256] = samples[i + 512] + common + surround;
}
}
static void mix22toS (sample_t * samples)
{
int i;
sample_t surround;
for (i = 0; i < 256; i++) {
surround = samples[i + 512] + samples[i + 768];
samples[i] += BIAS (-surround);
samples[i + 256] += BIAS (surround);
}
}
static void mix32to2 (sample_t * samples)
{
int i;
sample_t common;
for (i = 0; i < 256; i++) {
common = BIAS (samples[i + 256]);
samples[i] += common + samples[i + 768];
samples[i + 256] = common + samples[i + 512] + samples[i + 1024];
}
}
static void mix32toS (sample_t * samples)
{
int i;
sample_t common, surround;
for (i = 0; i < 256; i++) {
common = BIAS (samples[i + 256]);
surround = samples[i + 768] + samples[i + 1024];
samples[i] += common - surround;
samples[i + 256] = samples[i + 512] + common + surround;
}
}
static void move2to1 (sample_t * src, sample_t * dest)
{
int i;
for (i = 0; i < 256; i++)
dest[i] = BIAS (src[i] + src[i + 256]);
}
static void zero (sample_t * samples)
{
int i;
for (i = 0; i < 256; i++)
samples[i] = 0;
}
void a52_downmix (sample_t * samples, int acmod, int output,
level_t clev, level_t slev)
{
/* avoid compiler warning */
(void)clev;
switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
case CONVERT (A52_CHANNEL, A52_CHANNEL2):
memcpy (samples, samples + 256, 256 * sizeof (sample_t));
break;
case CONVERT (A52_CHANNEL, A52_MONO):
case CONVERT (A52_STEREO, A52_MONO):
mix_2to1:
mix2to1 (samples, samples + 256);
break;
case CONVERT (A52_2F1R, A52_MONO):
if (slev == 0)
goto mix_2to1;
case CONVERT (A52_3F, A52_MONO):
mix_3to1:
mix3to1 (samples);
break;
case CONVERT (A52_3F1R, A52_MONO):
if (slev == 0)
goto mix_3to1;
case CONVERT (A52_2F2R, A52_MONO):
if (slev == 0)
goto mix_2to1;
mix4to1 (samples);
break;
case CONVERT (A52_3F2R, A52_MONO):
if (slev == 0)
goto mix_3to1;
mix5to1 (samples);
break;
case CONVERT (A52_MONO, A52_DOLBY):
memcpy (samples + 256, samples, 256 * sizeof (sample_t));
break;
case CONVERT (A52_3F, A52_STEREO):
case CONVERT (A52_3F, A52_DOLBY):
mix_3to2:
mix3to2 (samples);
break;
case CONVERT (A52_2F1R, A52_STEREO):
if (slev == 0)
break;
mix21to2 (samples, samples + 256);
break;
case CONVERT (A52_2F1R, A52_DOLBY):
mix21toS (samples);
break;
case CONVERT (A52_3F1R, A52_STEREO):
if (slev == 0)
goto mix_3to2;
mix31to2 (samples);
break;
case CONVERT (A52_3F1R, A52_DOLBY):
mix31toS (samples);
break;
case CONVERT (A52_2F2R, A52_STEREO):
if (slev == 0)
break;
mix2to1 (samples, samples + 512);
mix2to1 (samples + 256, samples + 768);
break;
case CONVERT (A52_2F2R, A52_DOLBY):
mix22toS (samples);
break;
case CONVERT (A52_3F2R, A52_STEREO):
if (slev == 0)
goto mix_3to2;
mix32to2 (samples);
break;
case CONVERT (A52_3F2R, A52_DOLBY):
mix32toS (samples);
break;
case CONVERT (A52_3F1R, A52_3F):
if (slev == 0)
break;
mix21to2 (samples, samples + 512);
break;
case CONVERT (A52_3F2R, A52_3F):
if (slev == 0)
break;
mix2to1 (samples, samples + 768);
mix2to1 (samples + 512, samples + 1024);
break;
case CONVERT (A52_3F1R, A52_2F1R):
mix3to2 (samples);
memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
break;
case CONVERT (A52_2F2R, A52_2F1R):
mix2to1 (samples + 512, samples + 768);
break;
case CONVERT (A52_3F2R, A52_2F1R):
mix3to2 (samples);
move2to1 (samples + 768, samples + 512);
break;
case CONVERT (A52_3F2R, A52_3F1R):
mix2to1 (samples + 768, samples + 1024);
break;
case CONVERT (A52_2F1R, A52_2F2R):
memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
break;
case CONVERT (A52_3F1R, A52_2F2R):
mix3to2 (samples);
memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
break;
case CONVERT (A52_3F2R, A52_2F2R):
mix3to2 (samples);
memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t));
break;
case CONVERT (A52_3F1R, A52_3F2R):
memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
break;
}
}
void a52_upmix (sample_t * samples, int acmod, int output)
{
switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
case CONVERT (A52_CHANNEL, A52_CHANNEL2):
memcpy (samples + 256, samples, 256 * sizeof (sample_t));
break;
case CONVERT (A52_3F2R, A52_MONO):
zero (samples + 1024);
case CONVERT (A52_3F1R, A52_MONO):
case CONVERT (A52_2F2R, A52_MONO):
zero (samples + 768);
case CONVERT (A52_3F, A52_MONO):
case CONVERT (A52_2F1R, A52_MONO):
zero (samples + 512);
case CONVERT (A52_CHANNEL, A52_MONO):
case CONVERT (A52_STEREO, A52_MONO):
zero (samples + 256);
break;
case CONVERT (A52_3F2R, A52_STEREO):
case CONVERT (A52_3F2R, A52_DOLBY):
zero (samples + 1024);
case CONVERT (A52_3F1R, A52_STEREO):
case CONVERT (A52_3F1R, A52_DOLBY):
zero (samples + 768);
case CONVERT (A52_3F, A52_STEREO):
case CONVERT (A52_3F, A52_DOLBY):
mix_3to2:
memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t));
zero (samples + 256);
break;
case CONVERT (A52_2F2R, A52_STEREO):
case CONVERT (A52_2F2R, A52_DOLBY):
zero (samples + 768);
case CONVERT (A52_2F1R, A52_STEREO):
case CONVERT (A52_2F1R, A52_DOLBY):
zero (samples + 512);
break;
case CONVERT (A52_3F2R, A52_3F):
zero (samples + 1024);
case CONVERT (A52_3F1R, A52_3F):
case CONVERT (A52_2F2R, A52_2F1R):
zero (samples + 768);
break;
case CONVERT (A52_3F2R, A52_3F1R):
zero (samples + 1024);
break;
case CONVERT (A52_3F2R, A52_2F1R):
zero (samples + 1024);
case CONVERT (A52_3F1R, A52_2F1R):
mix_31to21:
memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
goto mix_3to2;
case CONVERT (A52_3F2R, A52_2F2R):
memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
goto mix_31to21;
}
}

Some files were not shown because too many files have changed in this diff Show more