Build librbcodec with DSP and metadata.

All associated files are moved to /lib/rbcodec.

Change-Id: I572ddd2b8a996aae1e98c081d06b1ed356dce222
This commit is contained in:
Sean Bartell 2011-06-24 01:25:21 -04:00 committed by Nils Wallménius
parent 24bd9d5393
commit b5716df4cb
80 changed files with 97 additions and 112 deletions

View file

@ -0,0 +1,363 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2009 Jeffrey Goode
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "config.h"
#include "fixedpoint.h"
#include "fracmul.h"
#include "settings.h"
#include "dsp.h"
#include "compressor.h"
/* Define LOGF_ENABLE to enable logf output in this file */
/*#define LOGF_ENABLE*/
#include "logf.h"
static int32_t comp_rel_slope IBSS_ATTR; /* S7.24 format */
static int32_t comp_makeup_gain IBSS_ATTR; /* S7.24 format */
static int32_t comp_curve[66] IBSS_ATTR; /* S7.24 format */
static int32_t release_gain IBSS_ATTR; /* S7.24 format */
#define UNITY (1L << 24) /* unity gain in S7.24 format */
/** COMPRESSOR UPDATE
* Called via the menu system to configure the compressor process */
bool compressor_update(void)
{
static int curr_set[5];
int new_set[5] = {
global_settings.compressor_threshold,
global_settings.compressor_makeup_gain,
global_settings.compressor_ratio,
global_settings.compressor_knee,
global_settings.compressor_release_time};
/* make menu values useful */
int threshold = new_set[0];
bool auto_gain = (new_set[1] == 1);
const int comp_ratios[] = {2, 4, 6, 10, 0};
int ratio = comp_ratios[new_set[2]];
bool soft_knee = (new_set[3] == 1);
int release = new_set[4] * NATIVE_FREQUENCY / 1000;
bool changed = false;
bool active = (threshold < 0);
for (int i = 0; i < 5; i++)
{
if (curr_set[i] != new_set[i])
{
changed = true;
curr_set[i] = new_set[i];
#if defined(ROCKBOX_HAS_LOGF) && defined(LOGF_ENABLE)
switch (i)
{
case 0:
logf(" Compressor Threshold: %d dB\tEnabled: %s",
threshold, active ? "Yes" : "No");
break;
case 1:
logf(" Compressor Makeup Gain: %s",
auto_gain ? "Auto" : "Off");
break;
case 2:
if (ratio)
{ logf(" Compressor Ratio: %d:1", ratio); }
else
{ logf(" Compressor Ratio: Limit"); }
break;
case 3:
logf(" Compressor Knee: %s", soft_knee?"Soft":"Hard");
break;
case 4:
logf(" Compressor Release: %d", release);
break;
}
#endif
}
}
if (changed && active)
{
/* configure variables for compressor operation */
static const int32_t db[] = {
/* positive db equivalents in S15.16 format */
0x000000, 0x241FA4, 0x1E1A5E, 0x1A94C8,
0x181518, 0x1624EA, 0x148F82, 0x1338BD,
0x120FD2, 0x1109EB, 0x101FA4, 0x0F4BB6,
0x0E8A3C, 0x0DD840, 0x0D3377, 0x0C9A0E,
0x0C0A8C, 0x0B83BE, 0x0B04A5, 0x0A8C6C,
0x0A1A5E, 0x09ADE1, 0x094670, 0x08E398,
0x0884F6, 0x082A30, 0x07D2FA, 0x077F0F,
0x072E31, 0x06E02A, 0x0694C8, 0x064BDF,
0x060546, 0x05C0DA, 0x057E78, 0x053E03,
0x04FF5F, 0x04C273, 0x048726, 0x044D64,
0x041518, 0x03DE30, 0x03A89B, 0x037448,
0x03412A, 0x030F32, 0x02DE52, 0x02AE80,
0x027FB0, 0x0251D6, 0x0224EA, 0x01F8E2,
0x01CDB4, 0x01A359, 0x0179C9, 0x0150FC,
0x0128EB, 0x010190, 0x00DAE4, 0x00B4E1,
0x008F82, 0x006AC1, 0x004699, 0x002305};
struct curve_point
{
int32_t db; /* S15.16 format */
int32_t offset; /* S15.16 format */
} db_curve[5];
/** Set up the shape of the compression curve first as decibel
values */
/* db_curve[0] = bottom of knee
[1] = threshold
[2] = top of knee
[3] = 0 db input
[4] = ~+12db input (2 bits clipping overhead) */
db_curve[1].db = threshold << 16;
if (soft_knee)
{
/* bottom of knee is 3dB below the threshold for soft knee*/
db_curve[0].db = db_curve[1].db - (3 << 16);
/* top of knee is 3dB above the threshold for soft knee */
db_curve[2].db = db_curve[1].db + (3 << 16);
if (ratio)
/* offset = -3db * (ratio - 1) / ratio */
db_curve[2].offset = (int32_t)((long long)(-3 << 16)
* (ratio - 1) / ratio);
else
/* offset = -3db for hard limit */
db_curve[2].offset = (-3 << 16);
}
else
{
/* bottom of knee is at the threshold for hard knee */
db_curve[0].db = threshold << 16;
/* top of knee is at the threshold for hard knee */
db_curve[2].db = threshold << 16;
db_curve[2].offset = 0;
}
/* Calculate 0db and ~+12db offsets */
db_curve[4].db = 0xC0A8C; /* db of 2 bits clipping */
if (ratio)
{
/* offset = threshold * (ratio - 1) / ratio */
db_curve[3].offset = (int32_t)((long long)(threshold << 16)
* (ratio - 1) / ratio);
db_curve[4].offset = (int32_t)((long long)-db_curve[4].db
* (ratio - 1) / ratio) + db_curve[3].offset;
}
else
{
/* offset = threshold for hard limit */
db_curve[3].offset = (threshold << 16);
db_curve[4].offset = -db_curve[4].db + db_curve[3].offset;
}
/** Now set up the comp_curve table with compression offsets in the
form of gain factors in S7.24 format */
/* comp_curve[0] is 0 (-infinity db) input */
comp_curve[0] = UNITY;
/* comp_curve[1 to 63] are intermediate compression values
corresponding to the 6 MSB of the input values of a non-clipped
signal */
for (int i = 1; i < 64; i++)
{
/* db constants are stored as positive numbers;
make them negative here */
int32_t this_db = -db[i];
/* no compression below the knee */
if (this_db <= db_curve[0].db)
comp_curve[i] = UNITY;
/* if soft knee and below top of knee,
interpolate along soft knee slope */
else if (soft_knee && (this_db <= db_curve[2].db))
comp_curve[i] = fp_factor(fp_mul(
((this_db - db_curve[0].db) / 6),
db_curve[2].offset, 16), 16) << 8;
/* interpolate along ratio slope above the knee */
else
comp_curve[i] = fp_factor(fp_mul(
fp_div((db_curve[1].db - this_db), db_curve[1].db, 16),
db_curve[3].offset, 16), 16) << 8;
}
/* comp_curve[64] is the compression level of a maximum level,
non-clipped signal */
comp_curve[64] = fp_factor(db_curve[3].offset, 16) << 8;
/* comp_curve[65] is the compression level of a maximum level,
clipped signal */
comp_curve[65] = fp_factor(db_curve[4].offset, 16) << 8;
#if defined(ROCKBOX_HAS_LOGF) && defined(LOGF_ENABLE)
logf("\n *** Compression Offsets ***");
/* some settings for display only, not used in calculations */
db_curve[0].offset = 0;
db_curve[1].offset = 0;
db_curve[3].db = 0;
for (int i = 0; i <= 4; i++)
{
logf("Curve[%d]: db: % 6.2f\toffset: % 6.2f", i,
(float)db_curve[i].db / (1 << 16),
(float)db_curve[i].offset / (1 << 16));
}
logf("\nGain factors:");
for (int i = 1; i <= 65; i++)
{
debugf("%02d: %.6f ", i, (float)comp_curve[i] / UNITY);
if (i % 4 == 0) debugf("\n");
}
debugf("\n");
#endif
/* if using auto peak, then makeup gain is max offset -
.1dB headroom */
comp_makeup_gain = auto_gain ?
fp_factor(-(db_curve[3].offset) - 0x199A, 16) << 8 : UNITY;
logf("Makeup gain:\t%.6f", (float)comp_makeup_gain / UNITY);
/* calculate per-sample gain change a rate of 10db over release time
*/
comp_rel_slope = 0xAF0BB2 / release;
logf("Release slope:\t%.6f", (float)comp_rel_slope / UNITY);
release_gain = UNITY;
}
return active;
}
/** GET COMPRESSION GAIN
* Returns the required gain factor in S7.24 format in order to compress the
* sample in accordance with the compression curve. Always 1 or less.
*/
static inline int32_t get_compression_gain(struct dsp_data *data,
int32_t sample)
{
const int frac_bits_offset = data->frac_bits - 15;
/* sample must be positive */
if (sample < 0)
sample = -(sample + 1);
/* shift sample into 15 frac bit range */
if (frac_bits_offset > 0)
sample >>= frac_bits_offset;
if (frac_bits_offset < 0)
sample <<= -frac_bits_offset;
/* normal case: sample isn't clipped */
if (sample < (1 << 15))
{
/* index is 6 MSB, rem is 9 LSB */
int index = sample >> 9;
int32_t rem = (sample & 0x1FF) << 22;
/* interpolate from the compression curve:
higher gain - ((rem / (1 << 31)) * (higher gain - lower gain)) */
return comp_curve[index] - (FRACMUL(rem,
(comp_curve[index] - comp_curve[index + 1])));
}
/* sample is somewhat clipped, up to 2 bits of overhead */
if (sample < (1 << 17))
{
/* straight interpolation:
higher gain - ((clipped portion of sample * 4/3
/ (1 << 31)) * (higher gain - lower gain)) */
return comp_curve[64] - (FRACMUL(((sample - (1 << 15)) / 3) << 16,
(comp_curve[64] - comp_curve[65])));
}
/* sample is too clipped, return invalid value */
return -1;
}
/** COMPRESSOR PROCESS
* Changes the gain of the samples according to the compressor curve
*/
void compressor_process(int count, struct dsp_data *data, int32_t *buf[])
{
const int num_chan = data->num_channels;
int32_t *in_buf[2] = {buf[0], buf[1]};
while (count-- > 0)
{
int ch;
/* use lowest (most compressed) gain factor of the output buffer
sample pair for both samples (mono is also handled correctly here)
*/
int32_t sample_gain = UNITY;
for (ch = 0; ch < num_chan; ch++)
{
int32_t this_gain = get_compression_gain(data, *in_buf[ch]);
if (this_gain < sample_gain)
sample_gain = this_gain;
}
/* perform release slope; skip if no compression and no release slope
*/
if ((sample_gain != UNITY) || (release_gain != UNITY))
{
/* if larger offset than previous slope, start new release slope
*/
if ((sample_gain <= release_gain) && (sample_gain > 0))
{
release_gain = sample_gain;
}
else
/* keep sloping towards unity gain (and ignore invalid value) */
{
release_gain += comp_rel_slope;
if (release_gain > UNITY)
{
release_gain = UNITY;
}
}
}
/* total gain factor is the product of release gain and makeup gain,
but avoid computation if possible */
int32_t total_gain = ((release_gain == UNITY) ? comp_makeup_gain :
(comp_makeup_gain == UNITY) ? release_gain :
FRACMUL_SHL(release_gain, comp_makeup_gain, 7));
/* Implement the compressor: apply total gain factor (if any) to the
output buffer sample pair/mono sample */
if (total_gain != UNITY)
{
for (ch = 0; ch < num_chan; ch++)
{
*in_buf[ch] = FRACMUL_SHL(total_gain, *in_buf[ch], 7);
}
}
in_buf[0]++;
in_buf[1]++;
}
}
void compressor_reset(void)
{
release_gain = UNITY;
}

View file

@ -0,0 +1,29 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2009 Jeffrey Goode
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#ifndef COMPRESSOR_H
#define COMPRESSOR_H
void compressor_process(int count, struct dsp_data *data, int32_t *buf[]);
bool compressor_update(void);
void compressor_reset(void);
#endif /* COMPRESSOR_H */

1573
lib/rbcodec/dsp/dsp.c Normal file

File diff suppressed because it is too large Load diff

125
lib/rbcodec/dsp/dsp.h Normal file
View file

@ -0,0 +1,125 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2005 Miika Pekkarinen
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#ifndef _DSP_H
#define _DSP_H
#include <stdlib.h>
#include <stdbool.h>
#define NATIVE_FREQUENCY 44100
enum
{
STEREO_INTERLEAVED = 0,
STEREO_NONINTERLEAVED,
STEREO_MONO,
STEREO_NUM_MODES,
};
enum
{
CODEC_IDX_AUDIO = 0,
CODEC_IDX_VOICE,
};
enum
{
DSP_MYDSP = 1,
DSP_SET_FREQUENCY,
DSP_SWITCH_FREQUENCY,
DSP_SET_SAMPLE_DEPTH,
DSP_SET_STEREO_MODE,
DSP_RESET,
DSP_FLUSH,
DSP_SET_TRACK_GAIN,
DSP_SET_ALBUM_GAIN,
DSP_SET_TRACK_PEAK,
DSP_SET_ALBUM_PEAK,
DSP_CROSSFEED
};
/****************************************************************************
* NOTE: Any assembly routines that use these structures must be updated
* if current data members are moved or changed.
*/
struct resample_data
{
uint32_t delta; /* 00h */
uint32_t phase; /* 04h */
int32_t last_sample[2]; /* 08h */
/* 10h */
};
/* This is for passing needed data to external dsp routines. If another
* dsp parameter needs to be passed, add to the end of the structure
* and remove from dsp_config.
* If another function type becomes assembly/external and requires dsp
* config info, add a pointer paramter of type "struct dsp_data *".
* If removing something from other than the end, reserve the spot or
* else update every implementation for every target.
* Be sure to add the offset of the new member for easy viewing as well. :)
* It is the first member of dsp_config and all members can be accessesed
* through the main aggregate but this is intended to make a safe haven
* for these items whereas the c part can be rearranged at will. dsp_data
* could even moved within dsp_config without disurbing the order.
*/
struct dsp_data
{
int output_scale; /* 00h */
int num_channels; /* 04h */
struct resample_data resample_data; /* 08h */
int32_t clip_min; /* 18h */
int32_t clip_max; /* 1ch */
int32_t gain; /* 20h - Note that this is in S8.23 format. */
int frac_bits; /* 24h */
/* 28h */
};
struct dsp_config;
int dsp_process(struct dsp_config *dsp, char *dest,
const char *src[], int count);
int dsp_input_count(struct dsp_config *dsp, int count);
int dsp_output_count(struct dsp_config *dsp, int count);
intptr_t dsp_configure(struct dsp_config *dsp, int setting,
intptr_t value);
int get_replaygain_mode(bool have_track_gain, bool have_album_gain);
void dsp_set_replaygain(void);
void dsp_set_crossfeed(bool enable);
void dsp_set_crossfeed_direct_gain(int gain);
void dsp_set_crossfeed_cross_params(long lf_gain, long hf_gain,
long cutoff);
void dsp_set_eq(bool enable);
void dsp_set_eq_precut(int precut);
void dsp_set_eq_coefs(int band);
void dsp_dither_enable(bool enable);
void dsp_timestretch_enable(bool enable);
bool dsp_timestretch_available(void);
void sound_set_pitch(int32_t r);
int32_t sound_get_pitch(void);
void dsp_set_timestretch(int32_t percent);
int32_t dsp_get_timestretch(void);
int dsp_callback(int msg, intptr_t param);
void dsp_set_compressor(void);
#endif

561
lib/rbcodec/dsp/dsp_arm.S Normal file
View file

@ -0,0 +1,561 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2006-2007 Thom Johansen
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "config.h"
/****************************************************************************
* void channels_process_sound_chan_mono(int count, int32_t *buf[])
*/
#include "config.h"
.section .icode, "ax", %progbits
.align 2
.global channels_process_sound_chan_mono
.type channels_process_sound_chan_mono, %function
channels_process_sound_chan_mono:
@ input: r0 = count, r1 = buf
stmfd sp!, { r4, lr } @
@
ldmia r1, { r1, r2 } @ r1 = buf[0], r2 = buf[1]
subs r0, r0, #1 @ odd: end at 0; even: end at -1
beq .mono_singlesample @ Zero? Only one sample!
@
.monoloop: @
ldmia r1, { r3, r4 } @ r3, r4 = Li0, Li1
ldmia r2, { r12, r14 } @ r12, r14 = Ri0, Ri1
mov r3, r3, asr #1 @ Mo0 = Li0 / 2 + Ri0 / 2
mov r4, r4, asr #1 @ Mo1 = Li1 / 2 + Ri1 / 2
add r12, r3, r12, asr #1 @
add r14, r4, r14, asr #1 @
subs r0, r0, #2 @
stmia r1!, { r12, r14 } @ store Mo0, Mo1
stmia r2!, { r12, r14 } @ store Mo0, Mo1
bgt .monoloop @
@
ldmpc cond=lt, regs=r4 @ if count was even, we're done
@
.mono_singlesample: @
ldr r3, [r1] @ r3 = Ls
ldr r12, [r2] @ r12 = Rs
mov r3, r3, asr #1 @ Mo = Ls / 2 + Rs / 2
add r12, r3, r12, asr #1 @
str r12, [r1] @ store Mo
str r12, [r2] @ store Mo
@
ldmpc regs=r4 @
.size channels_process_sound_chan_mono, \
.-channels_process_sound_chan_mono
/****************************************************************************
* void channels_process_sound_chan_custom(int count, int32_t *buf[])
*/
.section .icode, "ax", %progbits
.align 2
.global channels_process_sound_chan_custom
.type channels_process_sound_chan_custom, %function
channels_process_sound_chan_custom:
stmfd sp!, { r4-r10, lr }
ldr r3, =dsp_sw_gain
ldr r4, =dsp_sw_cross
ldmia r1, { r1, r2 } @ r1 = buf[0], r2 = buf[1]
ldr r3, [r3] @ r3 = dsp_sw_gain
ldr r4, [r4] @ r4 = dsp_sw_cross
subs r0, r0, #1
beq .custom_single_sample @ Zero? Only one sample!
.custom_loop:
ldmia r1, { r5, r6 } @ r5 = Li0, r6 = Li1
ldmia r2, { r7, r8 } @ r7 = Ri0, r8 = Ri1
subs r0, r0, #2
smull r9, r10, r5, r3 @ Lc0 = Li0*gain
smull r12, r14, r7, r3 @ Rc0 = Ri0*gain
smlal r9, r10, r7, r4 @ Lc0 += Ri0*cross
smlal r12, r14, r5, r4 @ Rc0 += Li0*cross
mov r9, r9, lsr #31 @ Convert to s0.31
mov r12, r12, lsr #31
orr r5, r9, r10, asl #1
orr r7, r12, r14, asl #1
smull r9, r10, r6, r3 @ Lc1 = Li1*gain
smull r12, r14, r8, r3 @ Rc1 = Ri1*gain
smlal r9, r10, r8, r4 @ Lc1 += Ri1*cross
smlal r12, r14, r6, r4 @ Rc1 += Li1*cross
mov r9, r9, lsr #31 @ Convert to s0.31
mov r12, r12, lsr #31
orr r6, r9, r10, asl #1
orr r8, r12, r14, asl #1
stmia r1!, { r5, r6 } @ Store Lc0, Lc1
stmia r2!, { r7, r8 } @ Store Rc0, Rc1
bgt .custom_loop
ldmpc cond=lt, regs=r4-r10 @ < 0? even count
.custom_single_sample:
ldr r5, [r1] @ handle odd sample
ldr r7, [r2]
smull r9, r10, r5, r3 @ Lc0 = Li0*gain
smull r12, r14, r7, r3 @ Rc0 = Ri0*gain
smlal r9, r10, r7, r4 @ Lc0 += Ri0*cross
smlal r12, r14, r5, r4 @ Rc0 += Li0*cross
mov r9, r9, lsr #31 @ Convert to s0.31
mov r12, r12, lsr #31
orr r5, r9, r10, asl #1
orr r7, r12, r14, asl #1
str r5, [r1] @ Store Lc0
str r7, [r2] @ Store Rc0
ldmpc regs=r4-r10
.size channels_process_sound_chan_custom, \
.-channels_process_sound_chan_custom
/****************************************************************************
* void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
*/
.section .icode, "ax", %progbits
.align 2
.global channels_process_sound_chan_karaoke
.type channels_process_sound_chan_karaoke, %function
channels_process_sound_chan_karaoke:
@ input: r0 = count, r1 = buf
stmfd sp!, { r4, lr } @
@
ldmia r1, { r1, r2 } @ r1 = buf[0], r2 = buf[1]
subs r0, r0, #1 @ odd: end at 0; even: end at -1
beq .karaoke_singlesample @ Zero? Only one sample!
@
.karaokeloop: @
ldmia r1, { r3, r4 } @ r3, r4 = Li0, Li1
ldmia r2, { r12, r14 } @ r12, r14 = Ri0, Ri1
mov r3, r3, asr #1 @ Lo0 = Li0 / 2 - Ri0 / 2
mov r4, r4, asr #1 @ Lo1 = Li1 / 2 - Ri1 / 2
sub r3, r3, r12, asr #1 @
sub r4, r4, r14, asr #1 @
rsb r12, r3, #0 @ Ro0 = -Lk0 = Rs0 / 2 - Ls0 / 2
rsb r14, r4, #0 @ Ro1 = -Lk1 = Ri1 / 2 - Li1 / 2
subs r0, r0, #2 @
stmia r1!, { r3, r4 } @ store Lo0, Lo1
stmia r2!, { r12, r14 } @ store Ro0, Ro1
bgt .karaokeloop @
@
ldmpc cond=lt, regs=r4 @ if count was even, we're done
@
.karaoke_singlesample: @
ldr r3, [r1] @ r3 = Li
ldr r12, [r2] @ r12 = Ri
mov r3, r3, asr #1 @ Lk = Li / 2 - Ri /2
sub r3, r3, r12, asr #1 @
rsb r12, r3, #0 @ Rk = -Lo = Ri / 2 - Li / 2
str r3, [r1] @ store Lo
str r12, [r2] @ store Ro
@
ldmpc regs=r4 @
.size channels_process_sound_chan_karaoke, \
.-channels_process_sound_chan_karaoke
#if ARM_ARCH < 6
/****************************************************************************
* void sample_output_mono(int count, struct dsp_data *data,
* const int32_t *src[], int16_t *dst)
*/
.section .icode, "ax", %progbits
.align 2
.global sample_output_mono
.type sample_output_mono, %function
sample_output_mono:
@ input: r0 = count, r1 = data, r2 = src, r3 = dst
stmfd sp!, { r4-r6, lr }
ldr r1, [r1] @ lr = data->output_scale
ldr r2, [r2] @ r2 = src[0]
mov r4, #1
mov r4, r4, lsl r1 @ r4 = 1 << (scale-1)
mov r4, r4, lsr #1
mvn r14, #0x8000 @ r14 = 0xffff7fff, needed for
@ clipping and masking
subs r0, r0, #1 @
beq .som_singlesample @ Zero? Only one sample!
.somloop:
ldmia r2!, { r5, r6 }
add r5, r5, r4 @ r6 = (r6 + 1<<(scale-1)) >> scale
mov r5, r5, asr r1
mov r12, r5, asr #15
teq r12, r12, asr #31
eorne r5, r14, r5, asr #31 @ Clip (-32768...+32767)
add r6, r6, r4
mov r6, r6, asr r1 @ r7 = (r7 + 1<<(scale-1)) >> scale
mov r12, r6, asr #15
teq r12, r12, asr #31
eorne r6, r14, r6, asr #31 @ Clip (-32768...+32767)
and r5, r5, r14, lsr #16
and r6, r6, r14, lsr #16
orr r5, r5, r5, lsl #16 @ pack first 2 halfwords into 1 word
orr r6, r6, r6, lsl #16 @ pack last 2 halfwords into 1 word
stmia r3!, { r5, r6 }
subs r0, r0, #2
bgt .somloop
ldmpc cond=lt, regs=r4-r6 @ even 'count'? return
.som_singlesample:
ldr r5, [r2] @ do odd sample
add r5, r5, r4
mov r5, r5, asr r1
mov r12, r5, asr #15
teq r12, r12, asr #31
eorne r5, r14, r5, asr #31
and r5, r5, r14, lsr #16 @ pack 2 halfwords into 1 word
orr r5, r5, r5, lsl #16
str r5, [r3]
ldmpc regs=r4-r6
.size sample_output_mono, .-sample_output_mono
/****************************************************************************
* void sample_output_stereo(int count, struct dsp_data *data,
* const int32_t *src[], int16_t *dst)
*/
.section .icode, "ax", %progbits
.align 2
.global sample_output_stereo
.type sample_output_stereo, %function
sample_output_stereo:
@ input: r0 = count, r1 = data, r2 = src, r3 = dst
stmfd sp!, { r4-r9, lr }
ldr r1, [r1] @ r1 = data->output_scale
ldmia r2, { r2, r5 } @ r2 = src[0], r5 = src[1]
mov r4, #1
mov r4, r4, lsl r1 @ r4 = 1 << (scale-1)
mov r4, r4, lsr #1 @
mvn r14, #0x8000 @ r14 = 0xffff7fff, needed for
@ clipping and masking
subs r0, r0, #1 @
beq .sos_singlesample @ Zero? Only one sample!
.sosloop:
ldmia r2!, { r6, r7 } @ 2 left
ldmia r5!, { r8, r9 } @ 2 right
add r6, r6, r4 @ r6 = (r6 + 1<<(scale-1)) >> scale
mov r6, r6, asr r1
mov r12, r6, asr #15
teq r12, r12, asr #31
eorne r6, r14, r6, asr #31 @ Clip (-32768...+32767)
add r7, r7, r4
mov r7, r7, asr r1 @ r7 = (r7 + 1<<(scale-1)) >> scale
mov r12, r7, asr #15
teq r12, r12, asr #31
eorne r7, r14, r7, asr #31 @ Clip (-32768...+32767)
add r8, r8, r4 @ r8 = (r8 + 1<<(scale-1)) >> scale
mov r8, r8, asr r1
mov r12, r8, asr #15
teq r12, r12, asr #31
eorne r8, r14, r8, asr #31 @ Clip (-32768...+32767)
add r9, r9, r4 @ r9 = (r9 + 1<<(scale-1)) >> scale
mov r9, r9, asr r1
mov r12, r9, asr #15
teq r12, r12, asr #31
eorne r9, r14, r9, asr #31 @ Clip (-32768...+32767)
and r6, r6, r14, lsr #16 @ pack first 2 halfwords into 1 word
orr r8, r6, r8, asl #16
and r7, r7, r14, lsr #16 @ pack last 2 halfwords into 1 word
orr r9, r7, r9, asl #16
stmia r3!, { r8, r9 }
subs r0, r0, #2
bgt .sosloop
ldmpc cond=lt, regs=r4-r9 @ even 'count'? return
.sos_singlesample:
ldr r6, [r2] @ left odd sample
ldr r8, [r5] @ right odd sample
add r6, r6, r4 @ r6 = (r7 + 1<<(scale-1)) >> scale
mov r6, r6, asr r1
mov r12, r6, asr #15
teq r12, r12, asr #31
eorne r6, r14, r6, asr #31 @ Clip (-32768...+32767)
add r8, r8, r4 @ r8 = (r8 + 1<<(scale-1)) >> scale
mov r8, r8, asr r1
mov r12, r8, asr #15
teq r12, r12, asr #31
eorne r8, r14, r8, asr #31 @ Clip (-32768...+32767)
and r6, r6, r14, lsr #16 @ pack 2 halfwords into 1 word
orr r8, r6, r8, asl #16
str r8, [r3]
ldmpc regs=r4-r9
.size sample_output_stereo, .-sample_output_stereo
#endif /* ARM_ARCH < 6 */
/****************************************************************************
* void apply_crossfeed(int count, int32_t* src[])
*/
.section .text
.global apply_crossfeed
apply_crossfeed:
@ unfortunately, we ended up in a bit of a register squeeze here, and need
@ to keep the count on the stack :/
stmdb sp!, { r4-r11, lr } @ stack modified regs
ldmia r1, { r2-r3 } @ r2 = src[0], r3 = src[1]
ldr r1, =crossfeed_data
ldmia r1!, { r4-r11 } @ load direct gain and filter data
mov r12, r0 @ better to ldm delay + count later
add r0, r1, #13*4*2 @ calculate end of delay
stmdb sp!, { r0, r12 } @ stack end of delay adr and count
ldr r0, [r1, #13*4*2] @ fetch current delay line address
/* Register usage in loop:
* r0 = &delay[index][0], r1 = accumulator high, r2 = src[0], r3 = src[1],
* r4 = direct gain, r5-r7 = b0, b1, a1 (filter coefs),
* r8-r11 = filter history, r12 = temp, r14 = accumulator low
*/
.cfloop:
smull r14, r1, r6, r8 @ acc = b1*dr[n - 1]
smlal r14, r1, r7, r9 @ acc += a1*y_l[n - 1]
ldr r8, [r0, #4] @ r8 = dr[n]
smlal r14, r1, r5, r8 @ acc += b0*dr[n]
mov r9, r1, lsl #1 @ fix format for filter history
ldr r12, [r2] @ load left input
smlal r14, r1, r4, r12 @ acc += gain*x_l[n]
mov r1, r1, lsl #1 @ fix format
str r1, [r2], #4 @ save result
smull r14, r1, r6, r10 @ acc = b1*dl[n - 1]
smlal r14, r1, r7, r11 @ acc += a1*y_r[n - 1]
ldr r10, [r0] @ r10 = dl[n]
str r12, [r0], #4 @ save left input to delay line
smlal r14, r1, r5, r10 @ acc += b0*dl[n]
mov r11, r1, lsl #1 @ fix format for filter history
ldr r12, [r3] @ load right input
smlal r14, r1, r4, r12 @ acc += gain*x_r[n]
str r12, [r0], #4 @ save right input to delay line
mov r1, r1, lsl #1 @ fix format
ldmia sp, { r12, r14 } @ fetch delay line end addr and count from stack
str r1, [r3], #4 @ save result
cmp r0, r12 @ need to wrap to start of delay?
subeq r0, r0, #13*4*2 @ wrap back delay line ptr to start
subs r14, r14, #1 @ are we finished?
strne r14, [sp, #4] @ nope, save count back to stack
bne .cfloop
@ save data back to struct
ldr r12, =crossfeed_data + 4*4
stmia r12, { r8-r11 } @ save filter history
str r0, [r12, #30*4] @ save delay line index
add sp, sp, #8 @ remove temp variables from stack
ldmpc regs=r4-r11
.size apply_crossfeed, .-apply_crossfeed
/****************************************************************************
* int dsp_downsample(int count, struct dsp_data *data,
* in32_t *src[], int32_t *dst[])
*/
.section .text
.global dsp_downsample
dsp_downsample:
stmdb sp!, { r4-r11, lr } @ stack modified regs
ldmib r1, { r5-r6 } @ r5 = num_channels,r6 = resample_data.delta
sub r5, r5, #1 @ pre-decrement num_channels for use
add r4, r1, #12 @ r4 = &resample_data.phase
mov r12, #0xff
orr r12, r12, #0xff00 @ r12 = 0xffff
.dschannel_loop:
ldr r1, [r4] @ r1 = resample_data.phase
ldr r7, [r2, r5, lsl #2] @ r7 = s = src[ch - 1]
ldr r8, [r3, r5, lsl #2] @ r8 = d = dst[ch - 1]
add r9, r4, #4 @ r9 = &last_sample[0]
ldr r10, [r9, r5, lsl #2] @ r10 = last_sample[ch - 1]
sub r11, r0, #1
ldr r14, [r7, r11, lsl #2] @ load last sample in s[] ...
str r14, [r9, r5, lsl #2] @ and write as next frame's last_sample
movs r9, r1, lsr #16 @ r9 = pos = phase >> 16
ldreq r11, [r7] @ if pos = 0, load src[0] and jump into loop
beq .dsuse_last_start
cmp r9, r0 @ if pos >= count, we're already done
bge .dsloop_skip
@ Register usage in loop:
@ r0 = count, r1 = phase, r4 = &resample_data.phase, r5 = cur_channel,
@ r6 = delta, r7 = s, r8 = d, r9 = pos, r10 = s[pos - 1], r11 = s[pos]
.dsloop:
add r9, r7, r9, lsl #2 @ r9 = &s[pos]
ldmda r9, { r10, r11 } @ r10 = s[pos - 1], r11 = s[pos]
.dsuse_last_start:
sub r11, r11, r10 @ r11 = diff = s[pos] - s[pos - 1]
@ keep frac in lower bits to take advantage of multiplier early termination
and r9, r1, r12 @ frac = phase & 0xffff
smull r9, r14, r11, r9
add r1, r1, r6 @ phase += delta
add r10, r10, r9, lsr #16 @ r10 = out = s[pos - 1] + frac*diff
add r10, r10, r14, lsl #16
str r10, [r8], #4 @ *d++ = out
mov r9, r1, lsr #16 @ pos = phase >> 16
cmp r9, r0 @ pos < count?
blt .dsloop @ yup, do more samples
.dsloop_skip:
subs r5, r5, #1
bpl .dschannel_loop @ if (--ch) >= 0, do another channel
sub r1, r1, r0, lsl #16 @ wrap phase back to start
str r1, [r4] @ store back
ldr r1, [r3] @ r1 = &dst[0]
sub r8, r8, r1 @ dst - &dst[0]
mov r0, r8, lsr #2 @ convert bytes->samples
ldmpc regs=r4-r11 @ ... and we're out
.size dsp_downsample, .-dsp_downsample
/****************************************************************************
* int dsp_upsample(int count, struct dsp_data *dsp,
* in32_t *src[], int32_t *dst[])
*/
.section .text
.global dsp_upsample
dsp_upsample:
stmfd sp!, { r4-r11, lr } @ stack modified regs
ldmib r1, { r5-r6 } @ r5 = num_channels,r6 = resample_data.delta
sub r5, r5, #1 @ pre-decrement num_channels for use
add r4, r1, #12 @ r4 = &resample_data.phase
mov r6, r6, lsl #16 @ we'll use carry to detect pos increments
stmfd sp!, { r0, r4 } @ stack count and &resample_data.phase
.uschannel_loop:
ldr r12, [r4] @ r12 = resample_data.phase
ldr r7, [r2, r5, lsl #2] @ r7 = s = src[ch - 1]
ldr r8, [r3, r5, lsl #2] @ r8 = d = dst[ch - 1]
add r9, r4, #4 @ r9 = &last_sample[0]
mov r1, r12, lsl #16 @ we'll use carry to detect pos increments
sub r11, r0, #1
ldr r14, [r7, r11, lsl #2] @ load last sample in s[] ...
ldr r10, [r9, r5, lsl #2] @ r10 = last_sample[ch - 1]
str r14, [r9, r5, lsl #2] @ and write as next frame's last_sample
movs r14, r12, lsr #16 @ pos = resample_data.phase >> 16
beq .usstart_0 @ pos = 0
cmp r14, r0 @ if pos >= count, we're already done
bge .usloop_skip
add r7, r7, r14, lsl #2 @ r7 = &s[pos]
ldr r10, [r7, #-4] @ r11 = s[pos - 1]
b .usstart_0
@ Register usage in loop:
@ r0 = count, r1 = phase, r4 = &resample_data.phase, r5 = cur_channel,
@ r6 = delta, r7 = s, r8 = d, r9 = diff, r10 = s[pos - 1], r11 = s[pos]
.usloop_1:
mov r10, r11 @ r10 = previous sample
.usstart_0:
ldr r11, [r7], #4 @ r11 = next sample
mov r4, r1, lsr #16 @ r4 = frac = phase >> 16
sub r9, r11, r10 @ r9 = diff = s[pos] - s[pos - 1]
.usloop_0:
smull r12, r14, r4, r9
adds r1, r1, r6 @ phase += delta << 16
mov r4, r1, lsr #16 @ r4 = frac = phase >> 16
add r14, r10, r14, lsl #16
add r14, r14, r12, lsr #16 @ r14 = out = s[pos - 1] + frac*diff
str r14, [r8], #4 @ *d++ = out
bcc .usloop_0 @ if carry is set, pos is incremented
subs r0, r0, #1 @ if count > 0, do another sample
bgt .usloop_1
.usloop_skip:
subs r5, r5, #1
ldmfd sp, { r0, r4 } @ reload count and &resample_data.phase
bpl .uschannel_loop @ if (--ch) >= 0, do another channel
mov r1, r1, lsr #16 @ wrap phase back to start of next frame
ldr r2, [r3] @ r1 = &dst[0]
str r1, [r4] @ store phase
sub r8, r8, r2 @ dst - &dst[0]
mov r0, r8, lsr #2 @ convert bytes->samples
add sp, sp, #8 @ adjust stack for temp variables
ldmpc regs=r4-r11 @ ... and we're out
.size dsp_upsample, .-dsp_upsample
/****************************************************************************
* void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[])
*/
.section .icode, "ax", %progbits
.align 2
.global dsp_apply_gain
.type dsp_apply_gain, %function
dsp_apply_gain:
@ input: r0 = count, r1 = data, r2 = buf[]
stmfd sp!, { r4-r8, lr }
ldr r3, [r1, #4] @ r3 = data->num_channels
ldr r4, [r1, #32] @ r5 = data->gain
.dag_outerloop:
ldr r1, [r2], #4 @ r1 = buf[0] and increment index of buf[]
subs r12, r0, #1 @ r12 = r0 = count - 1
beq .dag_singlesample @ Zero? Only one sample!
.dag_innerloop:
ldmia r1, { r5, r6 } @ load r5, r6 from r1
smull r7, r8, r5, r4 @ r7 = FRACMUL_SHL(r5, r4, 8)
smull r14, r5, r6, r4 @ r14 = FRACMUL_SHL(r6, r4, 8)
subs r12, r12, #2
mov r7, r7, lsr #23
mov r14, r14, lsr #23
orr r7, r7, r8, asl #9
orr r14, r14, r5, asl #9
stmia r1!, { r7, r14 } @ save r7, r14 to [r1] and increment r1
bgt .dag_innerloop @ end of inner loop
blt .dag_evencount @ < 0? even count
.dag_singlesample:
ldr r5, [r1] @ handle odd sample
smull r7, r8, r5, r4 @ r7 = FRACMUL_SHL(r5, r4, 8)
mov r7, r7, lsr #23
orr r7, r7, r8, asl #9
str r7, [r1]
.dag_evencount:
subs r3, r3, #1
bgt .dag_outerloop @ end of outer loop
ldmpc regs=r4-r8
.size dsp_apply_gain, .-dsp_apply_gain

View file

@ -0,0 +1,127 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2010 Michael Sevakis
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
/****************************************************************************
* void sample_output_mono(int count, struct dsp_data *data,
* const int32_t *src[], int16_t *dst)
*/
.section .text, "ax", %progbits
.align 2
.global sample_output_mono
.type sample_output_mono, %function
sample_output_mono:
@ input: r0 = count, r1 = data, r2 = src, r3 = dst
stmfd sp!, { r4, lr } @
@
ldr r1, [r1] @ r1 = data->output_scale
ldr r2, [r2] @ r2 = src[0]
@
mov r4, #1 @ r4 = 1 << (scale - 1)
mov r4, r4, lsl r1 @
subs r0, r0, #1 @ odd: end at 0; even: end at -1
mov r4, r4, lsr #1 @
beq 2f @ Zero? Only one sample!
@
1: @
ldmia r2!, { r12, r14 } @ load Mi0, Mi1
qadd r12, r12, r4 @ round, scale, saturate and
qadd r14, r14, r4 @ pack Mi0 to So0, Mi1 to So1
mov r12, r12, asr r1 @
mov r14, r14, asr r1 @
ssat r12, #16, r12 @
ssat r14, #16, r14 @
pkhbt r12, r12, r12, asl #16 @
pkhbt r14, r14, r14, asl #16 @
subs r0, r0, #2 @
stmia r3!, { r12, r14 } @ store So0, So1
bgt 1b @
@
ldmltfd sp!, { r4, pc } @ if count was even, we're done
@
2: @
ldr r12, [r2] @ round, scale, saturate
qadd r12, r12, r4 @ and pack Mi to So
mov r12, r12, asr r1 @
ssat r12, #16, r12 @
pkhbt r12, r12, r12, asl #16 @
str r12, [r3] @ store So
@
ldmfd sp!, { r4, pc } @
.size sample_output_mono, .-sample_output_mono
/****************************************************************************
* void sample_output_stereo(int count, struct dsp_data *data,
* const int32_t *src[], int16_t *dst)
*/
.section .text, "ax", %progbits
.align 2
.global sample_output_stereo
.type sample_output_stereo, %function
sample_output_stereo:
@ input: r0 = count, r1 = data, r2 = src, r3 = dst
stmfd sp!, { r4-r7, lr } @
@
ldr r1, [r1] @ r1 = data->output_scale
ldmia r2, { r2, r4 } @ r2 = src[0], r4 = src[1]
@
mov r5, #1 @ r5 = 1 << (scale - 1)
mov r5, r5, lsl r1 @
subs r0, r0, #1 @ odd: end at 0; even: end at -1
mov r5, r5, lsr #1 @
beq 2f @ Zero? Only one sample!
@
1: @
ldmia r2!, { r6, r7 } @ r6, r7 = Li0, Li1
ldmia r4!, { r12, r14 } @ r12, r14 = Ri0, Ri1
qadd r6, r6, r5 @ round, scale, saturate and pack
qadd r7, r7, r5 @ Li0+Ri0 to So0, Li1+Ri1 to So1
qadd r12, r12, r5 @
qadd r14, r14, r5 @
mov r6, r6, asr r1 @
mov r7, r7, asr r1 @
mov r12, r12, asr r1 @
mov r14, r14, asr r1 @
ssat r6, #16, r6 @
ssat r12, #16, r12 @
ssat r7, #16, r7 @
ssat r14, #16, r14 @
pkhbt r6, r6, r12, asl #16 @
pkhbt r7, r7, r14, asl #16 @
subs r0, r0, #2 @
stmia r3!, { r6, r7 } @ store So0, So1
bgt 1b @
@
ldmltfd sp!, { r4-r7, pc } @ if count was even, we're done
@
2: @
ldr r6, [r2] @ r6 = Li
ldr r12, [r4] @ r12 = Ri
qadd r6, r6, r5 @ round, scale, saturate
qadd r12, r12, r5 @ and pack Li+Ri to So
mov r6, r6, asr r1 @
mov r12, r12, asr r1 @
ssat r6, #16, r6 @
ssat r12, #16, r12 @
pkhbt r6, r6, r12, asl #16 @
str r6, [r3] @ store So
@
ldmfd sp!, { r4-r7, pc } @
.size sample_output_stereo, .-sample_output_stereo

86
lib/rbcodec/dsp/dsp_asm.h Normal file
View file

@ -0,0 +1,86 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2006 Thom Johansen
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include <config.h>
#ifndef _DSP_ASM_H
#define _DSP_ASM_H
/* Set the appropriate #defines based on CPU or whatever matters */
#if defined(CPU_ARM)
#define DSP_HAVE_ASM_APPLY_GAIN
#define DSP_HAVE_ASM_RESAMPLING
#define DSP_HAVE_ASM_CROSSFEED
#define DSP_HAVE_ASM_SOUND_CHAN_MONO
#define DSP_HAVE_ASM_SOUND_CHAN_CUSTOM
#define DSP_HAVE_ASM_SOUND_CHAN_KARAOKE
#define DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO
#define DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO
#elif defined (CPU_COLDFIRE)
#define DSP_HAVE_ASM_APPLY_GAIN
#define DSP_HAVE_ASM_RESAMPLING
#define DSP_HAVE_ASM_CROSSFEED
#define DSP_HAVE_ASM_SOUND_CHAN_MONO
#define DSP_HAVE_ASM_SOUND_CHAN_CUSTOM
#define DSP_HAVE_ASM_SOUND_CHAN_KARAOKE
#define DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO
#define DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO
#endif /* CPU_COLDFIRE */
/* Declare prototypes based upon what's #defined above */
#ifdef DSP_HAVE_ASM_CROSSFEED
void apply_crossfeed(int count, int32_t *buf[]);
#endif
#ifdef DSP_HAVE_ASM_APPLY_GAIN
void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[]);
#endif /* DSP_HAVE_ASM_APPLY_GAIN* */
#ifdef DSP_HAVE_ASM_RESAMPLING
int dsp_upsample(int count, struct dsp_data *data,
const int32_t *src[], int32_t *dst[]);
int dsp_downsample(int count, struct dsp_data *data,
const int32_t *src[], int32_t *dst[]);
#endif /* DSP_HAVE_ASM_RESAMPLING */
#ifdef DSP_HAVE_ASM_SOUND_CHAN_MONO
void channels_process_sound_chan_mono(int count, int32_t *buf[]);
#endif
#ifdef DSP_HAVE_ASM_SOUND_CHAN_CUSTOM
void channels_process_sound_chan_custom(int count, int32_t *buf[]);
#endif
#ifdef DSP_HAVE_ASM_SOUND_CHAN_KARAOKE
void channels_process_sound_chan_karaoke(int count, int32_t *buf[]);
#endif
#ifdef DSP_HAVE_ASM_SAMPLE_OUTPUT_STEREO
void sample_output_stereo(int count, struct dsp_data *data,
const int32_t *src[], int16_t *dst);
#endif
#ifdef DSP_HAVE_ASM_SAMPLE_OUTPUT_MONO
void sample_output_mono(int count, struct dsp_data *data,
const int32_t *src[], int16_t *dst);
#endif
#endif /* _DSP_ASM_H */

611
lib/rbcodec/dsp/dsp_cf.S Normal file
View file

@ -0,0 +1,611 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2006 Thom Johansen
* Portions Copyright (C) 2007 Michael Sevakis
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
/****************************************************************************
* void dsp_apply_gain(int count, struct dsp_data *data, int32_t *buf[])
*/
.section .text
.align 2
.global dsp_apply_gain
dsp_apply_gain:
lea.l -20(%sp), %sp | save registers
movem.l %d2-%d4/%a2-%a3, (%sp) |
movem.l 28(%sp), %a0-%a1 | %a0 = data,
| %a1 = buf
move.l 4(%a0), %d1 | %d1 = data->num_channels
move.l 32(%a0), %a0 | %a0 = data->gain (in s8.23)
10: | channel loop |
move.l 24(%sp), %d0 | %d0 = count
move.l -4(%a1, %d1.l*4), %a2 | %a2 = s = buf[ch-1]
move.l %a2, %a3 | %a3 = d = s
move.l (%a2)+, %d2 | %d2 = *s++,
mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1)
subq.l #1, %d0 | --count > 0 ? : effectively n++
ble.b 30f | loop done | no? finish up
20: | loop |
move.l %accext01, %d4 | fetch S(n-1)[7:0]
movclr.l %acc0, %d3 | fetch S(n-1)[40:8] in %d5[31:0]
asl.l #8, %d3 | *s++ = (S(n-1)[40:8] << 8) | S(n-1)[7:0]
mac.l %a0, %d2, (%a2)+, %d2, %acc0 | %acc0 = S(n)*gain, load S(n+1)
move.b %d4, %d3 |
move.l %d3, (%a3)+ |
subq.l #1, %d0 | --count > 0 ? : effectively n++
bgt.b 20b | loop | yes? do more samples
30: | loop done |
move.l %accext01, %d4 | fetch S(n-1)[7:0]
movclr.l %acc0, %d3 | fetch S(n-1)[40:8] in %d5[31:0]
asl.l #8, %d3 | *s = (S(n-1)[40:8] << 8) | S(n-1)[7:0]
move.b %d4, %d3 |
move.l %d3, (%a3) |
subq.l #1, %d1 | next channel
bgt.b 10b | channel loop |
movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
lea.l 20(%sp), %sp | cleanup stack
rts |
.size dsp_apply_gain,.-dsp_apply_gain
/****************************************************************************
* void apply_crossfeed(int count, int32_t *buf[])
*/
.section .text
.align 2
.global apply_crossfeed
apply_crossfeed:
lea.l -44(%sp), %sp |
movem.l %d2-%d7/%a2-%a6, (%sp) | save all regs
movem.l 48(%sp), %d7/%a4 | %d7 = count, %a4 = src
movem.l (%a4), %a4-%a5 | %a4 = src[0], %a5 = src[1]
lea.l crossfeed_data, %a1 | %a1 = &crossfeed_data
move.l (%a1)+, %d6 | %d6 = direct gain
movem.l 12(%a1), %d0-%d3 | fetch filter history samples
move.l 132(%a1), %a0 | fetch delay line address
movem.l (%a1), %a1-%a3 | load filter coefs
lea.l crossfeed_data+136, %a6 | %a6 = delay line wrap limit
bra.b 20f | loop start | go to loop start point
/* Register usage in loop:
* %a0 = delay_p, %a1..%a3 = b0, b1, a1 (filter coefs),
* %a4 = buf[0], %a5 = buf[1],
* %a6 = delay line pointer wrap limit,
* %d0..%d3 = history
* %d4..%d5 = temp.
* %d6 = direct gain,
* %d7 = count
*/
10: | loop |
movclr.l %acc0, %d4 | write outputs
move.l %d4, (%a4)+ | .
movclr.l %acc1, %d5 | .
move.l %d5, (%a5)+ | .
20: | loop start |
mac.l %a2, %d0, (%a0)+, %d0, %acc0 | %acc0 = b1*dl[n - 1], %d0 = dl[n]
mac.l %a1, %d0 , %acc0 | %acc0 += b0*dl[n]
mac.l %a3, %d1, (%a5), %d5, %acc0 | %acc0 += a1*y_r[n - 1], load R
mac.l %a2, %d2, (%a0)+, %d2, %acc1 | %acc1 = b1*dr[n - 1], %d2 = dr[n]
mac.l %a1, %d2 , %acc1 | %acc1 += b0*dr[n]
mac.l %a3, %d3, (%a4), %d4, %acc1 | %acc1 += a1*y_l[n - 1], load L
movem.l %d4-%d5, -8(%a0) | save left & right inputs to delay line
move.l %acc0, %d3 | get filtered delayed left sample (y_l[n])
move.l %acc1, %d1 | get filtered delayed right sample (y_r[n])
mac.l %d6, %d4, %acc0 | %acc0 += gain*x_l[n]
mac.l %d6, %d5, %acc1 | %acc1 += gain*x_r[n]
cmp.l %a6, %a0 | wrap %a0 if passed end
bhs.b 30f | wrap buffer |
.word 0x51fb | tpf.l | trap the buffer wrap
30: | wrap buffer | ...fwd taken branches more costly
lea.l -104(%a0), %a0 | wrap it up
subq.l #1, %d7 | --count > 0 ?
bgt.b 10b | loop | yes? do more
movclr.l %acc0, %d4 | write last outputs
move.l %d4, (%a4) | .
movclr.l %acc1, %d5 | .
move.l %d5, (%a5) | .
lea.l crossfeed_data+16, %a1 | save data back to struct
movem.l %d0-%d3, (%a1) | ...history
move.l %a0, 120(%a1) | ...delay_p
movem.l (%sp), %d2-%d7/%a2-%a6 | restore all regs
lea.l 44(%sp), %sp |
rts |
.size apply_crossfeed,.-apply_crossfeed
/****************************************************************************
* int dsp_downsample(int count, struct dsp_data *data,
* in32_t *src[], int32_t *dst[])
*/
.section .text
.align 2
.global dsp_downsample
dsp_downsample:
lea.l -40(%sp), %sp | save non-clobberables
movem.l %d2-%d7/%a2-%a5, (%sp) |
movem.l 44(%sp), %d2/%a0-%a2 | %d2 = count
| %a0 = data
| %a1 = src
| %a2 = dst
movem.l 4(%a0), %d3-%d4 | %d3 = ch = data->num_channels
| %d4 = delta = data->resample_data.delta
moveq.l #16, %d7 | %d7 = shift
10: | channel loop |
move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase
move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1]
move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1]
lea.l 12(%a0, %d3.l*4), %a5 | %a5 = &data->resample_data.ast_sample[ch-1]
move.l (%a5), %d0 | %d0 = last = data->resample_data.last_sample[ch-1]
move.l -4(%a3, %d2.l*4), (%a5) | data->resample_data.last_sample[ch-1] = s[count-1]
move.l %d5, %d6 | %d6 = pos = phase >> 16
lsr.l %d7, %d6 |
cmp.l %d2, %d6 | past end of samples?
bge.b 40f | skip resample loop| yes? skip loop
tst.l %d6 | need last sample of prev. frame?
bne.b 20f | resample loop | no? start main loop
move.l (%a3, %d6.l*4), %d1 | %d1 = s[pos]
bra.b 30f | resample start last | start with last (last in %d0)
20: | resample loop |
lea.l -4(%a3, %d6.l*4), %a5 | load s[pos-1] and s[pos]
movem.l (%a5), %d0-%d1 |
30: | resample start last |
sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
move.l %d0, %acc0 | %acc0 = previous sample
move.l %d5, %d0 | frac = (phase << 16) >> 1
lsl.l %d7, %d0 |
lsr.l #1, %d0 |
mac.l %d0, %d1, %acc0 | %acc0 += frac * diff
add.l %d4, %d5 | phase += delta
move.l %d5, %d6 | pos = phase >> 16
lsr.l %d7, %d6 |
movclr.l %acc0, %d0 |
move.l %d0, (%a4)+ | *d++ = %d0
cmp.l %d2, %d6 | pos < count?
blt.b 20b | resample loop | yes? continue resampling
40: | skip resample loop |
subq.l #1, %d3 | ch > 0?
bgt.b 10b | channel loop | yes? process next channel
lsl.l %d7, %d2 | wrap phase to start of next frame
sub.l %d2, %d5 | data->resample_data.phase =
move.l %d5, 12(%a0) | ... phase - (count << 16)
move.l %a4, %d0 | return d - d[0]
sub.l (%a2), %d0 |
asr.l #2, %d0 | convert bytes->samples
movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables
lea.l 40(%sp), %sp | cleanup stack
rts | buh-bye
.size dsp_downsample,.-dsp_downsample
/****************************************************************************
* int dsp_upsample(int count, struct dsp_data *dsp,
* const int32_t *src[], int32_t *dst[])
*/
.section .text
.align 2
.global dsp_upsample
dsp_upsample:
lea.l -40(%sp), %sp | save non-clobberables
movem.l %d2-%d7/%a2-%a5, (%sp) |
movem.l 44(%sp), %d2/%a0-%a2 | %d2 = count
| %a0 = data
| %a1 = src
| %a2 = dst
movem.l 4(%a0), %d3-%d4 | %d3 = ch = channels
| %d4 = delta = data->resample_data.delta
swap %d4 | swap delta to high word to use...
| ...carries to increment position
10: | channel loop |
move.l 12(%a0), %d5 | %d5 = phase = data->resample_data.phase
move.l -4(%a1, %d3.l*4), %a3 | %a3 = s = src[ch-1]
lea.l 12(%a0, %d3.l*4), %a4 | %a4 = &data->resample_data.last_sample[ch-1]
lea.l -4(%a3, %d2.l*4), %a5 | %a5 = src_end = &src[count-1]
move.l (%a4), %d0 | %d0 = last = data->resample_data.last_sample[ch-1]
move.l (%a5), (%a4) | data->resample_data.last_sample[ch-1] = s[count-1]
move.l -4(%a2, %d3.l*4), %a4 | %a4 = d = dst[ch-1]
move.l (%a3)+, %d1 | fetch first sample - might throw this...
| ...away later but we'll be preincremented
move.l %d1, %d6 | save sample value
sub.l %d0, %d1 | %d1 = diff = s[0] - last
swap %d5 | swap phase to high word to use
| carries to increment position
move.l %d5, %d7 | %d7 = pos = phase >> 16
clr.w %d5 |
eor.l %d5, %d7 | pos == 0?
beq.b 40f | loop start | yes? start loop
cmp.l %d2, %d7 | past end of samples?
bge.b 50f | skip resample loop| yes? go to next channel and collect info
lea.l (%a3, %d7.l*4), %a3 | %a3 = s = &s[pos+1]
movem.l -8(%a3), %d0-%d1 | %d0 = s[pos-1], %d1 = s[pos]
move.l %d1, %d6 | save sample value
sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
bra.b 40f | loop start |
20: | next sample loop |
move.l %d6, %d0 | move previous sample to %d0
move.l (%a3)+, %d1 | fetch next sample
move.l %d1, %d6 | save sample value
sub.l %d0, %d1 | %d1 = diff = s[pos] - s[pos-1]
30: | same sample loop |
movclr.l %acc0, %d7 | %d7 = result
move.l %d7, (%a4)+ | *d++ = %d7
40: | loop start |
lsr.l #1, %d5 | make phase into frac
move.l %d0, %acc0 | %acc0 = s[pos-1]
mac.l %d1, %d5, %acc0 | %acc0 = diff * frac
lsl.l #1, %d5 | restore frac to phase
add.l %d4, %d5 | phase += delta
bcc.b 30b | same sample loop | load next values?
cmp.l %a5, %a3 | src <= src_end?
bls.b 20b | next sample loop | yes? continue resampling
movclr.l %acc0, %d7 | %d7 = result
move.l %d7, (%a4)+ | *d++ = %d7
50: | skip resample loop |
subq.l #1, %d3 | ch > 0?
bgt.b 10b | channel loop | yes? process next channel
swap %d5 | wrap phase to start of next frame
move.l %d5, 12(%a0) | ...and save in data->resample_data.phase
move.l %a4, %d0 | return d - d[0]
sub.l (%a2), %d0 |
movem.l (%sp), %d2-%d7/%a2-%a5 | restore non-clobberables
asr.l #2, %d0 | convert bytes->samples
lea.l 40(%sp), %sp | cleanup stack
rts | buh-bye
.size dsp_upsample,.-dsp_upsample
/****************************************************************************
* void channels_process_sound_chan_mono(int count, int32_t *buf[])
*
* Mix left and right channels 50/50 into a center channel.
*/
.section .text
.align 2
.global channels_process_sound_chan_mono
channels_process_sound_chan_mono:
movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
lea.l -20(%sp), %sp | save registers
movem.l %d2-%d4/%a2-%a3, (%sp) |
movem.l (%a0), %a0-%a1 | get channel pointers
move.l %a0, %a2 | use separate dst pointers since read
move.l %a1, %a3 | pointers run one ahead of write
move.l #0x40000000, %d3 | %d3 = 0.5
move.l (%a0)+, %d1 | prime the input registers
move.l (%a1)+, %d2 |
mac.l %d1, %d3, (%a0)+, %d1, %acc0 |
mac.l %d2, %d3, (%a1)+, %d2, %acc0 |
subq.l #1, %d0 |
ble.s 20f | loop done |
10: | loop |
movclr.l %acc0, %d4 | L = R = l/2 + r/2
mac.l %d1, %d3, (%a0)+, %d1, %acc0 |
mac.l %d2, %d3, (%a1)+, %d2, %acc0 |
move.l %d4, (%a2)+ | output to original buffer
move.l %d4, (%a3)+ |
subq.l #1, %d0 |
bgt.s 10b | loop |
20: | loop done |
movclr.l %acc0, %d4 | output last sample
move.l %d4, (%a2) |
move.l %d4, (%a3) |
movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
lea.l 20(%sp), %sp | cleanup
rts |
.size channels_process_sound_chan_mono, \
.-channels_process_sound_chan_mono
/****************************************************************************
* void channels_process_sound_chan_custom(int count, int32_t *buf[])
*
* Apply stereo width (narrowing/expanding) effect.
*/
.section .text
.align 2
.global channels_process_sound_chan_custom
channels_process_sound_chan_custom:
movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
lea.l -28(%sp), %sp | save registers
movem.l %d2-%d6/%a2-%a3, (%sp) |
movem.l (%a0), %a0-%a1 | get channel pointers
move.l %a0, %a2 | use separate dst pointers since read
move.l %a1, %a3 | pointers run one ahead of write
move.l dsp_sw_gain, %d3 | load straight (mid) gain
move.l dsp_sw_cross, %d4 | load cross (side) gain
move.l (%a0)+, %d1 | prime the input registers
move.l (%a1)+, %d2 |
mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross
mac.l %d1, %d4, (%a0)+, %d1, %acc1 | R = r*gain + l*cross
mac.l %d2, %d4 , %acc0 |
mac.l %d2, %d3, (%a1)+, %d2, %acc1 |
subq.l #1, %d0 |
ble.b 20f | loop done |
10: | loop |
movclr.l %acc0, %d5 |
movclr.l %acc1, %d6 |
mac.l %d1, %d3 , %acc0 | L = l*gain + r*cross
mac.l %d1, %d4, (%a0)+, %d1, %acc1 | R = r*gain + l*cross
mac.l %d2, %d4 , %acc0 |
mac.l %d2, %d3, (%a1)+, %d2, %acc1 |
move.l %d5, (%a2)+ |
move.l %d6, (%a3)+ |
subq.l #1, %d0 |
bgt.s 10b | loop |
20: | loop done |
movclr.l %acc0, %d5 | output last sample
movclr.l %acc1, %d6 |
move.l %d5, (%a2) |
move.l %d6, (%a3) |
movem.l (%sp), %d2-%d6/%a2-%a3 | restore registers
lea.l 28(%sp), %sp | cleanup
rts |
.size channels_process_sound_chan_custom, \
.-channels_process_sound_chan_custom
/****************************************************************************
* void channels_process_sound_chan_karaoke(int count, int32_t *buf[])
*
* Separate channels into side channels.
*/
.section .text
.align 2
.global channels_process_sound_chan_karaoke
channels_process_sound_chan_karaoke:
movem.l 4(%sp), %d0/%a0 | %d0 = count, %a0 = buf
lea.l -20(%sp), %sp | save registers
movem.l %d2-%d4/%a2-%a3, (%sp) |
movem.l (%a0), %a0-%a1 | get channel src pointers
move.l %a0, %a2 | use separate dst pointers since read
move.l %a1, %a3 | pointers run one ahead of write
move.l #0x40000000, %d3 | %d3 = 0.5
move.l (%a0)+, %d1 | prime the input registers
move.l (%a1)+, %d2 |
mac.l %d1, %d3, (%a0)+, %d1, %acc0 | L = l/2 - r/2
msac.l %d2, %d3, (%a1)+, %d2, %acc0 |
subq.l #1, %d0 |
ble.b 20f | loop done |
10: | loop |
movclr.l %acc0, %d4 |
mac.l %d1, %d3, (%a0)+, %d1, %acc0 | L = l/2 - r/2
msac.l %d2, %d3, (%a1)+, %d2, %acc0 |
move.l %d4, (%a2)+ |
neg.l %d4 | R = -L = -(l/2 - r/2) = r/2 - l/2
move.l %d4, (%a3)+ |
subq.l #1, %d0 |
bgt.s 10b | loop |
20: | loop done |
movclr.l %acc0, %d4 | output last sample
move.l %d4, (%a2) |
neg.l %d4 | R = -L = -(l/2 - r/2) = r/2 - l/2
move.l %d4, (%a3) |
movem.l (%sp), %d2-%d4/%a2-%a3 | restore registers
lea.l 20(%sp), %sp | cleanup
rts |
.size channels_process_sound_chan_karaoke, \
.-channels_process_sound_chan_karaoke
/****************************************************************************
* void sample_output_stereo(int count, struct dsp_data *data,
* const int32_t *src[], int16_t *dst)
*
* Framework based on the ubiquitous Rockbox line transfer logic for
* Coldfire CPUs.
*
* Does emac clamping and scaling (which proved faster than the usual
* checks and branches - even single test clamping) and writes using
* line burst transfers. Also better than writing a single L-R pair per
* loop but a good deal more code.
*
* Attemping bursting during reads is rather futile since the source and
* destination alignments rarely agree and too much complication will
* slow us up. The parallel loads seem to do a bit better at least until
* a pcm buffer can always give line aligned chunk and then aligning the
* dest can then imply the source is aligned if the source buffers are.
* For now longword alignment is assumed of both the source and dest.
*
*/
.section .text
.align 2
.global sample_output_stereo
sample_output_stereo:
lea.l -48(%sp), %sp | save registers
move.l %macsr, %d1 | do it now as at many lines will
movem.l %d1-%d7/%a2-%a6, (%sp) | be the far more common condition
move.l #0x80, %macsr | put emac unit in signed int mode
movem.l 52(%sp), %a0-%a2/%a4 |
lea.l (%a4, %a0.l*4), %a0 | %a0 = end address
move.l (%a1), %d1 | %a1 = multiplier: (1 << (16 - scale))
sub.l #16, %d1 |
neg.l %d1 |
moveq.l #1, %d0 |
asl.l %d1, %d0 |
move.l %d0, %a1 |
move.l #0x8000, %a6 | %a6 = rounding term
movem.l (%a2), %a2-%a3 | get L/R channel pointers
moveq.l #28, %d0 | %d0 = second line bound
add.l %a4, %d0 |
and.l #0xfffffff0, %d0 |
cmp.l %a0, %d0 | at least a full line?
bhi.w 40f | long loop 1 start | no? do as trailing longwords
sub.l #16, %d0 | %d1 = first line bound
cmp.l %a4, %d0 | any leading longwords?
bls.b 20f | line loop start | no? start line loop
10: | long loop 0 |
move.l (%a2)+, %d1 | read longword from L and R
move.l %a6, %acc0 |
move.l %acc0, %acc1 |
mac.l %d1, %a1, (%a3)+, %d2, %acc0 | shift L to high word
mac.l %d2, %a1, %acc1 | shift R to high word
movclr.l %acc0, %d1 | get possibly saturated results
movclr.l %acc1, %d2 |
swap %d2 | move R to low word
move.w %d2, %d1 | interleave MS 16 bits of each
move.l %d1, (%a4)+ | ...and write both
cmp.l %a4, %d0 |
bhi.b 10b | long loop 0 |
20: | line loop start |
lea.l -12(%a0), %a5 | %a5 = at or just before last line bound
30: | line loop |
move.l (%a3)+, %d4 | get next 4 R samples and scale
move.l %a6, %acc0 |
move.l %acc0, %acc1 |
move.l %acc1, %acc2 |
move.l %acc2, %acc3 |
mac.l %d4, %a1, (%a3)+, %d5, %acc0 | with saturation
mac.l %d5, %a1, (%a3)+, %d6, %acc1 |
mac.l %d6, %a1, (%a3)+, %d7, %acc2 |
mac.l %d7, %a1, (%a2)+, %d0, %acc3 |
lea.l 16(%a4), %a4 | increment dest here, mitigate stalls
movclr.l %acc0, %d4 | obtain R results
movclr.l %acc1, %d5 |
movclr.l %acc2, %d6 |
movclr.l %acc3, %d7 |
move.l %a6, %acc0 |
move.l %acc0, %acc1 |
move.l %acc1, %acc2 |
move.l %acc2, %acc3 |
mac.l %d0, %a1, (%a2)+, %d1, %acc0 | get next 4 L samples and scale
mac.l %d1, %a1, (%a2)+, %d2, %acc1 | with saturation
mac.l %d2, %a1, (%a2)+, %d3, %acc2 |
mac.l %d3, %a1 , %acc3 |
swap %d4 | a) interleave most significant...
swap %d5 |
swap %d6 |
swap %d7 |
movclr.l %acc0, %d0 | obtain L results
movclr.l %acc1, %d1 |
movclr.l %acc2, %d2 |
movclr.l %acc3, %d3 |
move.w %d4, %d0 | a) ... 16 bits of L and R
move.w %d5, %d1 |
move.w %d6, %d2 |
move.w %d7, %d3 |
movem.l %d0-%d3, -16(%a4) | write four stereo samples
cmp.l %a4, %a5 |
bhi.b 30b | line loop |
40: | long loop 1 start |
cmp.l %a4, %a0 | any longwords left?
bls.b 60f | output end | no? stop
50: | long loop 1 |
move.l (%a2)+, %d1 | handle trailing longwords
move.l %a6, %acc0 |
move.l %acc0, %acc1 |
mac.l %d1, %a1, (%a3)+, %d2, %acc0 | the same way as leading ones
mac.l %d2, %a1, %acc1 |
movclr.l %acc0, %d1 |
movclr.l %acc1, %d2 |
swap %d2 |
move.w %d2, %d1 |
move.l %d1, (%a4)+ |
cmp.l %a4, %a0 |
bhi.b 50b | long loop 1
60: | output end |
movem.l (%sp), %d1-%d7/%a2-%a6 | restore registers
move.l %d1, %macsr |
lea.l 48(%sp), %sp | cleanup
rts |
.size sample_output_stereo, .-sample_output_stereo
/****************************************************************************
* void sample_output_mono(int count, struct dsp_data *data,
* const int32_t *src[], int16_t *dst)
*
* Same treatment as sample_output_stereo but for one channel.
*/
.section .text
.align 2
.global sample_output_mono
sample_output_mono:
lea.l -32(%sp), %sp | save registers
move.l %macsr, %d1 | do it now as at many lines will
movem.l %d1-%d5/%a2-%a4, (%sp) | be the far more common condition
move.l #0x80, %macsr | put emac unit in signed int mode
movem.l 36(%sp), %a0-%a3 |
lea.l (%a3, %a0.l*4), %a0 | %a0 = end address
move.l (%a1), %d1 | %d5 = multiplier: (1 << (16 - scale))
sub.l #16, %d1 |
neg.l %d1 |
moveq.l #1, %d5 |
asl.l %d1, %d5 |
move.l #0x8000, %a4 | %a4 = rounding term
movem.l (%a2), %a2 | get source channel pointer
moveq.l #28, %d0 | %d0 = second line bound
add.l %a3, %d0 |
and.l #0xfffffff0, %d0 |
cmp.l %a0, %d0 | at least a full line?
bhi.w 40f | long loop 1 start | no? do as trailing longwords
sub.l #16, %d0 | %d1 = first line bound
cmp.l %a3, %d0 | any leading longwords?
bls.b 20f | line loop start | no? start line loop
10: | long loop 0 |
move.l (%a2)+, %d1 | read longword from L and R
move.l %a4, %acc0 |
mac.l %d1, %d5, %acc0 | shift L to high word
movclr.l %acc0, %d1 | get possibly saturated results
move.l %d1, %d2 |
swap %d2 | move R to low word
move.w %d2, %d1 | duplicate single channel into
move.l %d1, (%a3)+ | L and R
cmp.l %a3, %d0 |
bhi.b 10b | long loop 0 |
20: | line loop start |
lea.l -12(%a0), %a1 | %a1 = at or just before last line bound
30: | line loop |
move.l (%a2)+, %d0 | get next 4 L samples and scale
move.l %a4, %acc0 |
move.l %acc0, %acc1 |
move.l %acc1, %acc2 |
move.l %acc2, %acc3 |
mac.l %d0, %d5, (%a2)+, %d1, %acc0 | with saturation
mac.l %d1, %d5, (%a2)+, %d2, %acc1 |
mac.l %d2, %d5, (%a2)+, %d3, %acc2 |
mac.l %d3, %d5 , %acc3 |
lea.l 16(%a3), %a3 | increment dest here, mitigate stalls
movclr.l %acc0, %d0 | obtain results
movclr.l %acc1, %d1 |
movclr.l %acc2, %d2 |
movclr.l %acc3, %d3 |
move.l %d0, %d4 | duplicate single channel
swap %d4 | into L and R
move.w %d4, %d0 |
move.l %d1, %d4 |
swap %d4 |
move.w %d4, %d1 |
move.l %d2, %d4 |
swap %d4 |
move.w %d4, %d2 |
move.l %d3, %d4 |
swap %d4 |
move.w %d4, %d3 |
movem.l %d0-%d3, -16(%a3) | write four stereo samples
cmp.l %a3, %a1 |
bhi.b 30b | line loop |
40: | long loop 1 start |
cmp.l %a3, %a0 | any longwords left?
bls.b 60f | output end | no? stop
50: | loop loop 1 |
move.l (%a2)+, %d1 | handle trailing longwords
move.l %a4, %acc0 |
mac.l %d1, %d5, %acc0 | the same way as leading ones
movclr.l %acc0, %d1 |
move.l %d1, %d2 |
swap %d2 |
move.w %d2, %d1 |
move.l %d1, (%a3)+ |
cmp.l %a3, %a0 |
bhi.b 50b | long loop 1 |
60: | output end |
movem.l (%sp), %d1-%d5/%a2-%a4 | restore registers
move.l %d1, %macsr |
lea.l 32(%sp), %sp | cleanup
rts |
.size sample_output_mono, .-sample_output_mono

268
lib/rbcodec/dsp/eq.c Normal file
View file

@ -0,0 +1,268 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2006-2007 Thom Johansen
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include <inttypes.h>
#include "config.h"
#include "fixedpoint.h"
#include "fracmul.h"
#include "eq.h"
#include "replaygain.h"
/**
* Calculate first order shelving filter. Filter is not directly usable by the
* eq_filter() function.
* @param cutoff shelf midpoint frequency. See eq_pk_coefs for format.
* @param A decibel value multiplied by ten, describing gain/attenuation of
* shelf. Max value is 24 dB.
* @param low true for low-shelf filter, false for high-shelf filter.
* @param c pointer to coefficient storage. Coefficients are s4.27 format.
*/
void filter_shelf_coefs(unsigned long cutoff, long A, bool low, int32_t *c)
{
long sin, cos;
int32_t b0, b1, a0, a1; /* s3.28 */
const long g = get_replaygain_int(A*5) << 4; /* 10^(db/40), s3.28 */
sin = fp_sincos(cutoff/2, &cos);
if (low) {
const int32_t sin_div_g = fp_div(sin, g, 25);
const int32_t sin_g = FRACMUL(sin, g);
cos >>= 3;
b0 = sin_g + cos; /* 0.25 .. 4.10 */
b1 = sin_g - cos; /* -1 .. 3.98 */
a0 = sin_div_g + cos; /* 0.25 .. 4.10 */
a1 = sin_div_g - cos; /* -1 .. 3.98 */
} else {
const int32_t cos_div_g = fp_div(cos, g, 25);
const int32_t cos_g = FRACMUL(cos, g);
sin >>= 3;
b0 = sin + cos_g; /* 0.25 .. 4.10 */
b1 = sin - cos_g; /* -3.98 .. 1 */
a0 = sin + cos_div_g; /* 0.25 .. 4.10 */
a1 = sin - cos_div_g; /* -3.98 .. 1 */
}
const int32_t rcp_a0 = fp_div(1, a0, 57); /* 0.24 .. 3.98, s2.29 */
*c++ = FRACMUL_SHL(b0, rcp_a0, 1); /* 0.063 .. 15.85 */
*c++ = FRACMUL_SHL(b1, rcp_a0, 1); /* -15.85 .. 15.85 */
*c++ = -FRACMUL_SHL(a1, rcp_a0, 1); /* -1 .. 1 */
}
#ifdef HAVE_SW_TONE_CONTROLS
/**
* Calculate second order section filter consisting of one low-shelf and one
* high-shelf section.
* @param cutoff_low low-shelf midpoint frequency. See eq_pk_coefs for format.
* @param cutoff_high high-shelf midpoint frequency.
* @param A_low decibel value multiplied by ten, describing gain/attenuation of
* low-shelf part. Max value is 24 dB.
* @param A_high decibel value multiplied by ten, describing gain/attenuation of
* high-shelf part. Max value is 24 dB.
* @param A decibel value multiplied by ten, describing additional overall gain.
* @param c pointer to coefficient storage. Coefficients are s4.27 format.
*/
void filter_bishelf_coefs(unsigned long cutoff_low, unsigned long cutoff_high,
long A_low, long A_high, long A, int32_t *c)
{
const long g = get_replaygain_int(A*10) << 7; /* 10^(db/20), s0.31 */
int32_t c_ls[3], c_hs[3];
filter_shelf_coefs(cutoff_low, A_low, true, c_ls);
filter_shelf_coefs(cutoff_high, A_high, false, c_hs);
c_ls[0] = FRACMUL(g, c_ls[0]);
c_ls[1] = FRACMUL(g, c_ls[1]);
/* now we cascade the two first order filters to one second order filter
* which can be used by eq_filter(). these resulting coefficients have a
* really wide numerical range, so we use a fixed point format which will
* work for the selected cutoff frequencies (in dsp.c) only.
*/
const int32_t b0 = c_ls[0], b1 = c_ls[1], b2 = c_hs[0], b3 = c_hs[1];
const int32_t a0 = c_ls[2], a1 = c_hs[2];
*c++ = FRACMUL_SHL(b0, b2, 4);
*c++ = FRACMUL_SHL(b0, b3, 4) + FRACMUL_SHL(b1, b2, 4);
*c++ = FRACMUL_SHL(b1, b3, 4);
*c++ = a0 + a1;
*c++ = -FRACMUL_SHL(a0, a1, 4);
}
#endif
/* Coef calculation taken from Audio-EQ-Cookbook.txt by Robert Bristow-Johnson.
* Slightly faster calculation can be done by deriving forms which use tan()
* instead of cos() and sin(), but the latter are far easier to use when doing
* fixed point math, and performance is not a big point in the calculation part.
* All the 'a' filter coefficients are negated so we can use only additions
* in the filtering equation.
*/
/**
* Calculate second order section peaking filter coefficients.
* @param cutoff a value from 0 to 0x80000000, where 0 represents 0 Hz and
* 0x80000000 represents the Nyquist frequency (samplerate/2).
* @param Q Q factor value multiplied by ten. Lower bound is artificially set
* at 0.5.
* @param db decibel value multiplied by ten, describing gain/attenuation at
* peak freq. Max value is 24 dB.
* @param c pointer to coefficient storage. Coefficients are s3.28 format.
*/
void eq_pk_coefs(unsigned long cutoff, unsigned long Q, long db, int32_t *c)
{
long cs;
const long one = 1 << 28; /* s3.28 */
const long A = get_replaygain_int(db*5) << 5; /* 10^(db/40), s2.29 */
const long alpha = fp_sincos(cutoff, &cs)/(2*Q)*10 >> 1; /* s1.30 */
int32_t a0, a1, a2; /* these are all s3.28 format */
int32_t b0, b1, b2;
const long alphadivA = fp_div(alpha, A, 27);
const long alphaA = FRACMUL(alpha, A);
/* possible numerical ranges are in comments by each coef */
b0 = one + alphaA; /* [1 .. 5] */
b1 = a1 = -2*(cs >> 3); /* [-2 .. 2] */
b2 = one - alphaA; /* [-3 .. 1] */
a0 = one + alphadivA; /* [1 .. 5] */
a2 = one - alphadivA; /* [-3 .. 1] */
/* range of this is roughly [0.2 .. 1], but we'll never hit 1 completely */
const long rcp_a0 = fp_div(1, a0, 59); /* s0.31 */
*c++ = FRACMUL(b0, rcp_a0); /* [0.25 .. 4] */
*c++ = FRACMUL(b1, rcp_a0); /* [-2 .. 2] */
*c++ = FRACMUL(b2, rcp_a0); /* [-2.4 .. 1] */
*c++ = FRACMUL(-a1, rcp_a0); /* [-2 .. 2] */
*c++ = FRACMUL(-a2, rcp_a0); /* [-0.6 .. 1] */
}
/**
* Calculate coefficients for lowshelf filter. Parameters are as for
* eq_pk_coefs, but the coefficient format is s5.26 fixed point.
*/
void eq_ls_coefs(unsigned long cutoff, unsigned long Q, long db, int32_t *c)
{
long cs;
const long one = 1 << 25; /* s6.25 */
const long sqrtA = get_replaygain_int(db*5/2) << 2; /* 10^(db/80), s5.26 */
const long A = FRACMUL_SHL(sqrtA, sqrtA, 8); /* s2.29 */
const long alpha = fp_sincos(cutoff, &cs)/(2*Q)*10 >> 1; /* s1.30 */
const long ap1 = (A >> 4) + one;
const long am1 = (A >> 4) - one;
const long ap1_cs = FRACMUL(ap1, cs);
const long am1_cs = FRACMUL(am1, cs);
const long twosqrtalpha = 2*FRACMUL(sqrtA, alpha);
int32_t a0, a1, a2; /* these are all s6.25 format */
int32_t b0, b1, b2;
/* [0.1 .. 40] */
b0 = FRACMUL_SHL(A, ap1 - am1_cs + twosqrtalpha, 2);
/* [-16 .. 63.4] */
b1 = FRACMUL_SHL(A, am1 - ap1_cs, 3);
/* [0 .. 31.7] */
b2 = FRACMUL_SHL(A, ap1 - am1_cs - twosqrtalpha, 2);
/* [0.5 .. 10] */
a0 = ap1 + am1_cs + twosqrtalpha;
/* [-16 .. 4] */
a1 = -2*(am1 + ap1_cs);
/* [0 .. 8] */
a2 = ap1 + am1_cs - twosqrtalpha;
/* [0.1 .. 1.99] */
const long rcp_a0 = fp_div(1, a0, 55); /* s1.30 */
*c++ = FRACMUL_SHL(b0, rcp_a0, 2); /* [0.06 .. 15.9] */
*c++ = FRACMUL_SHL(b1, rcp_a0, 2); /* [-2 .. 31.7] */
*c++ = FRACMUL_SHL(b2, rcp_a0, 2); /* [0 .. 15.9] */
*c++ = FRACMUL_SHL(-a1, rcp_a0, 2); /* [-2 .. 2] */
*c++ = FRACMUL_SHL(-a2, rcp_a0, 2); /* [0 .. 1] */
}
/**
* Calculate coefficients for highshelf filter. Parameters are as for
* eq_pk_coefs, but the coefficient format is s5.26 fixed point.
*/
void eq_hs_coefs(unsigned long cutoff, unsigned long Q, long db, int32_t *c)
{
long cs;
const long one = 1 << 25; /* s6.25 */
const long sqrtA = get_replaygain_int(db*5/2) << 2; /* 10^(db/80), s5.26 */
const long A = FRACMUL_SHL(sqrtA, sqrtA, 8); /* s2.29 */
const long alpha = fp_sincos(cutoff, &cs)/(2*Q)*10 >> 1; /* s1.30 */
const long ap1 = (A >> 4) + one;
const long am1 = (A >> 4) - one;
const long ap1_cs = FRACMUL(ap1, cs);
const long am1_cs = FRACMUL(am1, cs);
const long twosqrtalpha = 2*FRACMUL(sqrtA, alpha);
int32_t a0, a1, a2; /* these are all s6.25 format */
int32_t b0, b1, b2;
/* [0.1 .. 40] */
b0 = FRACMUL_SHL(A, ap1 + am1_cs + twosqrtalpha, 2);
/* [-63.5 .. 16] */
b1 = -FRACMUL_SHL(A, am1 + ap1_cs, 3);
/* [0 .. 32] */
b2 = FRACMUL_SHL(A, ap1 + am1_cs - twosqrtalpha, 2);
/* [0.5 .. 10] */
a0 = ap1 - am1_cs + twosqrtalpha;
/* [-4 .. 16] */
a1 = 2*(am1 - ap1_cs);
/* [0 .. 8] */
a2 = ap1 - am1_cs - twosqrtalpha;
/* [0.1 .. 1.99] */
const long rcp_a0 = fp_div(1, a0, 55); /* s1.30 */
*c++ = FRACMUL_SHL(b0, rcp_a0, 2); /* [0 .. 16] */
*c++ = FRACMUL_SHL(b1, rcp_a0, 2); /* [-31.7 .. 2] */
*c++ = FRACMUL_SHL(b2, rcp_a0, 2); /* [0 .. 16] */
*c++ = FRACMUL_SHL(-a1, rcp_a0, 2); /* [-2 .. 2] */
*c++ = FRACMUL_SHL(-a2, rcp_a0, 2); /* [0 .. 1] */
}
/* We realise the filters as a second order direct form 1 structure. Direct
* form 1 was chosen because of better numerical properties for fixed point
* implementations.
*/
#if (!defined(CPU_COLDFIRE) && !defined(CPU_ARM))
void eq_filter(int32_t **x, struct eqfilter *f, unsigned num,
unsigned channels, unsigned shift)
{
unsigned c, i;
long long acc;
/* Direct form 1 filtering code.
y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2],
where y[] is output and x[] is input.
*/
for (c = 0; c < channels; c++) {
for (i = 0; i < num; i++) {
acc = (long long) x[c][i] * f->coefs[0];
acc += (long long) f->history[c][0] * f->coefs[1];
acc += (long long) f->history[c][1] * f->coefs[2];
acc += (long long) f->history[c][2] * f->coefs[3];
acc += (long long) f->history[c][3] * f->coefs[4];
f->history[c][1] = f->history[c][0];
f->history[c][0] = x[c][i];
f->history[c][3] = f->history[c][2];
x[c][i] = (acc << shift) >> 32;
f->history[c][2] = x[c][i];
}
}
}
#endif

50
lib/rbcodec/dsp/eq.h Normal file
View file

@ -0,0 +1,50 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2006-2007 Thom Johansen
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#ifndef _EQ_H
#define _EQ_H
#include <inttypes.h>
#include <stdbool.h>
/* These depend on the fixed point formats used by the different filter types
and need to be changed when they change.
*/
#define FILTER_BISHELF_SHIFT 5
#define EQ_PEAK_SHIFT 4
#define EQ_SHELF_SHIFT 6
struct eqfilter {
int32_t coefs[5]; /* Order is b0, b1, b2, a1, a2 */
int32_t history[2][4];
};
void filter_shelf_coefs(unsigned long cutoff, long A, bool low, int32_t *c);
void filter_bishelf_coefs(unsigned long cutoff_low, unsigned long cutoff_high,
long A_low, long A_high, long A, int32_t *c);
void eq_pk_coefs(unsigned long cutoff, unsigned long Q, long db, int32_t *c);
void eq_ls_coefs(unsigned long cutoff, unsigned long Q, long db, int32_t *c);
void eq_hs_coefs(unsigned long cutoff, unsigned long Q, long db, int32_t *c);
void eq_filter(int32_t **x, struct eqfilter *f, unsigned num,
unsigned channels, unsigned shift);
#endif

89
lib/rbcodec/dsp/eq_arm.S Normal file
View file

@ -0,0 +1,89 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2006-2007 Thom Johansen
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "config.h"
/* uncomment this to make filtering calculate lower bits after shifting.
* without this, "shift" of the lower bits will be lost here.
*/
/* #define HIGH_PRECISION */
/*
* void eq_filter(int32_t **x, struct eqfilter *f, unsigned num,
* unsigned channels, unsigned shift)
*/
#if CONFIG_CPU == PP5002
.section .icode,"ax",%progbits
#else
.text
#endif
.global eq_filter
eq_filter:
ldr r12, [sp] @ get shift parameter
stmdb sp!, { r0-r11, lr } @ save all params and clobbered regs
ldmia r1!, { r4-r8 } @ load coefs
mov r10, r1 @ loop prelude expects filter struct addr in r10
.filterloop:
ldr r9, [sp] @ get pointer to this channels data
add r0, r9, #4
str r0, [sp] @ save back pointer to next channels data
ldr r9, [r9] @ r9 = x[]
ldr r14, [sp, #8] @ r14 = numsamples
ldmia r10, { r0-r3 } @ load history, r10 should be filter struct addr
str r10, [sp, #4] @ save it for loop end
/* r0-r3 = history, r4-r8 = coefs, r9 = x[], r10..r11 = accumulator,
* r12 = shift amount, r14 = number of samples.
*/
.loop:
/* Direct form 1 filtering code.
* y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2],
* where y[] is output and x[] is input. This is performed out of order to
* reuse registers, we're pretty short on regs.
*/
smull r10, r11, r6, r1 @ acc = b2*x[i - 2]
mov r1, r0 @ fix input history
smlal r10, r11, r5, r0 @ acc += b1*x[i - 1]
ldr r0, [r9] @ load input and fix history in same operation
smlal r10, r11, r7, r2 @ acc += a1*y[i - 1]
smlal r10, r11, r8, r3 @ acc += a2*y[i - 2]
smlal r10, r11, r4, r0 @ acc += b0*x[i] /* avoid stall on arm9*/
mov r3, r2 @ fix output history
mov r2, r11, asl r12 @ get upper part of result and shift left
#ifdef HIGH_PRECISION
rsb r11, r12, #32 @ get shift amount for lower part
orr r2, r2, r10, lsr r11 @ then mix in correctly shifted lower part
#endif
str r2, [r9], #4 @ save result
subs r14, r14, #1 @ are we done with this channel?
bne .loop
ldr r10, [sp, #4] @ load filter struct pointer
stmia r10!, { r0-r3 } @ save back history
ldr r11, [sp, #12] @ load number of channels
subs r11, r11, #1 @ all channels processed?
strne r11, [sp, #12]
bne .filterloop
add sp, sp, #16 @ compensate for temp storage
ldmpc regs=r4-r11

91
lib/rbcodec/dsp/eq_cf.S Normal file
View file

@ -0,0 +1,91 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2006-2007 Thom Johansen
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
/* uncomment this to make filtering calculate lower bits after shifting.
* without this, "shift" - 1 of the lower bits will be lost here.
*/
/* #define HIGH_PRECISION */
/*
* void eq_filter(int32_t **x, struct eqfilter *f, unsigned num,
* unsigned channels, unsigned shift)
*/
.text
.global eq_filter
eq_filter:
lea.l (-11*4, %sp), %sp
movem.l %d2-%d7/%a2-%a6, (%sp) | save clobbered regs
move.l (11*4+8, %sp), %a5 | fetch filter structure address
move.l (11*4+20, %sp), %d7 | load shift count
subq.l #1, %d7 | EMAC gives us one free shift
#ifdef HIGH_PRECISION
moveq.l #8, %d6
sub.l %d7, %d6 | shift for lower part of accumulator
#endif
movem.l (%a5), %a0-%a4 | load coefs
lea.l (5*4, %a5), %a5 | point to filter history
.filterloop:
move.l (11*4+4, %sp), %a6 | load input channel pointer
addq.l #4, (11*4+4, %sp) | point x to next channel
move.l (%a6), %a6
move.l (11*4+12, %sp), %d5 | number of samples
movem.l (%a5), %d0-%d3 | load filter history
/* d0-d3 = history, d4 = temp, d5 = sample count, d6 = lower shift amount,
* d7 = upper shift amount, a0-a4 = coefs, a5 = history pointer, a6 = x[]
*/
.loop:
/* Direct form 1 filtering code. We assume DSP has put EMAC in frac mode.
* y[n] = b0*x[i] + b1*x[i - 1] + b2*x[i - 2] + a1*y[i - 1] + a2*y[i - 2],
* where y[] is output and x[] is input. This is performed out of order
* to do parallel load of input value.
*/
mac.l %a2, %d1, %acc0 | acc = b2*x[i - 2]
move.l %d0, %d1 | fix input history
mac.l %a1, %d0, (%a6), %d0, %acc0 | acc += b1*x[i - 1], x[i] -> d0
mac.l %a0, %d0, %acc0 | acc += b0*x[i]
mac.l %a3, %d2, %acc0 | acc += a1*y[i - 1]
mac.l %a4, %d3, %acc0 | acc += a2*y[i - 2]
move.l %d2, %d3 | fix output history
#ifdef HIGH_PRECISION
move.l %accext01, %d2 | fetch lower part of accumulator
move.b %d2, %d4 | clear upper three bytes
lsr.l %d6, %d4 | shift lower bits
#endif
movclr.l %acc0, %d2 | fetch upper part of result
asl.l %d7, %d2 | restore fixed point format
#ifdef HIGH_PRECISION
or.l %d2, %d4 | combine lower and upper parts
#endif
move.l %d2, (%a6)+ | save result
subq.l #1, %d5 | are we done with this channel?
jne .loop
movem.l %d0-%d3, (%a5) | save history back to struct
lea.l (4*4, %a5), %a5 | point to next channel's history
subq.l #1, (11*4+16, %sp) | have we processed both channels?
jne .filterloop
movem.l (%sp), %d2-%d7/%a2-%a6
lea.l (11*4, %sp), %sp
rts

View file

@ -0,0 +1,17 @@
eq enabled: on
eq precut: 45
eq band 0 cutoff: 60
eq band 0 q: 7
eq band 0 gain: 45
eq band 1 cutoff: 200
eq band 1 q: 10
eq band 1 gain: 10
eq band 2 cutoff: 800
eq band 2 q: 10
eq band 2 gain: 15
eq band 3 cutoff: 4000
eq band 3 q: 10
eq band 3 gain: 30
eq band 4 cutoff: 12000
eq band 4 q: 7
eq band 4 gain: 20

View file

@ -0,0 +1,17 @@
eq enabled: on
eq precut: 50
eq band 0 cutoff: 60
eq band 0 q: 7
eq band 0 gain: 50
eq band 1 cutoff: 200
eq band 1 q: 10
eq band 1 gain: 35
eq band 2 cutoff: 800
eq band 2 q: 10
eq band 2 gain: 15
eq band 3 cutoff: 4000
eq band 3 q: 10
eq band 3 gain: 5
eq band 4 cutoff: 12000
eq band 4 q: 7
eq band 4 gain: -5

View file

@ -0,0 +1,17 @@
eq enabled: on
eq precut: 50
eq band 0 cutoff: 60
eq band 0 q: 7
eq band 0 gain: 50
eq band 1 cutoff: 200
eq band 1 q: 10
eq band 1 gain: 40
eq band 2 cutoff: 800
eq band 2 q: 10
eq band 2 gain: -20
eq band 3 cutoff: 4000
eq band 3 q: 10
eq band 3 gain: 10
eq band 4 cutoff: 12000
eq band 4 q: 7
eq band 4 gain: 20

View file

@ -0,0 +1,17 @@
eq enabled: off
eq precut: 0
eq band 0 cutoff: 60
eq band 0 q: 7
eq band 0 gain: 0
eq band 1 cutoff: 200
eq band 1 q: 10
eq band 1 gain: 0
eq band 2 cutoff: 800
eq band 2 q: 10
eq band 2 gain: 0
eq band 3 cutoff: 4000
eq band 3 q: 10
eq band 3 gain: 0
eq band 4 cutoff: 12000
eq band 4 q: 7
eq band 4 gain: 0

View file

@ -0,0 +1,17 @@
eq enabled: on
eq precut: 45
eq band 0 cutoff: 60
eq band 0 q: 7
eq band 0 gain: 30
eq band 1 cutoff: 200
eq band 1 q: 10
eq band 1 gain: 10
eq band 2 cutoff: 800
eq band 2 q: 10
eq band 2 gain: 45
eq band 3 cutoff: 4000
eq band 3 q: 10
eq band 3 gain: 25
eq band 4 cutoff: 12000
eq band 4 q: 7
eq band 4 gain: 10

View file

@ -0,0 +1,17 @@
eq enabled: on
eq precut: 55
eq band 0 cutoff: 60
eq band 0 q: 7
eq band 0 gain: 45
eq band 1 cutoff: 200
eq band 1 q: 10
eq band 1 gain: 5
eq band 2 cutoff: 800
eq band 2 q: 10
eq band 2 gain: 25
eq band 3 cutoff: 4000
eq band 3 q: 10
eq band 3 gain: 15
eq band 4 cutoff: 12000
eq band 4 q: 7
eq band 4 gain: 55

View file

@ -0,0 +1,17 @@
eq enabled: on
eq precut: 65
eq band 0 cutoff: 60
eq band 0 q: 7
eq band 0 gain: 65
eq band 1 cutoff: 200
eq band 1 q: 10
eq band 1 gain: 25
eq band 2 cutoff: 800
eq band 2 q: 10
eq band 2 gain: -10
eq band 3 cutoff: 4000
eq band 3 q: 10
eq band 3 gain: 15
eq band 4 cutoff: 12000
eq band 4 q: 7
eq band 4 gain: 35

View file

@ -0,0 +1,17 @@
eq enabled: on
eq precut: 60
eq band 0 cutoff: 60
eq band 0 q: 7
eq band 0 gain: 40
eq band 1 cutoff: 200
eq band 1 q: 10
eq band 1 gain: 15
eq band 2 cutoff: 800
eq band 2 q: 10
eq band 2 gain: -25
eq band 3 cutoff: 4000
eq band 3 q: 10
eq band 3 gain: 5
eq band 4 cutoff: 12000
eq band 4 q: 7
eq band 4 gain: 60

View file

@ -0,0 +1,17 @@
eq enabled: on
eq precut: 20
eq band 0 cutoff: 60
eq band 0 q: 7
eq band 0 gain: -25
eq band 1 cutoff: 200
eq band 1 q: 10
eq band 1 gain: 5
eq band 2 cutoff: 800
eq band 2 q: 10
eq band 2 gain: 20
eq band 3 cutoff: 4000
eq band 3 q: 10
eq band 3 gain: -15
eq band 4 cutoff: 12000
eq band 4 q: 7
eq band 4 gain: 15

View file

@ -0,0 +1,17 @@
eq enabled: on
eq precut: 50
eq band 0 cutoff: 60
eq band 0 q: 7
eq band 0 gain: -10
eq band 1 cutoff: 200
eq band 1 q: 10
eq band 1 gain: 5
eq band 2 cutoff: 800
eq band 2 q: 10
eq band 2 gain: 50
eq band 3 cutoff: 4000
eq band 3 q: 10
eq band 3 gain: 15
eq band 4 cutoff: 12000
eq band 4 q: 7
eq band 4 gain: -10

View file

@ -0,0 +1,17 @@
eq enabled: on
eq precut: 45
eq band 0 cutoff: 60
eq band 0 q: 7
eq band 0 gain: 35
eq band 1 cutoff: 200
eq band 1 q: 10
eq band 1 gain: 45
eq band 2 cutoff: 800
eq band 2 q: 10
eq band 2 gain: 5
eq band 3 cutoff: 4000
eq band 3 q: 10
eq band 3 gain: 25
eq band 4 cutoff: 12000
eq band 4 q: 7
eq band 4 gain: 30

View file

@ -0,0 +1,17 @@
eq enabled: on
eq precut: 45
eq band 0 cutoff: 60
eq band 0 q: 7
eq band 0 gain: 25
eq band 1 cutoff: 200
eq band 1 q: 10
eq band 1 gain: 10
eq band 2 cutoff: 800
eq band 2 q: 10
eq band 2 gain: 0
eq band 3 cutoff: 4000
eq band 3 q: 10
eq band 3 gain: 20
eq band 4 cutoff: 12000
eq band 4 q: 7
eq band 4 gain: 45

View file

@ -0,0 +1,17 @@
eq enabled: on
eq precut: 45
eq band 0 cutoff: 60
eq band 0 q: 7
eq band 0 gain: -45
eq band 1 cutoff: 200
eq band 1 q: 10
eq band 1 gain: 5
eq band 2 cutoff: 800
eq band 2 q: 10
eq band 2 gain: 45
eq band 3 cutoff: 4000
eq band 3 q: 10
eq band 3 gain: 20
eq band 4 cutoff: 12000
eq band 4 q: 7
eq band 4 gain: 0

450
lib/rbcodec/dsp/tdspeed.c Normal file
View file

@ -0,0 +1,450 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2006 by Nicolas Pitre <nico@cam.org>
* Copyright (C) 2006-2007 by Stéphane Doyon <s.doyon@videotron.ca>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include <inttypes.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include "sound.h"
#include "core_alloc.h"
#include "system.h"
#include "tdspeed.h"
#include "settings.h"
#define assert(cond)
#define MIN_RATE 8000
#define MAX_RATE 48000 /* double buffer for double rate */
#define MINFREQ 100
#define FIXED_BUFSIZE 3072 /* 48KHz factor 3.0 */
static int32_t** dsp_src;
static int handles[4];
static int32_t *overlap_buffer[2] = { NULL, NULL };
static int32_t *outbuf[2] = { NULL, NULL };
static int move_callback(int handle, void* current, void* new)
{
/* TODO */
(void)handle;
if (dsp_src)
{
int ch = (current == outbuf[0]) ? 0 : 1;
dsp_src[ch] = outbuf[ch] = new;
}
return BUFLIB_CB_OK;
}
static struct buflib_callbacks ops = {
.move_callback = move_callback,
.shrink_callback = NULL,
};
static int ovl_move_callback(int handle, void* current, void* new)
{
/* TODO */
(void)handle;
if (dsp_src)
{
int ch = (current == overlap_buffer[0]) ? 0 : 1;
overlap_buffer[ch] = new;
}
return BUFLIB_CB_OK;
}
static struct buflib_callbacks ovl_ops = {
.move_callback = ovl_move_callback,
.shrink_callback = NULL,
};
static struct tdspeed_state_s
{
bool stereo;
int32_t shift_max; /* maximum displacement on a frame */
int32_t src_step; /* source window pace */
int32_t dst_step; /* destination window pace */
int32_t dst_order; /* power of two for dst_step */
int32_t ovl_shift; /* overlap buffer frame shift */
int32_t ovl_size; /* overlap buffer used size */
int32_t ovl_space; /* overlap buffer size */
int32_t *ovl_buff[2]; /* overlap buffer */
} tdspeed_state;
void tdspeed_init(void)
{
if (!global_settings.timestretch_enabled)
return;
/* Allocate buffers */
if (overlap_buffer[0] == NULL)
{
handles[0] = core_alloc_ex("tdspeed ovl left", FIXED_BUFSIZE * sizeof(int32_t), &ovl_ops);
overlap_buffer[0] = core_get_data(handles[0]);
}
if (overlap_buffer[1] == NULL)
{
handles[1] = core_alloc_ex("tdspeed ovl right", FIXED_BUFSIZE * sizeof(int32_t), &ovl_ops);
overlap_buffer[1] = core_get_data(handles[1]);
}
if (outbuf[0] == NULL)
{
handles[2] = core_alloc_ex("tdspeed left", TDSPEED_OUTBUFSIZE * sizeof(int32_t), &ops);
outbuf[0] = core_get_data(handles[2]);
}
if (outbuf[1] == NULL)
{
handles[3] = core_alloc_ex("tdspeed right", TDSPEED_OUTBUFSIZE * sizeof(int32_t), &ops);
outbuf[1] = core_get_data(handles[3]);
}
}
void tdspeed_finish(void)
{
for(unsigned i = 0; i < ARRAYLEN(handles); i++)
{
if (handles[i] > 0)
{
core_free(handles[i]);
handles[i] = 0;
}
}
overlap_buffer[0] = overlap_buffer[1] = NULL;
outbuf[0] = outbuf[1] = NULL;
}
bool tdspeed_config(int samplerate, bool stereo, int32_t factor)
{
struct tdspeed_state_s *st = &tdspeed_state;
int src_frame_sz;
/* Check buffers were allocated ok */
if (overlap_buffer[0] == NULL || overlap_buffer[1] == NULL)
return false;
if (outbuf[0] == NULL || outbuf[1] == NULL)
return false;
/* Check parameters */
if (factor == PITCH_SPEED_100)
return false;
if (samplerate < MIN_RATE || samplerate > MAX_RATE)
return false;
if (factor < STRETCH_MIN || factor > STRETCH_MAX)
return false;
st->stereo = stereo;
st->dst_step = samplerate / MINFREQ;
if (factor > PITCH_SPEED_100)
st->dst_step = st->dst_step * PITCH_SPEED_100 / factor;
st->dst_order = 1;
while (st->dst_step >>= 1)
st->dst_order++;
st->dst_step = (1 << st->dst_order);
st->src_step = st->dst_step * factor / PITCH_SPEED_100;
st->shift_max = (st->dst_step > st->src_step) ? st->dst_step : st->src_step;
src_frame_sz = st->shift_max + st->dst_step;
if (st->dst_step > st->src_step)
src_frame_sz += st->dst_step - st->src_step;
st->ovl_space = ((src_frame_sz - 2) / st->src_step) * st->src_step
+ src_frame_sz;
if (st->src_step > st->dst_step)
st->ovl_space += 2*st->src_step - st->dst_step;
if (st->ovl_space > FIXED_BUFSIZE)
st->ovl_space = FIXED_BUFSIZE;
st->ovl_size = 0;
st->ovl_shift = 0;
st->ovl_buff[0] = overlap_buffer[0];
if (stereo)
st->ovl_buff[1] = overlap_buffer[1];
else
st->ovl_buff[1] = st->ovl_buff[0];
return true;
}
static int tdspeed_apply(int32_t *buf_out[2], int32_t *buf_in[2],
int data_len, int last, int out_size)
/* data_len in samples */
{
struct tdspeed_state_s *st = &tdspeed_state;
int32_t *dest[2];
int32_t next_frame, prev_frame, src_frame_sz;
bool stereo = buf_in[0] != buf_in[1];
assert(stereo == st->stereo);
src_frame_sz = st->shift_max + st->dst_step;
if (st->dst_step > st->src_step)
src_frame_sz += st->dst_step - st->src_step;
/* deal with overlap data first, if any */
if (st->ovl_size)
{
int32_t have, copy, steps;
have = st->ovl_size;
if (st->ovl_shift > 0)
have -= st->ovl_shift;
/* append just enough data to have all of the overlap buffer consumed */
steps = (have - 1) / st->src_step;
copy = steps * st->src_step + src_frame_sz - have;
if (copy < src_frame_sz - st->dst_step)
copy += st->src_step; /* one more step to allow for pregap data */
if (copy > data_len)
copy = data_len;
assert(st->ovl_size + copy <= FIXED_BUFSIZE);
memcpy(st->ovl_buff[0] + st->ovl_size, buf_in[0],
copy * sizeof(int32_t));
if (stereo)
memcpy(st->ovl_buff[1] + st->ovl_size, buf_in[1],
copy * sizeof(int32_t));
if (!last && have + copy < src_frame_sz)
{
/* still not enough to process at least one frame */
st->ovl_size += copy;
return 0;
}
/* recursively call ourselves to process the overlap buffer */
have = st->ovl_size;
st->ovl_size = 0;
if (copy == data_len)
{
assert(have + copy <= FIXED_BUFSIZE);
return tdspeed_apply(buf_out, st->ovl_buff, have+copy, last,
out_size);
}
assert(have + copy <= FIXED_BUFSIZE);
int i = tdspeed_apply(buf_out, st->ovl_buff, have+copy, -1, out_size);
dest[0] = buf_out[0] + i;
dest[1] = buf_out[1] + i;
/* readjust pointers to account for data already consumed */
next_frame = copy - src_frame_sz + st->src_step;
prev_frame = next_frame - st->ovl_shift;
}
else
{
dest[0] = buf_out[0];
dest[1] = buf_out[1];
next_frame = prev_frame = 0;
if (st->ovl_shift > 0)
next_frame += st->ovl_shift;
else
prev_frame += -st->ovl_shift;
}
st->ovl_shift = 0;
/* process all complete frames */
while (data_len - next_frame >= src_frame_sz)
{
/* find frame overlap by autocorelation */
int const INC1 = 8;
int const INC2 = 32;
int64_t min_delta = INT64_MAX; /* most positive */
int shift = 0;
/* Power of 2 of a 28bit number requires 56bits, can accumulate
256times in a 64bit variable. */
assert(st->dst_step / INC2 <= 256);
assert(next_frame + st->shift_max - 1 + st->dst_step - 1 < data_len);
assert(prev_frame + st->dst_step - 1 < data_len);
for (int i = 0; i < st->shift_max; i += INC1)
{
int64_t delta = 0;
int32_t *curr = buf_in[0] + next_frame + i;
int32_t *prev = buf_in[0] + prev_frame;
for (int j = 0; j < st->dst_step; j += INC2, curr += INC2, prev += INC2)
{
int32_t diff = *curr - *prev;
delta += abs(diff);
if (delta >= min_delta)
goto skip;
}
if (stereo)
{
curr = buf_in[1] + next_frame + i;
prev = buf_in[1] + prev_frame;
for (int j = 0; j < st->dst_step; j += INC2, curr += INC2, prev += INC2)
{
int32_t diff = *curr - *prev;
delta += abs(diff);
if (delta >= min_delta)
goto skip;
}
}
min_delta = delta;
shift = i;
skip:;
}
/* overlap fading-out previous frame with fading-in current frame */
int32_t *curr = buf_in[0] + next_frame + shift;
int32_t *prev = buf_in[0] + prev_frame;
int32_t *d = dest[0];
assert(next_frame + shift + st->dst_step - 1 < data_len);
assert(prev_frame + st->dst_step - 1 < data_len);
assert(dest[0] - buf_out[0] + st->dst_step - 1 < out_size);
for (int i = 0, j = st->dst_step; j; i++, j--)
{
*d++ = (*curr++ * (int64_t)i +
*prev++ * (int64_t)j) >> st->dst_order;
}
dest[0] = d;
if (stereo)
{
curr = buf_in[1] + next_frame + shift;
prev = buf_in[1] + prev_frame;
d = dest[1];
for (int i = 0, j = st->dst_step; j; i++, j--)
{
assert(d < buf_out[1] + out_size);
*d++ = (*curr++ * (int64_t)i +
*prev++ * (int64_t)j) >> st->dst_order;
}
dest[1] = d;
}
/* adjust pointers for next frame */
prev_frame = next_frame + shift + st->dst_step;
next_frame += st->src_step;
/* here next_frame - prev_frame = src_step - dst_step - shift */
assert(next_frame - prev_frame == st->src_step - st->dst_step - shift);
}
/* now deal with remaining partial frames */
if (last == -1)
{
/* special overlap buffer processing: remember frame shift only */
st->ovl_shift = next_frame - prev_frame;
}
else if (last != 0)
{
/* last call: purge all remaining data to output buffer */
int i = data_len - prev_frame;
assert(dest[0] + i <= buf_out[0] + out_size);
memcpy(dest[0], buf_in[0] + prev_frame, i * sizeof(int32_t));
dest[0] += i;
if (stereo)
{
assert(dest[1] + i <= buf_out[1] + out_size);
memcpy(dest[1], buf_in[1] + prev_frame, i * sizeof(int32_t));
dest[1] += i;
}
}
else
{
/* preserve remaining data + needed overlap data for next call */
st->ovl_shift = next_frame - prev_frame;
int i = (st->ovl_shift < 0) ? next_frame : prev_frame;
st->ovl_size = data_len - i;
assert(st->ovl_size <= FIXED_BUFSIZE);
memcpy(st->ovl_buff[0], buf_in[0] + i, st->ovl_size * sizeof(int32_t));
if (stereo)
memcpy(st->ovl_buff[1], buf_in[1] + i, st->ovl_size * sizeof(int32_t));
}
return dest[0] - buf_out[0];
}
long tdspeed_est_output_size()
{
return TDSPEED_OUTBUFSIZE;
}
long tdspeed_est_input_size(long size)
{
struct tdspeed_state_s *st = &tdspeed_state;
size = (size - st->ovl_size) * st->src_step / st->dst_step;
if (size < 0)
size = 0;
return size;
}
int tdspeed_doit(int32_t *src[], int count)
{
dsp_src = src;
count = tdspeed_apply( (int32_t *[2]) { outbuf[0], outbuf[1] },
src, count, 0, TDSPEED_OUTBUFSIZE);
src[0] = outbuf[0];
src[1] = outbuf[1];
return count;
}

49
lib/rbcodec/dsp/tdspeed.h Normal file
View file

@ -0,0 +1,49 @@
/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2006 by Nicolas Pitre <nico@cam.org>
* Copyright (C) 2006-2007 by Stéphane Doyon <s.doyon@videotron.ca>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#ifndef _TDSPEED_H
#define _TDSPEED_H
#include "dsp.h"
#define TDSPEED_OUTBUFSIZE 4096
/* some #define functions to get the pitch, stretch and speed values based on */
/* two known values. Remember that params are alphabetical. */
#define GET_SPEED(pitch, stretch) \
((pitch * stretch + PITCH_SPEED_100 / 2L) / PITCH_SPEED_100)
#define GET_PITCH(speed, stretch) \
((speed * PITCH_SPEED_100 + stretch / 2L) / stretch)
#define GET_STRETCH(pitch, speed) \
((speed * PITCH_SPEED_100 + pitch / 2L) / pitch)
void tdspeed_init(void);
void tdspeed_finish(void);
bool tdspeed_config(int samplerate, bool stereo, int32_t factor);
long tdspeed_est_output_size(void);
long tdspeed_est_input_size(long size);
int tdspeed_doit(int32_t *src[], int count);
#define STRETCH_MAX (250L * PITCH_SPEED_PRECISION) /* 250% */
#define STRETCH_MIN (35L * PITCH_SPEED_PRECISION) /* 35% */
#endif