1
0
Fork 0
forked from len0rd/rockbox

Patch #1426489 - Shorten codec optimisations from Mark Arigo

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@8615 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Dave Chapman 2006-02-07 22:16:35 +00:00
parent 6099dc8b77
commit 8e46ab85a9
3 changed files with 212 additions and 114 deletions

View file

@ -28,12 +28,6 @@
#include "golomb.h"
#include "shndec.h"
/* These seem reasonable from my test files.
Does MAX_HEADER_SIZE really need to be 16384? */
#define MAX_PRED_ORDER 16
#define MAX_HEADER_SIZE DEFAULT_BLOCK_SIZE*4
//#define MAX_HEADER_SIZE 16384
#define ULONGSIZE 2
#define WAVE_FORMAT_PCM 0x0001
@ -54,16 +48,6 @@
#define V2LPCQOFFSET (1 << LPCQUANT)
#define FNSIZE 2
#define FN_DIFF0 0
#define FN_DIFF1 1
#define FN_DIFF2 2
#define FN_DIFF3 3
#define FN_QUIT 4
#define FN_BLOCKSIZE 5
#define FN_BITSHIFT 6
#define FN_QLPC 7
#define FN_ZERO 8
#define FN_VERBATIM 9
#define VERBATIM_CKSIZE_SIZE 5
#define VERBATIM_BYTE_SIZE 8
@ -76,22 +60,21 @@
#define get_le16(gb) bswap_16(get_bits_long(gb, 16))
#define get_le32(gb) bswap_32(get_bits_long(gb, 32))
static inline uint32_t bswap_32(uint32_t x){
static uint32_t bswap_32(uint32_t x){
x= ((x<<8)&0xFF00FF00) | ((x>>8)&0x00FF00FF);
return (x>>16) | (x<<16);
}
static inline uint16_t bswap_16(uint16_t x){
static uint16_t bswap_16(uint16_t x){
return (x>>8) | (x<<8);
}
/* converts fourcc string to int */
static inline int ff_get_fourcc(const char *s){
static int ff_get_fourcc(const char *s){
//assert( strlen(s)==4 );
return (s[0]) + (s[1]<<8) + (s[2]<<16) + (s[3]<<24);
}
static unsigned int get_uint(ShortenContext *s, int k) ICODE_ATTR;
static unsigned int get_uint(ShortenContext *s, int k)
{
if (s->version != 0)
@ -99,10 +82,77 @@ static unsigned int get_uint(ShortenContext *s, int k)
return get_ur_golomb_shorten(&s->gb, k);
}
static void decode_subframe_lpc(ShortenContext *s, int32_t *decoded,
int residual_size, int pred_order) ICODE_ATTR;
static void decode_subframe_lpc(ShortenContext *s, int32_t *decoded,
int residual_size, int pred_order)
#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
static void coldfire_lshift_samples(int n, int shift, int32_t *samples) ICODE_ATTR;
static void coldfire_lshift_samples(int n, int shift, int32_t *samples)
{
/*
for (i = 0; i < n; i++)
samples[i] =<< shift;
*/
asm volatile (
"move.l %[n], %%d0 \n" /* d0 = loop counter */
"asr.l #2, %%d0 \n"
"beq l1_shift \n"
"l2_shift:" /* main loop (unroll by 4) */
"movem.l (%[x]), %%d4-%%d7 \n"
"asl.l %[s], %%d4 \n"
"asl.l %[s], %%d5 \n"
"asl.l %[s], %%d6 \n"
"asl.l %[s], %%d7 \n"
"movem.l %%d4-%%d7, (%[x]) \n"
"add.l #16, %[x] \n"
"subq.l #1, %%d0 \n"
"bne l2_shift \n"
"l1_shift:" /* any loops left? */
"and.l #3, %[n] \n"
"beq l4_shift \n"
"l3_shift:" /* remaining loops */
"move.l (%[x]), %%d4 \n"
"asl.l %[s], %%d4 \n"
"move.l %%d4, (%[x])+ \n"
"subq.l #1, %[n] \n"
"bne l3_shift \n"
"l4_shift:" /* exit */
: [n] "+d" (n), /* d1 */
[s] "+d" (shift), /* d2 */
[x] "+a" (samples) /* a0 */
:
: "%d0", "%d4", "%d5", "%d6", "%d7"
);
}
#endif
static inline void fix_bitshift(ShortenContext *s, int32_t *samples)
{
int i;
/* Wrapped samples don't get bitshifted, so we'll do them during
the next iteration. */
if (s->bitshift != 0) {
#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
coldfire_lshift_samples(s->blocksize, s->bitshift, samples - s->nwrap);
#else
for (i = -s->nwrap; i < (s->blocksize - s->nwrap); i++)
samples[i] <<= s->bitshift;
#endif
}
/* Also, when we have to remember to fix the wrapped samples when
the bitshift changes.*/
if (s->bitshift != s->last_bitshift) {
if (s->last_bitshift != 0)
for (i = -s->nwrap; i < 0; i++)
samples[i] <<= s->last_bitshift;
s->last_bitshift = s->bitshift;
}
}
static inline void decode_subframe_lpc(ShortenContext *s, int32_t *decoded,
int residual_size, int pred_order)
{
int sum, i, j;
int coeffs[MAX_PRED_ORDER];
@ -121,18 +171,12 @@ static void decode_subframe_lpc(ShortenContext *s, int32_t *decoded,
}
}
int shorten_decode_frame(ShortenContext *s,
int32_t *decoded,
int32_t *offset,
uint8_t *buf,
int buf_size)
static inline int shorten_decode_frame(ShortenContext *s, int32_t *decoded,
int32_t *offset)
{
int i;
int32_t sum;
init_get_bits(&s->gb, buf, buf_size*8);
get_bits(&s->gb, s->bitindex);
int cmd = get_ur_golomb_shorten(&s->gb, FNSIZE);
switch (cmd) {
case FN_ZERO:
@ -201,10 +245,6 @@ int shorten_decode_frame(ShortenContext *s,
case FN_QLPC:
{
int pred_order = get_ur_golomb_shorten(&s->gb, LPCQSIZE);
if (pred_order > MAX_PRED_ORDER) {
return -2;
}
for (i=0; i<pred_order; i++)
decoded[i - pred_order] -= coffset;
decode_subframe_lpc(s, decoded, residual_size, pred_order);
@ -231,12 +271,7 @@ int shorten_decode_frame(ShortenContext *s,
}
}
for (i=-s->nwrap; i<0; i++)
decoded[i] = decoded[i + s->blocksize];
int scale = s->bitshift + SHN_OUTPUT_DEPTH - s->bits_per_sample;
for (i = 0; i < s->blocksize; i++)
decoded[i] <<= scale;
fix_bitshift(s, decoded);
break;
}
@ -244,29 +279,88 @@ int shorten_decode_frame(ShortenContext *s,
i = get_ur_golomb_shorten(&s->gb, VERBATIM_CKSIZE_SIZE);
while (i--)
get_ur_golomb_shorten(&s->gb, VERBATIM_BYTE_SIZE);
return 4;
break;
case FN_BITSHIFT:
s->bitshift = get_ur_golomb_shorten(&s->gb, BITSHIFTSIZE);
return 3;
break;
case FN_BLOCKSIZE:
s->blocksize = get_uint(s, av_log2(s->blocksize));
return 2;
break;
case FN_QUIT:
return 1;
break;
default:
return -1;
return FN_ERROR;
break;
}
return 0;
return cmd;
}
int shorten_decode_frames(ShortenContext *s, int *nsamples,
int32_t *decoded0, int32_t *decoded1,
int32_t *offset0, int32_t *offset1,
uint8_t *buf, int buf_size,
void (*yield)(void))
{
int32_t *decoded, *offset;
int cmd;
*nsamples = 0;
init_get_bits(&s->gb, buf, buf_size*8);
get_bits(&s->gb, s->bitindex);
int n = 0;
while (n < NUM_DEC_LOOPS) {
int chan = n%2;
if (chan == 0) {
decoded = decoded0 + s->nwrap + *nsamples;
offset = offset0;
} else {
decoded = decoded1 + s->nwrap + *nsamples;
offset = offset1;
}
yield();
cmd = shorten_decode_frame(s, decoded, offset);
if (cmd == FN_VERBATIM || cmd == FN_BITSHIFT || cmd == FN_BLOCKSIZE) {
continue;
} else if (cmd == FN_QUIT || cmd == FN_ERROR) {
break;
}
*nsamples += chan * s->blocksize;
n++;
}
if (*nsamples) {
/* Wrap the samples for the next loop */
int i;
for (i = 0; i < s->nwrap; i++) {
decoded0[i] = decoded0[*nsamples + i];
decoded1[i] = decoded1[*nsamples + i];
}
/* Scale the samples for the pcmbuf */
int scale = SHN_OUTPUT_DEPTH - s->bits_per_sample;
#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
coldfire_lshift_samples(*nsamples, scale, decoded0 + s->nwrap);
coldfire_lshift_samples(*nsamples, scale, decoded1 + s->nwrap);
#else
for (i = 0; i < *nsamples; i++) {
decoded0[i + s->nwrap] <<= scale;
decoded1[i + s->nwrap] <<= scale;
}
#endif
}
return cmd;
}
static int decode_wave_header(ShortenContext *s,

View file

@ -1,11 +1,31 @@
#include "bitstream.h"
#define SHN_OUTPUT_DEPTH 28
#define MAX_CHANNELS 2
#define MAX_PRED_ORDER 16
#define MAX_NWRAP MAX_PRED_ORDER
#define MAX_NMEAN 4
/* NUM_DEC_LOOPS should be even number */
#define NUM_DEC_LOOPS 26
#define DEFAULT_BLOCK_SIZE 256
#define MAX_FRAMESIZE 1024
#define MAX_CHANNELS 2
#define MAX_NWRAP 3
#define MAX_NMEAN 4
#define MAX_HEADER_SIZE DEFAULT_BLOCK_SIZE*4
#define MAX_BUFFER_SIZE 2*DEFAULT_BLOCK_SIZE*NUM_DEC_LOOPS
#define MAX_DECODE_SIZE ((DEFAULT_BLOCK_SIZE*NUM_DEC_LOOPS/2) + MAX_NWRAP)
#define MAX_OFFSET_SIZE MAX_NMEAN
#define FN_DIFF0 0
#define FN_DIFF1 1
#define FN_DIFF2 2
#define FN_DIFF3 3
#define FN_QUIT 4
#define FN_BLOCKSIZE 5
#define FN_BITSHIFT 6
#define FN_QLPC 7
#define FN_ZERO 8
#define FN_VERBATIM 9
#define FN_ERROR 10
typedef struct ShortenContext {
GetBitContext gb;
@ -17,20 +37,16 @@ typedef struct ShortenContext {
int bits_per_sample;
int version;
int bitshift;
int last_bitshift;
int nmean;
int nwrap;
int blocksize;
int bitindex;
/* Not needed...
int bit_rate;
int block_align;
int chunk_size;
*/
} ShortenContext;
int shorten_init(ShortenContext* s, uint8_t *buf, int buf_size);
int shorten_decode_frame(ShortenContext *s,
int32_t *decoded,
int32_t *offset,
uint8_t *buf,
int buf_size) ICODE_ATTR;
int shorten_decode_frames(ShortenContext *s, int *nsamples,
int32_t *decoded0, int32_t *decoded1,
int32_t *offset0, int32_t *offset1,
uint8_t *buf, int buf_size,
void (*yield)(void)) ICODE_ATTR;

View file

@ -33,13 +33,13 @@ extern char iend[];
struct codec_api* rb;
struct codec_api* ci;
#define MAX_DECODED (DEFAULT_BLOCK_SIZE + MAX_NWRAP)
int32_t decoded0[MAX_DECODED] IBSS_ATTR;
int32_t decoded1[MAX_DECODED] IBSS_ATTR;
int32_t decoded0[MAX_DECODE_SIZE] IBSS_ATTR;
int32_t decoded1[MAX_DECODE_SIZE] IBSS_ATTR;
#define MAX_OFFSETS MAX_NMEAN
int32_t offset0[MAX_OFFSETS] IBSS_ATTR;
int32_t offset1[MAX_OFFSETS] IBSS_ATTR;
int32_t offset0[MAX_OFFSET_SIZE] IBSS_ATTR;
int32_t offset1[MAX_OFFSET_SIZE] IBSS_ATTR;
int8_t ibuf[MAX_BUFFER_SIZE] IBSS_ATTR;
/* this is the codec entry point */
enum codec_status codec_start(struct codec_api* api)
@ -48,9 +48,8 @@ enum codec_status codec_start(struct codec_api* api)
uint32_t samplesdone;
uint32_t elapsedtime;
int8_t *buf;
int cur_chan, consumed, res;
int consumed, res, nsamples;
long bytesleft;
int retval;
/* Generic codec initialisation */
rb = api;
@ -72,9 +71,8 @@ enum codec_status codec_start(struct codec_api* api)
next_track:
/* Codec initialization */
if (codec_init(api)) {
LOGF("Shorten: Error initialising codec\n");
retval = CODEC_ERROR;
goto exit;
LOGF("Shorten: codec_init error\n");
return CODEC_ERROR;
}
while (!*ci->taginfo_ready)
@ -90,12 +88,11 @@ next_track:
}
/* Read the shorten & wave headers */
buf = ci->request_buffer(&bytesleft, MAX_FRAMESIZE);
buf = ci->request_buffer(&bytesleft, MAX_HEADER_SIZE);
res = shorten_init(&sc, (unsigned char *)buf, bytesleft);
if (res < 0) {
LOGF("shorten_init error: %d\n", res);
retval = CODEC_ERROR;
goto exit;
LOGF("Shorten: shorten_init error: %d\n", res);
return CODEC_ERROR;
}
ci->id3->frequency = sc.sample_rate;
@ -117,14 +114,13 @@ next_track:
seek_start:
/* The main decoding loop */
ci->memset(&decoded0, 0, sizeof(int32_t)*MAX_DECODED);
ci->memset(&decoded1, 0, sizeof(int32_t)*MAX_DECODED);
ci->memset(&offset0, 0, sizeof(int32_t)*MAX_OFFSETS);
ci->memset(&offset1, 0, sizeof(int32_t)*MAX_OFFSETS);
ci->memset(&decoded0, 0, sizeof(int32_t)*MAX_DECODE_SIZE);
ci->memset(&decoded1, 0, sizeof(int32_t)*MAX_DECODE_SIZE);
ci->memset(&offset0, 0, sizeof(int32_t)*MAX_OFFSET_SIZE);
ci->memset(&offset1, 0, sizeof(int32_t)*MAX_OFFSET_SIZE);
cur_chan = 0;
samplesdone = 0;
buf = ci->request_buffer(&bytesleft, MAX_FRAMESIZE);
buf = ci->request_buffer(&bytesleft, MAX_BUFFER_SIZE);
while (bytesleft) {
ci->yield();
if (ci->stop_codec || ci->reload_codec) {
@ -143,51 +139,43 @@ seek_start:
}
/* Decode a frame */
ci->yield();
if (cur_chan == 0) {
res = shorten_decode_frame(&sc, decoded0 + sc.nwrap, offset0,
(unsigned char *)buf, bytesleft);
ci->memcpy(ibuf, buf, bytesleft); /* copy buf to iram */
res = shorten_decode_frames(&sc, &nsamples, decoded0, decoded1,
offset0, offset1, (unsigned char *)ibuf,
bytesleft, ci->yield);
if (res == FN_ERROR) {
LOGF("Shorten: shorten_decode_frames error (%d)\n", samplesdone);
return CODEC_ERROR;
} else {
res = shorten_decode_frame(&sc, decoded1 + sc.nwrap, offset1,
(unsigned char *)buf, bytesleft);
}
cur_chan++;
if (res == 0 && cur_chan == sc.channels) {
cur_chan = 0;
/* Insert decoded samples in pcmbuf */
ci->yield();
while (!ci->pcmbuf_insert_split((char*)(decoded0 + sc.nwrap),
(char*)(decoded1 + sc.nwrap), sc.blocksize*4)) {
if (nsamples) {
ci->yield();
while (!ci->pcmbuf_insert_split((char*)(decoded0 + sc.nwrap),
(char*)(decoded1 + sc.nwrap),
4*nsamples)) {
ci->yield();
}
/* Update the elapsed-time indicator */
samplesdone += nsamples;
elapsedtime = (samplesdone*10) / (sc.sample_rate/100);
ci->set_elapsed(elapsedtime);
}
/* Update the elapsed-time indicator */
samplesdone += sc.blocksize;
elapsedtime = (samplesdone*10) / (sc.sample_rate/100);
ci->set_elapsed(elapsedtime);
} else if (res == 1) {
/* End of shorten stream...go to next track */
break;
} else if (res < 0) {
LOGF("shorten_decode_frame error: \n", res);
retval = CODEC_ERROR;
goto exit;
if (res == FN_QUIT)
break;
}
consumed = sc.gb.index/8;
ci->advance_buffer(consumed);
buf = ci->request_buffer(&bytesleft, MAX_BUFFER_SIZE);
sc.bitindex = sc.gb.index - 8*consumed;
buf = ci->request_buffer(&bytesleft, MAX_FRAMESIZE);
}
LOGF("Shorten: Decoded %d samples\n", samplesdone);
if (ci->request_next_track())
goto next_track;
retval = CODEC_OK;
exit:
return retval;
return CODEC_OK;
}