forked from len0rd/rockbox
Patch #1426489 - Shorten codec optimisations from Mark Arigo
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@8615 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
6099dc8b77
commit
8e46ab85a9
3 changed files with 212 additions and 114 deletions
|
|
@ -28,12 +28,6 @@
|
|||
#include "golomb.h"
|
||||
#include "shndec.h"
|
||||
|
||||
/* These seem reasonable from my test files.
|
||||
Does MAX_HEADER_SIZE really need to be 16384? */
|
||||
#define MAX_PRED_ORDER 16
|
||||
#define MAX_HEADER_SIZE DEFAULT_BLOCK_SIZE*4
|
||||
//#define MAX_HEADER_SIZE 16384
|
||||
|
||||
#define ULONGSIZE 2
|
||||
|
||||
#define WAVE_FORMAT_PCM 0x0001
|
||||
|
|
@ -54,16 +48,6 @@
|
|||
#define V2LPCQOFFSET (1 << LPCQUANT)
|
||||
|
||||
#define FNSIZE 2
|
||||
#define FN_DIFF0 0
|
||||
#define FN_DIFF1 1
|
||||
#define FN_DIFF2 2
|
||||
#define FN_DIFF3 3
|
||||
#define FN_QUIT 4
|
||||
#define FN_BLOCKSIZE 5
|
||||
#define FN_BITSHIFT 6
|
||||
#define FN_QLPC 7
|
||||
#define FN_ZERO 8
|
||||
#define FN_VERBATIM 9
|
||||
|
||||
#define VERBATIM_CKSIZE_SIZE 5
|
||||
#define VERBATIM_BYTE_SIZE 8
|
||||
|
|
@ -76,22 +60,21 @@
|
|||
#define get_le16(gb) bswap_16(get_bits_long(gb, 16))
|
||||
#define get_le32(gb) bswap_32(get_bits_long(gb, 32))
|
||||
|
||||
static inline uint32_t bswap_32(uint32_t x){
|
||||
static uint32_t bswap_32(uint32_t x){
|
||||
x= ((x<<8)&0xFF00FF00) | ((x>>8)&0x00FF00FF);
|
||||
return (x>>16) | (x<<16);
|
||||
}
|
||||
|
||||
static inline uint16_t bswap_16(uint16_t x){
|
||||
static uint16_t bswap_16(uint16_t x){
|
||||
return (x>>8) | (x<<8);
|
||||
}
|
||||
|
||||
/* converts fourcc string to int */
|
||||
static inline int ff_get_fourcc(const char *s){
|
||||
static int ff_get_fourcc(const char *s){
|
||||
//assert( strlen(s)==4 );
|
||||
return (s[0]) + (s[1]<<8) + (s[2]<<16) + (s[3]<<24);
|
||||
}
|
||||
|
||||
static unsigned int get_uint(ShortenContext *s, int k) ICODE_ATTR;
|
||||
static unsigned int get_uint(ShortenContext *s, int k)
|
||||
{
|
||||
if (s->version != 0)
|
||||
|
|
@ -99,10 +82,77 @@ static unsigned int get_uint(ShortenContext *s, int k)
|
|||
return get_ur_golomb_shorten(&s->gb, k);
|
||||
}
|
||||
|
||||
static void decode_subframe_lpc(ShortenContext *s, int32_t *decoded,
|
||||
int residual_size, int pred_order) ICODE_ATTR;
|
||||
static void decode_subframe_lpc(ShortenContext *s, int32_t *decoded,
|
||||
int residual_size, int pred_order)
|
||||
#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
|
||||
static void coldfire_lshift_samples(int n, int shift, int32_t *samples) ICODE_ATTR;
|
||||
static void coldfire_lshift_samples(int n, int shift, int32_t *samples)
|
||||
{
|
||||
/*
|
||||
for (i = 0; i < n; i++)
|
||||
samples[i] =<< shift;
|
||||
*/
|
||||
asm volatile (
|
||||
"move.l %[n], %%d0 \n" /* d0 = loop counter */
|
||||
"asr.l #2, %%d0 \n"
|
||||
"beq l1_shift \n"
|
||||
"l2_shift:" /* main loop (unroll by 4) */
|
||||
"movem.l (%[x]), %%d4-%%d7 \n"
|
||||
"asl.l %[s], %%d4 \n"
|
||||
"asl.l %[s], %%d5 \n"
|
||||
"asl.l %[s], %%d6 \n"
|
||||
"asl.l %[s], %%d7 \n"
|
||||
"movem.l %%d4-%%d7, (%[x]) \n"
|
||||
"add.l #16, %[x] \n"
|
||||
|
||||
"subq.l #1, %%d0 \n"
|
||||
"bne l2_shift \n"
|
||||
"l1_shift:" /* any loops left? */
|
||||
"and.l #3, %[n] \n"
|
||||
"beq l4_shift \n"
|
||||
"l3_shift:" /* remaining loops */
|
||||
"move.l (%[x]), %%d4 \n"
|
||||
"asl.l %[s], %%d4 \n"
|
||||
"move.l %%d4, (%[x])+ \n"
|
||||
|
||||
"subq.l #1, %[n] \n"
|
||||
"bne l3_shift \n"
|
||||
"l4_shift:" /* exit */
|
||||
: [n] "+d" (n), /* d1 */
|
||||
[s] "+d" (shift), /* d2 */
|
||||
[x] "+a" (samples) /* a0 */
|
||||
:
|
||||
: "%d0", "%d4", "%d5", "%d6", "%d7"
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void fix_bitshift(ShortenContext *s, int32_t *samples)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* Wrapped samples don't get bitshifted, so we'll do them during
|
||||
the next iteration. */
|
||||
if (s->bitshift != 0) {
|
||||
#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
|
||||
coldfire_lshift_samples(s->blocksize, s->bitshift, samples - s->nwrap);
|
||||
#else
|
||||
for (i = -s->nwrap; i < (s->blocksize - s->nwrap); i++)
|
||||
samples[i] <<= s->bitshift;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Also, when we have to remember to fix the wrapped samples when
|
||||
the bitshift changes.*/
|
||||
if (s->bitshift != s->last_bitshift) {
|
||||
if (s->last_bitshift != 0)
|
||||
for (i = -s->nwrap; i < 0; i++)
|
||||
samples[i] <<= s->last_bitshift;
|
||||
|
||||
s->last_bitshift = s->bitshift;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void decode_subframe_lpc(ShortenContext *s, int32_t *decoded,
|
||||
int residual_size, int pred_order)
|
||||
{
|
||||
int sum, i, j;
|
||||
int coeffs[MAX_PRED_ORDER];
|
||||
|
|
@ -121,18 +171,12 @@ static void decode_subframe_lpc(ShortenContext *s, int32_t *decoded,
|
|||
}
|
||||
}
|
||||
|
||||
int shorten_decode_frame(ShortenContext *s,
|
||||
int32_t *decoded,
|
||||
int32_t *offset,
|
||||
uint8_t *buf,
|
||||
int buf_size)
|
||||
static inline int shorten_decode_frame(ShortenContext *s, int32_t *decoded,
|
||||
int32_t *offset)
|
||||
{
|
||||
int i;
|
||||
int32_t sum;
|
||||
|
||||
init_get_bits(&s->gb, buf, buf_size*8);
|
||||
get_bits(&s->gb, s->bitindex);
|
||||
|
||||
int cmd = get_ur_golomb_shorten(&s->gb, FNSIZE);
|
||||
switch (cmd) {
|
||||
case FN_ZERO:
|
||||
|
|
@ -201,10 +245,6 @@ int shorten_decode_frame(ShortenContext *s,
|
|||
case FN_QLPC:
|
||||
{
|
||||
int pred_order = get_ur_golomb_shorten(&s->gb, LPCQSIZE);
|
||||
if (pred_order > MAX_PRED_ORDER) {
|
||||
return -2;
|
||||
}
|
||||
|
||||
for (i=0; i<pred_order; i++)
|
||||
decoded[i - pred_order] -= coffset;
|
||||
decode_subframe_lpc(s, decoded, residual_size, pred_order);
|
||||
|
|
@ -231,12 +271,7 @@ int shorten_decode_frame(ShortenContext *s,
|
|||
}
|
||||
}
|
||||
|
||||
for (i=-s->nwrap; i<0; i++)
|
||||
decoded[i] = decoded[i + s->blocksize];
|
||||
|
||||
int scale = s->bitshift + SHN_OUTPUT_DEPTH - s->bits_per_sample;
|
||||
for (i = 0; i < s->blocksize; i++)
|
||||
decoded[i] <<= scale;
|
||||
fix_bitshift(s, decoded);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -244,29 +279,88 @@ int shorten_decode_frame(ShortenContext *s,
|
|||
i = get_ur_golomb_shorten(&s->gb, VERBATIM_CKSIZE_SIZE);
|
||||
while (i--)
|
||||
get_ur_golomb_shorten(&s->gb, VERBATIM_BYTE_SIZE);
|
||||
return 4;
|
||||
break;
|
||||
|
||||
case FN_BITSHIFT:
|
||||
s->bitshift = get_ur_golomb_shorten(&s->gb, BITSHIFTSIZE);
|
||||
return 3;
|
||||
break;
|
||||
|
||||
case FN_BLOCKSIZE:
|
||||
s->blocksize = get_uint(s, av_log2(s->blocksize));
|
||||
return 2;
|
||||
break;
|
||||
|
||||
case FN_QUIT:
|
||||
return 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
return -1;
|
||||
return FN_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return cmd;
|
||||
}
|
||||
|
||||
int shorten_decode_frames(ShortenContext *s, int *nsamples,
|
||||
int32_t *decoded0, int32_t *decoded1,
|
||||
int32_t *offset0, int32_t *offset1,
|
||||
uint8_t *buf, int buf_size,
|
||||
void (*yield)(void))
|
||||
{
|
||||
int32_t *decoded, *offset;
|
||||
int cmd;
|
||||
|
||||
*nsamples = 0;
|
||||
|
||||
init_get_bits(&s->gb, buf, buf_size*8);
|
||||
get_bits(&s->gb, s->bitindex);
|
||||
|
||||
int n = 0;
|
||||
while (n < NUM_DEC_LOOPS) {
|
||||
int chan = n%2;
|
||||
if (chan == 0) {
|
||||
decoded = decoded0 + s->nwrap + *nsamples;
|
||||
offset = offset0;
|
||||
} else {
|
||||
decoded = decoded1 + s->nwrap + *nsamples;
|
||||
offset = offset1;
|
||||
}
|
||||
|
||||
yield();
|
||||
|
||||
cmd = shorten_decode_frame(s, decoded, offset);
|
||||
|
||||
if (cmd == FN_VERBATIM || cmd == FN_BITSHIFT || cmd == FN_BLOCKSIZE) {
|
||||
continue;
|
||||
} else if (cmd == FN_QUIT || cmd == FN_ERROR) {
|
||||
break;
|
||||
}
|
||||
|
||||
*nsamples += chan * s->blocksize;
|
||||
n++;
|
||||
}
|
||||
|
||||
if (*nsamples) {
|
||||
/* Wrap the samples for the next loop */
|
||||
int i;
|
||||
for (i = 0; i < s->nwrap; i++) {
|
||||
decoded0[i] = decoded0[*nsamples + i];
|
||||
decoded1[i] = decoded1[*nsamples + i];
|
||||
}
|
||||
|
||||
/* Scale the samples for the pcmbuf */
|
||||
int scale = SHN_OUTPUT_DEPTH - s->bits_per_sample;
|
||||
#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
|
||||
coldfire_lshift_samples(*nsamples, scale, decoded0 + s->nwrap);
|
||||
coldfire_lshift_samples(*nsamples, scale, decoded1 + s->nwrap);
|
||||
#else
|
||||
for (i = 0; i < *nsamples; i++) {
|
||||
decoded0[i + s->nwrap] <<= scale;
|
||||
decoded1[i + s->nwrap] <<= scale;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return cmd;
|
||||
}
|
||||
|
||||
static int decode_wave_header(ShortenContext *s,
|
||||
|
|
|
|||
|
|
@ -1,11 +1,31 @@
|
|||
#include "bitstream.h"
|
||||
|
||||
#define SHN_OUTPUT_DEPTH 28
|
||||
|
||||
#define MAX_CHANNELS 2
|
||||
#define MAX_PRED_ORDER 16
|
||||
#define MAX_NWRAP MAX_PRED_ORDER
|
||||
#define MAX_NMEAN 4
|
||||
|
||||
/* NUM_DEC_LOOPS should be even number */
|
||||
#define NUM_DEC_LOOPS 26
|
||||
#define DEFAULT_BLOCK_SIZE 256
|
||||
#define MAX_FRAMESIZE 1024
|
||||
#define MAX_CHANNELS 2
|
||||
#define MAX_NWRAP 3
|
||||
#define MAX_NMEAN 4
|
||||
#define MAX_HEADER_SIZE DEFAULT_BLOCK_SIZE*4
|
||||
#define MAX_BUFFER_SIZE 2*DEFAULT_BLOCK_SIZE*NUM_DEC_LOOPS
|
||||
#define MAX_DECODE_SIZE ((DEFAULT_BLOCK_SIZE*NUM_DEC_LOOPS/2) + MAX_NWRAP)
|
||||
#define MAX_OFFSET_SIZE MAX_NMEAN
|
||||
|
||||
#define FN_DIFF0 0
|
||||
#define FN_DIFF1 1
|
||||
#define FN_DIFF2 2
|
||||
#define FN_DIFF3 3
|
||||
#define FN_QUIT 4
|
||||
#define FN_BLOCKSIZE 5
|
||||
#define FN_BITSHIFT 6
|
||||
#define FN_QLPC 7
|
||||
#define FN_ZERO 8
|
||||
#define FN_VERBATIM 9
|
||||
#define FN_ERROR 10
|
||||
|
||||
typedef struct ShortenContext {
|
||||
GetBitContext gb;
|
||||
|
|
@ -17,20 +37,16 @@ typedef struct ShortenContext {
|
|||
int bits_per_sample;
|
||||
int version;
|
||||
int bitshift;
|
||||
int last_bitshift;
|
||||
int nmean;
|
||||
int nwrap;
|
||||
int blocksize;
|
||||
int bitindex;
|
||||
/* Not needed...
|
||||
int bit_rate;
|
||||
int block_align;
|
||||
int chunk_size;
|
||||
*/
|
||||
} ShortenContext;
|
||||
|
||||
int shorten_init(ShortenContext* s, uint8_t *buf, int buf_size);
|
||||
int shorten_decode_frame(ShortenContext *s,
|
||||
int32_t *decoded,
|
||||
int32_t *offset,
|
||||
uint8_t *buf,
|
||||
int buf_size) ICODE_ATTR;
|
||||
int shorten_decode_frames(ShortenContext *s, int *nsamples,
|
||||
int32_t *decoded0, int32_t *decoded1,
|
||||
int32_t *offset0, int32_t *offset1,
|
||||
uint8_t *buf, int buf_size,
|
||||
void (*yield)(void)) ICODE_ATTR;
|
||||
|
|
|
|||
|
|
@ -33,13 +33,13 @@ extern char iend[];
|
|||
struct codec_api* rb;
|
||||
struct codec_api* ci;
|
||||
|
||||
#define MAX_DECODED (DEFAULT_BLOCK_SIZE + MAX_NWRAP)
|
||||
int32_t decoded0[MAX_DECODED] IBSS_ATTR;
|
||||
int32_t decoded1[MAX_DECODED] IBSS_ATTR;
|
||||
int32_t decoded0[MAX_DECODE_SIZE] IBSS_ATTR;
|
||||
int32_t decoded1[MAX_DECODE_SIZE] IBSS_ATTR;
|
||||
|
||||
#define MAX_OFFSETS MAX_NMEAN
|
||||
int32_t offset0[MAX_OFFSETS] IBSS_ATTR;
|
||||
int32_t offset1[MAX_OFFSETS] IBSS_ATTR;
|
||||
int32_t offset0[MAX_OFFSET_SIZE] IBSS_ATTR;
|
||||
int32_t offset1[MAX_OFFSET_SIZE] IBSS_ATTR;
|
||||
|
||||
int8_t ibuf[MAX_BUFFER_SIZE] IBSS_ATTR;
|
||||
|
||||
/* this is the codec entry point */
|
||||
enum codec_status codec_start(struct codec_api* api)
|
||||
|
|
@ -48,9 +48,8 @@ enum codec_status codec_start(struct codec_api* api)
|
|||
uint32_t samplesdone;
|
||||
uint32_t elapsedtime;
|
||||
int8_t *buf;
|
||||
int cur_chan, consumed, res;
|
||||
int consumed, res, nsamples;
|
||||
long bytesleft;
|
||||
int retval;
|
||||
|
||||
/* Generic codec initialisation */
|
||||
rb = api;
|
||||
|
|
@ -72,9 +71,8 @@ enum codec_status codec_start(struct codec_api* api)
|
|||
next_track:
|
||||
/* Codec initialization */
|
||||
if (codec_init(api)) {
|
||||
LOGF("Shorten: Error initialising codec\n");
|
||||
retval = CODEC_ERROR;
|
||||
goto exit;
|
||||
LOGF("Shorten: codec_init error\n");
|
||||
return CODEC_ERROR;
|
||||
}
|
||||
|
||||
while (!*ci->taginfo_ready)
|
||||
|
|
@ -90,12 +88,11 @@ next_track:
|
|||
}
|
||||
|
||||
/* Read the shorten & wave headers */
|
||||
buf = ci->request_buffer(&bytesleft, MAX_FRAMESIZE);
|
||||
buf = ci->request_buffer(&bytesleft, MAX_HEADER_SIZE);
|
||||
res = shorten_init(&sc, (unsigned char *)buf, bytesleft);
|
||||
if (res < 0) {
|
||||
LOGF("shorten_init error: %d\n", res);
|
||||
retval = CODEC_ERROR;
|
||||
goto exit;
|
||||
LOGF("Shorten: shorten_init error: %d\n", res);
|
||||
return CODEC_ERROR;
|
||||
}
|
||||
|
||||
ci->id3->frequency = sc.sample_rate;
|
||||
|
|
@ -117,14 +114,13 @@ next_track:
|
|||
|
||||
seek_start:
|
||||
/* The main decoding loop */
|
||||
ci->memset(&decoded0, 0, sizeof(int32_t)*MAX_DECODED);
|
||||
ci->memset(&decoded1, 0, sizeof(int32_t)*MAX_DECODED);
|
||||
ci->memset(&offset0, 0, sizeof(int32_t)*MAX_OFFSETS);
|
||||
ci->memset(&offset1, 0, sizeof(int32_t)*MAX_OFFSETS);
|
||||
ci->memset(&decoded0, 0, sizeof(int32_t)*MAX_DECODE_SIZE);
|
||||
ci->memset(&decoded1, 0, sizeof(int32_t)*MAX_DECODE_SIZE);
|
||||
ci->memset(&offset0, 0, sizeof(int32_t)*MAX_OFFSET_SIZE);
|
||||
ci->memset(&offset1, 0, sizeof(int32_t)*MAX_OFFSET_SIZE);
|
||||
|
||||
cur_chan = 0;
|
||||
samplesdone = 0;
|
||||
buf = ci->request_buffer(&bytesleft, MAX_FRAMESIZE);
|
||||
buf = ci->request_buffer(&bytesleft, MAX_BUFFER_SIZE);
|
||||
while (bytesleft) {
|
||||
ci->yield();
|
||||
if (ci->stop_codec || ci->reload_codec) {
|
||||
|
|
@ -143,51 +139,43 @@ seek_start:
|
|||
}
|
||||
|
||||
/* Decode a frame */
|
||||
ci->yield();
|
||||
if (cur_chan == 0) {
|
||||
res = shorten_decode_frame(&sc, decoded0 + sc.nwrap, offset0,
|
||||
(unsigned char *)buf, bytesleft);
|
||||
ci->memcpy(ibuf, buf, bytesleft); /* copy buf to iram */
|
||||
res = shorten_decode_frames(&sc, &nsamples, decoded0, decoded1,
|
||||
offset0, offset1, (unsigned char *)ibuf,
|
||||
bytesleft, ci->yield);
|
||||
|
||||
if (res == FN_ERROR) {
|
||||
LOGF("Shorten: shorten_decode_frames error (%d)\n", samplesdone);
|
||||
return CODEC_ERROR;
|
||||
} else {
|
||||
res = shorten_decode_frame(&sc, decoded1 + sc.nwrap, offset1,
|
||||
(unsigned char *)buf, bytesleft);
|
||||
}
|
||||
cur_chan++;
|
||||
|
||||
if (res == 0 && cur_chan == sc.channels) {
|
||||
cur_chan = 0;
|
||||
|
||||
/* Insert decoded samples in pcmbuf */
|
||||
ci->yield();
|
||||
while (!ci->pcmbuf_insert_split((char*)(decoded0 + sc.nwrap),
|
||||
(char*)(decoded1 + sc.nwrap), sc.blocksize*4)) {
|
||||
if (nsamples) {
|
||||
ci->yield();
|
||||
while (!ci->pcmbuf_insert_split((char*)(decoded0 + sc.nwrap),
|
||||
(char*)(decoded1 + sc.nwrap),
|
||||
4*nsamples)) {
|
||||
ci->yield();
|
||||
}
|
||||
|
||||
/* Update the elapsed-time indicator */
|
||||
samplesdone += nsamples;
|
||||
elapsedtime = (samplesdone*10) / (sc.sample_rate/100);
|
||||
ci->set_elapsed(elapsedtime);
|
||||
}
|
||||
|
||||
/* Update the elapsed-time indicator */
|
||||
samplesdone += sc.blocksize;
|
||||
elapsedtime = (samplesdone*10) / (sc.sample_rate/100);
|
||||
ci->set_elapsed(elapsedtime);
|
||||
} else if (res == 1) {
|
||||
/* End of shorten stream...go to next track */
|
||||
break;
|
||||
} else if (res < 0) {
|
||||
LOGF("shorten_decode_frame error: \n", res);
|
||||
retval = CODEC_ERROR;
|
||||
goto exit;
|
||||
if (res == FN_QUIT)
|
||||
break;
|
||||
}
|
||||
|
||||
consumed = sc.gb.index/8;
|
||||
ci->advance_buffer(consumed);
|
||||
buf = ci->request_buffer(&bytesleft, MAX_BUFFER_SIZE);
|
||||
sc.bitindex = sc.gb.index - 8*consumed;
|
||||
buf = ci->request_buffer(&bytesleft, MAX_FRAMESIZE);
|
||||
}
|
||||
|
||||
LOGF("Shorten: Decoded %d samples\n", samplesdone);
|
||||
|
||||
if (ci->request_next_track())
|
||||
goto next_track;
|
||||
|
||||
retval = CODEC_OK;
|
||||
exit:
|
||||
return retval;
|
||||
return CODEC_OK;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue