From 4e36a2b991d58a40d7ea12c9bf41e93736b8b024 Mon Sep 17 00:00:00 2001 From: Michael Giacomelli Date: Sat, 20 Sep 2008 22:06:12 +0000 Subject: [PATCH] Commit FS#9318 - MP3 synthesis filter on COP. Loads the MP3 synth filer on to the CoProcessor on all PortalPlayer devices, resulting in an ~90% speedup according to test_codec on the Sansa. Real world improvement is somewhat less, but still considerable. Allows MP3 decoding at 30MHz without boosting, or use of more DSP/EQ with less boosting/skipping, thus improving battery life. Minor changes to mpegplayer to retain compatibility with libmad changes. Should be no significant changes for other targets or codecs. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@18557 a1c6a512-1295-4272-9138-f99709370657 --- apps/codecs/libmad/frame.c | 9 +- apps/codecs/libmad/frame.h | 13 ++- apps/codecs/libmad/layer12.c | 16 +-- apps/codecs/libmad/layer3.c | 2 +- apps/codecs/libmad/mad.h | 7 +- apps/codecs/libmad/synth.c | 14 +-- apps/codecs/mpa.c | 142 ++++++++++++++++++++++++- apps/plugins/mpegplayer/Makefile | 2 +- apps/plugins/mpegplayer/audio_thread.c | 7 ++ 9 files changed, 180 insertions(+), 32 deletions(-) diff --git a/apps/codecs/libmad/frame.c b/apps/codecs/libmad/frame.c index 91cf2f9a15..2c7fdca199 100644 --- a/apps/codecs/libmad/frame.c +++ b/apps/codecs/libmad/frame.c @@ -31,6 +31,7 @@ # include "timer.h" # include "layer12.h" # include "layer3.h" +# include "../lib/codeclib.h" static unsigned long const bitrate_table[5][15] = { @@ -467,7 +468,9 @@ int mad_frame_decode(struct mad_frame *frame, struct mad_stream *stream) mad_bit_finish(&next_frame); } - + + + return 0; fail: @@ -485,8 +488,8 @@ void mad_frame_mute(struct mad_frame *frame) for (s = 0; s < 36; ++s) { for (sb = 0; sb < 32; ++sb) { - frame->sbsample[0][s][sb] = - frame->sbsample[1][s][sb] = 0; + (*frame->sbsample)[0][s][sb] = + (*frame->sbsample)[1][s][sb] = 0; } } diff --git a/apps/codecs/libmad/frame.h b/apps/codecs/libmad/frame.h index dce573d021..d2d6dca3ef 100644 --- a/apps/codecs/libmad/frame.h +++ b/apps/codecs/libmad/frame.h @@ -65,12 +65,15 @@ struct mad_header { }; struct mad_frame { - struct mad_header header; /* MPEG audio header */ + struct mad_header header; /* MPEG audio header */ + int options; /* decoding options (from stream) */ - int options; /* decoding options (from stream) */ - - mad_fixed_t sbsample[2][36][32]; /* synthesis subband filter samples */ - mad_fixed_t (*overlap)[2][32][18]; /* Layer III block overlap data */ + mad_fixed_t (*sbsample)[2][36][32]; /* synthesis subband filter samples */ + mad_fixed_t (*sbsample_prev)[2][36][32]; /* synthesis subband filter samples + from previous frame only needed + when synthesis is on cop */ + + mad_fixed_t (*overlap)[2][32][18]; /* Layer III block overlap data */ }; # define MAD_NCHANNELS(header) ((header)->mode ? 2 : 1) diff --git a/apps/codecs/libmad/layer12.c b/apps/codecs/libmad/layer12.c index c71c005cf5..ccac392b42 100644 --- a/apps/codecs/libmad/layer12.c +++ b/apps/codecs/libmad/layer12.c @@ -185,7 +185,7 @@ int mad_layer_I(struct mad_stream *stream, struct mad_frame *frame) for (sb = 0; sb < bound; ++sb) { for (ch = 0; ch < nch; ++ch) { nb = allocation[ch][sb]; - frame->sbsample[ch][s][sb] = nb ? + (*frame->sbsample)[ch][s][sb] = nb ? mad_f_mul(I_sample(&stream->ptr, nb), sf_table[scalefactor[ch][sb]]) : 0; } @@ -198,13 +198,13 @@ int mad_layer_I(struct mad_stream *stream, struct mad_frame *frame) sample = I_sample(&stream->ptr, nb); for (ch = 0; ch < nch; ++ch) { - frame->sbsample[ch][s][sb] = + (*frame->sbsample)[ch][s][sb] = mad_f_mul(sample, sf_table[scalefactor[ch][sb]]); } } else { for (ch = 0; ch < nch; ++ch) - frame->sbsample[ch][s][sb] = 0; + (*frame->sbsample)[ch][s][sb] = 0; } } } @@ -492,13 +492,13 @@ int mad_layer_II(struct mad_stream *stream, struct mad_frame *frame) II_samples(&stream->ptr, &qc_table[index], samples); for (s = 0; s < 3; ++s) { - frame->sbsample[ch][3 * gr + s][sb] = + (*frame->sbsample)[ch][3 * gr + s][sb] = mad_f_mul(samples[s], sf_table[scalefactor[ch][sb][gr / 4]]); } } else { for (s = 0; s < 3; ++s) - frame->sbsample[ch][3 * gr + s][sb] = 0; + (*frame->sbsample)[ch][3 * gr + s][sb] = 0; } } } @@ -512,7 +512,7 @@ int mad_layer_II(struct mad_stream *stream, struct mad_frame *frame) for (ch = 0; ch < nch; ++ch) { for (s = 0; s < 3; ++s) { - frame->sbsample[ch][3 * gr + s][sb] = + (*frame->sbsample)[ch][3 * gr + s][sb] = mad_f_mul(samples[s], sf_table[scalefactor[ch][sb][gr / 4]]); } } @@ -520,7 +520,7 @@ int mad_layer_II(struct mad_stream *stream, struct mad_frame *frame) else { for (ch = 0; ch < nch; ++ch) { for (s = 0; s < 3; ++s) - frame->sbsample[ch][3 * gr + s][sb] = 0; + (*frame->sbsample)[ch][3 * gr + s][sb] = 0; } } } @@ -528,7 +528,7 @@ int mad_layer_II(struct mad_stream *stream, struct mad_frame *frame) for (ch = 0; ch < nch; ++ch) { for (s = 0; s < 3; ++s) { for (sb = sblimit; sb < 32; ++sb) - frame->sbsample[ch][3 * gr + s][sb] = 0; + (*frame->sbsample)[ch][3 * gr + s][sb] = 0; } } } diff --git a/apps/codecs/libmad/layer3.c b/apps/codecs/libmad/layer3.c index a95927e10f..0a53086d78 100644 --- a/apps/codecs/libmad/layer3.c +++ b/apps/codecs/libmad/layer3.c @@ -3112,7 +3112,7 @@ enum mad_error III_decode(struct mad_bitptr *ptr, struct mad_frame *frame, for (ch = 0; ch < nch; ++ch) { struct channel const *channel = &granule->ch[ch]; - mad_fixed_t (*sample)[32] = &frame->sbsample[ch][18 * gr]; + mad_fixed_t (*sample)[32] = &((*frame->sbsample)[ch][18 * gr]); unsigned int sb, l, i, sblimit; mad_fixed_t output[36]; diff --git a/apps/codecs/libmad/mad.h b/apps/codecs/libmad/mad.h index f5d8f1dbcc..52a74d122c 100644 --- a/apps/codecs/libmad/mad.h +++ b/apps/codecs/libmad/mad.h @@ -777,10 +777,13 @@ struct mad_header { struct mad_frame { struct mad_header header; /* MPEG audio header */ - int options; /* decoding options (from stream) */ - mad_fixed_t sbsample[2][36][32]; /* synthesis subband filter samples */ + mad_fixed_t (*sbsample)[2][36][32]; /* synthesis subband filter samples */ + mad_fixed_t (*sbsample_prev)[2][36][32]; /* synthesis subband filter samples + from previous frame only needed + when synthesis is on cop */ + mad_fixed_t (*overlap)[2][32][18]; /* Layer III block overlap data */ }; diff --git a/apps/codecs/libmad/synth.c b/apps/codecs/libmad/synth.c index c023f01ae5..b1a8491a69 100644 --- a/apps/codecs/libmad/synth.c +++ b/apps/codecs/libmad/synth.c @@ -592,14 +592,14 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame, int sb; unsigned int phase, ch, s, p; mad_fixed_t *pcm, (*filter)[2][2][16][8]; - mad_fixed_t const (*sbsample)[36][32]; + mad_fixed_t (*sbsample)[36][32]; mad_fixed_t (*fe)[8], (*fx)[8], (*fo)[8]; mad_fixed_t const (*D0ptr)[32]; mad_fixed_t const (*D1ptr)[32]; mad_fixed64hi_t hi0, hi1; for (ch = 0; ch < nch; ++ch) { - sbsample = &frame->sbsample[ch]; + sbsample = &*frame->sbsample_prev[ch]; filter = &synth->filter[ch]; phase = synth->phase; pcm = synth->pcm.samples[ch]; @@ -1053,7 +1053,7 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame, int p; unsigned int phase, ch, s; mad_fixed_t *pcm, (*filter)[2][2][16][8]; - mad_fixed_t const (*sbsample)[36][32]; + mad_fixed_t (*sbsample)[36][32]; mad_fixed_t (*fe)[8], (*fx)[8], (*fo)[8]; mad_fixed_t const (*D0ptr)[32], *ptr; mad_fixed_t const (*D1ptr)[32]; @@ -1061,7 +1061,7 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame, mad_fixed64lo_t lo; for (ch = 0; ch < nch; ++ch) { - sbsample = &frame->sbsample[ch]; + sbsample = &(*frame->sbsample_prev)[ch]; filter = &synth->filter[ch]; phase = synth->phase; pcm = synth->pcm.samples[ch]; @@ -1202,7 +1202,7 @@ void synth_full(struct mad_synth *synth, struct mad_frame const *frame, mad_fixed64lo_t lo; for (ch = 0; ch < nch; ++ch) { - sbsample = &frame->sbsample[ch]; + sbsample = &frame->sbsample_prev[ch]; filter = &synth->filter[ch]; phase = synth->phase; pcm = synth->pcm.samples[ch]; @@ -1403,14 +1403,14 @@ void synth_half(struct mad_synth *synth, struct mad_frame const *frame, { unsigned int phase, ch, s, sb, pe, po; mad_fixed_t *pcm1, *pcm2, (*filter)[2][2][16][8]; - mad_fixed_t const (*sbsample)[36][32]; + mad_fixed_t (*sbsample)[36][32]; register mad_fixed_t (*fe)[8], (*fx)[8], (*fo)[8]; register mad_fixed_t const (*Dptr)[32], *ptr; register mad_fixed64hi_t hi; register mad_fixed64lo_t lo; for (ch = 0; ch < nch; ++ch) { - sbsample = &frame->sbsample[ch]; + sbsample = &(*frame->sbsample_prev)[ch]; filter = &synth->filter[ch]; phase = synth->phase; pcm1 = synth->pcm.samples[ch]; diff --git a/apps/codecs/mpa.c b/apps/codecs/mpa.c index 7ac96fb8b2..c916aefcaa 100644 --- a/apps/codecs/mpa.c +++ b/apps/codecs/mpa.c @@ -25,13 +25,34 @@ CODEC_HEADER +#if (CONFIG_CPU == PP5024 || CONFIG_CPU == PP5022 || CONFIG_CPU == PP5020 \ + || CONFIG_CPU == PP5002) && !defined(MPEGPLAYER) +#define MPA_SYNTH_ON_COP +#endif + struct mad_stream stream IBSS_ATTR; struct mad_frame frame IBSS_ATTR; struct mad_synth synth IBSS_ATTR; +#ifdef MPA_SYNTH_ON_COP +volatile short synth_running IBSS_ATTR; /*synthesis is running*/ +volatile short die IBSS_ATTR; /*thread should die*/ + +#if (CONFIG_CPU == PP5024) || (CONFIG_CPU == PP5022) +mad_fixed_t sbsample_prev[2][36][32] IBSS_ATTR; +#else +mad_fixed_t sbsample_prev[2][36][32] SHAREDBSS_ATTR; +#endif + +struct semaphore synth_done_sem IBSS_ATTR; +struct semaphore synth_pending_sem IBSS_ATTR; +#endif + #define INPUT_CHUNK_SIZE 8192 mad_fixed_t mad_frame_overlap[2][32][18] IBSS_ATTR; +mad_fixed_t sbsample[2][36][32] IBSS_ATTR; + unsigned char mad_main_data[MAD_BUFFER_MDLEN] IBSS_ATTR; /* TODO: what latency does layer 1 have? */ int mpeg_latency[3] = { 0, 481, 529 }; @@ -43,6 +64,14 @@ void init_mad(void) ci->memset(&frame, 0, sizeof(struct mad_frame)); ci->memset(&synth, 0, sizeof(struct mad_synth)); +#ifdef MPA_SYNTH_ON_COP + frame.sbsample_prev = &sbsample_prev; +#else + frame.sbsample_prev = &sbsample; +#endif + + frame.sbsample=&sbsample; + mad_stream_init(&stream); mad_frame_init(&frame); mad_synth_init(&synth); @@ -51,6 +80,8 @@ void init_mad(void) ci->memset(mad_frame_overlap, 0, sizeof(mad_frame_overlap)); frame.overlap = &mad_frame_overlap; stream.main_data = &mad_main_data; + + } int get_file_pos(int newtime) @@ -159,6 +190,90 @@ static void set_elapsed(struct mp3entry* id3) } } +#ifdef MPA_SYNTH_ON_COP + +/* + * Run the synthesis filter on the COProcessor + */ + +static int mad_synth_thread_stack[DEFAULT_STACK_SIZE/sizeof(int)/2] IBSS_ATTR; + +static const unsigned char * const mad_synth_thread_name = "mp3dec"; +static struct thread_entry *mad_synth_thread_p; + + +static void mad_synth_thread(void){ + + while(1){ + ci->semaphore_wait(&synth_pending_sem); + + if(die){ + die=0; + invalidate_icache(); + return ; + } + synth_running = 1; + mad_synth_frame(&synth, &frame); + synth_running = 0; + ci->semaphore_release(&synth_done_sem); + } +} + + +static int mad_synth_thread_wait_pcm(void){ + ci->semaphore_wait(&synth_done_sem); + return 0; +} + +static void mad_synth_thread_ready(void){ + mad_fixed_t (*temp)[2][36][32]; + while(1){ + /*check if synth is currently running before changing its inputs! */ + if(!synth_running){ + /*circular buffer that holds 2 frames' samples*/ + temp=frame.sbsample; + frame.sbsample = frame.sbsample_prev; + frame.sbsample_prev=temp; + + ci->semaphore_release(&synth_pending_sem); + return ; + } + ci->yield(); /*synth thread currently running, wait for it*/ + } +} + +static int mad_synth_thread_create(void){ + synth_running=0; + die=0; + + ci->semaphore_init(&synth_done_sem, 1, 0); + ci->semaphore_init(&synth_pending_sem, 1, 0); + + mad_synth_thread_p = ci->create_thread(mad_synth_thread, + mad_synth_thread_stack, + sizeof(mad_synth_thread_stack), 0, + mad_synth_thread_name + IF_PRIO(, PRIORITY_PLAYBACK), COP); + + if (mad_synth_thread_p == NULL) + return false; + + return true; + +} +#else +static void mad_synth_thread_ready(void){ + mad_synth_frame(&synth, &frame); +} +static int mad_synth_thread_create(void){ + return 0; +} +static int mad_synth_thread_wait_pcm(void){ + return 0; +} + +#endif + /* this is the codec entry point */ enum codec_status codec_main(void) { @@ -180,8 +295,12 @@ enum codec_status codec_main(void) /* Create a decoder instance */ ci->configure(DSP_SET_SAMPLE_DEPTH, MAD_F_FRACBITS); - + next_track: + + /*does nothing on 1 processor systems*/ + mad_synth_thread_create(); + status = CODEC_OK; /* Reinitializing seems to be necessary to avoid playback quircks when seeking. */ @@ -300,6 +419,9 @@ next_track: data (not the one just decoded above). When we exit the decoding loop we will need to process the final frame that was decoded. */ if (framelength > 0) { + + mad_synth_thread_wait_pcm(); + /* In case of a mono file, the second array will be ignored. */ ci->pcmbuf_insert(&synth.pcm.samples[0][samples_to_skip], &synth.pcm.samples[1][samples_to_skip], @@ -308,8 +430,9 @@ next_track: /* Only skip samples for the first frame added. */ samples_to_skip = 0; } - - mad_synth_frame(&synth, &frame); + + mad_synth_thread_ready(); + //mad_synth_frame(&synth, &frame); /* Check if sample rate and stereo settings changed in this frame. */ if (frame.header.samplerate != current_frequency) { @@ -345,11 +468,20 @@ next_track: /* Finish the remaining decoded frame. Cut the required samples from the end. */ - if (framelength > stop_skip) + if (framelength > stop_skip){ + mad_synth_thread_wait_pcm(); ci->pcmbuf_insert(synth.pcm.samples[0], synth.pcm.samples[1], framelength - stop_skip); - +} +#ifdef MPA_SYNTH_ON_COP + /*mop up COP thread*/ + die=1; + ci->semaphore_release(&synth_pending_sem); + ci->thread_wait(mad_synth_thread_p); + invalidate_icache(); stream.error = 0; +#endif + if (ci->request_next_track()) goto next_track; diff --git a/apps/plugins/mpegplayer/Makefile b/apps/plugins/mpegplayer/Makefile index 66a6142397..7f90cbcc8d 100644 --- a/apps/plugins/mpegplayer/Makefile +++ b/apps/plugins/mpegplayer/Makefile @@ -10,7 +10,7 @@ INCLUDES = -I$(APPSDIR) -I.. -I. $(TARGET_INC) -I$(FIRMDIR)/include -I$(FIRMDIR)/export \ -I$(FIRMDIR)/common -I$(FIRMDIR)/drivers -I$(APPSDIR)/plugins/lib -I$(OUTDIR) -I$(BUILDDIR) CFLAGS = $(INCLUDES) $(GCCOPTS) -O2 $(TARGET) $(EXTRA_DEFINES) \ - -DTARGET_ID=$(TARGET_ID) -DMEM=${MEMORYSIZE} -DPLUGIN + -DTARGET_ID=$(TARGET_ID) -DMEM=${MEMORYSIZE} -DPLUGIN -DMPEGPLAYER ifdef APPEXTRA INCLUDES += $(patsubst %,-I$(APPSDIR)/%,$(subst :, ,$(APPEXTRA))) diff --git a/apps/plugins/mpegplayer/audio_thread.c b/apps/plugins/mpegplayer/audio_thread.c index 838dcad699..351581548f 100644 --- a/apps/plugins/mpegplayer/audio_thread.c +++ b/apps/plugins/mpegplayer/audio_thread.c @@ -55,6 +55,9 @@ static struct mad_stream stream IBSS_ATTR; static struct mad_frame frame IBSS_ATTR; static struct mad_synth synth IBSS_ATTR; +/*sbsample buffer for mad_frame*/ +mad_fixed_t sbsample[2][36][32]; + /* 2567 bytes */ static unsigned char mad_main_data[MAD_BUFFER_MDLEN]; @@ -229,6 +232,10 @@ static int audio_buffer(struct stream *str, enum stream_parse_mode type) /* Initialise libmad */ static void init_mad(void) { + /*init the sbsample buffer*/ + frame.sbsample = &sbsample; + frame.sbsample_prev = &sbsample; + mad_stream_init(&stream); mad_frame_init(&frame); mad_synth_init(&synth);