1
0
Fork 0
forked from len0rd/rockbox

Sync to upstream libopus

Sync to commit bb4b6885a139644cf3ac14e7deda9f633ec2d93c

This brings in a bunch of optimizations to decode speed
and memory usage. Allocations are switched from using
the pseudostack to using the real stack. Enabled hacks
to reduce stack usage.

This should fix crashes on sansa clip, although some
files will not play due to failing allocations in the
codec buffer.

Speeds up decoding of the following test files:

                 H300 (cf)   C200 (arm7tdmi)  ipod classic (arm9e)
16 kbps (silk)   14.28 MHz   4.00 MHz         2.61 MHz
64 kbps (celt)   4.09 MHz    8.08 MHz         6.24 MHz
128 kbps (celt)  1.93 MHz    8.83 MHz         6.53 MHz

Change-Id: I851733a8a5824b61feb363a173091bc7e6629b58
This commit is contained in:
Nils Wallménius 2014-01-19 16:31:59 +01:00
parent e557951c94
commit 9b7ec42403
46 changed files with 1608 additions and 1051 deletions

View file

@ -31,6 +31,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "API.h"
#include "main.h"
#include "stack_alloc.h"
#include "os_support.h"
/************************/
/* Decoder Super Struct */
@ -90,7 +91,8 @@ opus_int silk_Decode( /* O Returns error co
opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR;
opus_int32 nSamplesOutDec, LBRR_symbol;
opus_int16 *samplesOut1_tmp[ 2 ];
VARDECL( opus_int16, samplesOut1_tmp_storage );
VARDECL( opus_int16, samplesOut1_tmp_storage1 );
VARDECL( opus_int16, samplesOut1_tmp_storage2 );
VARDECL( opus_int16, samplesOut2_tmp );
opus_int32 MS_pred_Q13[ 2 ] = { 0 };
opus_int16 *resample_out_ptr;
@ -98,6 +100,7 @@ opus_int silk_Decode( /* O Returns error co
silk_decoder_state *channel_state = psDec->channel_state;
opus_int has_side;
opus_int stereo_to_mono;
int delay_stack_alloc;
SAVE_STACK;
silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 );
@ -196,7 +199,7 @@ opus_int silk_Decode( /* O Returns error co
for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) {
for( n = 0; n < decControl->nChannelsInternal; n++ ) {
if( channel_state[ n ].LBRR_flags[ i ] ) {
opus_int pulses[ MAX_FRAME_LENGTH ];
opus_int16 pulses[ MAX_FRAME_LENGTH ];
opus_int condCoding;
if( decControl->nChannelsInternal == 2 && n == 0 ) {
@ -251,13 +254,22 @@ opus_int silk_Decode( /* O Returns error co
psDec->channel_state[ 1 ].first_frame_after_reset = 1;
}
ALLOC( samplesOut1_tmp_storage,
decControl->nChannelsInternal*(
channel_state[ 0 ].frame_length + 2 ),
/* Check if the temp buffer fits into the output PCM buffer. If it fits,
we can delay allocating the temp buffer until after the SILK peak stack
usage. We need to use a < and not a <= because of the two extra samples. */
delay_stack_alloc = decControl->internalSampleRate*decControl->nChannelsInternal
< decControl->API_sampleRate*decControl->nChannelsAPI;
ALLOC( samplesOut1_tmp_storage1, delay_stack_alloc ? ALLOC_NONE
: decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ),
opus_int16 );
samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage;
samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage
+ channel_state[ 0 ].frame_length + 2;
if ( delay_stack_alloc )
{
samplesOut1_tmp[ 0 ] = samplesOut;
samplesOut1_tmp[ 1 ] = samplesOut + channel_state[ 0 ].frame_length + 2;
} else {
samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1;
samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2;
}
if( lostFlag == FLAG_DECODE_NORMAL ) {
has_side = !decode_only_middle;
@ -312,6 +324,15 @@ opus_int silk_Decode( /* O Returns error co
resample_out_ptr = samplesOut;
}
ALLOC( samplesOut1_tmp_storage2, delay_stack_alloc
? decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 )
: ALLOC_NONE,
opus_int16 );
if ( delay_stack_alloc ) {
OPUS_COPY(samplesOut1_tmp_storage2, samplesOut, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2));
samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage2;
samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage2 + channel_state[ 0 ].frame_length + 2;
}
for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
/* Resample decoded signal to API_sampleRate */