forked from len0rd/rockbox
ASM optimisation by David Bryant.
Placed various important arrays in IRAM. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@6540 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
ff40e4cc6a
commit
9985caf3f9
6 changed files with 566 additions and 23 deletions
|
|
@ -15,7 +15,7 @@ INCLUDES += -I$(APPSDIR)/$(APPEXTRA)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
CFLAGS = $(GCCOPTS) \
|
CFLAGS = $(GCCOPTS) \
|
||||||
$(INCLUDES) $(TARGET) $(EXTRA_DEFINES) -DMEM=${MEMORYSIZE}
|
$(INCLUDES) $(TARGET) $(EXTRA_DEFINES) -DMEM=${MEMORYSIZE} -O2 \
|
||||||
|
|
||||||
# This sets up 'SRC' based on the files mentioned in SOURCES
|
# This sets up 'SRC' based on the files mentioned in SOURCES
|
||||||
include $(TOOLSDIR)/makesrc.inc
|
include $(TOOLSDIR)/makesrc.inc
|
||||||
|
|
|
||||||
|
|
@ -4,4 +4,7 @@ metadata.c
|
||||||
unpack.c
|
unpack.c
|
||||||
words.c
|
words.c
|
||||||
wputils.c
|
wputils.c
|
||||||
|
#if CONFIG_CPU==MCF5249 && !defined(SIMULATOR)
|
||||||
|
coldfire.S
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
|
||||||
535
apps/codecs/libwavpack/coldfire.S
Normal file
535
apps/codecs/libwavpack/coldfire.S
Normal file
|
|
@ -0,0 +1,535 @@
|
||||||
|
/***************************************************************************
|
||||||
|
* __________ __ ___.
|
||||||
|
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
|
||||||
|
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
|
||||||
|
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
|
||||||
|
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
|
||||||
|
* \/ \/ \/ \/ \/
|
||||||
|
* $Id$
|
||||||
|
*
|
||||||
|
* Copyright (C) 2005 by David Bryant
|
||||||
|
*
|
||||||
|
* All files in this archive are subject to the GNU General Public License.
|
||||||
|
* See the file COPYING in the source tree root for full license agreement.
|
||||||
|
*
|
||||||
|
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
||||||
|
* KIND, either express or implied.
|
||||||
|
*
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/* This is an assembly optimized version of the following WavPack function:
|
||||||
|
*
|
||||||
|
* void decorr_stereo_pass_cont_mcf5249 (struct decorr_pass *dpp,
|
||||||
|
* long *buffer, long sample_count);
|
||||||
|
*
|
||||||
|
* It performs a single pass of stereo decorrelation on the provided buffer.
|
||||||
|
* Note that this version of the function requires that the 8 previous stereo
|
||||||
|
* samples are visible and correct. In other words, it ignores the "samples_*"
|
||||||
|
* fields in the decorr_pass structure and gets the history data directly
|
||||||
|
* from the buffer. It does, however, return the appropriate history samples
|
||||||
|
* to the decorr_pass structure before returning.
|
||||||
|
*
|
||||||
|
* This is written to work on a MCF5249 processor, or any processor based on
|
||||||
|
* the ColdFire V2 core with an EMAC unit. The EMAC is perfectly suited for
|
||||||
|
* the "apply_weight" function of WavPack decorrelation because it provides
|
||||||
|
* the requires 40-bit product. The fractional rounding mode of the EMAC is not
|
||||||
|
* configurable and uses "round to even" while WavPack uses "round to larger",
|
||||||
|
* so the rounding has to be done manually.
|
||||||
|
*/
|
||||||
|
|
||||||
|
.text
|
||||||
|
.align 2
|
||||||
|
.global decorr_stereo_pass_cont_mcf5249
|
||||||
|
|
||||||
|
decorr_stereo_pass_cont_mcf5249:
|
||||||
|
|
||||||
|
lea (-44, %sp), %sp
|
||||||
|
movem.l %d2-%d7/%a2-%a6, (%sp)
|
||||||
|
move.l 44+4(%sp), %a2 | a2 = dpp->
|
||||||
|
move.l 44+8(%sp), %a1 | a1 = bptr
|
||||||
|
move.w 2(%a2), %a3 | a3 = dpp->delta
|
||||||
|
move.w 4(%a2), %d3 | d3 = dpp->weight_A (sign extended)
|
||||||
|
ext.l %d3
|
||||||
|
move.w 6(%a2), %d4 | d4 = dpp->weight_B (sign extended)
|
||||||
|
ext.l %d4
|
||||||
|
move.l 44+12(%sp), %d0 | d0 = sample_count
|
||||||
|
jbeq return_only | if zero, nothing to do
|
||||||
|
|
||||||
|
lsl.l #3, %d0 | d5 = bptr + (sample_count * 8)
|
||||||
|
move.l %d0, %d5
|
||||||
|
add.l %a1, %d5
|
||||||
|
|
||||||
|
moveq.l #17, %d0 | left shift weights & delta 17 places
|
||||||
|
asl.l %d0, %d3
|
||||||
|
asl.l %d0, %d4
|
||||||
|
move.l %a3, %d1
|
||||||
|
asl.l %d0, %d1
|
||||||
|
move.l %d1, %a3
|
||||||
|
|
||||||
|
move.l #0x20, %macsr | set fractional mode for MAC
|
||||||
|
move.l #0, %acc1 | acc1 = 0x00 0000 80 (for rounding)
|
||||||
|
move.l #0x800000, %accext01
|
||||||
|
|
||||||
|
move.l #1024<<17, %d6 | d6 & d7 are weight clipping limits
|
||||||
|
move.l #-1024<<17, %d7 | (only used by negative terms)
|
||||||
|
|
||||||
|
move.w (%a2), %d0 | d0 = term
|
||||||
|
ext.l %d0
|
||||||
|
cmp.l #17, %d0
|
||||||
|
jbeq term_17 | term = 17
|
||||||
|
cmp.l #18, %d0
|
||||||
|
jbeq term_18 | term = 18
|
||||||
|
addq.l #1, %d0
|
||||||
|
jbeq term_minus_1 | term = -1
|
||||||
|
addq.l #1, %d0
|
||||||
|
jbeq term_minus_2 | term = -2
|
||||||
|
addq.l #1, %d0
|
||||||
|
jbeq term_minus_3 | term = -3
|
||||||
|
jbra term_default | default term = 1 - 8
|
||||||
|
|
||||||
|
|------------------------------------------------------------------------------
|
||||||
|
| Loop to handle term = 17 condition
|
||||||
|
|
|
||||||
|
| a0 = d0 = (2 * bptr [-1]) - bptr [-2]
|
||||||
|
| a1 = bptr d1 = initial bptr [0]
|
||||||
|
| a2 = dpp-> d2 = updated bptr [0]
|
||||||
|
| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17
|
||||||
|
| a4 = d4 = dpp->weight_B << 17
|
||||||
|
| a5 = d5 = eptr
|
||||||
|
| macsr = 0x20 acc1 = 0x00 0000 80
|
||||||
|
|------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
term_17:
|
||||||
|
move.l -8(%a1), %d0 | d0 = 2 * bptr [-1] - bptr [-2]
|
||||||
|
add.l %d0, %d0
|
||||||
|
sub.l -16(%a1), %d0
|
||||||
|
beq .L251 | if zero, skip calculation
|
||||||
|
move.l %acc1, %acc0
|
||||||
|
asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_A
|
||||||
|
mac.l %d0, %d3, %acc0
|
||||||
|
move.l (%a1), %d1
|
||||||
|
beq .L255
|
||||||
|
eor.l %d1, %d0 | else compare signs
|
||||||
|
bge .L256 | if same, add delta to weight
|
||||||
|
sub.l %a3, %d3 | else subtract delta from weight
|
||||||
|
sub.l %a3, %d3 | subtract again instead of branch
|
||||||
|
.L256: add.l %a3, %d3 | add delta to weight
|
||||||
|
|
||||||
|
.L255: move.l %acc0, %d2 | d2 = rounded product
|
||||||
|
add.l %d1, %d2 | update bptr [0] and store
|
||||||
|
move.l %d2, (%a1)+
|
||||||
|
|
||||||
|
.L253: move.l -8(%a1), %d0 | d0 = 2 * bptr [-1] - bptr [-2]
|
||||||
|
add.l %d0, %d0
|
||||||
|
sub.l -16(%a1), %d0
|
||||||
|
beq .L257 | if zero, skip calculations
|
||||||
|
move.l %acc1, %acc0
|
||||||
|
asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_B
|
||||||
|
mac.l %d0, %d4, %acc0
|
||||||
|
move.l (%a1), %d1
|
||||||
|
beq .L254
|
||||||
|
eor.l %d1, %d0 | else compare signs
|
||||||
|
bge .L259 | if same, add delta to weight
|
||||||
|
sub.l %a3, %d4 | else subtract delta from weight
|
||||||
|
sub.l %a3, %d4 | subtract again instead of branch
|
||||||
|
.L259: add.l %a3, %d4 | add delta to weight
|
||||||
|
|
||||||
|
.L254: move.l %acc0, %d2 | d2 = rounded product
|
||||||
|
add.l %d1, %d2 | update bptr [0] and store
|
||||||
|
move.l %d2, (%a1)+
|
||||||
|
|
||||||
|
.L252: cmp.l %a1, %d5 | loop if bptr < eptr
|
||||||
|
jbhi term_17
|
||||||
|
bra term_17_18_finish | exit through common path
|
||||||
|
|
||||||
|
.L251: addq.l #4, %a1 | update point and jump back into loop
|
||||||
|
bra .L253
|
||||||
|
|
||||||
|
.L257: addq.l #4, %a1 | update point and jump back into loop
|
||||||
|
bra .L252
|
||||||
|
|
||||||
|
|------------------------------------------------------------------------------
|
||||||
|
| Loop to handle term = 18 condition
|
||||||
|
|
|
||||||
|
| a0 = d0 = ((3 * bptr [-1]) - bptr [-2]) >> 1
|
||||||
|
| a1 = bptr d1 = initial bptr [0]
|
||||||
|
| a2 = dpp-> d2 = updated bptr [0]
|
||||||
|
| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17
|
||||||
|
| a4 = d4 = dpp->weight_B << 17
|
||||||
|
| a5 = d5 = eptr
|
||||||
|
| macsr = 0x20 acc1 = 0x00 0000 80
|
||||||
|
|------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
term_18:
|
||||||
|
move.l -8(%a1), %a0 | d0 = (3 * bptr [-1] - bptr [-2]) >> 1
|
||||||
|
lea (%a0,%a0.l*2), %a0
|
||||||
|
move.l %a0, %d0
|
||||||
|
sub.l -16(%a1), %d0
|
||||||
|
asr.l #1, %d0
|
||||||
|
beq .L260
|
||||||
|
move.l %acc1, %acc0
|
||||||
|
asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_A
|
||||||
|
mac.l %d0, %d3, %acc0
|
||||||
|
move.l (%a1), %d1
|
||||||
|
beq .L266
|
||||||
|
eor.l %d1, %d0 | else compare signs
|
||||||
|
bge .L267 | if same, add delta to weight
|
||||||
|
sub.l %a3, %d3 | else subtract delta from weight
|
||||||
|
sub.l %a3, %d3 | subtract again instead of branch
|
||||||
|
.L267: add.l %a3, %d3 | add delta to weight
|
||||||
|
|
||||||
|
.L266: move.l %acc0, %d2 | d2 = rounded product
|
||||||
|
add.l %d1, %d2 | add applied weight to bptr [0], store
|
||||||
|
move.l %d2, (%a1)+
|
||||||
|
|
||||||
|
.L268: move.l -8(%a1), %a0 | d0 = (3 * bptr [-1] - bptr [-2]) >> 1
|
||||||
|
lea (%a0,%a0.l*2), %a0
|
||||||
|
move.l %a0, %d0
|
||||||
|
sub.l -16(%a1), %d0
|
||||||
|
asr.l #1, %d0
|
||||||
|
beq .L261
|
||||||
|
move.l %acc1, %acc0
|
||||||
|
asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_B
|
||||||
|
mac.l %d0, %d4, %acc0
|
||||||
|
move.l (%a1), %d1
|
||||||
|
beq .L265
|
||||||
|
eor.l %d1, %d0 | else compare signs
|
||||||
|
bge .L270 | if same, add delta to weight
|
||||||
|
sub.l %a3, %d4 | else subtract delta from weight
|
||||||
|
sub.l %a3, %d4 | subtract again instead of branch
|
||||||
|
.L270: add.l %a3, %d4 | add delta to weight
|
||||||
|
|
||||||
|
.L265: move.l %acc0, %d2 | d2 = rounded product
|
||||||
|
add.l %d1, %d2 | add applied weight to bptr [0], store
|
||||||
|
move.l %d2, (%a1)+
|
||||||
|
|
||||||
|
.L269: cmp.l %a1, %d5 | loop if bptr < eptr
|
||||||
|
jbhi term_18
|
||||||
|
bra term_17_18_finish | exit through common path
|
||||||
|
|
||||||
|
.L260: addq.l #4, %a1 | bump pointer and jump back into loop
|
||||||
|
bra .L268
|
||||||
|
|
||||||
|
.L261: addq.l #4, %a1 | bump pointer and jump back into loop
|
||||||
|
bra .L269
|
||||||
|
|
||||||
|
term_17_18_finish:
|
||||||
|
move.l -4(%a1), 40(%a2) | restore dpp->samples_A [0-1], B [0-1]
|
||||||
|
move.l -8(%a1), 8(%a2)
|
||||||
|
move.l -12(%a1), 44(%a2)
|
||||||
|
move.l -16(%a1), 12(%a2)
|
||||||
|
jbra finish_up
|
||||||
|
|
||||||
|
|------------------------------------------------------------------------------
|
||||||
|
| Loop to handle default terms (i.e. 1 - 8)
|
||||||
|
|
|
||||||
|
| a0 = tptr d0 = tptr [0]
|
||||||
|
| a1 = bptr d1 = initial bptr [0]
|
||||||
|
| a2 = dpp-> d2 = updated bptr [0]
|
||||||
|
| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17
|
||||||
|
| a4 = d4 = dpp->weight_B << 17
|
||||||
|
| a5 = d5 = eptr
|
||||||
|
| macsr = 0x20 acc1 = 0x00 0000 80
|
||||||
|
|------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
term_default:
|
||||||
|
move.w (%a2), %d0 | a0 = a1 - (dpp->term * 8)
|
||||||
|
ext.l %d0
|
||||||
|
lsl.l #3, %d0
|
||||||
|
move.l %a1, %a0
|
||||||
|
sub.l %d0, %a0
|
||||||
|
|
||||||
|
term_default_loop:
|
||||||
|
move.l (%a0)+, %d0 | d0 = tptr [0], skip ahead if zero
|
||||||
|
beq .L271
|
||||||
|
move.l %acc1, %acc0
|
||||||
|
asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_A
|
||||||
|
mac.l %d0, %d3, %acc0
|
||||||
|
move.l (%a1), %d1
|
||||||
|
beq .L277
|
||||||
|
eor.l %d1, %d0 | else compare signs
|
||||||
|
bge .L278 | if same, add delta to weight
|
||||||
|
sub.l %a3, %d3 | else subtract delta from weight
|
||||||
|
sub.l %a3, %d3 | subtract again instead of branch
|
||||||
|
.L278: add.l %a3, %d3 | add delta to weight
|
||||||
|
|
||||||
|
.L277: move.l %acc0, %d2 | d2 = rounded product
|
||||||
|
add.l %d1, %d2 | add applied weight to bptr [0], store
|
||||||
|
move.l %d2, (%a1)+
|
||||||
|
|
||||||
|
.L275: move.l (%a0)+, %d0 | d0 = tptr [0], skip ahead if zero
|
||||||
|
beq .L272
|
||||||
|
move.l %acc1, %acc0
|
||||||
|
asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_B
|
||||||
|
mac.l %d0, %d4, %acc0
|
||||||
|
move.l (%a1), %d1
|
||||||
|
beq .L276
|
||||||
|
eor.l %d1, %d0 | else compare signs
|
||||||
|
bge .L281 | if same, add delta to weight
|
||||||
|
sub.l %a3, %d4 | else subtract delta from weight
|
||||||
|
sub.l %a3, %d4 | subtract again instead of branch
|
||||||
|
.L281: add.l %a3, %d4 | add delta to weight
|
||||||
|
|
||||||
|
.L276: move.l %acc0, %d2 | d2 = rounded product
|
||||||
|
add.l %d1, %d2 | add applied weight to bptr [0], store
|
||||||
|
move.l %d2, (%a1)+
|
||||||
|
|
||||||
|
.L274: cmp.l %a1, %d5 | loop back if bptr < eptr
|
||||||
|
jbhi term_default_loop
|
||||||
|
move.w (%a2), %d0 | d0 = term - 1
|
||||||
|
moveq.l #8, %d1 | d1 = loop counter
|
||||||
|
|
||||||
|
.L323: subq.l #1, %d0 | back up & mask index
|
||||||
|
and.l #7, %d0
|
||||||
|
move.l -(%a1), 40(%a2,%d0.l*4) | store dpp->samples_B [d0]
|
||||||
|
move.l -(%a1), 8(%a2,%d0.l*4) | store dpp->samples_A [d0]
|
||||||
|
subq.l #1, %d1 | loop on count
|
||||||
|
jbne .L323
|
||||||
|
jbra finish_up
|
||||||
|
|
||||||
|
.L271: addq.l #4, %a1 | bump pointer and jump back into loop
|
||||||
|
bra .L275
|
||||||
|
|
||||||
|
.L272: addq.l #4, %a1 | bump pointer and jump back into loop
|
||||||
|
bra .L274
|
||||||
|
|
||||||
|
|
||||||
|
|------------------------------------------------------------------------------
|
||||||
|
| Loop to handle term = -1 condition
|
||||||
|
|
|
||||||
|
| a0 = d0 = decorrelation sample
|
||||||
|
| a1 = bptr d1 = initial bptr [0]
|
||||||
|
| a2 = dpp-> d2 = updated bptr [0]
|
||||||
|
| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17
|
||||||
|
| a4 = d4 = dpp->weight_B << 17
|
||||||
|
| a5 = d5 = eptr
|
||||||
|
| a6 = d6 = 1024 << 17
|
||||||
|
| a7 = d7 = -1024 << 17
|
||||||
|
| macsr = 0x20 acc1 = 0x00 0000 80
|
||||||
|
|------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
term_minus_1:
|
||||||
|
move.l -4(%a1), %d0 | d0 = bptr [-1]
|
||||||
|
beq .L402
|
||||||
|
move.l %acc1, %acc0
|
||||||
|
asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_A)
|
||||||
|
mac.l %d0, %d3, %acc0
|
||||||
|
move.l (%a1), %d1
|
||||||
|
beq .L405
|
||||||
|
eor.l %d1, %d0 | else compare signs
|
||||||
|
bge .L404 | if same, add delta to weight
|
||||||
|
sub.l %a3, %d3 | else subtract delta from weight
|
||||||
|
cmp.l %d7, %d3 | check for negative clip limit
|
||||||
|
bge .L405
|
||||||
|
move.l %d7, %d3
|
||||||
|
bra .L405
|
||||||
|
|
||||||
|
.L404: add.l %a3, %d3 | add delta to weight
|
||||||
|
cmp.l %d6, %d3 | check for positive clip limit
|
||||||
|
ble .L405
|
||||||
|
move.l %d6, %d3
|
||||||
|
|
||||||
|
.L405: move.l %acc0, %d0 | d2 = rounded product
|
||||||
|
add.l %d1, %d0 | add applied weight to bptr [0], store
|
||||||
|
move.l %d0, (%a1)+
|
||||||
|
beq .L401
|
||||||
|
|
||||||
|
.L410: move.l %acc1, %acc0
|
||||||
|
asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_B)
|
||||||
|
mac.l %d0, %d4, %acc0
|
||||||
|
move.l (%a1), %d1
|
||||||
|
beq .L403
|
||||||
|
eor.l %d1, %d0 | else compare signs
|
||||||
|
bge .L407 | if same, add delta to weight
|
||||||
|
sub.l %a3, %d4 | else subtract delta from weight
|
||||||
|
cmp.l %d7, %d4 | check for negative clip limit
|
||||||
|
bge .L403
|
||||||
|
move.l %d7, %d4
|
||||||
|
bra .L403
|
||||||
|
|
||||||
|
.L407: add.l %a3, %d4 | add delta to weight
|
||||||
|
cmp.l %d6, %d4 | check for positive clip limit
|
||||||
|
ble .L403
|
||||||
|
move.l %d6, %d4
|
||||||
|
|
||||||
|
.L403: move.l %acc0, %d2 | d2 = rounded product
|
||||||
|
add.l %d1, %d2 | add applied weight to bptr [1], store
|
||||||
|
move.l %d2, (%a1)+
|
||||||
|
|
||||||
|
.L411: cmp.l %a1, %d5 | loop back if bptr < eptr
|
||||||
|
jbhi term_minus_1
|
||||||
|
move.l -4(%a1), 8(%a2) | dpp->samples_A [0] = bptr [-1]
|
||||||
|
jbra finish_up
|
||||||
|
|
||||||
|
.L402: move.l (%a1)+, %d0
|
||||||
|
bne .L410
|
||||||
|
|
||||||
|
.L401: addq.l #4, %a1
|
||||||
|
bra .L411
|
||||||
|
|
||||||
|
|
||||||
|
|------------------------------------------------------------------------------
|
||||||
|
| Loop to handle term = -2 condition
|
||||||
|
|
|
||||||
|
| a0 = d0 = decorrelation sample
|
||||||
|
| a1 = bptr d1 = initial bptr [0]
|
||||||
|
| a2 = dpp-> d2 = updated bptr [0]
|
||||||
|
| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17
|
||||||
|
| a4 = d4 = dpp->weight_B << 17
|
||||||
|
| a5 = d5 = eptr
|
||||||
|
| a6 = d6 = 1024 << 17
|
||||||
|
| a7 = d7 = -1024 << 17
|
||||||
|
| macsr = 0x20 acc1 = 0x00 0000 80
|
||||||
|
|------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
term_minus_2:
|
||||||
|
move.l -8(%a1), %d0 | d0 = bptr [-2]
|
||||||
|
beq .L511
|
||||||
|
move.l %acc1, %acc0
|
||||||
|
asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_B)
|
||||||
|
mac.l %d0, %d4, %acc0
|
||||||
|
move.l 4(%a1), %d1
|
||||||
|
beq .L505
|
||||||
|
eor.l %d1, %d0 | else compare signs
|
||||||
|
bge .L504 | if same, add delta to weight
|
||||||
|
sub.l %a3, %d4 | else subtract delta from weight
|
||||||
|
cmp.l %d7, %d4 | ckeck for negative clip limit
|
||||||
|
bge .L505
|
||||||
|
move.l %d7, %d4
|
||||||
|
bra .L505
|
||||||
|
|
||||||
|
.L504: add.l %a3, %d4 | add delta to weight
|
||||||
|
cmp.l %d6, %d4 | check for positive clip limit
|
||||||
|
ble .L505
|
||||||
|
move.l %d6, %d4
|
||||||
|
|
||||||
|
.L505: move.l %acc0, %d0 | d2 = rounded product
|
||||||
|
add.l %d1, %d0 | add applied weight to bptr [0], store
|
||||||
|
move.l %d0, 4(%a1)
|
||||||
|
beq .L512
|
||||||
|
|
||||||
|
.L510: move.l %acc1, %acc0
|
||||||
|
asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_A)
|
||||||
|
mac.l %d0, %d3, %acc0
|
||||||
|
move.l (%a1), %d1
|
||||||
|
beq .L503
|
||||||
|
eor.l %d1, %d0 | else compare signs
|
||||||
|
bge .L507 | if same, add delta to weight
|
||||||
|
sub.l %a3, %d3 | else subtract delta from weight
|
||||||
|
cmp.l %d7, %d3 | check for negative clip limit
|
||||||
|
bge .L503
|
||||||
|
move.l %d7, %d3
|
||||||
|
bra .L503
|
||||||
|
|
||||||
|
.L507: add.l %a3, %d3 | add delta to weight
|
||||||
|
cmp.l %d6, %d3 | check for negative clip limit
|
||||||
|
ble .L503
|
||||||
|
move.l %d6, %d3
|
||||||
|
|
||||||
|
.L503: move.l %acc0, %d2 | d2 = rounded product
|
||||||
|
add.l %d1, %d2 | add applied weight to bptr [1], store
|
||||||
|
move.l %d2, (%a1)
|
||||||
|
|
||||||
|
.L512: addq.l #8, %a1
|
||||||
|
cmp.l %a1, %d5 | loop if bptr < eptr
|
||||||
|
jbhi term_minus_2
|
||||||
|
move.l -8(%a1), 40(%a2) | dpp->samples_B [0] = bptr [-4]
|
||||||
|
jbra finish_up
|
||||||
|
|
||||||
|
.L511: move.l 4(%a1), %d0
|
||||||
|
beq .L512
|
||||||
|
bra .L510
|
||||||
|
|
||||||
|
|
||||||
|
|------------------------------------------------------------------------------
|
||||||
|
| Loop to handle term = -3 condition
|
||||||
|
|
|
||||||
|
| a0 = d0 = decorrelation sample
|
||||||
|
| a1 = bptr d1 = initial bptr [0]
|
||||||
|
| a2 = dpp-> d2 = updated bptr [0]
|
||||||
|
| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17
|
||||||
|
| a4 = d4 = dpp->weight_B << 17
|
||||||
|
| a5 = d5 = eptr
|
||||||
|
| a6 = d6 = 1024 << 17
|
||||||
|
| a7 = d7 = -1024 << 17
|
||||||
|
| macsr = 0x20 acc1 = 0x00 0000 80
|
||||||
|
|------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
term_minus_3:
|
||||||
|
move.l -4(%a1), %d0 | d0 = bptr [-1]
|
||||||
|
beq .L301
|
||||||
|
move.l %acc1, %acc0
|
||||||
|
asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_A)
|
||||||
|
mac.l %d0, %d3, %acc0
|
||||||
|
move.l (%a1), %d1
|
||||||
|
beq .L320
|
||||||
|
eor.l %d1, %d0 | else compare signs
|
||||||
|
bge .L319 | if same, add delta to weight
|
||||||
|
sub.l %a3, %d3 | else subtract delta from weight
|
||||||
|
cmp.l %d7, %d3 | check for negative clip limit
|
||||||
|
bge .L320
|
||||||
|
move.l %d7, %d3
|
||||||
|
bra .L320
|
||||||
|
|
||||||
|
.L319: add.l %a3, %d3 | add delta to weight
|
||||||
|
cmp.l %d6, %d3 | check for positive clip limit
|
||||||
|
ble .L320
|
||||||
|
move.l %d6, %d3
|
||||||
|
|
||||||
|
.L320: move.l %acc0, %d2 | d2 = rounded product
|
||||||
|
add.l %d1, %d2 | add applied weight to bptr [0], store
|
||||||
|
move.l %d2, (%a1)+
|
||||||
|
|
||||||
|
.L330: move.l -12(%a1), %d0 | d0 = bptr [-2]
|
||||||
|
beq .L302
|
||||||
|
move.l %acc1, %acc0
|
||||||
|
asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_B)
|
||||||
|
mac.l %d0, %d4, %acc0
|
||||||
|
move.l (%a1), %d1
|
||||||
|
beq .L318
|
||||||
|
eor.l %d1, %d0 | else compare signs
|
||||||
|
bge .L322 | if same, add delta to weight
|
||||||
|
sub.l %a3, %d4 | else subtract delta from weight
|
||||||
|
cmp.l %d7, %d4 | check for negative clip limit
|
||||||
|
bge .L318
|
||||||
|
move.l %d7, %d4
|
||||||
|
bra .L318
|
||||||
|
|
||||||
|
.L322: add.l %a3, %d4 | add delta to weight
|
||||||
|
cmp.l %d6, %d4 | check for positive clip limit
|
||||||
|
ble .L318
|
||||||
|
move.l %d6, %d4
|
||||||
|
|
||||||
|
.L318: move.l %acc0, %d2 | d2 = rounded product
|
||||||
|
add.l %d1, %d2 | add applied weight to bptr [1], store
|
||||||
|
move.l %d2, (%a1)+
|
||||||
|
|
||||||
|
.L331: cmp.l %a1, %d5 | bptr, eptr
|
||||||
|
jbhi term_minus_3
|
||||||
|
move.l -4(%a1), 8(%a2) | dpp->samples_A [0] = bptr [-1]
|
||||||
|
move.l -8(%a1), 40(%a2) | dpp->samples_B [0] = bptr [-2]
|
||||||
|
jbra finish_up
|
||||||
|
|
||||||
|
.L301: addq.l #4, %a1
|
||||||
|
bra .L330
|
||||||
|
|
||||||
|
.L302: addq.l #4, %a1
|
||||||
|
bra .L331
|
||||||
|
|
||||||
|
| finish and return
|
||||||
|
|
||||||
|
finish_up:
|
||||||
|
moveq.l #17, %d0
|
||||||
|
asr.l %d0, %d3
|
||||||
|
asr.l %d0, %d4
|
||||||
|
move.w %d3, 4(%a2) | weight_A, dpp->weight_A
|
||||||
|
move.w %d4, 6(%a2) | weight_B, dpp->weight_B
|
||||||
|
|
||||||
|
clr.l %d0 | clear up EMAC
|
||||||
|
move.l %d0, %acc0
|
||||||
|
move.l %d0, %acc1
|
||||||
|
|
||||||
|
return_only:
|
||||||
|
movem.l (%sp), %d2-%d7/%a2-%a6
|
||||||
|
lea (44,%sp), %sp
|
||||||
|
rts
|
||||||
|
|
@ -27,7 +27,11 @@ static void strcpy_loc (char *dst, char *src) { while (*src) *dst++ = *src++; *d
|
||||||
// these macros implement the weight application and update operations
|
// these macros implement the weight application and update operations
|
||||||
// that are at the heart of the decorrelation loops
|
// that are at the heart of the decorrelation loops
|
||||||
|
|
||||||
|
#if 0 // PERFCOND
|
||||||
#define apply_weight_i(weight, sample) ((weight * sample + 512) >> 10)
|
#define apply_weight_i(weight, sample) ((weight * sample + 512) >> 10)
|
||||||
|
#else
|
||||||
|
#define apply_weight_i(weight, sample) ((((weight * sample) >> 8) + 2) >> 2)
|
||||||
|
#endif
|
||||||
|
|
||||||
#define apply_weight_f(weight, sample) (((((sample & 0xffff) * weight) >> 9) + \
|
#define apply_weight_f(weight, sample) (((((sample & 0xffff) * weight) >> 9) + \
|
||||||
(((sample & ~0xffff) >> 9) * weight) + 1) >> 1)
|
(((sample & ~0xffff) >> 9) * weight) + 1) >> 1)
|
||||||
|
|
@ -39,7 +43,7 @@ static void strcpy_loc (char *dst, char *src) { while (*src) *dst++ = *src++; *d
|
||||||
#define apply_weight(weight, sample) ((int32_t)((weight * (int64_t) sample + 512) >> 10))
|
#define apply_weight(weight, sample) ((int32_t)((weight * (int64_t) sample + 512) >> 10))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if 1 // PERFCOND
|
#if 0 // PERFCOND
|
||||||
#define update_weight(weight, delta, source, result) \
|
#define update_weight(weight, delta, source, result) \
|
||||||
if (source && result) weight -= ((((source ^ result) >> 30) & 2) - 1) * delta;
|
if (source && result) weight -= ((((source ^ result) >> 30) & 2) - 1) * delta;
|
||||||
#else
|
#else
|
||||||
|
|
@ -315,9 +319,14 @@ int read_config_info (WavpackContext *wpc, WavpackMetadata *wpmd)
|
||||||
// samples unpacked, which can be less than the number requested if an error
|
// samples unpacked, which can be less than the number requested if an error
|
||||||
// occurs or the end of the block is reached.
|
// occurs or the end of the block is reached.
|
||||||
|
|
||||||
|
#if CONFIG_CPU==MCF5249 && !defined(SIMULATOR)
|
||||||
|
extern void decorr_stereo_pass_cont_mcf5249 (struct decorr_pass *dpp, long *buffer, long sample_count);
|
||||||
|
#else
|
||||||
|
static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long sample_count);
|
||||||
|
#endif
|
||||||
|
|
||||||
static void decorr_mono_pass (struct decorr_pass *dpp, long *buffer, long sample_count);
|
static void decorr_mono_pass (struct decorr_pass *dpp, long *buffer, long sample_count);
|
||||||
static void decorr_stereo_pass (struct decorr_pass *dpp, long *buffer, long sample_count);
|
static void decorr_stereo_pass (struct decorr_pass *dpp, long *buffer, long sample_count);
|
||||||
static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long sample_count);
|
|
||||||
static void fixup_samples (WavpackStream *wps, long *buffer, ulong sample_count);
|
static void fixup_samples (WavpackStream *wps, long *buffer, ulong sample_count);
|
||||||
|
|
||||||
long unpack_samples (WavpackContext *wpc, long *buffer, ulong sample_count)
|
long unpack_samples (WavpackContext *wpc, long *buffer, ulong sample_count)
|
||||||
|
|
@ -372,7 +381,11 @@ long unpack_samples (WavpackContext *wpc, long *buffer, ulong sample_count)
|
||||||
else
|
else
|
||||||
for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) {
|
for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) {
|
||||||
decorr_stereo_pass (dpp, buffer, 8);
|
decorr_stereo_pass (dpp, buffer, 8);
|
||||||
|
#if CONFIG_CPU==MCF5249 && !defined(SIMULATOR)
|
||||||
|
decorr_stereo_pass_cont_mcf5249 (dpp, buffer + 16, sample_count - 8);
|
||||||
|
#else
|
||||||
decorr_stereo_pass_cont (dpp, buffer + 16, sample_count - 8);
|
decorr_stereo_pass_cont (dpp, buffer + 16, sample_count - 8);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & JOINT_STEREO)
|
if (flags & JOINT_STEREO)
|
||||||
|
|
@ -530,11 +543,13 @@ static void decorr_stereo_pass (struct decorr_pass *dpp, long *buffer, long samp
|
||||||
dpp->weight_B = weight_B;
|
dpp->weight_B = weight_B;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if CONFIG_CPU != MCF5249 || defined(SIMULATOR)
|
||||||
|
|
||||||
static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long sample_count)
|
static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long sample_count)
|
||||||
{
|
{
|
||||||
long delta = dpp->delta, weight_A = dpp->weight_A, weight_B = dpp->weight_B;
|
long delta = dpp->delta, weight_A = dpp->weight_A, weight_B = dpp->weight_B;
|
||||||
long *bptr, *tptr, *eptr = buffer + (sample_count * 2), sam_A, sam_B;
|
long *bptr, *tptr, *eptr = buffer + (sample_count * 2), sam_A, sam_B;
|
||||||
int k;
|
int k, i;
|
||||||
|
|
||||||
switch (dpp->term) {
|
switch (dpp->term) {
|
||||||
|
|
||||||
|
|
@ -581,23 +596,11 @@ static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long
|
||||||
update_weight (weight_B, delta, tptr [1], sam_A);
|
update_weight (weight_B, delta, tptr [1], sam_A);
|
||||||
}
|
}
|
||||||
|
|
||||||
k = dpp->term;
|
for (k = dpp->term - 1, i = 8; i--; k--) {
|
||||||
dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-1];
|
dpp->samples_B [k & (MAX_TERM - 1)] = *--bptr;
|
||||||
dpp->samples_A [ k & (MAX_TERM - 1)] = bptr [-2];
|
dpp->samples_A [k & (MAX_TERM - 1)] = *--bptr;
|
||||||
dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-3];
|
}
|
||||||
dpp->samples_A [ k & (MAX_TERM - 1)] = bptr [-4];
|
|
||||||
dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-5];
|
|
||||||
dpp->samples_A [ k & (MAX_TERM - 1)] = bptr [-6];
|
|
||||||
dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-7];
|
|
||||||
dpp->samples_A [ k & (MAX_TERM - 1)] = bptr [-8];
|
|
||||||
dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-9];
|
|
||||||
dpp->samples_A [ k & (MAX_TERM - 1)] = bptr [-10];
|
|
||||||
dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-11];
|
|
||||||
dpp->samples_A [ k & (MAX_TERM - 1)] = bptr [-12];
|
|
||||||
dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-13];
|
|
||||||
dpp->samples_A [ k & (MAX_TERM - 1)] = bptr [-14];
|
|
||||||
dpp->samples_B [--k & (MAX_TERM - 1)] = bptr [-15];
|
|
||||||
dpp->samples_A [ k & (MAX_TERM - 1)] = bptr [-16];
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case -1:
|
case -1:
|
||||||
|
|
@ -639,6 +642,8 @@ static void decorr_stereo_pass_cont (struct decorr_pass *dpp, long *buffer, long
|
||||||
dpp->weight_B = weight_B;
|
dpp->weight_B = weight_B;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
static void decorr_mono_pass (struct decorr_pass *dpp, long *buffer, long sample_count)
|
static void decorr_mono_pass (struct decorr_pass *dpp, long *buffer, long sample_count)
|
||||||
{
|
{
|
||||||
long delta = dpp->delta, weight_A = dpp->weight_A;
|
long delta = dpp->delta, weight_A = dpp->weight_A;
|
||||||
|
|
|
||||||
|
|
@ -45,7 +45,7 @@ static ulong read_next_header (read_stream infile, WavpackHeader *wphdr);
|
||||||
// large integer or floating point files (but always provides at least 24 bits
|
// large integer or floating point files (but always provides at least 24 bits
|
||||||
// of resolution).
|
// of resolution).
|
||||||
|
|
||||||
static WavpackContext wpc;
|
static WavpackContext wpc IDATA_ATTR;
|
||||||
|
|
||||||
WavpackContext *WavpackOpenFileInput (read_stream infile, char *error)
|
WavpackContext *WavpackOpenFileInput (read_stream infile, char *error)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,7 @@
|
||||||
|
|
||||||
static struct plugin_api* rb;
|
static struct plugin_api* rb;
|
||||||
static file_info_struct file_info;
|
static file_info_struct file_info;
|
||||||
static long temp_buffer [BUFFER_SIZE];
|
static long temp_buffer [BUFFER_SIZE] IDATA_ATTR;
|
||||||
|
|
||||||
/* Reformat samples from longs in processor's native endian mode to
|
/* Reformat samples from longs in processor's native endian mode to
|
||||||
little-endian data with 2 bytes / sample. */
|
little-endian data with 2 bytes / sample. */
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue