forked from len0rd/rockbox
* ARM asm DSP and codec/plugin functions: Use r12 scratch register properly
* Fix saving another unused reg in dsp code * Use less regs in the generic ARM mpegplayer adding idct pure DC case * Fix ARMv6 mpegplayer adding idct using an unsaved register in pure DC case git-svn-id: svn://svn.rockbox.org/rockbox/trunk@21803 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
parent
e12c1c0a62
commit
02c031709c
6 changed files with 125 additions and 127 deletions
|
@ -119,7 +119,7 @@ mdct_butterfly_16:
|
||||||
ldr pc, [sp], #4
|
ldr pc, [sp], #4
|
||||||
|
|
||||||
mdct_butterfly_32:
|
mdct_butterfly_32:
|
||||||
stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
|
stmdb sp!, {r4-r11, lr}
|
||||||
|
|
||||||
add r1, r0, #16*4
|
add r1, r0, #16*4
|
||||||
|
|
||||||
|
@ -247,13 +247,13 @@ mdct_butterfly_32:
|
||||||
add r0, r0, #16*4
|
add r0, r0, #16*4
|
||||||
bl mdct_butterfly_16
|
bl mdct_butterfly_16
|
||||||
|
|
||||||
ldmia sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc}
|
ldmia sp!, {r4-r11, pc}
|
||||||
|
|
||||||
@ mdct_butterfly_generic_loop(x1, x2, T0, step, Ttop)
|
@ mdct_butterfly_generic_loop(x1, x2, T0, step, Ttop)
|
||||||
mdct_butterfly_generic_loop:
|
mdct_butterfly_generic_loop:
|
||||||
stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
|
stmdb sp!, {r4-r11, lr}
|
||||||
str r2, [sp, #-4]
|
str r2, [sp, #-4]
|
||||||
ldr r4, [sp, #40]
|
ldr r4, [sp, #36]
|
||||||
1:
|
1:
|
||||||
ldmdb r0, {r6, r7, r8, r9}
|
ldmdb r0, {r6, r7, r8, r9}
|
||||||
ldmdb r1, {r10, r11, r12, r14}
|
ldmdb r1, {r10, r11, r12, r14}
|
||||||
|
@ -339,7 +339,7 @@ mdct_butterfly_generic_loop:
|
||||||
cmp r2, r4
|
cmp r2, r4
|
||||||
bhi 1b
|
bhi 1b
|
||||||
|
|
||||||
ldr r4, [sp, #40]
|
ldr r4, [sp, #36]
|
||||||
1:
|
1:
|
||||||
ldmdb r0, {r6, r7, r8, r9}
|
ldmdb r0, {r6, r7, r8, r9}
|
||||||
ldmdb r1, {r10, r11, r12, r14}
|
ldmdb r1, {r10, r11, r12, r14}
|
||||||
|
@ -425,5 +425,5 @@ mdct_butterfly_generic_loop:
|
||||||
cmp r2, r4
|
cmp r2, r4
|
||||||
bhi 1b
|
bhi 1b
|
||||||
|
|
||||||
ldmia sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc}
|
ldmia sp!, {r4-r11, pc}
|
||||||
|
|
||||||
|
|
|
@ -32,8 +32,8 @@
|
||||||
;; r3 = D0ptr
|
;; r3 = D0ptr
|
||||||
;; r4 = D1ptr
|
;; r4 = D1ptr
|
||||||
synth_full1:
|
synth_full1:
|
||||||
stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
|
stmdb sp!, {r4-r11, lr}
|
||||||
ldr r4, [sp, #40]
|
ldr r4, [sp, #36]
|
||||||
ldr r5, =synth_full_sp
|
ldr r5, =synth_full_sp
|
||||||
str sp, [r5]
|
str sp, [r5]
|
||||||
mov r5, #15
|
mov r5, #15
|
||||||
|
@ -135,11 +135,11 @@ synth_full1:
|
||||||
|
|
||||||
ldr r5, =synth_full_sp
|
ldr r5, =synth_full_sp
|
||||||
ldr sp, [r5]
|
ldr sp, [r5]
|
||||||
ldmia sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc}
|
ldmia sp!, {r4-r11, pc}
|
||||||
|
|
||||||
synth_full2:
|
synth_full2:
|
||||||
stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
|
stmdb sp!, {r4-r11, lr}
|
||||||
ldr r4, [sp, #40]
|
ldr r4, [sp, #36]
|
||||||
ldr r5, =synth_full_sp
|
ldr r5, =synth_full_sp
|
||||||
str sp, [r5]
|
str sp, [r5]
|
||||||
mov r5, #15
|
mov r5, #15
|
||||||
|
@ -241,12 +241,12 @@ synth_full2:
|
||||||
|
|
||||||
ldr r5, =synth_full_sp
|
ldr r5, =synth_full_sp
|
||||||
ldr sp, [r5]
|
ldr sp, [r5]
|
||||||
ldmia sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc}
|
ldmia sp!, {r4-r11, pc}
|
||||||
|
|
||||||
.global III_aliasreduce
|
.global III_aliasreduce
|
||||||
|
|
||||||
III_aliasreduce:
|
III_aliasreduce:
|
||||||
stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
|
stmdb sp!, {r4-r11, lr}
|
||||||
add r1, r0, r1, lsl #2
|
add r1, r0, r1, lsl #2
|
||||||
add r0, r0, #72
|
add r0, r0, #72
|
||||||
.arl1:
|
.arl1:
|
||||||
|
@ -289,7 +289,7 @@ III_aliasreduce:
|
||||||
add r0, r0, #72
|
add r0, r0, #72
|
||||||
cmp r0, r1
|
cmp r0, r1
|
||||||
blo .arl1
|
blo .arl1
|
||||||
ldmia sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc}
|
ldmia sp!, {r4-r11, pc}
|
||||||
|
|
||||||
csa:
|
csa:
|
||||||
.word +0x0db84a81
|
.word +0x0db84a81
|
||||||
|
@ -311,14 +311,14 @@ csa:
|
||||||
|
|
||||||
.global III_overlap
|
.global III_overlap
|
||||||
III_overlap:
|
III_overlap:
|
||||||
stmdb sp!, {r4, r5, r6, r7, r8, lr}
|
stmdb sp!, {r4-r7, lr}
|
||||||
add r2, r2, r3, lsl #2
|
add r2, r2, r3, lsl #2
|
||||||
mov r3, #6
|
mov r3, #6
|
||||||
.ol:
|
.ol:
|
||||||
ldmia r0!, {r4, r5, r6}
|
ldmia r0!, {r4, r5, r6}
|
||||||
ldmia r1!, {r7, r8, lr}
|
ldmia r1!, {r7, r12, lr}
|
||||||
add r4, r4, r7
|
add r4, r4, r7
|
||||||
add r5, r5, r8
|
add r5, r5, r12
|
||||||
add r6, r6, lr
|
add r6, r6, lr
|
||||||
str r4, [r2], #128
|
str r4, [r2], #128
|
||||||
str r5, [r2], #128
|
str r5, [r2], #128
|
||||||
|
@ -326,13 +326,13 @@ III_overlap:
|
||||||
subs r3, r3, #1
|
subs r3, r3, #1
|
||||||
bne .ol
|
bne .ol
|
||||||
sub r1, r1, #72
|
sub r1, r1, #72
|
||||||
ldmia r0!, {r4, r5, r6, r7, r8, lr}
|
ldmia r0!, {r4, r5, r6, r7, r12, lr}
|
||||||
stmia r1!, {r4, r5, r6, r7, r8, lr}
|
stmia r1!, {r4, r5, r6, r7, r12, lr}
|
||||||
ldmia r0!, {r4, r5, r6, r7, r8, lr}
|
ldmia r0!, {r4, r5, r6, r7, r12, lr}
|
||||||
stmia r1!, {r4, r5, r6, r7, r8, lr}
|
stmia r1!, {r4, r5, r6, r7, r12, lr}
|
||||||
ldmia r0!, {r4, r5, r6, r7, r8, lr}
|
ldmia r0!, {r4, r5, r6, r7, r12, lr}
|
||||||
stmia r1!, {r4, r5, r6, r7, r8, lr}
|
stmia r1!, {r4, r5, r6, r7, r12, lr}
|
||||||
ldmia sp!, {r4, r5, r6, r7, r8, pc}
|
ldmia sp!, {r4-r7, pc}
|
||||||
|
|
||||||
.section IBSS_SECTION_MPA_ARM,"aw",%nobits
|
.section IBSS_SECTION_MPA_ARM,"aw",%nobits
|
||||||
synth_full_sp:
|
synth_full_sp:
|
||||||
|
|
|
@ -9,10 +9,10 @@
|
||||||
*
|
*
|
||||||
* Copyright (C) 2008 by Andree Buschmann
|
* Copyright (C) 2008 by Andree Buschmann
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or
|
* This program is free software; you can redistribute it and/or
|
||||||
* modify it under the terms of the GNU General Public License
|
* modify it under the terms of the GNU General Public License
|
||||||
* as published by the Free Software Foundation; either version 2
|
* as published by the Free Software Foundation; either version 2
|
||||||
* of the License, or (at your option) any later version.
|
* of the License, or (at your option) any later version.
|
||||||
*
|
*
|
||||||
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
||||||
* KIND, either express or implied.
|
* KIND, either express or implied.
|
||||||
|
@ -41,7 +41,7 @@ mpc_decoder_windowing_D:
|
||||||
/* r2 = D[] */
|
/* r2 = D[] */
|
||||||
/* lr = counter */
|
/* lr = counter */
|
||||||
|
|
||||||
stmfd sp!, {r4-r12, lr}
|
stmfd sp!, {r4-r11, lr}
|
||||||
|
|
||||||
mov lr, #32
|
mov lr, #32
|
||||||
.loop32:
|
.loop32:
|
||||||
|
@ -86,7 +86,7 @@ mpc_decoder_windowing_D:
|
||||||
subs lr, lr, #1
|
subs lr, lr, #1
|
||||||
bgt .loop32
|
bgt .loop32
|
||||||
|
|
||||||
ldmfd sp!, {r4-r12, pc}
|
ldmfd sp!, {r4-r11, pc}
|
||||||
.mpc_dewindowing_end:
|
.mpc_dewindowing_end:
|
||||||
.size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D
|
.size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D
|
||||||
#else
|
#else
|
||||||
|
@ -110,55 +110,55 @@ mpc_decoder_windowing_D:
|
||||||
/************************************************************************
|
/************************************************************************
|
||||||
* Reference implementation.
|
* Reference implementation.
|
||||||
***********************************************************************/
|
***********************************************************************/
|
||||||
stmfd sp!, {r4-r9, lr}
|
stmfd sp!, {r4-r8, lr}
|
||||||
|
|
||||||
mov lr, #32
|
mov lr, #32
|
||||||
.loop32:
|
.loop32:
|
||||||
ldmia r2!, { r3-r6 } /* load D[00..03] */
|
ldmia r2!, { r3-r6 } /* load D[00..03] */
|
||||||
ldr r7, [r1] /* 0 */
|
ldr r7, [r1] /* 0 */
|
||||||
smull r8, r9, r7, r3
|
smull r8, r12, r7, r3
|
||||||
ldr r7, [r1, #96*4] /* 1 */
|
ldr r7, [r1, #96*4] /* 1 */
|
||||||
smlal r8, r9, r7, r4
|
smlal r8, r12, r7, r4
|
||||||
ldr r7, [r1, #128*4] /* 2 */
|
ldr r7, [r1, #128*4] /* 2 */
|
||||||
smlal r8, r9, r7, r5
|
smlal r8, r12, r7, r5
|
||||||
ldr r7, [r1, #224*4] /* 3 */
|
ldr r7, [r1, #224*4] /* 3 */
|
||||||
smlal r8, r9, r7, r6
|
smlal r8, r12, r7, r6
|
||||||
ldmia r2!, { r3-r6 } /* load D[04..07] */
|
ldmia r2!, { r3-r6 } /* load D[04..07] */
|
||||||
ldr r7, [r1, #256*4] /* 4 */
|
ldr r7, [r1, #256*4] /* 4 */
|
||||||
smlal r8, r9, r7, r3
|
smlal r8, r12, r7, r3
|
||||||
ldr r7, [r1, #352*4] /* 5 */
|
ldr r7, [r1, #352*4] /* 5 */
|
||||||
smlal r8, r9, r7, r4
|
smlal r8, r12, r7, r4
|
||||||
ldr r7, [r1, #384*4] /* 6 */
|
ldr r7, [r1, #384*4] /* 6 */
|
||||||
smlal r8, r9, r7, r5
|
smlal r8, r12, r7, r5
|
||||||
ldr r7, [r1, #480*4] /* 7 */
|
ldr r7, [r1, #480*4] /* 7 */
|
||||||
smlal r8, r9, r7, r6
|
smlal r8, r12, r7, r6
|
||||||
ldmia r2!, { r3-r6 } /* load D[08..11] */
|
ldmia r2!, { r3-r6 } /* load D[08..11] */
|
||||||
ldr r7, [r1, #512*4] /* 8 */
|
ldr r7, [r1, #512*4] /* 8 */
|
||||||
smlal r8, r9, r7, r3
|
smlal r8, r12, r7, r3
|
||||||
ldr r7, [r1, #608*4] /* 9 */
|
ldr r7, [r1, #608*4] /* 9 */
|
||||||
smlal r8, r9, r7, r4
|
smlal r8, r12, r7, r4
|
||||||
ldr r7, [r1, #640*4] /* 10 */
|
ldr r7, [r1, #640*4] /* 10 */
|
||||||
smlal r8, r9, r7, r5
|
smlal r8, r12, r7, r5
|
||||||
ldr r7, [r1, #736*4] /* 11 */
|
ldr r7, [r1, #736*4] /* 11 */
|
||||||
smlal r8, r9, r7, r6
|
smlal r8, r12, r7, r6
|
||||||
ldmia r2!, { r3-r6 } /* load D[12..15] */
|
ldmia r2!, { r3-r6 } /* load D[12..15] */
|
||||||
ldr r7, [r1, #768*4] /* 12 */
|
ldr r7, [r1, #768*4] /* 12 */
|
||||||
smlal r8, r9, r7, r3
|
smlal r8, r12, r7, r3
|
||||||
ldr r7, [r1, #864*4] /* 13 */
|
ldr r7, [r1, #864*4] /* 13 */
|
||||||
smlal r8, r9, r7, r4
|
smlal r8, r12, r7, r4
|
||||||
ldr r7, [r1, #896*4] /* 14 */
|
ldr r7, [r1, #896*4] /* 14 */
|
||||||
smlal r8, r9, r7, r5
|
smlal r8, r12, r7, r5
|
||||||
ldr r7, [r1, #992*4] /* 15 */
|
ldr r7, [r1, #992*4] /* 15 */
|
||||||
smlal r8, r9, r7, r6
|
smlal r8, r12, r7, r6
|
||||||
mov r8, r8, lsr #16
|
mov r8, r8, lsr #16
|
||||||
orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */
|
orr r8, r8, r12, lsl #16 /* (lo>>16) || (hi<<16) */
|
||||||
str r8, [r0], #4 /* store Data */
|
str r8, [r0], #4 /* store Data */
|
||||||
add r1, r1, #4 /* V++ */
|
add r1, r1, #4 /* V++ */
|
||||||
|
|
||||||
subs lr, lr, #1
|
subs lr, lr, #1
|
||||||
bgt .loop32
|
bgt .loop32
|
||||||
|
|
||||||
ldmfd sp!, {r4-r9, pc}
|
ldmfd sp!, {r4-r8, pc}
|
||||||
#else
|
#else
|
||||||
mpc_decoder_windowing_D:
|
mpc_decoder_windowing_D:
|
||||||
/* r0 = Data[] */
|
/* r0 = Data[] */
|
||||||
|
@ -174,7 +174,7 @@ mpc_decoder_windowing_D:
|
||||||
* The row V[16] can be extracted as it has symmetries within this single
|
* The row V[16] can be extracted as it has symmetries within this single
|
||||||
* row. 8 smull/mlal and 8 ldr's can be saved.
|
* row. 8 smull/mlal and 8 ldr's can be saved.
|
||||||
***********************************************************************/
|
***********************************************************************/
|
||||||
stmfd sp!, {r4-r12, lr}
|
stmfd sp!, {r4-r11, lr}
|
||||||
|
|
||||||
/******************************************
|
/******************************************
|
||||||
* row 0 with internal symmetry
|
* row 0 with internal symmetry
|
||||||
|
@ -356,7 +356,7 @@ mpc_decoder_windowing_D:
|
||||||
str r8, [r0], #4 /* store Data */
|
str r8, [r0], #4 /* store Data */
|
||||||
add r1, r1, #4 /* V++ */
|
add r1, r1, #4 /* V++ */
|
||||||
|
|
||||||
ldmfd sp!, {r4-r12, pc}
|
ldmfd sp!, {r4-r11, pc}
|
||||||
#endif
|
#endif
|
||||||
.mpc_dewindowing_end:
|
.mpc_dewindowing_end:
|
||||||
.size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D
|
.size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D
|
||||||
|
|
|
@ -32,14 +32,14 @@
|
||||||
.type channels_process_sound_chan_mono, %function
|
.type channels_process_sound_chan_mono, %function
|
||||||
channels_process_sound_chan_mono:
|
channels_process_sound_chan_mono:
|
||||||
@ input: r0 = count, r1 = buf
|
@ input: r0 = count, r1 = buf
|
||||||
stmfd sp!, {r4-r6, lr}
|
stmfd sp!, {r4-r5, lr}
|
||||||
ldmia r1, {r2-r3} @ r4 = buf[0], r5 = buf[1]
|
ldmia r1, {r2-r3} @ r4 = buf[0], r5 = buf[1]
|
||||||
|
|
||||||
.monoloop:
|
.monoloop:
|
||||||
ldmia r2, {r4-r5}
|
ldmia r2, {r4-r5}
|
||||||
ldmia r3, {r6,lr}
|
ldmia r3, {r12,lr}
|
||||||
mov r4, r4, asr #1 @ r4 = r4/2
|
mov r4, r4, asr #1 @ r4 = r4/2
|
||||||
add r4, r4, r6, asr #1 @ r4 = r4 + r6/2 = (buf[0]+buf[1])/2
|
add r4, r4, r12, asr #1 @ r4 = r4 + r12/2 = (buf[0]+buf[1])/2
|
||||||
mov r5, r5, asr #1 @ r5 = r5/2
|
mov r5, r5, asr #1 @ r5 = r5/2
|
||||||
add r5, r5, lr, asr #1 @ r5 = r5 + lr/2 = (buf[0]+buf[1])/2
|
add r5, r5, lr, asr #1 @ r5 = r5 + lr/2 = (buf[0]+buf[1])/2
|
||||||
stmia r2!, {r4-r5}
|
stmia r2!, {r4-r5}
|
||||||
|
@ -47,7 +47,7 @@ channels_process_sound_chan_mono:
|
||||||
subs r0, r0, #2
|
subs r0, r0, #2
|
||||||
bgt .monoloop
|
bgt .monoloop
|
||||||
|
|
||||||
ldmfd sp!, {r4-r6, pc}
|
ldmfd sp!, {r4-r5, pc}
|
||||||
.monoend:
|
.monoend:
|
||||||
.size channels_process_sound_chan_mono,.monoend-channels_process_sound_chan_mono
|
.size channels_process_sound_chan_mono,.monoend-channels_process_sound_chan_mono
|
||||||
|
|
||||||
|
@ -63,24 +63,24 @@ channels_process_sound_chan_mono:
|
||||||
.type channels_process_sound_chan_karaoke, %function
|
.type channels_process_sound_chan_karaoke, %function
|
||||||
channels_process_sound_chan_karaoke:
|
channels_process_sound_chan_karaoke:
|
||||||
@ input: r0 = count, r1 = buf
|
@ input: r0 = count, r1 = buf
|
||||||
stmfd sp!, {r4-r6, lr}
|
stmfd sp!, {r4-r5, lr}
|
||||||
ldmia r1, {r2-r3} @ r4 = buf[0], r5 = buf[1]
|
ldmia r1, {r2-r3} @ r4 = buf[0], r5 = buf[1]
|
||||||
|
|
||||||
.karaokeloop:
|
.karaokeloop:
|
||||||
ldmia r2, {r4-r5}
|
ldmia r2, {r4-r5}
|
||||||
ldmia r3, {r6,lr}
|
ldmia r3, {r12,lr}
|
||||||
mov r6, r6, asr #1 @ r6 = r6/2
|
mov r12, r12, asr #1 @ r12 = r12/2
|
||||||
rsb r4, r6, r4, asr #1 @ r4 = -r6 + r4/2 = (buf[0]-buf[1])/2
|
rsb r4, r12, r4, asr #1 @ r4 = -r12 + r4/2 = (buf[0]-buf[1])/2
|
||||||
rsb r6, r4, #0 @ r6 = -r4
|
rsb r12, r4, #0 @ r12 = -r4
|
||||||
mov lr, lr, asr #1 @ lr = lr/2
|
mov lr, lr, asr #1 @ lr = lr/2
|
||||||
rsb r5, lr, r5, asr #1 @ r5 = -lr + r5/2 = (buf[0]-buf[1])/2
|
rsb r5, lr, r5, asr #1 @ r5 = -lr + r5/2 = (buf[0]-buf[1])/2
|
||||||
rsb lr, r5, #0 @ lr = -r5
|
rsb lr, r5, #0 @ lr = -r5
|
||||||
stmia r2!, {r4-r5}
|
stmia r2!, {r4-r5}
|
||||||
stmia r3!, {r6,lr}
|
stmia r3!, {r12,lr}
|
||||||
subs r0, r0, #2
|
subs r0, r0, #2
|
||||||
bgt .karaokeloop
|
bgt .karaokeloop
|
||||||
|
|
||||||
ldmfd sp!, {r4-r6, pc}
|
ldmfd sp!, {r4-r5, pc}
|
||||||
.karaokeend:
|
.karaokeend:
|
||||||
.size channels_process_sound_chan_karaoke,.karaokeend-channels_process_sound_chan_karaoke
|
.size channels_process_sound_chan_karaoke,.karaokeend-channels_process_sound_chan_karaoke
|
||||||
|
|
||||||
|
@ -97,7 +97,7 @@ channels_process_sound_chan_karaoke:
|
||||||
.type sample_output_mono, %function
|
.type sample_output_mono, %function
|
||||||
sample_output_mono:
|
sample_output_mono:
|
||||||
@ input: r0 = count, r1 = data, r2 = src, r3 = dst
|
@ input: r0 = count, r1 = data, r2 = src, r3 = dst
|
||||||
stmfd sp!, {r4-r9, lr}
|
stmfd sp!, {r4-r7, lr}
|
||||||
|
|
||||||
ldr r4, [r2] @ r4 = src[0]
|
ldr r4, [r2] @ r4 = src[0]
|
||||||
ldr r5, [r1] @ lr = data->output_scale
|
ldr r5, [r1] @ lr = data->output_scale
|
||||||
|
@ -105,8 +105,8 @@ sample_output_mono:
|
||||||
mov r2, #1
|
mov r2, #1
|
||||||
mov r2, r2, asl r1 @ r2 = 1<<r1 = 1 << (scale-1)
|
mov r2, r2, asl r1 @ r2 = 1<<r1 = 1 << (scale-1)
|
||||||
mvn r1, #0x8000 @ r1 needed for clipping
|
mvn r1, #0x8000 @ r1 needed for clipping
|
||||||
mov r8, #0xff00
|
mov r12, #0xff00
|
||||||
orr r8, r8, #0xff @ r8 needed for masking
|
orr r12, r12, #0xff @ r12 needed for masking
|
||||||
|
|
||||||
.somloop:
|
.somloop:
|
||||||
ldmia r4!, {r6-r7}
|
ldmia r4!, {r6-r7}
|
||||||
|
@ -121,16 +121,16 @@ sample_output_mono:
|
||||||
teq lr, lr, asr #31
|
teq lr, lr, asr #31
|
||||||
eorne r7, r1, lr, asr #31 @ Clip (-32768...+32767)
|
eorne r7, r1, lr, asr #31 @ Clip (-32768...+32767)
|
||||||
|
|
||||||
and r6, r6, r8
|
and r6, r6, r12
|
||||||
orr r6, r6, r6, asl #16 @ pack first 2 halfwords into 1 word
|
orr r6, r6, r6, asl #16 @ pack first 2 halfwords into 1 word
|
||||||
and r7, r7, r8
|
and r7, r7, r12
|
||||||
orr r7, r7, r7, asl #16 @ pack last 2 halfwords into 1 word
|
orr r7, r7, r7, asl #16 @ pack last 2 halfwords into 1 word
|
||||||
stmia r3!, {r6-r7}
|
stmia r3!, {r6-r7}
|
||||||
|
|
||||||
subs r0, r0, #2
|
subs r0, r0, #2
|
||||||
bgt .somloop
|
bgt .somloop
|
||||||
|
|
||||||
ldmfd sp!, {r4-r9, pc}
|
ldmfd sp!, {r4-r7, pc}
|
||||||
.somend:
|
.somend:
|
||||||
.size sample_output_mono,.somend-sample_output_mono
|
.size sample_output_mono,.somend-sample_output_mono
|
||||||
|
|
||||||
|
@ -147,7 +147,7 @@ sample_output_mono:
|
||||||
.type sample_output_stereo, %function
|
.type sample_output_stereo, %function
|
||||||
sample_output_stereo:
|
sample_output_stereo:
|
||||||
@ input: r0 = count, r1 = data, r2 = src, r3 = dst
|
@ input: r0 = count, r1 = data, r2 = src, r3 = dst
|
||||||
stmfd sp!, {r4-r11, lr}
|
stmfd sp!, {r4-r10, lr}
|
||||||
|
|
||||||
ldmia r2, {r4-r5} @ r4 = src[0], r5 = src[1]
|
ldmia r2, {r4-r5} @ r4 = src[0], r5 = src[1]
|
||||||
ldr r6, [r1] @ r6 = data->output_scale
|
ldr r6, [r1] @ r6 = data->output_scale
|
||||||
|
@ -155,8 +155,8 @@ sample_output_stereo:
|
||||||
mov r2, #1
|
mov r2, #1
|
||||||
mov r2, r2, asl r1 @ r2 = 1<<r1 = 1 << (scale-1)
|
mov r2, r2, asl r1 @ r2 = 1<<r1 = 1 << (scale-1)
|
||||||
mvn r1, #0x8000 @ r1 needed for clipping
|
mvn r1, #0x8000 @ r1 needed for clipping
|
||||||
mov r11, #0xff00
|
mov r12, #0xff00
|
||||||
orr r11, r11, #0xff @ r11 needed for masking
|
orr r12, r12, #0xff @ r12 needed for masking
|
||||||
|
|
||||||
.sosloop:
|
.sosloop:
|
||||||
ldmia r4!, {r7-r8}
|
ldmia r4!, {r7-r8}
|
||||||
|
@ -183,16 +183,16 @@ sample_output_stereo:
|
||||||
teq lr, lr, asr #31
|
teq lr, lr, asr #31
|
||||||
eorne r10, r1, lr, asr #31 @ Clip (-32768...+32767)
|
eorne r10, r1, lr, asr #31 @ Clip (-32768...+32767)
|
||||||
|
|
||||||
and r7, r7, r11
|
and r7, r7, r12
|
||||||
orr r9, r7, r9, asl #16 @ pack first 2 halfwords into 1 word
|
orr r9, r7, r9, asl #16 @ pack first 2 halfwords into 1 word
|
||||||
and r8, r8, r11
|
and r8, r8, r12
|
||||||
orr r10, r8, r10, asl #16 @ pack last 2 halfwords into 1 word
|
orr r10, r8, r10, asl #16 @ pack last 2 halfwords into 1 word
|
||||||
stmia r3!, {r9-r10}
|
stmia r3!, {r9-r10}
|
||||||
|
|
||||||
subs r0, r0, #2
|
subs r0, r0, #2
|
||||||
bgt .sosloop
|
bgt .sosloop
|
||||||
|
|
||||||
ldmfd sp!, {r4-r11, pc}
|
ldmfd sp!, {r4-r10, pc}
|
||||||
.sosend:
|
.sosend:
|
||||||
.size sample_output_stereo,.sosend-sample_output_stereo
|
.size sample_output_stereo,.sosend-sample_output_stereo
|
||||||
|
|
||||||
|
|
|
@ -268,7 +268,7 @@
|
||||||
bx lr
|
bx lr
|
||||||
|
|
||||||
mpeg2_idct_copy:
|
mpeg2_idct_copy:
|
||||||
stmfd sp!, { r1-r2, r4-r12, lr }
|
stmfd sp!, { r1-r2, r4-r11, lr }
|
||||||
bl .idct
|
bl .idct
|
||||||
ldmfd sp!, { r1-r2 }
|
ldmfd sp!, { r1-r2 }
|
||||||
mov r11, #0
|
mov r11, #0
|
||||||
|
@ -313,7 +313,7 @@ mpeg2_idct_copy:
|
||||||
add r1, r1, r2
|
add r1, r1, r2
|
||||||
cmp r0, r12
|
cmp r0, r12
|
||||||
blo 1b
|
blo 1b
|
||||||
ldmfd sp!, { r4-r12, pc }
|
ldmfd sp!, { r4-r11, pc }
|
||||||
|
|
||||||
mpeg2_idct_add:
|
mpeg2_idct_add:
|
||||||
cmp r0, #129
|
cmp r0, #129
|
||||||
|
@ -324,7 +324,7 @@ mpeg2_idct_add:
|
||||||
cmp r1, #0x40
|
cmp r1, #0x40
|
||||||
bne 3f
|
bne 3f
|
||||||
1:
|
1:
|
||||||
stmfd sp!, { r2-r12, lr }
|
stmfd sp!, { r2-r11, lr }
|
||||||
bl .idct
|
bl .idct
|
||||||
ldmfd sp!, { r1-r2 }
|
ldmfd sp!, { r1-r2 }
|
||||||
mov r11, #0
|
mov r11, #0
|
||||||
|
@ -385,24 +385,20 @@ mpeg2_idct_add:
|
||||||
add r1, r1, r2
|
add r1, r1, r2
|
||||||
cmp r0, r12
|
cmp r0, r12
|
||||||
blo 2b
|
blo 2b
|
||||||
ldmfd sp!, { r4-r12, pc }
|
ldmfd sp!, { r4-r11, pc }
|
||||||
3:
|
3:
|
||||||
stmfd sp!, { r4-r11 }
|
stmfd sp!, { r4-r6, lr }
|
||||||
ldrsh r1, [r0, #0] /* r1 = block[0] */
|
ldrsh r1, [r0, #0] /* r1 = block[0] */
|
||||||
mov r11, #0
|
mov r4, #0
|
||||||
strh r11, [r0, #0] /* block[0] = 0 */
|
strh r4, [r0, #0] /* block[0] = 0 */
|
||||||
strh r11, [r0, #126] /* block[63] = 0 */
|
strh r4, [r0, #126] /* block[63] = 0 */
|
||||||
add r1, r1, #64 /* r1 = DC << 7 */
|
add r1, r1, #64 /* r1 = DC << 7 */
|
||||||
add r0, r2, r3, asl #3
|
add r0, r2, r3, asl #3
|
||||||
4:
|
4:
|
||||||
ldrb r4, [r2, #0]
|
ldrb r4, [r2, #0]
|
||||||
ldrb r5, [r2, #1]
|
ldrb r5, [r2, #1]
|
||||||
ldrb r6, [r2, #2]
|
ldrb r6, [r2, #2]
|
||||||
ldrb r7, [r2, #3]
|
ldrb lr, [r2, #3]
|
||||||
ldrb r8, [r2, #4]
|
|
||||||
ldrb r9, [r2, #5]
|
|
||||||
ldrb r10, [r2, #6]
|
|
||||||
ldrb r11, [r2, #7]
|
|
||||||
add r4, r4, r1, asr #7
|
add r4, r4, r1, asr #7
|
||||||
cmp r4, #255
|
cmp r4, #255
|
||||||
mvnhi r4, r4, asr #31
|
mvnhi r4, r4, asr #31
|
||||||
|
@ -415,28 +411,31 @@ mpeg2_idct_add:
|
||||||
cmp r6, #255
|
cmp r6, #255
|
||||||
mvnhi r6, r6, asr #31
|
mvnhi r6, r6, asr #31
|
||||||
strb r6, [r2, #2]
|
strb r6, [r2, #2]
|
||||||
add r7, r7, r1, asr #7
|
add lr, lr, r1, asr #7
|
||||||
cmp r7, #255
|
cmp lr, #255
|
||||||
mvnhi r7, r7, asr #31
|
mvnhi lr, lr, asr #31
|
||||||
strb r7, [r2, #3]
|
strb lr, [r2, #3]
|
||||||
add r8, r8, r1, asr #7
|
ldrb r4, [r2, #4]
|
||||||
cmp r8, #255
|
ldrb r5, [r2, #5]
|
||||||
mvnhi r8, r8, asr #31
|
ldrb r6, [r2, #6]
|
||||||
strb r8, [r2, #4]
|
ldrb lr, [r2, #7]
|
||||||
add r9, r9, r1, asr #7
|
add r4, r4, r1, asr #7
|
||||||
cmp r9, #255
|
cmp r4, #255
|
||||||
mvnhi r9, r9, asr #31
|
mvnhi r4, r4, asr #31
|
||||||
strb r9, [r2, #5]
|
strb r4, [r2, #4]
|
||||||
add r10, r10, r1, asr #7
|
add r5, r5, r1, asr #7
|
||||||
cmp r10, #255
|
cmp r5, #255
|
||||||
mvnhi r10, r10, asr #31
|
mvnhi r5, r5, asr #31
|
||||||
strb r10, [r2, #6]
|
strb r5, [r2, #5]
|
||||||
add r11, r11, r1, asr #7
|
add r6, r6, r1, asr #7
|
||||||
cmp r11, #255
|
cmp r6, #255
|
||||||
mvnhi r11, r11, asr #31
|
mvnhi r6, r6, asr #31
|
||||||
strb r11, [r2, #7]
|
strb r6, [r2, #6]
|
||||||
|
add lr, lr, r1, asr #7
|
||||||
|
cmp lr, #255
|
||||||
|
mvnhi lr, lr, asr #31
|
||||||
|
strb lr, [r2, #7]
|
||||||
add r2, r2, r3
|
add r2, r2, r3
|
||||||
cmp r2, r0
|
cmp r2, r0
|
||||||
blo 4b
|
blo 4b
|
||||||
ldmfd sp!, { r4-r11 }
|
ldmfd sp!, { r4-r6, pc }
|
||||||
bx lr
|
|
||||||
|
|
|
@ -196,7 +196,7 @@ L_W0246:
|
||||||
|
|
||||||
|
|
||||||
mpeg2_idct_copy:
|
mpeg2_idct_copy:
|
||||||
stmfd sp!, {r1-r2, r4-r12, lr}
|
stmfd sp!, {r1-r2, r4-r11, lr}
|
||||||
bl .idct
|
bl .idct
|
||||||
ldmfd sp!, {r1-r2}
|
ldmfd sp!, {r1-r2}
|
||||||
|
|
||||||
|
@ -230,7 +230,7 @@ mpeg2_idct_copy:
|
||||||
cmp r0, r12
|
cmp r0, r12
|
||||||
blo 1b
|
blo 1b
|
||||||
|
|
||||||
ldmfd sp!, {r4-r12, pc}
|
ldmfd sp!, {r4-r11, pc}
|
||||||
|
|
||||||
mpeg2_idct_add:
|
mpeg2_idct_add:
|
||||||
cmp r0, #129
|
cmp r0, #129
|
||||||
|
@ -241,7 +241,7 @@ mpeg2_idct_add:
|
||||||
cmp r1, #0x40
|
cmp r1, #0x40
|
||||||
bne 3f
|
bne 3f
|
||||||
1:
|
1:
|
||||||
stmfd sp!, {r2-r12, lr}
|
stmfd sp!, {r2-r11, lr}
|
||||||
bl .idct
|
bl .idct
|
||||||
ldmfd sp!, {r1-r2}
|
ldmfd sp!, {r1-r2}
|
||||||
mov r11, #0
|
mov r11, #0
|
||||||
|
@ -287,21 +287,21 @@ mpeg2_idct_add:
|
||||||
add r1, r1, r2
|
add r1, r1, r2
|
||||||
cmp r0, r12
|
cmp r0, r12
|
||||||
blo 2b
|
blo 2b
|
||||||
ldmfd sp!, {r4-r12, pc}
|
ldmfd sp!, {r4-r11, pc}
|
||||||
|
|
||||||
3:
|
3:
|
||||||
stmfd sp!, {r4-r7}
|
stmfd sp!, {r4-r6, lr}
|
||||||
ldrsh r1, [r0, #0] /* r1 = block[0] */
|
ldrsh r1, [r0, #0] /* r1 = block[0] */
|
||||||
mov r11, #0
|
mov r4, #0
|
||||||
strh r11, [r0, #0] /* block[0] = 0 */
|
strh r4, [r0, #0] /* block[0] = 0 */
|
||||||
strh r11, [r0, #126] /* block[63] = 0 */
|
strh r4, [r0, #126] /* block[63] = 0 */
|
||||||
add r1, r1, #64 /* r1 = DC << 7 */
|
add r1, r1, #64 /* r1 = DC << 7 */
|
||||||
add r0, r2, r3, asl #3
|
add r0, r2, r3, asl #3
|
||||||
4:
|
4:
|
||||||
ldrb r4, [r2, #0]
|
ldrb r4, [r2, #0]
|
||||||
ldrb r5, [r2, #1]
|
ldrb r5, [r2, #1]
|
||||||
ldrb r6, [r2, #2]
|
ldrb r6, [r2, #2]
|
||||||
ldrb r7, [r2, #3]
|
ldrb lr, [r2, #3]
|
||||||
add r4, r4, r1, asr #7
|
add r4, r4, r1, asr #7
|
||||||
usat r4, #8, r4
|
usat r4, #8, r4
|
||||||
strb r4, [r2, #0]
|
strb r4, [r2, #0]
|
||||||
|
@ -311,13 +311,13 @@ mpeg2_idct_add:
|
||||||
add r6, r6, r1, asr #7
|
add r6, r6, r1, asr #7
|
||||||
usat r6, #8, r6
|
usat r6, #8, r6
|
||||||
strb r6, [r2, #2]
|
strb r6, [r2, #2]
|
||||||
add r7, r7, r1, asr #7
|
add lr, lr, r1, asr #7
|
||||||
usat r7, #8, r7
|
usat lr, #8, lr
|
||||||
strb r7, [r2, #3]
|
strb lr, [r2, #3]
|
||||||
ldrb r4, [r2, #4]
|
ldrb r4, [r2, #4]
|
||||||
ldrb r5, [r2, #5]
|
ldrb r5, [r2, #5]
|
||||||
ldrb r6, [r2, #6]
|
ldrb r6, [r2, #6]
|
||||||
ldrb r7, [r2, #7]
|
ldrb lr, [r2, #7]
|
||||||
add r4, r4, r1, asr #7
|
add r4, r4, r1, asr #7
|
||||||
usat r4, #8, r4
|
usat r4, #8, r4
|
||||||
strb r4, [r2, #4]
|
strb r4, [r2, #4]
|
||||||
|
@ -327,11 +327,10 @@ mpeg2_idct_add:
|
||||||
add r6, r6, r1, asr #7
|
add r6, r6, r1, asr #7
|
||||||
usat r6, #8, r6
|
usat r6, #8, r6
|
||||||
strb r6, [r2, #6]
|
strb r6, [r2, #6]
|
||||||
add r7, r7, r1, asr #7
|
add lr, lr, r1, asr #7
|
||||||
usat r7, #8, r7
|
usat lr, #8, lr
|
||||||
strb r7, [r2, #7]
|
strb lr, [r2, #7]
|
||||||
add r2, r2, r3
|
add r2, r2, r3
|
||||||
cmp r2, r0
|
cmp r2, r0
|
||||||
blo 4b
|
blo 4b
|
||||||
ldmfd sp!, {r4-r7}
|
ldmfd sp!, {r4-r6, pc}
|
||||||
bx lr
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue