1
0
Fork 0
forked from len0rd/rockbox

* ARM asm DSP and codec/plugin functions: Use r12 scratch register properly

* Fix saving another unused reg in dsp code
* Use less regs in the generic ARM mpegplayer adding idct pure DC case
* Fix ARMv6 mpegplayer adding idct using an unsaved register in pure DC case


git-svn-id: svn://svn.rockbox.org/rockbox/trunk@21803 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Jens Arnold 2009-07-12 13:14:35 +00:00
parent e12c1c0a62
commit 02c031709c
6 changed files with 125 additions and 127 deletions

View file

@ -119,7 +119,7 @@ mdct_butterfly_16:
ldr pc, [sp], #4 ldr pc, [sp], #4
mdct_butterfly_32: mdct_butterfly_32:
stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} stmdb sp!, {r4-r11, lr}
add r1, r0, #16*4 add r1, r0, #16*4
@ -247,13 +247,13 @@ mdct_butterfly_32:
add r0, r0, #16*4 add r0, r0, #16*4
bl mdct_butterfly_16 bl mdct_butterfly_16
ldmia sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc} ldmia sp!, {r4-r11, pc}
@ mdct_butterfly_generic_loop(x1, x2, T0, step, Ttop) @ mdct_butterfly_generic_loop(x1, x2, T0, step, Ttop)
mdct_butterfly_generic_loop: mdct_butterfly_generic_loop:
stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} stmdb sp!, {r4-r11, lr}
str r2, [sp, #-4] str r2, [sp, #-4]
ldr r4, [sp, #40] ldr r4, [sp, #36]
1: 1:
ldmdb r0, {r6, r7, r8, r9} ldmdb r0, {r6, r7, r8, r9}
ldmdb r1, {r10, r11, r12, r14} ldmdb r1, {r10, r11, r12, r14}
@ -339,7 +339,7 @@ mdct_butterfly_generic_loop:
cmp r2, r4 cmp r2, r4
bhi 1b bhi 1b
ldr r4, [sp, #40] ldr r4, [sp, #36]
1: 1:
ldmdb r0, {r6, r7, r8, r9} ldmdb r0, {r6, r7, r8, r9}
ldmdb r1, {r10, r11, r12, r14} ldmdb r1, {r10, r11, r12, r14}
@ -425,5 +425,5 @@ mdct_butterfly_generic_loop:
cmp r2, r4 cmp r2, r4
bhi 1b bhi 1b
ldmia sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc} ldmia sp!, {r4-r11, pc}

View file

@ -32,8 +32,8 @@
;; r3 = D0ptr ;; r3 = D0ptr
;; r4 = D1ptr ;; r4 = D1ptr
synth_full1: synth_full1:
stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} stmdb sp!, {r4-r11, lr}
ldr r4, [sp, #40] ldr r4, [sp, #36]
ldr r5, =synth_full_sp ldr r5, =synth_full_sp
str sp, [r5] str sp, [r5]
mov r5, #15 mov r5, #15
@ -135,11 +135,11 @@ synth_full1:
ldr r5, =synth_full_sp ldr r5, =synth_full_sp
ldr sp, [r5] ldr sp, [r5]
ldmia sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc} ldmia sp!, {r4-r11, pc}
synth_full2: synth_full2:
stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} stmdb sp!, {r4-r11, lr}
ldr r4, [sp, #40] ldr r4, [sp, #36]
ldr r5, =synth_full_sp ldr r5, =synth_full_sp
str sp, [r5] str sp, [r5]
mov r5, #15 mov r5, #15
@ -241,12 +241,12 @@ synth_full2:
ldr r5, =synth_full_sp ldr r5, =synth_full_sp
ldr sp, [r5] ldr sp, [r5]
ldmia sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc} ldmia sp!, {r4-r11, pc}
.global III_aliasreduce .global III_aliasreduce
III_aliasreduce: III_aliasreduce:
stmdb sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} stmdb sp!, {r4-r11, lr}
add r1, r0, r1, lsl #2 add r1, r0, r1, lsl #2
add r0, r0, #72 add r0, r0, #72
.arl1: .arl1:
@ -289,7 +289,7 @@ III_aliasreduce:
add r0, r0, #72 add r0, r0, #72
cmp r0, r1 cmp r0, r1
blo .arl1 blo .arl1
ldmia sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12, pc} ldmia sp!, {r4-r11, pc}
csa: csa:
.word +0x0db84a81 .word +0x0db84a81
@ -311,14 +311,14 @@ csa:
.global III_overlap .global III_overlap
III_overlap: III_overlap:
stmdb sp!, {r4, r5, r6, r7, r8, lr} stmdb sp!, {r4-r7, lr}
add r2, r2, r3, lsl #2 add r2, r2, r3, lsl #2
mov r3, #6 mov r3, #6
.ol: .ol:
ldmia r0!, {r4, r5, r6} ldmia r0!, {r4, r5, r6}
ldmia r1!, {r7, r8, lr} ldmia r1!, {r7, r12, lr}
add r4, r4, r7 add r4, r4, r7
add r5, r5, r8 add r5, r5, r12
add r6, r6, lr add r6, r6, lr
str r4, [r2], #128 str r4, [r2], #128
str r5, [r2], #128 str r5, [r2], #128
@ -326,13 +326,13 @@ III_overlap:
subs r3, r3, #1 subs r3, r3, #1
bne .ol bne .ol
sub r1, r1, #72 sub r1, r1, #72
ldmia r0!, {r4, r5, r6, r7, r8, lr} ldmia r0!, {r4, r5, r6, r7, r12, lr}
stmia r1!, {r4, r5, r6, r7, r8, lr} stmia r1!, {r4, r5, r6, r7, r12, lr}
ldmia r0!, {r4, r5, r6, r7, r8, lr} ldmia r0!, {r4, r5, r6, r7, r12, lr}
stmia r1!, {r4, r5, r6, r7, r8, lr} stmia r1!, {r4, r5, r6, r7, r12, lr}
ldmia r0!, {r4, r5, r6, r7, r8, lr} ldmia r0!, {r4, r5, r6, r7, r12, lr}
stmia r1!, {r4, r5, r6, r7, r8, lr} stmia r1!, {r4, r5, r6, r7, r12, lr}
ldmia sp!, {r4, r5, r6, r7, r8, pc} ldmia sp!, {r4-r7, pc}
.section IBSS_SECTION_MPA_ARM,"aw",%nobits .section IBSS_SECTION_MPA_ARM,"aw",%nobits
synth_full_sp: synth_full_sp:

View file

@ -41,7 +41,7 @@ mpc_decoder_windowing_D:
/* r2 = D[] */ /* r2 = D[] */
/* lr = counter */ /* lr = counter */
stmfd sp!, {r4-r12, lr} stmfd sp!, {r4-r11, lr}
mov lr, #32 mov lr, #32
.loop32: .loop32:
@ -86,7 +86,7 @@ mpc_decoder_windowing_D:
subs lr, lr, #1 subs lr, lr, #1
bgt .loop32 bgt .loop32
ldmfd sp!, {r4-r12, pc} ldmfd sp!, {r4-r11, pc}
.mpc_dewindowing_end: .mpc_dewindowing_end:
.size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D .size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D
#else #else
@ -110,55 +110,55 @@ mpc_decoder_windowing_D:
/************************************************************************ /************************************************************************
* Reference implementation. * Reference implementation.
***********************************************************************/ ***********************************************************************/
stmfd sp!, {r4-r9, lr} stmfd sp!, {r4-r8, lr}
mov lr, #32 mov lr, #32
.loop32: .loop32:
ldmia r2!, { r3-r6 } /* load D[00..03] */ ldmia r2!, { r3-r6 } /* load D[00..03] */
ldr r7, [r1] /* 0 */ ldr r7, [r1] /* 0 */
smull r8, r9, r7, r3 smull r8, r12, r7, r3
ldr r7, [r1, #96*4] /* 1 */ ldr r7, [r1, #96*4] /* 1 */
smlal r8, r9, r7, r4 smlal r8, r12, r7, r4
ldr r7, [r1, #128*4] /* 2 */ ldr r7, [r1, #128*4] /* 2 */
smlal r8, r9, r7, r5 smlal r8, r12, r7, r5
ldr r7, [r1, #224*4] /* 3 */ ldr r7, [r1, #224*4] /* 3 */
smlal r8, r9, r7, r6 smlal r8, r12, r7, r6
ldmia r2!, { r3-r6 } /* load D[04..07] */ ldmia r2!, { r3-r6 } /* load D[04..07] */
ldr r7, [r1, #256*4] /* 4 */ ldr r7, [r1, #256*4] /* 4 */
smlal r8, r9, r7, r3 smlal r8, r12, r7, r3
ldr r7, [r1, #352*4] /* 5 */ ldr r7, [r1, #352*4] /* 5 */
smlal r8, r9, r7, r4 smlal r8, r12, r7, r4
ldr r7, [r1, #384*4] /* 6 */ ldr r7, [r1, #384*4] /* 6 */
smlal r8, r9, r7, r5 smlal r8, r12, r7, r5
ldr r7, [r1, #480*4] /* 7 */ ldr r7, [r1, #480*4] /* 7 */
smlal r8, r9, r7, r6 smlal r8, r12, r7, r6
ldmia r2!, { r3-r6 } /* load D[08..11] */ ldmia r2!, { r3-r6 } /* load D[08..11] */
ldr r7, [r1, #512*4] /* 8 */ ldr r7, [r1, #512*4] /* 8 */
smlal r8, r9, r7, r3 smlal r8, r12, r7, r3
ldr r7, [r1, #608*4] /* 9 */ ldr r7, [r1, #608*4] /* 9 */
smlal r8, r9, r7, r4 smlal r8, r12, r7, r4
ldr r7, [r1, #640*4] /* 10 */ ldr r7, [r1, #640*4] /* 10 */
smlal r8, r9, r7, r5 smlal r8, r12, r7, r5
ldr r7, [r1, #736*4] /* 11 */ ldr r7, [r1, #736*4] /* 11 */
smlal r8, r9, r7, r6 smlal r8, r12, r7, r6
ldmia r2!, { r3-r6 } /* load D[12..15] */ ldmia r2!, { r3-r6 } /* load D[12..15] */
ldr r7, [r1, #768*4] /* 12 */ ldr r7, [r1, #768*4] /* 12 */
smlal r8, r9, r7, r3 smlal r8, r12, r7, r3
ldr r7, [r1, #864*4] /* 13 */ ldr r7, [r1, #864*4] /* 13 */
smlal r8, r9, r7, r4 smlal r8, r12, r7, r4
ldr r7, [r1, #896*4] /* 14 */ ldr r7, [r1, #896*4] /* 14 */
smlal r8, r9, r7, r5 smlal r8, r12, r7, r5
ldr r7, [r1, #992*4] /* 15 */ ldr r7, [r1, #992*4] /* 15 */
smlal r8, r9, r7, r6 smlal r8, r12, r7, r6
mov r8, r8, lsr #16 mov r8, r8, lsr #16
orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ orr r8, r8, r12, lsl #16 /* (lo>>16) || (hi<<16) */
str r8, [r0], #4 /* store Data */ str r8, [r0], #4 /* store Data */
add r1, r1, #4 /* V++ */ add r1, r1, #4 /* V++ */
subs lr, lr, #1 subs lr, lr, #1
bgt .loop32 bgt .loop32
ldmfd sp!, {r4-r9, pc} ldmfd sp!, {r4-r8, pc}
#else #else
mpc_decoder_windowing_D: mpc_decoder_windowing_D:
/* r0 = Data[] */ /* r0 = Data[] */
@ -174,7 +174,7 @@ mpc_decoder_windowing_D:
* The row V[16] can be extracted as it has symmetries within this single * The row V[16] can be extracted as it has symmetries within this single
* row. 8 smull/mlal and 8 ldr's can be saved. * row. 8 smull/mlal and 8 ldr's can be saved.
***********************************************************************/ ***********************************************************************/
stmfd sp!, {r4-r12, lr} stmfd sp!, {r4-r11, lr}
/****************************************** /******************************************
* row 0 with internal symmetry * row 0 with internal symmetry
@ -356,7 +356,7 @@ mpc_decoder_windowing_D:
str r8, [r0], #4 /* store Data */ str r8, [r0], #4 /* store Data */
add r1, r1, #4 /* V++ */ add r1, r1, #4 /* V++ */
ldmfd sp!, {r4-r12, pc} ldmfd sp!, {r4-r11, pc}
#endif #endif
.mpc_dewindowing_end: .mpc_dewindowing_end:
.size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D .size mpc_decoder_windowing_D,.mpc_dewindowing_end-mpc_decoder_windowing_D

View file

@ -32,14 +32,14 @@
.type channels_process_sound_chan_mono, %function .type channels_process_sound_chan_mono, %function
channels_process_sound_chan_mono: channels_process_sound_chan_mono:
@ input: r0 = count, r1 = buf @ input: r0 = count, r1 = buf
stmfd sp!, {r4-r6, lr} stmfd sp!, {r4-r5, lr}
ldmia r1, {r2-r3} @ r4 = buf[0], r5 = buf[1] ldmia r1, {r2-r3} @ r4 = buf[0], r5 = buf[1]
.monoloop: .monoloop:
ldmia r2, {r4-r5} ldmia r2, {r4-r5}
ldmia r3, {r6,lr} ldmia r3, {r12,lr}
mov r4, r4, asr #1 @ r4 = r4/2 mov r4, r4, asr #1 @ r4 = r4/2
add r4, r4, r6, asr #1 @ r4 = r4 + r6/2 = (buf[0]+buf[1])/2 add r4, r4, r12, asr #1 @ r4 = r4 + r12/2 = (buf[0]+buf[1])/2
mov r5, r5, asr #1 @ r5 = r5/2 mov r5, r5, asr #1 @ r5 = r5/2
add r5, r5, lr, asr #1 @ r5 = r5 + lr/2 = (buf[0]+buf[1])/2 add r5, r5, lr, asr #1 @ r5 = r5 + lr/2 = (buf[0]+buf[1])/2
stmia r2!, {r4-r5} stmia r2!, {r4-r5}
@ -47,7 +47,7 @@ channels_process_sound_chan_mono:
subs r0, r0, #2 subs r0, r0, #2
bgt .monoloop bgt .monoloop
ldmfd sp!, {r4-r6, pc} ldmfd sp!, {r4-r5, pc}
.monoend: .monoend:
.size channels_process_sound_chan_mono,.monoend-channels_process_sound_chan_mono .size channels_process_sound_chan_mono,.monoend-channels_process_sound_chan_mono
@ -63,24 +63,24 @@ channels_process_sound_chan_mono:
.type channels_process_sound_chan_karaoke, %function .type channels_process_sound_chan_karaoke, %function
channels_process_sound_chan_karaoke: channels_process_sound_chan_karaoke:
@ input: r0 = count, r1 = buf @ input: r0 = count, r1 = buf
stmfd sp!, {r4-r6, lr} stmfd sp!, {r4-r5, lr}
ldmia r1, {r2-r3} @ r4 = buf[0], r5 = buf[1] ldmia r1, {r2-r3} @ r4 = buf[0], r5 = buf[1]
.karaokeloop: .karaokeloop:
ldmia r2, {r4-r5} ldmia r2, {r4-r5}
ldmia r3, {r6,lr} ldmia r3, {r12,lr}
mov r6, r6, asr #1 @ r6 = r6/2 mov r12, r12, asr #1 @ r12 = r12/2
rsb r4, r6, r4, asr #1 @ r4 = -r6 + r4/2 = (buf[0]-buf[1])/2 rsb r4, r12, r4, asr #1 @ r4 = -r12 + r4/2 = (buf[0]-buf[1])/2
rsb r6, r4, #0 @ r6 = -r4 rsb r12, r4, #0 @ r12 = -r4
mov lr, lr, asr #1 @ lr = lr/2 mov lr, lr, asr #1 @ lr = lr/2
rsb r5, lr, r5, asr #1 @ r5 = -lr + r5/2 = (buf[0]-buf[1])/2 rsb r5, lr, r5, asr #1 @ r5 = -lr + r5/2 = (buf[0]-buf[1])/2
rsb lr, r5, #0 @ lr = -r5 rsb lr, r5, #0 @ lr = -r5
stmia r2!, {r4-r5} stmia r2!, {r4-r5}
stmia r3!, {r6,lr} stmia r3!, {r12,lr}
subs r0, r0, #2 subs r0, r0, #2
bgt .karaokeloop bgt .karaokeloop
ldmfd sp!, {r4-r6, pc} ldmfd sp!, {r4-r5, pc}
.karaokeend: .karaokeend:
.size channels_process_sound_chan_karaoke,.karaokeend-channels_process_sound_chan_karaoke .size channels_process_sound_chan_karaoke,.karaokeend-channels_process_sound_chan_karaoke
@ -97,7 +97,7 @@ channels_process_sound_chan_karaoke:
.type sample_output_mono, %function .type sample_output_mono, %function
sample_output_mono: sample_output_mono:
@ input: r0 = count, r1 = data, r2 = src, r3 = dst @ input: r0 = count, r1 = data, r2 = src, r3 = dst
stmfd sp!, {r4-r9, lr} stmfd sp!, {r4-r7, lr}
ldr r4, [r2] @ r4 = src[0] ldr r4, [r2] @ r4 = src[0]
ldr r5, [r1] @ lr = data->output_scale ldr r5, [r1] @ lr = data->output_scale
@ -105,8 +105,8 @@ sample_output_mono:
mov r2, #1 mov r2, #1
mov r2, r2, asl r1 @ r2 = 1<<r1 = 1 << (scale-1) mov r2, r2, asl r1 @ r2 = 1<<r1 = 1 << (scale-1)
mvn r1, #0x8000 @ r1 needed for clipping mvn r1, #0x8000 @ r1 needed for clipping
mov r8, #0xff00 mov r12, #0xff00
orr r8, r8, #0xff @ r8 needed for masking orr r12, r12, #0xff @ r12 needed for masking
.somloop: .somloop:
ldmia r4!, {r6-r7} ldmia r4!, {r6-r7}
@ -121,16 +121,16 @@ sample_output_mono:
teq lr, lr, asr #31 teq lr, lr, asr #31
eorne r7, r1, lr, asr #31 @ Clip (-32768...+32767) eorne r7, r1, lr, asr #31 @ Clip (-32768...+32767)
and r6, r6, r8 and r6, r6, r12
orr r6, r6, r6, asl #16 @ pack first 2 halfwords into 1 word orr r6, r6, r6, asl #16 @ pack first 2 halfwords into 1 word
and r7, r7, r8 and r7, r7, r12
orr r7, r7, r7, asl #16 @ pack last 2 halfwords into 1 word orr r7, r7, r7, asl #16 @ pack last 2 halfwords into 1 word
stmia r3!, {r6-r7} stmia r3!, {r6-r7}
subs r0, r0, #2 subs r0, r0, #2
bgt .somloop bgt .somloop
ldmfd sp!, {r4-r9, pc} ldmfd sp!, {r4-r7, pc}
.somend: .somend:
.size sample_output_mono,.somend-sample_output_mono .size sample_output_mono,.somend-sample_output_mono
@ -147,7 +147,7 @@ sample_output_mono:
.type sample_output_stereo, %function .type sample_output_stereo, %function
sample_output_stereo: sample_output_stereo:
@ input: r0 = count, r1 = data, r2 = src, r3 = dst @ input: r0 = count, r1 = data, r2 = src, r3 = dst
stmfd sp!, {r4-r11, lr} stmfd sp!, {r4-r10, lr}
ldmia r2, {r4-r5} @ r4 = src[0], r5 = src[1] ldmia r2, {r4-r5} @ r4 = src[0], r5 = src[1]
ldr r6, [r1] @ r6 = data->output_scale ldr r6, [r1] @ r6 = data->output_scale
@ -155,8 +155,8 @@ sample_output_stereo:
mov r2, #1 mov r2, #1
mov r2, r2, asl r1 @ r2 = 1<<r1 = 1 << (scale-1) mov r2, r2, asl r1 @ r2 = 1<<r1 = 1 << (scale-1)
mvn r1, #0x8000 @ r1 needed for clipping mvn r1, #0x8000 @ r1 needed for clipping
mov r11, #0xff00 mov r12, #0xff00
orr r11, r11, #0xff @ r11 needed for masking orr r12, r12, #0xff @ r12 needed for masking
.sosloop: .sosloop:
ldmia r4!, {r7-r8} ldmia r4!, {r7-r8}
@ -183,16 +183,16 @@ sample_output_stereo:
teq lr, lr, asr #31 teq lr, lr, asr #31
eorne r10, r1, lr, asr #31 @ Clip (-32768...+32767) eorne r10, r1, lr, asr #31 @ Clip (-32768...+32767)
and r7, r7, r11 and r7, r7, r12
orr r9, r7, r9, asl #16 @ pack first 2 halfwords into 1 word orr r9, r7, r9, asl #16 @ pack first 2 halfwords into 1 word
and r8, r8, r11 and r8, r8, r12
orr r10, r8, r10, asl #16 @ pack last 2 halfwords into 1 word orr r10, r8, r10, asl #16 @ pack last 2 halfwords into 1 word
stmia r3!, {r9-r10} stmia r3!, {r9-r10}
subs r0, r0, #2 subs r0, r0, #2
bgt .sosloop bgt .sosloop
ldmfd sp!, {r4-r11, pc} ldmfd sp!, {r4-r10, pc}
.sosend: .sosend:
.size sample_output_stereo,.sosend-sample_output_stereo .size sample_output_stereo,.sosend-sample_output_stereo

View file

@ -268,7 +268,7 @@
bx lr bx lr
mpeg2_idct_copy: mpeg2_idct_copy:
stmfd sp!, { r1-r2, r4-r12, lr } stmfd sp!, { r1-r2, r4-r11, lr }
bl .idct bl .idct
ldmfd sp!, { r1-r2 } ldmfd sp!, { r1-r2 }
mov r11, #0 mov r11, #0
@ -313,7 +313,7 @@ mpeg2_idct_copy:
add r1, r1, r2 add r1, r1, r2
cmp r0, r12 cmp r0, r12
blo 1b blo 1b
ldmfd sp!, { r4-r12, pc } ldmfd sp!, { r4-r11, pc }
mpeg2_idct_add: mpeg2_idct_add:
cmp r0, #129 cmp r0, #129
@ -324,7 +324,7 @@ mpeg2_idct_add:
cmp r1, #0x40 cmp r1, #0x40
bne 3f bne 3f
1: 1:
stmfd sp!, { r2-r12, lr } stmfd sp!, { r2-r11, lr }
bl .idct bl .idct
ldmfd sp!, { r1-r2 } ldmfd sp!, { r1-r2 }
mov r11, #0 mov r11, #0
@ -385,24 +385,20 @@ mpeg2_idct_add:
add r1, r1, r2 add r1, r1, r2
cmp r0, r12 cmp r0, r12
blo 2b blo 2b
ldmfd sp!, { r4-r12, pc } ldmfd sp!, { r4-r11, pc }
3: 3:
stmfd sp!, { r4-r11 } stmfd sp!, { r4-r6, lr }
ldrsh r1, [r0, #0] /* r1 = block[0] */ ldrsh r1, [r0, #0] /* r1 = block[0] */
mov r11, #0 mov r4, #0
strh r11, [r0, #0] /* block[0] = 0 */ strh r4, [r0, #0] /* block[0] = 0 */
strh r11, [r0, #126] /* block[63] = 0 */ strh r4, [r0, #126] /* block[63] = 0 */
add r1, r1, #64 /* r1 = DC << 7 */ add r1, r1, #64 /* r1 = DC << 7 */
add r0, r2, r3, asl #3 add r0, r2, r3, asl #3
4: 4:
ldrb r4, [r2, #0] ldrb r4, [r2, #0]
ldrb r5, [r2, #1] ldrb r5, [r2, #1]
ldrb r6, [r2, #2] ldrb r6, [r2, #2]
ldrb r7, [r2, #3] ldrb lr, [r2, #3]
ldrb r8, [r2, #4]
ldrb r9, [r2, #5]
ldrb r10, [r2, #6]
ldrb r11, [r2, #7]
add r4, r4, r1, asr #7 add r4, r4, r1, asr #7
cmp r4, #255 cmp r4, #255
mvnhi r4, r4, asr #31 mvnhi r4, r4, asr #31
@ -415,28 +411,31 @@ mpeg2_idct_add:
cmp r6, #255 cmp r6, #255
mvnhi r6, r6, asr #31 mvnhi r6, r6, asr #31
strb r6, [r2, #2] strb r6, [r2, #2]
add r7, r7, r1, asr #7 add lr, lr, r1, asr #7
cmp r7, #255 cmp lr, #255
mvnhi r7, r7, asr #31 mvnhi lr, lr, asr #31
strb r7, [r2, #3] strb lr, [r2, #3]
add r8, r8, r1, asr #7 ldrb r4, [r2, #4]
cmp r8, #255 ldrb r5, [r2, #5]
mvnhi r8, r8, asr #31 ldrb r6, [r2, #6]
strb r8, [r2, #4] ldrb lr, [r2, #7]
add r9, r9, r1, asr #7 add r4, r4, r1, asr #7
cmp r9, #255 cmp r4, #255
mvnhi r9, r9, asr #31 mvnhi r4, r4, asr #31
strb r9, [r2, #5] strb r4, [r2, #4]
add r10, r10, r1, asr #7 add r5, r5, r1, asr #7
cmp r10, #255 cmp r5, #255
mvnhi r10, r10, asr #31 mvnhi r5, r5, asr #31
strb r10, [r2, #6] strb r5, [r2, #5]
add r11, r11, r1, asr #7 add r6, r6, r1, asr #7
cmp r11, #255 cmp r6, #255
mvnhi r11, r11, asr #31 mvnhi r6, r6, asr #31
strb r11, [r2, #7] strb r6, [r2, #6]
add lr, lr, r1, asr #7
cmp lr, #255
mvnhi lr, lr, asr #31
strb lr, [r2, #7]
add r2, r2, r3 add r2, r2, r3
cmp r2, r0 cmp r2, r0
blo 4b blo 4b
ldmfd sp!, { r4-r11 } ldmfd sp!, { r4-r6, pc }
bx lr

View file

@ -196,7 +196,7 @@ L_W0246:
mpeg2_idct_copy: mpeg2_idct_copy:
stmfd sp!, {r1-r2, r4-r12, lr} stmfd sp!, {r1-r2, r4-r11, lr}
bl .idct bl .idct
ldmfd sp!, {r1-r2} ldmfd sp!, {r1-r2}
@ -230,7 +230,7 @@ mpeg2_idct_copy:
cmp r0, r12 cmp r0, r12
blo 1b blo 1b
ldmfd sp!, {r4-r12, pc} ldmfd sp!, {r4-r11, pc}
mpeg2_idct_add: mpeg2_idct_add:
cmp r0, #129 cmp r0, #129
@ -241,7 +241,7 @@ mpeg2_idct_add:
cmp r1, #0x40 cmp r1, #0x40
bne 3f bne 3f
1: 1:
stmfd sp!, {r2-r12, lr} stmfd sp!, {r2-r11, lr}
bl .idct bl .idct
ldmfd sp!, {r1-r2} ldmfd sp!, {r1-r2}
mov r11, #0 mov r11, #0
@ -287,21 +287,21 @@ mpeg2_idct_add:
add r1, r1, r2 add r1, r1, r2
cmp r0, r12 cmp r0, r12
blo 2b blo 2b
ldmfd sp!, {r4-r12, pc} ldmfd sp!, {r4-r11, pc}
3: 3:
stmfd sp!, {r4-r7} stmfd sp!, {r4-r6, lr}
ldrsh r1, [r0, #0] /* r1 = block[0] */ ldrsh r1, [r0, #0] /* r1 = block[0] */
mov r11, #0 mov r4, #0
strh r11, [r0, #0] /* block[0] = 0 */ strh r4, [r0, #0] /* block[0] = 0 */
strh r11, [r0, #126] /* block[63] = 0 */ strh r4, [r0, #126] /* block[63] = 0 */
add r1, r1, #64 /* r1 = DC << 7 */ add r1, r1, #64 /* r1 = DC << 7 */
add r0, r2, r3, asl #3 add r0, r2, r3, asl #3
4: 4:
ldrb r4, [r2, #0] ldrb r4, [r2, #0]
ldrb r5, [r2, #1] ldrb r5, [r2, #1]
ldrb r6, [r2, #2] ldrb r6, [r2, #2]
ldrb r7, [r2, #3] ldrb lr, [r2, #3]
add r4, r4, r1, asr #7 add r4, r4, r1, asr #7
usat r4, #8, r4 usat r4, #8, r4
strb r4, [r2, #0] strb r4, [r2, #0]
@ -311,13 +311,13 @@ mpeg2_idct_add:
add r6, r6, r1, asr #7 add r6, r6, r1, asr #7
usat r6, #8, r6 usat r6, #8, r6
strb r6, [r2, #2] strb r6, [r2, #2]
add r7, r7, r1, asr #7 add lr, lr, r1, asr #7
usat r7, #8, r7 usat lr, #8, lr
strb r7, [r2, #3] strb lr, [r2, #3]
ldrb r4, [r2, #4] ldrb r4, [r2, #4]
ldrb r5, [r2, #5] ldrb r5, [r2, #5]
ldrb r6, [r2, #6] ldrb r6, [r2, #6]
ldrb r7, [r2, #7] ldrb lr, [r2, #7]
add r4, r4, r1, asr #7 add r4, r4, r1, asr #7
usat r4, #8, r4 usat r4, #8, r4
strb r4, [r2, #4] strb r4, [r2, #4]
@ -327,11 +327,10 @@ mpeg2_idct_add:
add r6, r6, r1, asr #7 add r6, r6, r1, asr #7
usat r6, #8, r6 usat r6, #8, r6
strb r6, [r2, #6] strb r6, [r2, #6]
add r7, r7, r1, asr #7 add lr, lr, r1, asr #7
usat r7, #8, r7 usat lr, #8, lr
strb r7, [r2, #7] strb lr, [r2, #7]
add r2, r2, r3 add r2, r2, r3
cmp r2, r0 cmp r2, r0
blo 4b blo 4b
ldmfd sp!, {r4-r7} ldmfd sp!, {r4-r6, pc}
bx lr