1
0
Fork 0
forked from len0rd/rockbox

Less stack hogging in the coldfire IDCT for mpegplayer. Speedup might or might not be measurable.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@15175 a1c6a512-1295-4272-9138-f99709370657
This commit is contained in:
Jens Arnold 2007-10-18 00:14:22 +00:00
parent 7bcfa84c4a
commit 1cca3ceeef

View file

@ -25,11 +25,9 @@
/* The IDCT itself. /* The IDCT itself.
* Input: %a0: block pointer * Input: %a0: block pointer
* All registers are preserved. */ * Caller must save all registers. */
.align 2 .align 2
.idct: .idct:
lea.l (-15*4,%sp), %sp
movem.l %d0-%d7/%a0-%a6, (%sp) | save all registers
move.l %a0, %a6 move.l %a0, %a6
move.l #0, %macsr | signed integer mode move.l #0, %macsr | signed integer mode
@ -238,20 +236,19 @@
subq.l #1, %d3 | loop 8 times subq.l #1, %d3 | loop 8 times
bne.w .col_loop bne.w .col_loop
movem.l (%sp), %d0-%d7/%a0-%a6 | restore all registers
lea.l (15*4,%sp), %sp
rts rts
.align 2 .align 2
mpeg2_idct_copy_coldfire: mpeg2_idct_copy_coldfire:
lea.l (-4*4,%sp), %sp lea.l (-11*4,%sp), %sp
movem.l %d2-%d4/%a2, (%sp) | save some registers movem.l %d2-%d7/%a2-%a6, (%sp) | save some registers
movem.l (4*4+4,%sp), %a0-%a2| %a0 - block pointer move.l (11*4+4,%sp), %a0 | %a0 - block pointer for idct
| %a1 - destination pointer
| %a2 - stride
bsr.w .idct | apply idct to block bsr.w .idct | apply idct to block
movem.l (11*4+4,%sp), %a0-%a2 | %a0 - block pointer
| %a1 - destination pointer
| %a2 - stride
move.l #255, %d1 | preload constant for clipping move.l #255, %d1 | preload constant for clipping
moveq.l #8, %d4 | loop counter moveq.l #8, %d4 | loop counter
@ -336,19 +333,20 @@ mpeg2_idct_copy_coldfire:
subq.l #1, %d4 | loop 8 times subq.l #1, %d4 | loop 8 times
bne.w .copy_clip_loop bne.w .copy_clip_loop
movem.l (%sp), %d2-%d4/%a2 | restore registers movem.l (%sp), %d2-%d7/%a2-%a6
lea.l (4*4,%sp), %sp lea.l (11*4,%sp), %sp
rts rts
.align 2 .align 2
mpeg2_idct_add_coldfire: mpeg2_idct_add_coldfire:
lea.l (-7*4,%sp), %sp lea.l (-11*4,%sp), %sp
movem.l %d2-%d7/%a2, (%sp) | save some registers movem.l %d2-%d7/%a2-%a6, (%sp)
movem.l (7*4+4,%sp), %d0/%a0-%a2| %d0 - last value movem.l (11*4+4,%sp), %d0/%a0-%a2 | %d0 - last value
| %a0 - block pointer | %a0 - block pointer
| %a1 - destination pointer | %a1 - destination pointer
| %a2 - stride | %a2 - stride
cmp.l #129, %d0 | last == 129 ? cmp.l #129, %d0 | last == 129 ?
bne.b .idct_add | no: perform idct + addition bne.b .idct_add | no: perform idct + addition
move.w (%a0), %d0 move.w (%a0), %d0
@ -360,6 +358,7 @@ mpeg2_idct_add_coldfire:
.idct_add: .idct_add:
bsr.w .idct | apply idct bsr.w .idct | apply idct
movem.l (11*4+8,%sp), %a0-%a2 | reload arguments %a0..%a2
move.l #255, %d2 | preload constant for clipping move.l #255, %d2 | preload constant for clipping
clr.l %d3 | used for splitting input words into bytes clr.l %d3 | used for splitting input words into bytes
@ -569,6 +568,6 @@ mpeg2_idct_add_coldfire:
bne.w .dc_clip_loop bne.w .dc_clip_loop
.idct_add_end: .idct_add_end:
movem.l (%sp), %d2-%d7/%a2 | restore registers movem.l (%sp), %d2-%d7/%a2-%a6
lea.l (7*4,%sp), %sp lea.l (11*4,%sp), %sp
rts rts