forked from len0rd/rockbox
		
	We can't pop into pc on ARMv4t when using thumb: the T bit won't be modified if we are returning to a thumb function Code running on ARMv4t should use the new ldrpc / ldmpc macros instead of ldr pc, [sp], #4 and ldm(cond) sp!, {regs, pc} No modification on pure ARM builds and ARMv5+ Note: USE_THUMB is currently never defined, no targets can currently be built with -mthumb, see FS#6734 git-svn-id: svn://svn.rockbox.org/rockbox/trunk@26756 a1c6a512-1295-4272-9138-f99709370657
		
			
				
	
	
		
			437 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			437 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| /***************************************************************************
 | |
|  *             __________               __   ___.
 | |
|  *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
 | |
|  *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
 | |
|  *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
 | |
|  *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
 | |
|  *                     \/            \/     \/    \/            \/
 | |
|  * $Id$
 | |
|  *
 | |
|  * Copyright (C) 2007 by Tomasz Malesinski
 | |
|  *
 | |
|  * This program is free software; you can redistribute it and/or
 | |
|  * modify it under the terms of the GNU General Public License
 | |
|  * as published by the Free Software Foundation; either version 2
 | |
|  * of the License, or (at your option) any later version.
 | |
|  *
 | |
|  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 | |
|  * KIND, either express or implied.
 | |
|  *
 | |
|  ****************************************************************************/
 | |
|  
 | |
| #include "config.h" 
 | |
| /* Codecs should not normally do this, but we need to check a macro, and
 | |
|  * codecs.h would confuse the assembler. */
 | |
| 
 | |
| #define cPI3_8 (0x30fbc54d)
 | |
| #define cPI2_8 (0x5a82799a)
 | |
| #define cPI1_8 (0x7641af3d)
 | |
| 
 | |
| #ifdef USE_IRAM
 | |
|     .section    .icode,"ax",%progbits
 | |
| #else
 | |
|     .text
 | |
| #endif
 | |
|     .align
 | |
| 
 | |
|     .global mdct_butterfly_32
 | |
|     .global mdct_butterfly_generic_loop
 | |
| 
 | |
| mdct_butterfly_8:
 | |
| @ inputs: r0,r1,r2,r3,r4,r5,r6,r10,r11   &lr
 | |
| @ uses: r8,r9,r12(scratch)
 | |
| @ modifies: r0,r1,r2,r3,r4,r5,r6,r10,r11.  increments r0 by #8*4
 | |
|     add     r9,  r5,  r1                @ x4 + x0
 | |
|     sub     r5,  r5,  r1                @ x4 - x0
 | |
|     add     r7,  r6,  r2                @ x5 + x1
 | |
|     sub     r6,  r6,  r2                @ x5 - x1
 | |
|     add     r8,  r10, r3                @ x6 + x2
 | |
|     sub     r10, r10, r3                @ x6 - x2
 | |
|     add     r12, r11, r4                @ x7 + x3
 | |
|     sub     r11, r11, r4                @ x7 - x3
 | |
| 
 | |
|     add     r1,  r10, r6                @ y0 = (x6 - x2) + (x5 - x1)
 | |
|     sub     r2,  r11, r5                @ y1 = (x7 - x3) - (x4 - x0)
 | |
|     sub     r3,  r10, r6                @ y2 = (x6 - x2) - (x5 - x1)
 | |
|     add     r4,  r11, r5                @ y3 = (x7 - x3) + (x4 - x0)
 | |
|     sub     r5,  r8,  r9                @ y4 = (x6 + x2) - (x4 + x0)
 | |
|     sub     r6,  r12, r7                @ y5 = (x7 + x3) - (x5 + x1)
 | |
|     add     r10, r8,  r9                @ y6 = (x6 + x2) + (x4 + x0)
 | |
|     add     r11, r12, r7                @ y7 = (x7 + x3) + (x5 + x1)
 | |
|     stmia   r0!, {r1, r2, r3, r4, r5, r6, r10, r11}
 | |
| 
 | |
|     bx      lr
 | |
| 
 | |
| mdct_butterfly_16:
 | |
| @ inputs: r0,r1   &lr
 | |
| @ uses: r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12
 | |
| @ modifies: r0.  increments r0 by #16*4
 | |
| @ calls mdct_butterfly_8 via bl so need to stack lr for return address
 | |
|     str     lr, [sp, #-4]!
 | |
|     add     r1, r0, #8*4
 | |
| 
 | |
|     ldmia   r0, {r2, r3, r4, r5}
 | |
|     ldmia   r1, {r6, r7, r8, r9}
 | |
|     add     r6, r6, r2                  @ y8 = x8 + x0
 | |
|     rsb     r2, r6, r2, asl #1          @ x0 - x8
 | |
|     add     r7, r7, r3                  @ y9 = x9 + x1
 | |
|     rsb     r3, r7, r3, asl #1          @ x1 - x9
 | |
|     add     r8, r8, r4                  @ y10 = x10 + x2
 | |
|     sub     r11, r8, r4, asl #1         @ x10 - x2
 | |
|     add     r9, r9, r5                  @ y11 = x11 + x3
 | |
|     rsb     r10, r9, r5, asl #1         @ x3 - x11
 | |
| 
 | |
|     stmia   r1!, {r6, r7, r8, r9}
 | |
|     
 | |
|     add     r2, r2, r3                  @ (x0 - x8) + (x1 - x9)
 | |
|     rsb     r3, r2, r3, asl #1          @ (x1 - x9) - (x0 - x8)
 | |
| 
 | |
|     ldr     r12, =cPI2_8
 | |
|     smull   r8, r5, r12, r2
 | |
|     smull   r8, r6, r12, r3
 | |
|     mov     r5, r5, asl #1
 | |
|     mov     r6, r6, asl #1
 | |
| 
 | |
|     stmia   r0!, {r5, r6, r10, r11}
 | |
| 
 | |
|     ldmia   r0, {r2, r3, r4, r5}
 | |
|     ldmia   r1, {r6, r7, r8, r9}
 | |
|     add     r6, r6, r2                  @ y12 = x12 + x4
 | |
|     sub     r2, r6, r2, asl #1          @ x12 - x4
 | |
|     add     r7, r7, r3                  @ y13 = x13 + x5
 | |
|     sub     r3, r7, r3, asl #1          @ x13 - x5
 | |
|     add     r8, r8, r4                  @ y10 = x14 + x6
 | |
|     sub     r10, r8, r4, asl #1         @ x14 - x6
 | |
|     add     r9, r9, r5                  @ y11 = x15 + x7
 | |
|     sub     r11, r9, r5, asl #1         @ x15 - x7
 | |
| 
 | |
|     stmia   r1, {r6, r7, r8, r9}
 | |
|     
 | |
|     sub     r2, r2, r3                  @ (x12 - x4) - (x13 - x5)
 | |
|     add     r3, r2, r3, asl #1          @ (x12 - x4) + (x13 - x5)
 | |
| 
 | |
|     smull   r8, r5, r12, r2
 | |
|     smull   r8, r6, r12, r3
 | |
|     mov     r5, r5, asl #1
 | |
|     mov     r6, r6, asl #1
 | |
|     @ no stmia here, r5, r6, r10, r11 are passed to mdct_butterfly_8
 | |
| 
 | |
|     sub     r0, r0, #4*4
 | |
|     ldmia   r0, {r1, r2, r3, r4}
 | |
|     bl      mdct_butterfly_8
 | |
| 
 | |
|     @ mdct_butterfly_8 will have incremented r0 by #8*4 already
 | |
|     ldmia   r0, {r1, r2, r3, r4, r5, r6, r10, r11}
 | |
| 
 | |
|     bl      mdct_butterfly_8
 | |
|     @ mdct_butterfly_8 increments r0 by another #8*4 here
 | |
|     @ at end, r0 has been incremented by #16*4
 | |
| 
 | |
|     ldrpc
 | |
| 
 | |
| mdct_butterfly_32:
 | |
|     stmdb   sp!, {r4-r11, lr}
 | |
| 
 | |
|     add     r1, r0, #16*4
 | |
| 
 | |
|     ldmia   r0, {r2, r3, r4, r5}
 | |
|     ldmia   r1, {r6, r7, r8, r9}
 | |
|     add     r6, r6, r2                  @ y16 = x16 + x0
 | |
|     rsb     r2, r6, r2, asl #1          @ x0 - x16
 | |
|     add     r7, r7, r3                  @ y17 = x17 + x1
 | |
|     rsb     r3, r7, r3, asl #1          @ x1 - x17
 | |
|     add     r8, r8, r4                  @ y18 = x18 + x2
 | |
|     rsb     r4, r8, r4, asl #1          @ x2 - x18
 | |
|     add     r9, r9, r5                  @ y19 = x19 + x3
 | |
|     rsb     r5, r9, r5, asl #1          @ x3 - x19
 | |
| 
 | |
|     stmia   r1!, {r6, r7, r8, r9}
 | |
| 
 | |
|     ldr     r12, =cPI1_8
 | |
|     ldr     lr, =cPI3_8
 | |
|     smull   r10, r6, r12, r2
 | |
|     rsb     r2, r2, #0
 | |
|     smlal   r10, r6, lr, r3
 | |
|     smull   r10, r7, r12, r3
 | |
|     smlal   r10, r7, lr, r2
 | |
|     mov     r6, r6, asl #1
 | |
|     mov     r7, r7, asl #1
 | |
| 
 | |
|     add     r4, r4, r5                  @ (x3 - x19) + (x2 - x18) 
 | |
|     rsb     r5, r4, r5, asl #1          @ (x3 - x19) - (x2 - x18)
 | |
| 
 | |
|     ldr     r11, =cPI2_8
 | |
|     smull   r10, r8, r4, r11
 | |
|     smull   r10, r9, r5, r11
 | |
|     mov     r8, r8, asl #1
 | |
|     mov     r9, r9, asl #1
 | |
| 
 | |
|     stmia   r0!, {r6, r7, r8, r9}
 | |
|     
 | |
|     ldmia   r0, {r2, r3, r4, r5}
 | |
|     ldmia   r1, {r6, r7, r8, r9}
 | |
|     add     r6, r6, r2                  @ y20 = x20 + x4
 | |
|     rsb     r2, r6, r2, asl #1          @ x4 - x20
 | |
|     add     r7, r7, r3                  @ y21 = x21 + x5
 | |
|     rsb     r3, r7, r3, asl #1          @ x5 - x21
 | |
|     add     r8, r8, r4                  @ y22 = x22 + x6
 | |
|     sub     r11, r8, r4, asl #1         @ x22 - x6
 | |
|     add     r9, r9, r5                  @ y23 = x23 + x7
 | |
|     rsb     r10, r9, r5, asl #1         @ x7 - x23
 | |
|     stmia   r1!, {r6, r7, r8, r9}
 | |
| 
 | |
|     @r4,r5,r6,r7,r8,r9 now free
 | |
|     @ we don't use r5, r8, r9 below
 | |
| 
 | |
|     smull   r4, r6, lr, r2
 | |
|     rsb     r2, r2, #0
 | |
|     smlal   r4, r6, r12, r3
 | |
|     smull   r4, r7, lr, r3
 | |
|     smlal   r4, r7, r12, r2
 | |
|     mov     r6, r6, asl #1
 | |
|     mov     r7, r7, asl #1
 | |
| 
 | |
|     stmia   r0!, {r6, r7, r10, r11}
 | |
| 
 | |
|     ldmia   r0, {r2, r3, r4, r5}
 | |
|     ldmia   r1, {r6, r7, r8, r9}
 | |
|     add     r6, r6, r2                  @ y24 = x24 + x8
 | |
|     sub     r2, r6, r2, asl #1          @ x24 - x8
 | |
|     add     r7, r7, r3                  @ y25 = x25 + x9
 | |
|     sub     r3, r7, r3, asl #1          @ x25 - x9
 | |
|     add     r8, r8, r4                  @ y26 = x26 + x10
 | |
|     sub     r4, r8, r4, asl #1          @ x26 - x10
 | |
|     add     r9, r9, r5                  @ y27 = x27 + x11
 | |
|     sub     r5, r9, r5, asl #1          @ x27 - x11
 | |
| 
 | |
|     stmia   r1!, {r6, r7, r8, r9}
 | |
| 
 | |
|     smull   r10, r7, lr, r3
 | |
|     rsb     r3, r3, #0
 | |
|     smlal   r10, r7, r12, r2
 | |
|     smull   r10, r6, r12, r3
 | |
|     smlal   r10, r6, lr, r2
 | |
|     mov     r6, r6, asl #1
 | |
|     mov     r7, r7, asl #1
 | |
| 
 | |
|     sub     r4, r4, r5                  @ (x26 - x10) - (x27 - x11) 
 | |
|     add     r5, r4, r5, asl #1          @ (x26 - x10) + (x27 - x11)
 | |
| 
 | |
|     ldr     r11, =cPI2_8
 | |
|     smull   r10, r8, r11, r4
 | |
|     smull   r10, r9, r11, r5
 | |
|     mov     r8, r8, asl #1
 | |
|     mov     r9, r9, asl #1
 | |
| 
 | |
|     stmia   r0!, {r6, r7, r8, r9}
 | |
| 
 | |
|     ldmia   r0, {r2, r3, r4, r5}
 | |
|     ldmia   r1, {r6, r7, r8, r9}
 | |
|     add     r6, r6, r2                  @ y28 = x28 + x12
 | |
|     sub     r2, r6, r2, asl #1          @ x28 - x12
 | |
|     add     r7, r7, r3                  @ y29 = x29 + x13
 | |
|     sub     r3, r7, r3, asl #1          @ x29 - x13
 | |
|     add     r8, r8, r4                  @ y30 = x30 + x14
 | |
|     sub     r10, r8, r4, asl #1         @ x30 - x14
 | |
|     add     r9, r9, r5                  @ y31 = x31 + x15
 | |
|     sub     r11, r9, r5, asl #1         @ x31 - x15
 | |
|     stmia   r1, {r6, r7, r8, r9}
 | |
| 
 | |
|     @ r4,r5,r6,r7,r8,r9 now free
 | |
|     @ we don't use r5,r8,r9 below
 | |
| 
 | |
|     smull   r4, r7, r12, r3
 | |
|     rsb     r3, r3, #0
 | |
|     smlal   r4, r7, lr, r2
 | |
|     smull   r4, r6, lr, r3
 | |
|     smlal   r4, r6, r12, r2
 | |
|     mov     r6, r6, asl #1
 | |
|     mov     r7, r7, asl #1
 | |
| 
 | |
|     stmia   r0, {r6, r7, r10, r11}
 | |
| 
 | |
|     sub     r0, r0, #12*4
 | |
|     bl      mdct_butterfly_16
 | |
| 
 | |
|     @ we know mdct_butterfly_16 increments r0 by #16*4
 | |
|     @ and we wanted to advance by #16*4 anyway, so just call again
 | |
|     bl      mdct_butterfly_16
 | |
| 
 | |
|     ldmpc   regs=r4-r11
 | |
| 
 | |
|     @ mdct_butterfly_generic_loop(x1, x2, T0, step, Ttop)
 | |
| mdct_butterfly_generic_loop:
 | |
|     stmdb   sp!, {r4-r11, lr}
 | |
|     str     r2, [sp, #-4]
 | |
|     ldr     r4, [sp, #36]
 | |
| 1:
 | |
|     ldmdb   r0, {r6, r7, r8, r9}
 | |
|     ldmdb   r1, {r10, r11, r12, r14}
 | |
| 
 | |
|     add     r6, r6, r10
 | |
|     sub     r10, r6, r10, asl #1
 | |
|     add     r7, r7, r11
 | |
|     rsb     r11, r7, r11, asl #1
 | |
|     add     r8, r8, r12
 | |
|     sub     r12, r8, r12, asl #1
 | |
|     add     r9, r9, r14
 | |
|     rsb     r14, r9, r14, asl #1
 | |
| 
 | |
|     stmdb   r0!, {r6, r7, r8, r9}
 | |
| 
 | |
|     ldmia   r2, {r6, r7}
 | |
|     smull   r5, r8, r6, r14
 | |
|     rsb     r14, r14, #0
 | |
|     smlal   r5, r8, r7, r12
 | |
|     smull   r5, r9, r6, r12
 | |
|     smlal   r5, r9, r7, r14
 | |
| 
 | |
|     mov     r8, r8, asl #1
 | |
|     mov     r9, r9, asl #1
 | |
|     add     r2, r2, r3, asl #2
 | |
| 
 | |
|     ldmia   r2, {r12, r14}
 | |
|     smull   r5, r6, r12, r11
 | |
|     rsb     r11, r11, #0
 | |
|     smlal   r5, r6, r14, r10
 | |
|     smull   r5, r7, r12, r10
 | |
|     smlal   r5, r7, r14, r11
 | |
| 
 | |
|     mov     r6, r6, asl #1
 | |
|     mov     r7, r7, asl #1
 | |
|     stmdb   r1!, {r6, r7, r8, r9}
 | |
|     add     r2, r2, r3, asl #2
 | |
| 
 | |
|     cmp     r2, r4
 | |
|     blo     1b
 | |
| 
 | |
|     ldr     r4, [sp, #-4]
 | |
| 1:
 | |
|     ldmdb   r0, {r6, r7, r8, r9}
 | |
|     ldmdb   r1, {r10, r11, r12, r14}
 | |
| 
 | |
|     add     r6, r6, r10
 | |
|     sub     r10, r6, r10, asl #1
 | |
|     add     r7, r7, r11
 | |
|     sub     r11, r7, r11, asl #1
 | |
|     add     r8, r8, r12
 | |
|     sub     r12, r8, r12, asl #1
 | |
|     add     r9, r9, r14
 | |
|     sub     r14, r9, r14, asl #1
 | |
| 
 | |
|     stmdb   r0!, {r6, r7, r8, r9}
 | |
| 
 | |
|     ldmia   r2, {r6, r7}
 | |
|     smull   r5, r9, r6, r14
 | |
|     rsb     r14, r14, #0
 | |
|     smlal   r5, r9, r7, r12
 | |
|     smull   r5, r8, r6, r12
 | |
|     smlal   r5, r8, r7, r14
 | |
| 
 | |
|     mov     r8, r8, asl #1
 | |
|     mov     r9, r9, asl #1
 | |
| 
 | |
|     sub     r2, r2, r3, asl #2
 | |
| 
 | |
|     ldmia   r2, {r12, r14}
 | |
|     smull   r5, r7, r12, r11
 | |
|     rsb     r11, r11, #0
 | |
|     smlal   r5, r7, r14, r10
 | |
|     smull   r5, r6, r12, r10
 | |
|     smlal   r5, r6, r14, r11
 | |
| 
 | |
|     mov     r6, r6, asl #1
 | |
|     mov     r7, r7, asl #1
 | |
|     stmdb   r1!, {r6, r7, r8, r9}
 | |
|     sub     r2, r2, r3, asl #2
 | |
| 
 | |
|     cmp     r2, r4
 | |
|     bhi     1b
 | |
| 
 | |
|     ldr     r4, [sp, #36]
 | |
| 1:
 | |
|     ldmdb   r0, {r6, r7, r8, r9}
 | |
|     ldmdb   r1, {r10, r11, r12, r14}
 | |
| 
 | |
|     add     r6, r6, r10
 | |
|     rsb     r10, r6, r10, asl #1
 | |
|     add     r7, r7, r11
 | |
|     rsb     r11, r7, r11, asl #1
 | |
|     add     r8, r8, r12
 | |
|     rsb     r12, r8, r12, asl #1
 | |
|     add     r9, r9, r14
 | |
|     rsb     r14, r9, r14, asl #1
 | |
| 
 | |
|     stmdb   r0!, {r6, r7, r8, r9}
 | |
| 
 | |
|     ldmia   r2, {r6, r7}
 | |
|     smull   r5, r8, r6, r12
 | |
|     rsb     r12, r12, #0
 | |
|     smlal   r5, r8, r7, r14
 | |
|     smull   r5, r9, r6, r14
 | |
|     smlal   r5, r9, r7, r12
 | |
| 
 | |
|     mov     r8, r8, asl #1
 | |
|     mov     r9, r9, asl #1
 | |
| 
 | |
|     add     r2, r2, r3, asl #2
 | |
| 
 | |
|     ldmia   r2, {r12, r14}
 | |
|     smull   r5, r6, r12, r10
 | |
|     rsb     r10, r10, #0
 | |
|     smlal   r5, r6, r14, r11
 | |
|     smull   r5, r7, r12, r11
 | |
|     smlal   r5, r7, r14, r10
 | |
| 
 | |
|     mov     r6, r6, asl #1
 | |
|     mov     r7, r7, asl #1
 | |
|     stmdb   r1!, {r6, r7, r8, r9}
 | |
|     add     r2, r2, r3, asl #2
 | |
| 
 | |
|     cmp     r2, r4
 | |
|     blo     1b
 | |
| 
 | |
|     ldr     r4, [sp, #-4]
 | |
| 1:
 | |
|     ldmdb   r0, {r6, r7, r8, r9}
 | |
|     ldmdb   r1, {r10, r11, r12, r14}
 | |
| 
 | |
|     add     r6, r6, r10
 | |
|     sub     r10, r6, r10, asl #1
 | |
|     add     r7, r7, r11
 | |
|     rsb     r11, r7, r11, asl #1
 | |
|     add     r8, r8, r12
 | |
|     sub     r12, r8, r12, asl #1
 | |
|     add     r9, r9, r14
 | |
|     rsb     r14, r9, r14, asl #1
 | |
| 
 | |
|     stmdb   r0!, {r6, r7, r8, r9}
 | |
| 
 | |
|     ldmia   r2, {r6, r7}
 | |
|     smull   r5, r9, r6, r12
 | |
|     smlal   r5, r9, r7, r14
 | |
|     rsb     r12, r12, #0
 | |
|     smull   r5, r8, r6, r14
 | |
|     smlal   r5, r8, r7, r12
 | |
| 
 | |
|     mov     r8, r8, asl #1
 | |
|     mov     r9, r9, asl #1
 | |
|     sub     r2, r2, r3, asl #2
 | |
| 
 | |
|     ldmia   r2, {r12, r14}
 | |
|     smull   r5, r7, r12, r10
 | |
|     rsb     r10, r10, #0
 | |
|     smlal   r5, r7, r14, r11
 | |
|     smull   r5, r6, r12, r11
 | |
|     smlal   r5, r6, r14, r10
 | |
| 
 | |
|     mov     r6, r6, asl #1
 | |
|     mov     r7, r7, asl #1
 | |
|     stmdb   r1!, {r6, r7, r8, r9}
 | |
|     sub     r2, r2, r3, asl #2
 | |
| 
 | |
|     cmp     r2, r4
 | |
|     bhi     1b
 | |
| 
 | |
|     ldmpc   regs=r4-r11
 | |
| 
 |