forked from len0rd/rockbox
		
	later. We still need to hunt down snippets used that are not. 1324 modified files... http://www.rockbox.org/mail/archive/rockbox-dev-archive-2008-06/0060.shtml git-svn-id: svn://svn.rockbox.org/rockbox/trunk@17847 a1c6a512-1295-4272-9138-f99709370657
		
			
				
	
	
		
			504 lines
		
	
	
	
		
			18 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			504 lines
		
	
	
	
		
			18 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| /***************************************************************************
 | |
|  *             __________               __   ___.
 | |
|  *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
 | |
|  *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
 | |
|  *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
 | |
|  *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
 | |
|  *                     \/            \/     \/    \/            \/
 | |
|  * $Id$
 | |
|  *
 | |
|  * Copyright (C) 2006 by David Bryant
 | |
|  *
 | |
|  * This program is free software; you can redistribute it and/or
 | |
|  * modify it under the terms of the GNU General Public License
 | |
|  * as published by the Free Software Foundation; either version 2
 | |
|  * of the License, or (at your option) any later version.
 | |
|  *
 | |
|  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 | |
|  * KIND, either express or implied.
 | |
|  *
 | |
|  ****************************************************************************/
 | |
| 
 | |
| /* This is an assembly optimized version of the following WavPack function:
 | |
|  *
 | |
|  * void decorr_stereo_pass_cont_arml (struct decorr_pass *dpp,
 | |
|  *                                    long *buffer, long sample_count);
 | |
|  *
 | |
|  * It performs a single pass of stereo decorrelation on the provided buffer.
 | |
|  * Note that this version of the function requires that the 8 previous stereo
 | |
|  * samples are visible and correct. In other words, it ignores the "samples_*"
 | |
|  * fields in the decorr_pass structure and gets the history data directly
 | |
|  * from the buffer. It does, however, return the appropriate history samples
 | |
|  * to the decorr_pass structure before returning.
 | |
|  *
 | |
|  * This is written to work on a ARM7TDMI processor. This version uses the
 | |
|  * 64-bit multiply-accumulate instruction and so can be used with all
 | |
|  * WavPack files. However, for optimum performance with 16-bit WavPack
 | |
|  * files, there is a faster version that only uses the 32-bit MLA
 | |
|  * instruction.
 | |
|  */
 | |
| 
 | |
|         .text
 | |
|         .align
 | |
|         .global         decorr_stereo_pass_cont_arml
 | |
| 
 | |
| /*
 | |
|  * on entry:
 | |
|  *
 | |
|  * r0 = struct decorr_pass *dpp
 | |
|  * r1 = long *buffer
 | |
|  * r2 = long sample_count
 | |
|  */
 | |
| 
 | |
| decorr_stereo_pass_cont_arml:
 | |
| 
 | |
|         stmfd   sp!, {r4 - r8, r10, r11, lr}
 | |
|         mov     r5, r0                  @ r5 = dpp
 | |
|         mov     r11, #512               @ r11 = 512 for rounding
 | |
|         ldrsh   r6, [r0, #2]            @ r6 = dpp->delta
 | |
|         ldrsh   r4, [r0, #4]            @ r4 = dpp->weight_A
 | |
|         ldrsh   r0, [r0, #6]            @ r0 = dpp->weight_B
 | |
|         cmp     r2, #0                  @ exit if no samples to process
 | |
|         beq     common_exit
 | |
| 
 | |
|         mov     r0, r0, asl #18         @ for 64-bit math we use weights << 18
 | |
|         mov     r4, r4, asl #18
 | |
|         mov     r6, r6, asl #18
 | |
|         add     r7, r1, r2, asl #3      @ r7 = buffer ending position
 | |
|         ldrsh   r2, [r5, #0]            @ r2 = dpp->term
 | |
|         cmp     r2, #0
 | |
|         blt     minus_term
 | |
| 
 | |
|         ldr     lr, [r1, #-16]          @ load 2 sample history from buffer
 | |
|         ldr     r10, [r1, #-12]         @  for terms 2, 17, and 18
 | |
|         ldr     r8, [r1, #-8]
 | |
|         ldr     r3, [r1, #-4]
 | |
| 
 | |
|         cmp     r2, #18
 | |
|         beq     term_18_loop
 | |
|         mov     lr, lr, asl #4
 | |
|         mov     r10, r10, asl #4
 | |
|         cmp     r2, #2
 | |
|         beq     term_2_loop
 | |
|         cmp     r2, #17
 | |
|         beq     term_17_loop
 | |
|         b       term_default_loop
 | |
| 
 | |
| minus_term:
 | |
|         mov     r10, #(1024 << 18)      @ r10 = -1024 << 18 for weight clipping
 | |
|         rsb     r10, r10, #0            @  (only used for negative terms)
 | |
|         cmn     r2, #1
 | |
|         beq     term_minus_1
 | |
|         cmn     r2, #2
 | |
|         beq     term_minus_2
 | |
|         cmn     r2, #3
 | |
|         beq     term_minus_3
 | |
|         b       common_exit
 | |
| 
 | |
| /*
 | |
|  ******************************************************************************
 | |
|  * Loop to handle term = 17 condition
 | |
|  *
 | |
|  * r0 = dpp->weight_B           r8 = previous left sample
 | |
|  * r1 = bptr                    r9 = 
 | |
|  * r2 = current sample          r10 = second previous left sample << 4
 | |
|  * r3 = previous right sample   r11 = lo accumulator (for rounding)
 | |
|  * r4 = dpp->weight_A           ip = current decorrelation value
 | |
|  * r5 = dpp                     sp =
 | |
|  * r6 = dpp->delta              lr = second previous right sample << 4
 | |
|  * r7 = eptr                    pc =
 | |
|  *******************************************************************************
 | |
|  */
 | |
| 
 | |
| term_17_loop:
 | |
|         rsbs    ip, lr, r8, asl #5      @ decorr value = (2 * prev) - 2nd prev
 | |
|         mov     lr, r8, asl #4          @ previous becomes 2nd previous
 | |
|         ldr     r2, [r1], #4            @ get sample & update pointer
 | |
|         mov     r11, #0x80000000
 | |
|         mov     r8, r2
 | |
|         smlalne r11, r8, r4, ip
 | |
|         strne   r8, [r1, #-4]           @ if change possible, store sample back
 | |
|         cmpne   r2, #0
 | |
|         beq     .L325
 | |
|         teq     ip, r2                  @ update weight based on signs
 | |
|         submi   r4, r4, r6
 | |
|         addpl   r4, r4, r6
 | |
| 
 | |
| .L325:  rsbs    ip, r10, r3, asl #5     @ do same thing for right channel
 | |
|         mov     r10, r3, asl #4
 | |
|         ldr     r2, [r1], #4
 | |
|         mov     r11, #0x80000000
 | |
|         mov     r3, r2
 | |
|         smlalne r11, r3, r0, ip
 | |
|         strne   r3, [r1, #-4]
 | |
|         cmpne   r2, #0
 | |
|         beq     .L329
 | |
|         teq     ip, r2
 | |
|         submi   r0, r0, r6
 | |
|         addpl   r0, r0, r6
 | |
| 
 | |
| .L329:  cmp     r7, r1                  @ loop back if more samples to do
 | |
|         bhi     term_17_loop
 | |
|         mov     lr, lr, asr #4
 | |
|         mov     r10, r10, asr #4
 | |
|         b       store_1718              @ common exit for terms 17 & 18
 | |
| 
 | |
| /*
 | |
|  ******************************************************************************
 | |
|  * Loop to handle term = 18 condition
 | |
|  *
 | |
|  * r0 = dpp->weight_B           r8 = previous left sample
 | |
|  * r1 = bptr                    r9 = 
 | |
|  * r2 = current sample          r10 = second previous left sample
 | |
|  * r3 = previous right sample   r11 = lo accumulator (for rounding)
 | |
|  * r4 = dpp->weight_A           ip = decorrelation value
 | |
|  * r5 = dpp                     sp =
 | |
|  * r6 = dpp->delta              lr = second previous right sample
 | |
|  * r7 = eptr                    pc =
 | |
|  *******************************************************************************
 | |
|  */
 | |
| 
 | |
| term_18_loop:
 | |
|         rsb     ip, lr, r8              @ decorr value =
 | |
|         mov     lr, r8                  @  ((3 * prev) - 2nd prev) >> 1
 | |
|         add     ip, lr, ip, asr #1
 | |
|         movs    ip, ip, asl #4
 | |
|         ldr     r2, [r1], #4            @ get sample & update pointer
 | |
|         mov     r11, #0x80000000
 | |
|         mov     r8, r2
 | |
|         smlalne r11, r8, r4, ip
 | |
|         strne   r8, [r1, #-4]           @ if change possible, store sample back
 | |
|         cmpne   r2, #0
 | |
|         beq     .L337
 | |
|         teq     ip, r2                  @ update weight based on signs
 | |
|         submi   r4, r4, r6
 | |
|         addpl   r4, r4, r6
 | |
| 
 | |
| .L337:  rsb     ip, r10, r3             @ do same thing for right channel
 | |
|         mov     r10, r3
 | |
|         add     ip, r10, ip, asr #1
 | |
|         movs    ip, ip, asl #4
 | |
|         ldr     r2, [r1], #4
 | |
|         mov     r11, #0x80000000
 | |
|         mov     r3, r2
 | |
|         smlalne r11, r3, r0, ip
 | |
|         strne   r3, [r1, #-4]
 | |
|         cmpne   r2, #0
 | |
|         beq     .L341
 | |
|         teq     ip, r2
 | |
|         submi   r0, r0, r6
 | |
|         addpl   r0, r0, r6
 | |
| 
 | |
| .L341:  cmp     r7, r1                  @ loop back if more samples to do
 | |
|         bhi     term_18_loop
 | |
| 
 | |
| /* common exit for terms 17 & 18 */
 | |
| 
 | |
| store_1718:
 | |
|         str     r3, [r5, #40]           @ store sample history into struct
 | |
|         str     r8, [r5, #8]
 | |
|         str     r10, [r5, #44]
 | |
|         str     lr, [r5, #12]
 | |
|         b       common_exit             @ and return
 | |
| 
 | |
| /*
 | |
|  ******************************************************************************
 | |
|  * Loop to handle term = 2 condition
 | |
|  * (note that this case can be handled by the default term handler (1-8), but
 | |
|  * this special case is faster because it doesn't have to read memory twice)
 | |
|  *
 | |
|  * r0 = dpp->weight_B           r8 = previous left sample
 | |
|  * r1 = bptr                    r9 = 
 | |
|  * r2 = current sample          r10 = second previous left sample << 4
 | |
|  * r3 = previous right sample   r11 = lo accumulator (for rounding)
 | |
|  * r4 = dpp->weight_A           ip = decorrelation value
 | |
|  * r5 = dpp                     sp =
 | |
|  * r6 = dpp->delta              lr = second previous right sample << 4
 | |
|  * r7 = eptr                    pc =
 | |
|  *******************************************************************************
 | |
|  */
 | |
| 
 | |
| term_2_loop:
 | |
|         movs    ip, lr                  @ get decorrelation value & test
 | |
|         ldr     r2, [r1], #4            @ get sample & update pointer
 | |
|         mov     lr, r8, asl #4          @ previous becomes 2nd previous
 | |
|         mov     r11, #0x80000000
 | |
|         mov     r8, r2
 | |
|         smlalne r11, r8, r4, ip
 | |
|         strne   r8, [r1, #-4]           @ if change possible, store sample back
 | |
|         cmpne   r2, #0
 | |
|         beq     .L225
 | |
|         teq     ip, r2                  @ update weight based on signs
 | |
|         submi   r4, r4, r6
 | |
|         addpl   r4, r4, r6
 | |
| 
 | |
| .L225:  movs    ip, r10                 @ do same thing for right channel
 | |
|         ldr     r2, [r1], #4
 | |
|         mov     r10, r3, asl #4
 | |
|         mov     r11, #0x80000000
 | |
|         mov     r3, r2
 | |
|         smlalne r11, r3, r0, ip
 | |
|         strne   r3, [r1, #-4]
 | |
|         cmpne   r2, #0
 | |
|         beq     .L229
 | |
|         teq     ip, r2
 | |
|         submi   r0, r0, r6
 | |
|         addpl   r0, r0, r6
 | |
| 
 | |
| .L229:  cmp     r7, r1                  @ loop back if more samples to do
 | |
|         bhi     term_2_loop
 | |
| 
 | |
|         b       default_term_exit       @ this exit updates all dpp->samples
 | |
| 
 | |
| /*
 | |
|  ******************************************************************************
 | |
|  * Loop to handle default term condition
 | |
|  *
 | |
|  * r0 = dpp->weight_B           r8 = result accumulator
 | |
|  * r1 = bptr                    r9 = 
 | |
|  * r2 = dpp->term               r10 =
 | |
|  * r3 = decorrelation value     r11 = lo accumulator (for rounding)
 | |
|  * r4 = dpp->weight_A           ip = current sample
 | |
|  * r5 = dpp                     sp =
 | |
|  * r6 = dpp->delta              lr =
 | |
|  * r7 = eptr                    pc =
 | |
|  *******************************************************************************
 | |
|  */
 | |
| 
 | |
| term_default_loop:
 | |
|         ldr     r3, [r1, -r2, asl #3]   @ get decorrelation value based on term
 | |
|         ldr     ip, [r1], #4            @ get original sample and bump ptr
 | |
|         movs    r3, r3, asl #4
 | |
|         mov     r11, #0x80000000
 | |
|         mov     r8, ip
 | |
|         smlalne r11, r8, r4, r3
 | |
|         strne   r8, [r1, #-4]           @ if possibly changed, store updated sample
 | |
|         cmpne   ip, #0
 | |
|         beq     .L350
 | |
|         teq     ip, r3                  @ update weight based on signs
 | |
|         submi   r4, r4, r6
 | |
|         addpl   r4, r4, r6
 | |
| 
 | |
| .L350:  ldr     r3, [r1, -r2, asl #3]   @ do the same thing for right channel
 | |
|         ldr     ip, [r1], #4
 | |
|         movs    r3, r3, asl #4
 | |
|         mov     r11, #0x80000000
 | |
|         mov     r8, ip
 | |
|         smlalne r11, r8, r0, r3
 | |
|         strne   r8, [r1, #-4]
 | |
|         cmpne   ip, #0
 | |
|         beq     .L354
 | |
|         teq     ip, r3
 | |
|         submi   r0, r0, r6
 | |
|         addpl   r0, r0, r6
 | |
| 
 | |
| .L354:  cmp     r7, r1                  @ loop back if more samples to do
 | |
|         bhi     term_default_loop
 | |
| 
 | |
| /*
 | |
|  * This exit is used by terms 1-8 to store the previous 8 samples into the decorr
 | |
|  * structure (even if they are not all used for the given term)
 | |
|  */
 | |
| 
 | |
| default_term_exit:
 | |
|         ldrsh   r3, [r5, #0]
 | |
|         sub     ip, r3, #1
 | |
|         mov     lr, #7
 | |
| 
 | |
| .L358:  and     r3, ip, #7
 | |
|         add     r3, r5, r3, asl #2
 | |
|         ldr     r2, [r1, #-4]
 | |
|         str     r2, [r3, #40]
 | |
|         ldr     r2, [r1, #-8]!
 | |
|         str     r2, [r3, #8]
 | |
|         sub     ip, ip, #1
 | |
|         sub     lr, lr, #1
 | |
|         cmn     lr, #1
 | |
|         bne     .L358
 | |
|         b       common_exit
 | |
| 
 | |
| /*
 | |
|  ******************************************************************************
 | |
|  * Loop to handle term = -1 condition
 | |
|  *
 | |
|  * r0 = dpp->weight_B           r8 =
 | |
|  * r1 = bptr                    r9 = 
 | |
|  * r2 = intermediate result     r10 = -1024 (for clipping)
 | |
|  * r3 = previous right sample   r11 = lo accumulator (for rounding)
 | |
|  * r4 = dpp->weight_A           ip = current sample
 | |
|  * r5 = dpp                     sp =
 | |
|  * r6 = dpp->delta              lr = updated left sample
 | |
|  * r7 = eptr                    pc =
 | |
|  *******************************************************************************
 | |
|  */
 | |
| 
 | |
| term_minus_1:
 | |
|         ldr     r3, [r1, #-4]
 | |
| 
 | |
| term_minus_1_loop:
 | |
|         ldr     ip, [r1], #8            @ for left channel the decorrelation value
 | |
|         movs    r3, r3, asl #4          @  is the previous right sample (in r3)
 | |
|         mov     r11, #0x80000000
 | |
|         mov     lr, ip
 | |
|         smlalne r11, lr, r4, r3
 | |
|         strne   lr, [r1, #-8]
 | |
|         cmpne   ip, #0
 | |
|         beq     .L361
 | |
|         teq     ip, r3                  @ update weight based on signs
 | |
|         submi   r4, r4, r6
 | |
|         addpl   r4, r4, r6
 | |
|         cmp     r4, #(1024 << 18)
 | |
|         movgt   r4, #(1024 << 18)
 | |
|         cmp     r4, r10
 | |
|         movlt   r4, r10
 | |
| 
 | |
| .L361:  ldr     r2, [r1, #-4]           @ for right channel the decorrelation value
 | |
|         movs    lr, lr, asl #4
 | |
|         mov     r11, #0x80000000
 | |
|         mov     r3, r2
 | |
|         smlalne r11, r3, r0, lr
 | |
|         strne   r3, [r1, #-4]
 | |
|         cmpne   r2, #0
 | |
|         beq     .L369
 | |
|         teq     r2, lr
 | |
|         submi   r0, r0, r6
 | |
|         addpl   r0, r0, r6
 | |
|         cmp     r0, #(1024 << 18)               @ then clip weight to +/-1024
 | |
|         movgt   r0, #(1024 << 18)
 | |
|         cmp     r0, r10
 | |
|         movlt   r0, r10
 | |
| 
 | |
| .L369:  cmp     r7, r1                  @ loop back if more samples to do
 | |
|         bhi     term_minus_1_loop
 | |
| 
 | |
|         str     r3, [r5, #8]            @ else store right sample and exit
 | |
|         b       common_exit
 | |
| 
 | |
| /*
 | |
|  ******************************************************************************
 | |
|  * Loop to handle term = -2 condition
 | |
|  * (note that the channels are processed in the reverse order here)
 | |
|  *
 | |
|  * r0 = dpp->weight_B           r8 =
 | |
|  * r1 = bptr                    r9 = 
 | |
|  * r2 = intermediate result     r10 = -1024 (for clipping)
 | |
|  * r3 = previous left sample    r11 = lo accumulator (for rounding)
 | |
|  * r4 = dpp->weight_A           ip = current sample
 | |
|  * r5 = dpp                     sp =
 | |
|  * r6 = dpp->delta              lr = updated right sample
 | |
|  * r7 = eptr                    pc =
 | |
|  *******************************************************************************
 | |
|  */
 | |
| 
 | |
| term_minus_2:
 | |
|         ldr     r3, [r1, #-8]
 | |
| 
 | |
| term_minus_2_loop:
 | |
|         ldr     ip, [r1, #4]            @ for right channel the decorrelation value
 | |
|         movs    r3, r3, asl #4          @  is the previous left sample (in r3)
 | |
|         mov     r11, #0x80000000
 | |
|         mov     lr, ip
 | |
|         smlalne r11, lr, r0, r3
 | |
|         strne   lr, [r1, #4]
 | |
|         cmpne   ip, #0
 | |
|         beq     .L380
 | |
|         teq     ip, r3                  @ update weight based on signs
 | |
|         submi   r0, r0, r6
 | |
|         addpl   r0, r0, r6
 | |
|         cmp     r0, #(1024 << 18)               @ then clip weight to +/-1024
 | |
|         movgt   r0, #(1024 << 18)
 | |
|         cmp     r0, r10
 | |
|         movlt   r0, r10
 | |
| 
 | |
| .L380:  ldr     r2, [r1], #8            @ for left channel the decorrelation value
 | |
|         movs    lr, lr, asl #4
 | |
|         mov     r11, #0x80000000
 | |
|         mov     r3, r2
 | |
|         smlalne r11, r3, r4, lr
 | |
|         strne   r3, [r1, #-8]
 | |
|         cmpne   r2, #0
 | |
|         beq     .L388
 | |
|         teq     r2, lr
 | |
|         submi   r4, r4, r6
 | |
|         addpl   r4, r4, r6
 | |
|         cmp     r4, #(1024 << 18)
 | |
|         movgt   r4, #(1024 << 18)
 | |
|         cmp     r4, r10
 | |
|         movlt   r4, r10
 | |
| 
 | |
| .L388:  cmp     r7, r1                  @ loop back if more samples to do
 | |
|         bhi     term_minus_2_loop
 | |
| 
 | |
|         str     r3, [r5, #40]           @ else store left channel and exit
 | |
|         b       common_exit
 | |
| 
 | |
| /*
 | |
|  ******************************************************************************
 | |
|  * Loop to handle term = -3 condition
 | |
|  *
 | |
|  * r0 = dpp->weight_B           r8 = previous left sample
 | |
|  * r1 = bptr                    r9 = 
 | |
|  * r2 = current left sample     r10 = -1024 (for clipping)
 | |
|  * r3 = previous right sample   r11 = lo accumulator (for rounding)
 | |
|  * r4 = dpp->weight_A           ip = intermediate result
 | |
|  * r5 = dpp                     sp =
 | |
|  * r6 = dpp->delta              lr =
 | |
|  * r7 = eptr                    pc =
 | |
|  *******************************************************************************
 | |
|  */
 | |
| 
 | |
| term_minus_3:
 | |
|         ldr     r3, [r1, #-4]           @ load previous samples
 | |
|         ldr     r8, [r1, #-8]
 | |
| 
 | |
| term_minus_3_loop:
 | |
|         ldr     ip, [r1], #4
 | |
|         movs    r3, r3, asl #4
 | |
|         mov     r11, #0x80000000
 | |
|         mov     r2, ip
 | |
|         smlalne r11, r2, r4, r3
 | |
|         strne   r2, [r1, #-4]
 | |
|         cmpne   ip, #0
 | |
|         beq     .L399
 | |
|         teq     ip, r3                  @ update weight based on signs
 | |
|         submi   r4, r4, r6
 | |
|         addpl   r4, r4, r6
 | |
|         cmp     r4, #(1024 << 18)       @ then clip weight to +/-1024
 | |
|         movgt   r4, #(1024 << 18)
 | |
|         cmp     r4, r10
 | |
|         movlt   r4, r10
 | |
| 
 | |
| .L399:  movs    ip, r8, asl #4          @ ip = previous left we use now
 | |
|         mov     r8, r2                  @ r8 = current left we use next time
 | |
|         ldr     r2, [r1], #4
 | |
|         mov     r11, #0x80000000
 | |
|         mov     r3, r2
 | |
|         smlalne r11, r3, r0, ip
 | |
|         strne   r3, [r1, #-4]
 | |
|         cmpne   r2, #0
 | |
|         beq     .L407
 | |
|         teq     ip, r2
 | |
|         submi   r0, r0, r6
 | |
|         addpl   r0, r0, r6
 | |
|         cmp     r0, #(1024 << 18)
 | |
|         movgt   r0, #(1024 << 18)
 | |
|         cmp     r0, r10
 | |
|         movlt   r0, r10
 | |
| 
 | |
| .L407:  cmp     r7, r1                  @ loop back if more samples to do
 | |
|         bhi     term_minus_3_loop
 | |
| 
 | |
|         str     r3, [r5, #8]            @ else store previous samples & exit
 | |
|         str     r8, [r5, #40]
 | |
| 
 | |
| /*
 | |
|  * Before finally exiting we must store weights back for next time
 | |
|  */
 | |
| 
 | |
| common_exit:
 | |
|         mov     r0, r0, asr #18         @ restore weights to real magnitude
 | |
|         mov     r4, r4, asr #18
 | |
|         strh    r4, [r5, #4]
 | |
|         strh    r0, [r5, #6]
 | |
|         ldmfd   sp!, {r4 - r8, r10, r11, pc}
 | |
| 
 |