forked from len0rd/rockbox
		
	git-svn-id: svn://svn.rockbox.org/rockbox/trunk@5347 a1c6a512-1295-4272-9138-f99709370657
		
			
				
	
	
		
			132 lines
		
	
	
	
		
			3.9 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			132 lines
		
	
	
	
		
			3.9 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| /***************************************************************************
 | |
|  *             __________               __   ___.
 | |
|  *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
 | |
|  *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
 | |
|  *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
 | |
|  *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
 | |
|  *                     \/            \/     \/    \/            \/
 | |
|  * $Id$
 | |
|  *
 | |
|  * Copyright (C) 2004 by Jens Arnold
 | |
|  *
 | |
|  * All files in this archive are subject to the GNU General Public License.
 | |
|  * See the file COPYING in the source tree root for full license agreement.
 | |
|  *
 | |
|  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 | |
|  * KIND, either express or implied.
 | |
|  *
 | |
|  ****************************************************************************/
 | |
| #include "config.h"
 | |
| 
 | |
|     .section    .icode,"ax",@progbits
 | |
| 
 | |
|     .align      2
 | |
| #if CONFIG_CPU == SH7034        
 | |
|     .global     _memset
 | |
|     .type       _memset,@function
 | |
| 
 | |
| /* Fills a memory region with specified byte value
 | |
|  * This version is optimized for speed
 | |
|  *
 | |
|  * arguments:
 | |
|  *  r4 - start address
 | |
|  *  r5 - data
 | |
|  *  r6 - length
 | |
|  *
 | |
|  * return value:
 | |
|  *  r0 - start address (like ANSI version)
 | |
|  *
 | |
|  * register usage:
 | |
|  *  r0 - temporary
 | |
|  *  r1 - bit mask for rounding to long bounds
 | |
|  *  r2 - last / first long bound (only if >= 12 bytes)
 | |
|  *  r4 - start address
 | |
|  *  r5 - data (spread to all 4 bytes if >= 12 bytes)
 | |
|  *  r6 - current address (runs down from end to start)
 | |
|  *
 | |
|  * The instruction order below is devised in a way to utilize the pipelining
 | |
|  * of the SH1 to the max. The routine fills memory from end to start in
 | |
|  * order to utilize the auto-decrementing store instructions.
 | |
|  */
 | |
| 
 | |
| _memset:
 | |
|     add     r4,r6       /* r6 = end_address */
 | |
| 
 | |
|     mov     r6,r0
 | |
|     add     #-12,r0     /* r0 = r6 - 12; don't go below 12 here! */
 | |
|     cmp/hs  r4,r0       /* >= 12 bytes to fill? */
 | |
|     bf      .start_b2   /* no, jump directly to byte loop */
 | |
| 
 | |
|     extu.b  r5,r5       /* start: spread data to all 4 bytes */
 | |
|     swap.b  r5,r0
 | |
|     or      r0,r5       /* data now in 2 lower bytes of r5 */
 | |
|     swap.w  r5,r0
 | |
|     or      r0,r5       /* data now in all 4 bytes of r5 */
 | |
| 
 | |
|     mov     #-4,r1      /* r1 = 0xFFFFFFFC */
 | |
| 
 | |
|     mov     r6,r2
 | |
|     bra     .start_b1
 | |
|     and     r1,r2       /* r2 = last long bound */
 | |
| 
 | |
|     /* leading byte loop: sets 0..3 bytes */
 | |
| .loop_b1:
 | |
|     mov.b   r5,@-r6     /* store byte */
 | |
| .start_b1:
 | |
|     cmp/hi  r2,r6       /* runs r6 down to last long bound */
 | |
|     bt      .loop_b1
 | |
| 
 | |
|     mov     r4,r2
 | |
|     add     #11,r2      /* combined for rounding and offset */
 | |
|     and     r1,r2       /* r2 = first long bound + 8 */
 | |
| 
 | |
|     /* main loop: set 2 longs per pass */
 | |
| .loop2_l:
 | |
|     mov.l   r5,@-r6     /* store first long */
 | |
|     cmp/hi  r2,r6       /* runs r6 down to first or second long bound */
 | |
|     mov.l   r5,@-r6     /* store second long */
 | |
|     bt      .loop2_l
 | |
| 
 | |
|     add     #-8,r2      /* correct offset */
 | |
|     cmp/hi  r2,r6       /* 1 long left? */
 | |
|     bf      .start_b2   /* no, jump to trailing byte loop */
 | |
| 
 | |
|     bra     .start_b2   /* jump to trailing byte loop */
 | |
|     mov.l   r5,@-r6     /* store last long */
 | |
| 
 | |
|     /* trailing byte loop */
 | |
|     .align  2
 | |
| .loop_b2:
 | |
|     mov.b   r5,@-r6     /* store byte */
 | |
| .start_b2:
 | |
|     cmp/hi  r4,r6       /* runs r6 down to the start address */
 | |
|     bt      .loop_b2
 | |
| 
 | |
|     rts
 | |
|     mov     r4,r0       /* return start address */
 | |
| 
 | |
| .end:
 | |
|     .size   _memset,.end-_memset
 | |
| #elif CONFIG_CPU == MCF5249
 | |
|     .global     memset
 | |
|     .type       memset,@function
 | |
| 
 | |
| /* Fills a memory region with specified byte value
 | |
|  * This version is not optimized at all
 | |
|  */
 | |
| memset:
 | |
|         move.l  (4,%sp),%a0     /* Start address */
 | |
|         move.l  (8,%sp),%d0    /* Value */
 | |
|         move.l  (12,%sp),%d1    /* Length */
 | |
|         lea.l   (%d1,%a0),%a1 /* a1 = a0+d1 */
 | |
| 
 | |
|         bra.b   .byteloopend
 | |
|         
 | |
| .byteloop:
 | |
|         move.b  %d0,(%a0)+
 | |
| .byteloopend:
 | |
|         cmp.l   %a0,%a1
 | |
|         bne.b   .byteloop
 | |
|         
 | |
|         rts
 | |
| #endif
 |