Move optimized memcpy and friends and strlen to firmware/asm,

using the new automatic-asm-picking infrastructure.
2025-12-08 20:55:17 -05:00 · 2012-01-07 19:56:09 +01:00 · 2012-01-07 19:56:09 +01:00 · a035261089
commit a035261089
parent 8e8e978de6
22 changed files with 7 additions and 34 deletions
--- a/firmware/target/arm/memcpy-arm.S
+++ b/firmware/target/arm/memcpy-arm.S
@ -1,176 +0,0 @@
-/***************************************************************************
- *             __________               __   ___.
- *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
- *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
- *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
- *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
- *                     \/            \/     \/    \/            \/
- * $Id$
- *
- * Copyright (C) 2006 Free Software Foundation, Inc.
- * This file was originally part of the GNU C Library
- * Contributed to glibc by MontaVista Software, Inc. (written by Nicolas Pitre)
- * Adapted for Rockbox by Daniel Ankers
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
- * KIND, either express or implied.
- *
- ****************************************************************************/
-
-#include "config.h"
-
-/*
- * Endian independent macros for shifting bytes within registers.
- */
-#ifndef __ARMEB__
-#define pull            lsr
-#define push            lsl
-#else
-#define pull            lsl
-#define push            lsr
-#endif
-
-/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
-
-    .section    .icode,"ax",%progbits
-
-    .align      2
-    .global     memcpy
-    .type       memcpy,%function
-
-memcpy:
-        stmfd   sp!, {r0, r4, lr}
-
-        subs    r2, r2, #4
-        blt 8f
-        ands    ip, r0, #3
-        bne 9f
-        ands    ip, r1, #3
-        bne 10f
-
-1:      subs    r2, r2, #(28)
-        stmfd   sp!, {r5 - r8}
-        blt 5f
-
-2:
-3:
-4:      ldmia   r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
-        subs    r2, r2, #32
-        stmia   r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
-        bge 3b
-
-5:      ands    ip, r2, #28
-        rsb ip, ip, #32
-        addne   pc, pc, ip      @ C is always clear here
-        b   7f
-6:      nop
-        ldr r3, [r1], #4
-        ldr r4, [r1], #4
-        ldr r5, [r1], #4
-        ldr r6, [r1], #4
-        ldr r7, [r1], #4
-        ldr r8, [r1], #4
-        ldr lr, [r1], #4
-
-        add pc, pc, ip
-        nop
-        nop
-        str r3, [r0], #4
-        str r4, [r0], #4
-        str r5, [r0], #4
-        str r6, [r0], #4
-        str r7, [r0], #4
-        str r8, [r0], #4
-        str lr, [r0], #4
-
-7:      ldmfd   sp!, {r5 - r8}
-
-8:      movs    r2, r2, lsl #31
-        ldrneb  r3, [r1], #1
-        ldrcsb  r4, [r1], #1
-        ldrcsb  ip, [r1]
-        strneb  r3, [r0], #1
-        strcsb  r4, [r0], #1
-        strcsb  ip, [r0]
-
-        ldmpc   regs="r0, r4"
-
-9:      rsb ip, ip, #4
-        cmp ip, #2
-        ldrgtb  r3, [r1], #1
-        ldrgeb  r4, [r1], #1
-        ldrb    lr, [r1], #1
-        strgtb  r3, [r0], #1
-        strgeb  r4, [r0], #1
-        subs    r2, r2, ip
-        strb    lr, [r0], #1
-        blt 8b
-        ands    ip, r1, #3
-        beq 1b
-
-10:     bic r1, r1, #3
-        cmp ip, #2
-        ldr lr, [r1], #4
-        beq 17f
-        bgt 18f
-
-
-        .macro  forward_copy_shift pull push
-
-        subs    r2, r2, #28
-        blt 14f
-
-11:     stmfd   sp!, {r5 - r9}
-
-12:
-13:     ldmia   r1!, {r4, r5, r6, r7}
-        mov r3, lr, pull #\pull
-        subs    r2, r2, #32
-        ldmia   r1!, {r8, r9, ip, lr}
-        orr r3, r3, r4, push #\push
-        mov r4, r4, pull #\pull
-        orr r4, r4, r5, push #\push
-        mov r5, r5, pull #\pull
-        orr r5, r5, r6, push #\push
-        mov r6, r6, pull #\pull
-        orr r6, r6, r7, push #\push
-        mov r7, r7, pull #\pull
-        orr r7, r7, r8, push #\push
-        mov r8, r8, pull #\pull
-        orr r8, r8, r9, push #\push
-        mov r9, r9, pull #\pull
-        orr r9, r9, ip, push #\push
-        mov ip, ip, pull #\pull
-        orr ip, ip, lr, push #\push
-        stmia   r0!, {r3, r4, r5, r6, r7, r8, r9, ip}
-        bge 12b
-
-        ldmfd   sp!, {r5 - r9}
-
-14:     ands    ip, r2, #28
-        beq 16f
-
-15:     mov r3, lr, pull #\pull
-        ldr lr, [r1], #4
-        subs    ip, ip, #4
-        orr r3, r3, lr, push #\push
-        str r3, [r0], #4
-        bgt 15b
-
-16:     sub r1, r1, #(\push / 8)
-        b   8b
-
-        .endm
-
-
-        forward_copy_shift  pull=8  push=24
-
-17:     forward_copy_shift  pull=16 push=16
-
-18:     forward_copy_shift  pull=24 push=8
-
--- a/firmware/target/arm/memmove-arm.S
+++ b/firmware/target/arm/memmove-arm.S
@ -1,190 +0,0 @@
-/***************************************************************************
- *             __________               __   ___.
- *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
- *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
- *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
- *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
- *                     \/            \/     \/    \/            \/
- * $Id$
- *
- * Copyright (C) 2006 Free Software Foundation, Inc.
- * This file was originally part of the GNU C Library
- * Contributed to glibc by MontaVista Software, Inc. (written by Nicolas Pitre)
- * Adapted for Rockbox by Daniel Ankers
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
- * KIND, either express or implied.
- *
- ****************************************************************************/
-
-#include "config.h"
-
-/*
- * Endian independent macros for shifting bytes within registers.
- */
-#ifndef __ARMEB__
-#define pull            lsr
-#define push            lsl
-#else
-#define pull            lsl
-#define push            lsr
-#endif
-
-        .text
-
-/*
- * Prototype: void *memmove(void *dest, const void *src, size_t n);
- *
- * Note:
- *
- * If the memory regions don't overlap, we simply branch to memcpy which is
- * normally a bit faster. Otherwise the copy is done going downwards.
- */
-
-    .section    .icode,"ax",%progbits
-
-    .align      2
-    .global     memmove
-    .type       memmove,%function
-
-memmove:
-
-        subs    ip, r0, r1
-        cmphi   r2, ip
-        bls memcpy
-
-        stmfd   sp!, {r0, r4, lr}
-        add r1, r1, r2
-        add r0, r0, r2
-        subs    r2, r2, #4
-        blt 8f
-        ands    ip, r0, #3
-        bne 9f
-        ands    ip, r1, #3
-        bne 10f
-
-1:      subs    r2, r2, #(28)
-        stmfd   sp!, {r5 - r8}
-        blt 5f
-
-2:
-3:
-4:      ldmdb   r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
-        subs    r2, r2, #32
-        stmdb   r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
-        bge 3b
-
-5:      ands    ip, r2, #28
-        rsb ip, ip, #32
-        addne   pc, pc, ip      @ C is always clear here
-        b   7f
-6:      nop
-        ldr r3, [r1, #-4]!
-        ldr r4, [r1, #-4]!
-        ldr r5, [r1, #-4]!
-        ldr r6, [r1, #-4]!
-        ldr r7, [r1, #-4]!
-        ldr r8, [r1, #-4]!
-        ldr lr, [r1, #-4]!
-
-        add pc, pc, ip
-        nop
-        nop
-        str r3, [r0, #-4]!
-        str r4, [r0, #-4]!
-        str r5, [r0, #-4]!
-        str r6, [r0, #-4]!
-        str r7, [r0, #-4]!
-        str r8, [r0, #-4]!
-        str lr, [r0, #-4]!
-
-7:      ldmfd   sp!, {r5 - r8}
-
-8:      movs    r2, r2, lsl #31
-        ldrneb  r3, [r1, #-1]!
-        ldrcsb  r4, [r1, #-1]!
-        ldrcsb  ip, [r1, #-1]
-        strneb  r3, [r0, #-1]!
-        strcsb  r4, [r0, #-1]!
-        strcsb  ip, [r0, #-1]
-        ldmpc   regs="r0, r4"
-
-9:      cmp ip, #2
-        ldrgtb  r3, [r1, #-1]!
-        ldrgeb  r4, [r1, #-1]!
-        ldrb    lr, [r1, #-1]!
-        strgtb  r3, [r0, #-1]!
-        strgeb  r4, [r0, #-1]!
-        subs    r2, r2, ip
-        strb    lr, [r0, #-1]!
-        blt 8b
-        ands    ip, r1, #3
-        beq 1b
-
-10:     bic r1, r1, #3
-        cmp ip, #2
-        ldr r3, [r1, #0]
-        beq 17f
-        blt 18f
-
-
-        .macro  backward_copy_shift push pull
-
-        subs    r2, r2, #28
-        blt 14f
-
-11:     stmfd   sp!, {r5 - r9}
-
-12:
-13:     ldmdb   r1!, {r7, r8, r9, ip}
-        mov     lr, r3, push #\push
-        subs    r2, r2, #32
-        ldmdb   r1!, {r3, r4, r5, r6}
-        orr     lr, lr, ip, pull #\pull
-        mov     ip, ip, push #\push
-        orr     ip, ip, r9, pull #\pull
-        mov     r9, r9, push #\push
-        orr     r9, r9, r8, pull #\pull
-        mov     r8, r8, push #\push
-        orr     r8, r8, r7, pull #\pull
-        mov     r7, r7, push #\push
-        orr     r7, r7, r6, pull #\pull
-        mov     r6, r6, push #\push
-        orr     r6, r6, r5, pull #\pull
-        mov     r5, r5, push #\push
-        orr     r5, r5, r4, pull #\pull
-        mov     r4, r4, push #\push
-        orr     r4, r4, r3, pull #\pull
-        stmdb   r0!, {r4 - r9, ip, lr}
-        bge 12b
-
-        ldmfd   sp!, {r5 - r9}
-
-14:     ands    ip, r2, #28
-        beq 16f
-
-15:     mov     lr, r3, push #\push
-        ldr r3, [r1, #-4]!
-        subs    ip, ip, #4
-        orr lr, lr, r3, pull #\pull
-        str lr, [r0, #-4]!
-        bgt 15b
-
-16:     add r1, r1, #(\pull / 8)
-        b   8b
-
-        .endm
-
-
-        backward_copy_shift push=8  pull=24
-
-17:     backward_copy_shift push=16 pull=16
-
-18:     backward_copy_shift push=24 pull=8
-
-
--- a/firmware/target/arm/memset-arm.S
+++ b/firmware/target/arm/memset-arm.S
@ -1,98 +0,0 @@
-/***************************************************************************
- *             __________               __   ___.
- *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
- *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
- *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
- *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
- *                     \/            \/     \/    \/            \/
- * $Id$
- *
- * Copyright (C) 2006 by Thom Johansen
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
- * KIND, either express or implied.
- *
- ****************************************************************************/
-#include "config.h"
-
-    .section    .icode,"ax",%progbits
-
-    .align      2
-
-/*  The following code is based on code found in Linux kernel version 2.6.15.3
- *  linux/arch/arm/lib/memset.S
- *
- *  Copyright (C) 1995-2000 Russell King
- */
-
-/* This code will align a pointer for memset, if needed */
-1:      cmp     r2, #4                  @ 1 do we have enough
-        blt     5f                      @ 1 bytes to align with?
-        cmp     r3, #2                  @ 1
-        strgtb  r1, [r0, #-1]!          @ 1
-        strgeb  r1, [r0, #-1]!          @ 1
-        strb    r1, [r0, #-1]!          @ 1
-        sub     r2, r2, r3              @ 1 r2 = r2 - r3
-        b 2f
-
-        .global     memset
-        .type       memset,%function
-memset:
-        add     r0, r0, r2              @ we'll write backwards in memory
-        ands    r3, r0, #3              @ 1 unaligned?
-        bne     1b                      @ 1
-2:
-/*
- * we know that the pointer in r0 is aligned to a word boundary.
- */
-        orr     r1, r1, r1, lsl #8
-        orr     r1, r1, r1, lsl #16
-        mov     r3, r1
-        cmp     r2, #16
-        blt     5f
-/*
- * We need an extra register for this loop - save the return address and
- * use the LR
- */
-        str     lr, [sp, #-4]!
-        mov     ip, r1
-        mov     lr, r1
-
-3:      subs    r2, r2, #64
-        stmgedb r0!, {r1, r3, ip, lr}   @ 64 bytes at a time.
-        stmgedb r0!, {r1, r3, ip, lr}
-        stmgedb r0!, {r1, r3, ip, lr}
-        stmgedb r0!, {r1, r3, ip, lr}
-        bgt     3b
-        ldrpc   cond=eq                 @ Now <64 bytes to go.
-/*
- * No need to correct the count; we're only testing bits from now on
- */
-        tst     r2, #32
-        stmnedb r0!, {r1, r3, ip, lr}
-        stmnedb r0!, {r1, r3, ip, lr}
-        tst     r2, #16
-        stmnedb r0!, {r1, r3, ip, lr}
-        ldr     lr, [sp], #4
-
-5:      tst     r2, #8
-        stmnedb r0!, {r1, r3}
-        tst     r2, #4
-        strne   r1, [r0, #-4]!
-/*
- * When we get here, we've got less than 4 bytes to zero.  We
- * may have an unaligned pointer as well.
- */
-6:      tst     r2, #2
-        strneb  r1, [r0, #-1]!
-        strneb  r1, [r0, #-1]!
-        tst     r2, #1
-        strneb  r1, [r0, #-1]!
-        bx      lr
-.end:
-        .size   memset,.end-memset
--- a/firmware/target/arm/memset16-arm.S
+++ b/firmware/target/arm/memset16-arm.S
@ -1,82 +0,0 @@
-/***************************************************************************
- *             __________               __   ___.
- *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
- *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
- *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
- *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
- *                     \/            \/     \/    \/            \/
- * $Id$
- *
- * Copyright (C) 2006 by Thom Johansen
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
- * KIND, either express or implied.
- *
- ****************************************************************************/
-#include "config.h"
-
-    .section    .icode,"ax",%progbits
-
-    .align      2
-
-/*  The following code is based on code from the Linux kernel version 2.6.15.3,
- *  linux/arch/arm/lib/memset.S
- *
- *  Copyright (C) 1995-2000 Russell King
- */
-
-        .global     memset16
-        .type       memset16,%function
-memset16:
-        tst     r0, #2                  @ unaligned?
-        cmpne   r2, #0
-        strneh  r1, [r0], #2            @ store one halfword to align
-        subne   r2, r2, #1
-
-/*
- * we know that the pointer in r0 is aligned to a word boundary.
- */
-        orr     r1, r1, r1, lsl #16
-        mov     r3, r1
-        cmp     r2, #8
-        blt     4f
-/*
- * We need an extra register for this loop - save the return address and
- * use the LR
- */
-        str     lr, [sp, #-4]!
-        mov     ip, r1
-        mov     lr, r1
-
-2:      subs    r2, r2, #32
-        stmgeia r0!, {r1, r3, ip, lr}   @ 64 bytes at a time.
-        stmgeia r0!, {r1, r3, ip, lr}
-        stmgeia r0!, {r1, r3, ip, lr}
-        stmgeia r0!, {r1, r3, ip, lr}
-        bgt     2b
-        ldrpc   cond=eq                 @ Now <64 bytes to go.
-/*
- * No need to correct the count; we're only testing bits from now on
- */
-        tst     r2, #16
-        stmneia r0!, {r1, r3, ip, lr}
-        stmneia r0!, {r1, r3, ip, lr}
-        tst     r2, #8
-        stmneia r0!, {r1, r3, ip, lr}
-        ldr     lr, [sp], #4
-
-4:      tst     r2, #4
-        stmneia r0!, {r1, r3}
-        tst     r2, #2
-        strne   r1, [r0], #4
-
-        tst     r2, #1
-        strneh  r1, [r0], #2
-        bx      lr
-.end:
-        .size   memset16,.end-memset16