1
0
Fork 0
forked from len0rd/rockbox
foxbox/apps/codecs/lib/udiv32_armv4.S
2010-01-03 15:57:03 +00:00

134 lines
5.1 KiB
ArmAsm

/***************************************************************************
* __________ __ ___.
* Open \______ \ ____ ____ | | _\_ |__ _______ ___
* Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
* Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
* Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
* \/ \/ \/ \/ \/
* $Id$
*
* Copyright (C) 2008 by Jens Arnold
* Copyright (C) 2009 by Andrew Mahone
*
* Optimised unsigned integer division for ARMv4
*
* Based on: libgcc routines for ARM cpu.
* Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
* Free Software Foundation, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
****************************************************************************/
#include "config.h"
/* Codecs should not normally do this, but we need to check a macro, and
* codecs.h would confuse the assembler. */
/* Adapted from an algorithm given in ARM System Developer's Guide (7.3.1.2)
for dividing a 30-bit value by a 15-bit value, with two operations per
iteration by storing quotient and remainder together and adding the previous
quotient bit during trial subtraction. Modified to work with any dividend
and divisor both less than 1 << 30, and skipping trials by calculating bits
in output. */
.macro ARM_DIV_31_BODY dividend, divisor, result, bits, curbit, quotient, remainder
mov \bits, #1
/* Shift the divisor left until it aligns with the numerator. If it already
has the high bit set, this is fine, everything inside .rept will be
skipped, and the add before and adcs after will set the one-bit result
to zero. */
cmn \divisor, \dividend, lsr #16
movcs \divisor, \divisor, lsl #16
addcs \bits, \bits, #16
cmn \divisor, \dividend, lsr #8
movcs \divisor, \divisor, lsl #8
addcs \bits, \bits, #8
cmn \divisor, \dividend, lsr #4
movcs \divisor, \divisor, lsl #4
addcs \bits, \bits, #4
cmn \divisor, \dividend, lsr #2
movcs \divisor, \divisor, lsl #2
addcs \bits, \bits, #2
cmn \divisor, \dividend, lsr #1
movcs \divisor, \divisor, lsl #1
addcs \bits, \bits, #1
adds \result, \dividend, \divisor
subcc \result, \result, \divisor
rsb \curbit, \bits, #31
add pc, pc, \curbit, lsl #3
nop
.rept 30
adcs \result, \divisor, \result, lsl #1
/* Fix the remainder portion of the result. This must be done because the
handler for 32-bit numerators needs the remainder. */
subcc \result, \result, \divisor
.endr
/* Shift remainder/quotient left one, add final quotient bit */
adc \result, \result, \result
mov \remainder, \result, lsr \bits
eor \quotient, \result, \remainder, lsl \bits
.endm
#ifdef USE_IRAM
.section .icode,"ax",%progbits
#else
.text
#endif
.align
.global udiv32_arm
.type udiv32_arm,%function
udiv32_arm:
/* Invert divisor. ARM_DIV_31_BODY uses adc to both subtract the divisor
and add the next bit of the result. The correction code at .L_udiv32
does not need the divisor inverted, but can be modified to work with it,
and this allows the zero divisor test to be done early and without an
explicit comparison. */
rsbs r1, r1, #0
beq .L_div0
tst r0, r0
/* High bit must be unset, otherwise shift numerator right, calculate,
and correct results. As this case is very uncommon we want to avoid
any other delays on the main path in handling it, so the long divide
calls the short divide as a function. */
bmi .L_udiv32
.L_udiv31:
ARM_DIV_31_BODY r0, r1, r2, r3, ip, r0, r1
bx lr
.L_udiv32:
/* store original numerator and divisor, we'll need them to correct the
result, */
stmdb sp, { r0, r1, lr }
/* Call __div0 here if divisor is zero, otherwise it would report the wrong
address. */
mov r0, r0, lsr #1
bl .L_udiv31
ldmdb sp, { r2, r3, lr }
/* Move the low bit of the original numerator to the carry bit */
movs r2, r2, lsr #1
/* Shift the remainder left one and add in the carry bit */
adc r1, r1, r1
/* Subtract the original divisor from the remainder, setting carry if the
result is non-negative */
adds r1, r1, r3
/* Shift quotient left one and add carry bit */
adc r0, r0, r0
bx lr
.L_div0:
/* __div0 expects the calling address on the top of the stack */
stmdb sp!, { lr }
#if defined(__ARM_EABI__) || !defined(USE_IRAM)
bl __div0
#else
mov lr, pc
bx r3
#endif
.size udiv32_arm, . - udiv32_arm