Using ARM Unified Assembler Language

Change-Id: Iae32a8ba8eff6087330e458fafc912a12fee4509
This commit is contained in:
Chris Chua 2023-03-19 06:22:08 +11:00 committed by Aidan MacDonald
parent a64cad847e
commit 86429dbf1e
23 changed files with 139 additions and 127 deletions

View file

@ -43,8 +43,8 @@
ldrsh r7, [r0, #12] /* d2 */
ldrsh r8, [r0, #14] /* d3 */
orrs r9, r2, r3
orreqs r9, r4, r5
orreqs r9, r6, r7
orrseq r9, r4, r5
orrseq r9, r6, r7
cmpeq r8, #0
bne 2f
mov r1, r1, asl #15
@ -320,7 +320,7 @@ mpeg2_idct_copy:
mpeg2_idct_add:
cmp r0, #129
mov r0, r1
ldreqsh r1, [r0, #0]
ldrsheq r1, [r0, #0]
bne 1f
and r1, r1, #0x70
cmp r1, #0x40

View file

@ -19,6 +19,7 @@
*
****************************************************************************/
#include "config.h"
.global mpeg2_idct_copy
.type mpeg2_idct_copy, %function
@ -228,7 +229,7 @@ mpeg2_idct_copy:
mpeg2_idct_add:
cmp r0, #129
mov r0, r1
ldreqsh r1, [r0, #0]
ldrsheq r1, [r0, #0]
bne 1f
and r1, r1, #0x70
cmp r1, #0x40
@ -260,7 +261,7 @@ mpeg2_idct_add:
strd r4, [r1] @ r4, r5
add r1, r1, r2
cmp r0, r3
ldrlod r8, [r1] @ r8, r9
ldrdlo r8, [r1] @ r8, r9
blo 2b
ldmfd sp!, {r4-r11, pc}
@ -291,7 +292,7 @@ mpeg2_idct_add:
strd r0, [r2] @ r0, r1
add r2, r2, r3
cmp r2, r12
ldrlod r0, [r2] @ r0, r1
ldrdlo r0, [r2] @ r0, r1
blo 4b
ldmfd sp!, {r4, pc}

View file

@ -53,12 +53,13 @@ enum state_enum
#define CMP_3_CONST(_a, _b) \
({ int _x; \
asm volatile ( \
".syntax unified \n" \
"ldrb %[x], [%[a], #0] \n" \
"eors %[x], %[x], %[b0] \n" \
"ldreqb %[x], [%[a], #1] \n" \
"eoreqs %[x], %[x], %[b1] \n" \
"ldreqb %[x], [%[a], #2] \n" \
"eoreqs %[x], %[x], %[b2] \n" \
"ldrbeq %[x], [%[a], #1] \n" \
"eorseq %[x], %[x], %[b1] \n" \
"ldrbeq %[x], [%[a], #2] \n" \
"eorseq %[x], %[x], %[b2] \n" \
: [x]"=&r"(_x) \
: [a]"r"(_a), \
[b0]"i"(((_b) >> 24) & 0xff), \
@ -70,14 +71,15 @@ enum state_enum
#define CMP_4_CONST(_a, _b) \
({ int _x; \
asm volatile ( \
".syntax unified \n" \
"ldrb %[x], [%[a], #0] \n" \
"eors %[x], %[x], %[b0] \n" \
"ldreqb %[x], [%[a], #1] \n" \
"eoreqs %[x], %[x], %[b1] \n" \
"ldreqb %[x], [%[a], #2] \n" \
"eoreqs %[x], %[x], %[b2] \n" \
"ldreqb %[x], [%[a], #3] \n" \
"eoreqs %[x], %[x], %[b3] \n" \
"ldrbeq %[x], [%[a], #1] \n" \
"eorseq %[x], %[x], %[b1] \n" \
"ldrbeq %[x], [%[a], #2] \n" \
"eorseq %[x], %[x], %[b2] \n" \
"ldrbeq %[x], [%[a], #3] \n" \
"eorseq %[x], %[x], %[b3] \n" \
: [x]"=&r"(_x) \
: [a]"r"(_a), \
[b0]"i"(((_b) >> 24) & 0xff), \

View file

@ -410,7 +410,7 @@ jpeg_idct8v:
#if ARM_ARCH < 5
mov r8, r4, lsl #16
orrs r9, r6, r7
orreqs r9, r5, r4, lsr #16
orrseq r9, r5, r4, lsr #16
bne 2f
mov r8, r8, asr #14
strh r8, [r2]
@ -505,7 +505,7 @@ jpeg_idct8v:
#else /* ARMv5+ */
mov r12, r4, lsl #16
orrs r9, r6, r7
orreqs r9, r5, r4, lsr #16
orrseq r9, r5, r4, lsr #16
bne 2f
mov r12, r12, asr #14
strh r12, [r2]
@ -615,7 +615,7 @@ jpeg_idct8h:
#if ARM_ARCH < 5
add r8, r14, r4, lsl #16
orrs r9, r6, r7
orreqs r9, r5, r4, lsr #16
orrseq r9, r5, r4, lsr #16
bne 2f
mov r8, r8, asr #21
cmp r8, #255
@ -727,7 +727,7 @@ jpeg_idct8h:
#else /* ARMv5+ */
add r12, r14, r4, lsl #16
orrs r9, r6, r7
orreqs r9, r5, r4, lsr #16
orrseq r9, r5, r4, lsr #16
bne 2f
mov r12, r12, asr #21
cmp r12, #255
@ -835,7 +835,7 @@ jpeg_idct8v:
1:
ldmia r0!, { r4-r7 }
orrs r9, r6, r7
orreqs r9, r5, r4, lsr #16
orrseq r9, r5, r4, lsr #16
bne 2f
mov r4, r4, lsl #2
strh r4, [r2]
@ -939,7 +939,7 @@ jpeg_idct8h:
ldmia r0!, { r4-r7 }
sadd16 r4, r4, r14
orrs r9, r6, r7
orreqs r9, r5, r4, lsr #16
orrseq r9, r5, r4, lsr #16
bne 2f
sxth r4, r4
usat r4, #8, r4, asr #5

View file

@ -61,6 +61,7 @@ int corelock_try_lock(struct corelock *cl)
/* Relies on the fact that core IDs are complementary bitmasks (0x55,0xaa) */
asm volatile (
".syntax unified \n"
"mov r1, %[id] \n" /* r1 = PROCESSOR_ID */
"ldrb r1, [r1] \n"
"strb r1, [%[cl], r1, lsr #7] \n" /* cl->myl[core] = core */
@ -71,7 +72,7 @@ int corelock_try_lock(struct corelock *cl)
"bne 1f \n" /* yes? lock acquired */
"ldrb %[rv], [%[cl], #2] \n" /* || cl->turn == core? */
"ands %[rv], %[rv], r1 \n"
"streqb %[rv], [%[cl], r1, lsr #7] \n" /* if not, cl->myl[core] = 0 */
"strbeq %[rv], [%[cl], r1, lsr #7] \n" /* if not, cl->myl[core] = 0 */
"1: \n" /* Done */
: [rv] "=r"(rval)
: [id] "i" (&PROCESSOR_ID), [cl] "r" (cl)

View file

@ -91,9 +91,9 @@ lcd_copy_buffer_rect: @
stmia r0!, { r6-r12, r14 } @
bgt 30b @ octword loop @
40: @ finish line @
ldreqh r6, [r1], #2 @ finish last halfword if eq ...
ldrheq r6, [r1], #2 @ finish last halfword if eq ...
add r1, r1, r4, lsl #1 @
streqh r6, [r0], #2 @ ...
strheq r6, [r0], #2 @ ...
add r0, r0, r4, lsl #1 @
subs r3, r3, #1 @ next line
bgt 10b @ copy line @

View file

@ -99,22 +99,22 @@ memcpy:
7: ldmfd sp!, {r5 - r8}
8: movs r2, r2, lsl #31
ldrneb r3, [r1], #1
ldrcsb r4, [r1], #1
ldrcsb ip, [r1]
strneb r3, [r0], #1
strcsb r4, [r0], #1
strcsb ip, [r0]
ldrbne r3, [r1], #1
ldrbcs r4, [r1], #1
ldrbcs ip, [r1]
strbne r3, [r0], #1
strbcs r4, [r0], #1
strbcs ip, [r0]
ldmpc regs="r0, r4"
9: rsb ip, ip, #4
cmp ip, #2
ldrgtb r3, [r1], #1
ldrgeb r4, [r1], #1
ldrbgt r3, [r1], #1
ldrbge r4, [r1], #1
ldrb lr, [r1], #1
strgtb r3, [r0], #1
strgeb r4, [r0], #1
strbgt r3, [r0], #1
strbge r4, [r0], #1
subs r2, r2, ip
strb lr, [r0], #1
blt 8b

View file

@ -106,20 +106,20 @@ memmove:
7: ldmfd sp!, {r5 - r8}
8: movs r2, r2, lsl #31
ldrneb r3, [r1, #-1]!
ldrcsb r4, [r1, #-1]!
ldrcsb ip, [r1, #-1]
strneb r3, [r0, #-1]!
strcsb r4, [r0, #-1]!
strcsb ip, [r0, #-1]
ldrbne r3, [r1, #-1]!
ldrbcs r4, [r1, #-1]!
ldrbcs ip, [r1, #-1]
strbne r3, [r0, #-1]!
strbcs r4, [r0, #-1]!
strbcs ip, [r0, #-1]
ldmpc regs="r0, r4"
9: cmp ip, #2
ldrgtb r3, [r1, #-1]!
ldrgeb r4, [r1, #-1]!
ldrbgt r3, [r1, #-1]!
ldrbge r4, [r1, #-1]!
ldrb lr, [r1, #-1]!
strgtb r3, [r0, #-1]!
strgeb r4, [r0, #-1]!
strbgt r3, [r0, #-1]!
strbge r4, [r0, #-1]!
subs r2, r2, ip
strb lr, [r0, #-1]!
blt 8b

View file

@ -34,8 +34,8 @@
1: cmp r2, #4 @ 1 do we have enough
blt 5f @ 1 bytes to align with?
cmp r3, #2 @ 1
strgtb r1, [r0, #-1]! @ 1
strgeb r1, [r0, #-1]! @ 1
strbgt r1, [r0, #-1]! @ 1
strbge r1, [r0, #-1]! @ 1
strb r1, [r0, #-1]! @ 1
sub r2, r2, r3 @ 1 r2 = r2 - r3
b 2f
@ -65,24 +65,24 @@ memset:
mov lr, r1
3: subs r2, r2, #64
stmgedb r0!, {r1, r3, ip, lr} @ 64 bytes at a time.
stmgedb r0!, {r1, r3, ip, lr}
stmgedb r0!, {r1, r3, ip, lr}
stmgedb r0!, {r1, r3, ip, lr}
stmdbge r0!, {r1, r3, ip, lr} @ 64 bytes at a time.
stmdbge r0!, {r1, r3, ip, lr}
stmdbge r0!, {r1, r3, ip, lr}
stmdbge r0!, {r1, r3, ip, lr}
bgt 3b
ldrpc cond=eq @ Now <64 bytes to go.
/*
* No need to correct the count; we're only testing bits from now on
*/
tst r2, #32
stmnedb r0!, {r1, r3, ip, lr}
stmnedb r0!, {r1, r3, ip, lr}
stmdbne r0!, {r1, r3, ip, lr}
stmdbne r0!, {r1, r3, ip, lr}
tst r2, #16
stmnedb r0!, {r1, r3, ip, lr}
stmdbne r0!, {r1, r3, ip, lr}
ldr lr, [sp], #4
5: tst r2, #8
stmnedb r0!, {r1, r3}
stmdbne r0!, {r1, r3}
tst r2, #4
strne r1, [r0, #-4]!
/*
@ -90,10 +90,10 @@ memset:
* may have an unaligned pointer as well.
*/
6: tst r2, #2
strneb r1, [r0, #-1]!
strneb r1, [r0, #-1]!
strbne r1, [r0, #-1]!
strbne r1, [r0, #-1]!
tst r2, #1
strneb r1, [r0, #-1]!
strbne r1, [r0, #-1]!
bx lr
.end:
.size memset,.end-memset

View file

@ -35,7 +35,7 @@
memset16:
tst r0, #2 @ unaligned?
cmpne r2, #0
strneh r1, [r0], #2 @ store one halfword to align
strhne r1, [r0], #2 @ store one halfword to align
subne r2, r2, #1
/*
@ -54,29 +54,29 @@ memset16:
mov lr, r1
2: subs r2, r2, #32
stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time.
stmgeia r0!, {r1, r3, ip, lr}
stmgeia r0!, {r1, r3, ip, lr}
stmgeia r0!, {r1, r3, ip, lr}
stmiage r0!, {r1, r3, ip, lr} @ 64 bytes at a time.
stmiage r0!, {r1, r3, ip, lr}
stmiage r0!, {r1, r3, ip, lr}
stmiage r0!, {r1, r3, ip, lr}
bgt 2b
ldrpc cond=eq @ Now <64 bytes to go.
/*
* No need to correct the count; we're only testing bits from now on
*/
tst r2, #16
stmneia r0!, {r1, r3, ip, lr}
stmneia r0!, {r1, r3, ip, lr}
stmiane r0!, {r1, r3, ip, lr}
stmiane r0!, {r1, r3, ip, lr}
tst r2, #8
stmneia r0!, {r1, r3, ip, lr}
stmiane r0!, {r1, r3, ip, lr}
ldr lr, [sp], #4
4: tst r2, #4
stmneia r0!, {r1, r3}
stmiane r0!, {r1, r3}
tst r2, #2
strne r1, [r0], #4
tst r2, #1
strneh r1, [r0], #2
strhne r1, [r0], #2
bx lr
.end:
.size memset16,.end-memset16

View file

@ -73,15 +73,16 @@ static inline void store_context(void* addr)
static inline void load_context(const void* addr)
{
asm volatile(
".syntax unified \n"
"ldr r0, [%0, #40] \n" /* Load start pointer */
"cmp r0, #0 \n" /* Check for NULL */
/* If not already running, jump to start */
#if ARM_ARCH == 4 && defined(USE_THUMB)
"ldmneia %0, { r0, r12 } \n"
"ldmiane %0, { r0, r12 } \n"
"bxne r12 \n"
#else
"ldmneia %0, { r0, pc } \n"
"ldmiane %0, { r0, pc } \n"
#endif
"ldmia %0, { r4-r11, sp, lr } \n" /* Load regs r4 to r14 from context */

View file

@ -1006,13 +1006,14 @@ Lyre prototype 1 */
#endif
#if defined(CPU_ARM) && defined(__ASSEMBLER__)
.syntax unified
/* ARMv4T doesn't switch the T bit when popping pc directly, we must use BX */
.macro ldmpc cond="", order="ia", regs
#if ARM_ARCH == 4 && defined(USE_THUMB)
ldm\cond\order sp!, { \regs, lr }
ldm\order\cond sp!, { \regs, lr }
bx\cond lr
#else
ldm\cond\order sp!, { \regs, pc }
ldm\order\cond sp!, { \regs, pc }
#endif
.endm
.macro ldrpc cond=""

View file

@ -139,9 +139,9 @@ copy_read_sectors:
.r_end2_u:
tst r1, #1 /* one halfword left? */
ldrneh r4, [r2]
ldrhne r4, [r2]
orrne r3, r3, r4, lsl #8
strneh r3, [r0], #2
strhne r3, [r0], #2
movne r3, r4, lsr #8
strb r3, [r0], #1 /* store final byte */
@ -151,8 +151,8 @@ copy_read_sectors:
/* 16-bit aligned */
.r_aligned:
tst r0, #2 /* 32 bit aligned? */
ldrneh r3, [r2] /* no: read first halfword */
strneh r3, [r0], #2 /* store */
ldrhne r3, [r2] /* no: read first halfword */
strhne r3, [r0], #2 /* store */
subne r1, r1, #1 /* one halfword taken */
sub r1, r1, #8 /* adjust for zero-check and doing 8 halfwords/loop */
@ -186,14 +186,14 @@ copy_read_sectors:
.r_end4_a:
tst r1, #2 /* 2 or more halfwords left? */
ldrneh r3, [r2]
ldrneh r4, [r2]
ldrhne r3, [r2]
ldrhne r4, [r2]
orrne r3, r3, r4, lsl #16
strne r3, [r0], #4
tst r1, #1 /* one halfword left? */
ldrneh r3, [r2]
strneh r3, [r0], #2
ldrhne r3, [r2]
strhne r3, [r0], #2
ldmpc regs=r4-r5
@ -291,9 +291,9 @@ copy_write_sectors:
.w_end2_u:
tst r1, #1 /* one halfword left? */
ldrneh r4, [r0], #2
ldrhne r4, [r0], #2
orrne r3, r3, r4, lsl #8
strneh r3, [r2]
strhne r3, [r2]
movne r3, r3, lsr #16
ldrb r4, [r0], #1 /* load final byte */
@ -305,8 +305,8 @@ copy_write_sectors:
/* 16-bit aligned */
.w_aligned:
tst r0, #2 /* 32 bit aligned? */
ldrneh r3, [r0], #2 /* no: load first halfword */
strneh r3, [r2] /* write */
ldrhne r3, [r0], #2 /* no: load first halfword */
strhne r3, [r2] /* write */
subne r1, r1, #1 /* one halfword taken */
sub r1, r1, #8 /* adjust for zero-check and doing 8 halfwords/loop */
@ -341,13 +341,13 @@ copy_write_sectors:
tst r1, #2 /* 2 or more halfwords left? */
ldrne r3, [r0], #4
strneh r3, [r2]
strhne r3, [r2]
movne r3, r3, lsr #16
strneh r3, [r2]
strhne r3, [r2]
tst r1, #1 /* one halfword left? */
ldrneh r3, [r0], #2
strneh r3, [r2]
ldrhne r3, [r0], #2
strhne r3, [r2]
ldmpc regs=r4-r5

View file

@ -40,24 +40,24 @@ lcd_write_data: /* r1 = pixel count, must be even */
subs r1, r1, #16
.loop16:
ldmgeia r0!, {r2-r3}
stmgeia lr, {r2-r3}
ldmgeia r0!, {r2-r3}
stmgeia lr, {r2-r3}
ldmgeia r0!, {r2-r3}
stmgeia lr, {r2-r3}
ldmgeia r0!, {r2-r3}
stmgeia lr, {r2-r3}
subges r1, r1, #16
ldmiage r0!, {r2-r3}
stmiage lr, {r2-r3}
ldmiage r0!, {r2-r3}
stmiage lr, {r2-r3}
ldmiage r0!, {r2-r3}
stmiage lr, {r2-r3}
ldmiage r0!, {r2-r3}
stmiage lr, {r2-r3}
subsge r1, r1, #16
bge .loop16
/* no need to correct the count, we're just checking bits from now */
tst r1, #8
ldmneia r0!, {r2-r4, r12}
stmneia lr, {r2-r4, r12}
ldmiane r0!, {r2-r4, r12}
stmiane lr, {r2-r4, r12}
tst r1, #4
ldmneia r0!, {r2-r3}
stmneia lr, {r2-r3}
ldmiane r0!, {r2-r3}
stmiane lr, {r2-r3}
tst r1, #2
ldrne r3, [r0], #4
strne r3, [lr]

View file

@ -218,6 +218,7 @@ void fiq_handler(void)
* r0-r3 and r12 is a working register.
*/
asm volatile (
".syntax unified \n"
"sub lr, lr, #4 \n"
"stmfd sp!, { r0-r3, lr } \n" /* stack scratch regs and lr */
"mov r14, #0 \n" /* Was the callback called? */
@ -251,7 +252,7 @@ void fiq_handler(void)
"stmia r11, { r8-r9 } \n" /* save p and size */
"cmp r14, #0 \n" /* Callback called? */
"ldmeqfd sp!, { r0-r3, pc }^ \n" /* no? -> exit */
"ldmfdeq sp!, { r0-r3, pc }^ \n" /* no? -> exit */
"ldr r1, =pcm_play_status_callback \n"
"ldr r1, [r1] \n"
@ -268,7 +269,7 @@ void fiq_handler(void)
"mov lr, pc \n"
"ldr pc, =pcm_play_dma_complete_callback \n"
"cmp r0, #0 \n" /* any more to play? */
"ldmneia r11, { r8-r9 } \n" /* load new p and size */
"ldmiane r11, { r8-r9 } \n" /* load new p and size */
"cmpne r9, #0x0f \n" /* did we actually get enough data? */
"bhi .fill_fifo \n" /* not stop and enough? refill */
"ldmfd sp!, { r0-r3, pc }^ \n" /* exit */

View file

@ -327,6 +327,7 @@ void fiq_playback(void)
*/
asm volatile (
/* No external calls */
".syntax unified \n"
"sub lr, lr, #4 \n" /* Prepare return address */
"stmfd sp!, { lr } \n" /* stack lr so we can use it */
"ldr r12, =0xcf001040 \n" /* Some magic from iPodLinux ... */
@ -349,8 +350,8 @@ void fiq_playback(void)
"bhi 0b \n" /* ... yes, continue */
"cmp r9, #0 \n" /* either FIFO full or size empty? */
"stmneia r11, { r8-r9 } \n" /* save p and size, if not empty */
"ldmnefd sp!, { pc }^ \n" /* RFE if not empty */
"stmiane r11, { r8-r9 } \n" /* save p and size, if not empty */
"ldmfdne sp!, { pc }^ \n" /* RFE if not empty */
/* Making external calls */
"1: \n"
@ -363,7 +364,7 @@ void fiq_playback(void)
"mov lr, pc \n" /* long call (not in same section) */
"bx r3 \n"
"cmp r0, #0 \n" /* more data? */
"ldmeqfd sp!, { r0-r3, pc }^ \n" /* no? -> exit */
"ldmfdeq sp!, { r0-r3, pc }^ \n" /* no? -> exit */
"ldr r14, [r10, #0x1c] \n" /* read IISFIFO_CFG to check FIFO status */
"ands r14, r14, #(0xe<<23) \n" /* r14 = (IIS_TX_FREE_COUNT & ~1) << 23 */

View file

@ -227,7 +227,7 @@
/* Test whether divisor is 2^N */
cmp \inv, #1<<31
/* Load approximate reciprocal */
ldrhib \inv, [\neg, #.L_udiv_est_table-.-64]
ldrbhi \inv, [\neg, #.L_udiv_est_table-.-64]
bls 20f
subs \bits, \bits, #7
rsb \neg, \divisor, #0

View file

@ -225,7 +225,7 @@ udiv32_arm:
mov \inv, \divisor, lsl \bits
add \neg, pc, \inv, lsr #25
cmp \inv, #1<<31
ldrhib \inv, [\neg, #.L_udiv_est_table-.-64]
ldrbhi \inv, [\neg, #.L_udiv_est_table-.-64]
bls 20f
subs \bits, \bits, #7
rsb \neg, \divisor, #0

View file

@ -45,6 +45,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
#endif
asm volatile (
".syntax unified \n"
#if ORDER > 32
"mov %[res], #0 \n"
#endif
@ -117,7 +118,7 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
"smladx %[res], r1, r2, %[res] \n"
#if ORDER > 32
"subs %[cnt], %[cnt], #1 \n"
"ldmneia %[f2]!, {r2,r4} \n"
"ldmiane %[f2]!, {r2,r4} \n"
"sadd16 r0, r0, r7 \n"
"sadd16 r1, r1, r5 \n"
"strd r0, [%[v1]], #8 \n"
@ -172,8 +173,8 @@ static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t* s2)
"smlad %[res], r3, r5, %[res] \n"
#if ORDER > 32
"subs %[cnt], %[cnt], #1 \n"
"ldrned r4, [%[f2]], #8 \n"
"ldrned r0, [%[v1], #8] \n"
"ldrdne r4, [%[f2]], #8 \n"
"ldrdne r0, [%[v1], #8] \n"
"sadd16 r2, r2, r6 \n"
"sadd16 r3, r3, r7 \n"
"strd r2, [%[v1]], #8 \n"
@ -214,6 +215,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
#endif
asm volatile (
".syntax unified \n"
#if ORDER > 32
"mov %[res], #0 \n"
#endif
@ -286,7 +288,7 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
"smladx %[res], r1, r2, %[res] \n"
#if ORDER > 32
"subs %[cnt], %[cnt], #1 \n"
"ldmneia %[f2]!, {r2,r4} \n"
"ldmiane %[f2]!, {r2,r4} \n"
"ssub16 r0, r0, r7 \n"
"ssub16 r1, r1, r5 \n"
"strd r0, [%[v1]], #8 \n"
@ -341,8 +343,8 @@ static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t* s2)
"smlad %[res], r3, r5, %[res] \n"
#if ORDER > 32
"subs %[cnt], %[cnt], #1 \n"
"ldrned r4, [%[f2]], #8 \n"
"ldrned r0, [%[v1], #8] \n"
"ldrdne r4, [%[f2]], #8 \n"
"ldrdne r0, [%[v1], #8] \n"
"ssub16 r2, r2, r6 \n"
"ssub16 r3, r3, r7 \n"
"strd r2, [%[v1]], #8 \n"
@ -381,6 +383,7 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
#endif
asm volatile (
".syntax unified \n"
#if ORDER > 32
"mov %[res], #0 \n"
#endif
@ -421,10 +424,10 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
"pkhtb r1, r7, r4 \n"
#if ORDER > 32
"subs %[cnt], %[cnt], #1 \n"
"ldrned r6, [%[v2]], #8 \n"
"ldrdne r6, [%[v2]], #8 \n"
"smladx %[res], r2, r1, %[res] \n"
"pkhtb r2, r4, r5 \n"
"ldrned r0, [%[v1]], #8 \n"
"ldrdne r0, [%[v1]], #8 \n"
"smladx %[res], r3, r2, %[res] \n"
"bne 1b \n"
#else
@ -461,9 +464,9 @@ static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
"ldrd r4, [%[v2]], #8 \n"
"smlad %[res], r1, r6, %[res] \n"
"subs %[cnt], %[cnt], #1 \n"
"ldrned r0, [%[v1]], #8 \n"
"ldrdne r0, [%[v1]], #8 \n"
"smlad %[res], r2, r7, %[res] \n"
"ldrned r6, [%[v2]], #8 \n"
"ldrdne r6, [%[v2]], #8 \n"
"smlad %[res], r3, r4, %[res] \n"
"bne 1b \n"
#else

View file

@ -165,8 +165,8 @@ hybrid_filter:
sub r10, r11, r10
@ set to the memory: *pA, *(pA-1), *(pA-2), *(pA-3), *pM, *(pM-1), *(pM-2), *(pM-3)
stmneda r2, {r10, r11, r12, lr}
stmneda r3, {r5, r6, r7, r8}
stmdane r2, {r10, r11, r12, lr}
stmdane r3, {r5, r6, r7, r8}
ldmpc cond=ne regs=r4-r12 @ hybrid_filter end (when fs->index != 0)
.hf_memshl:

View file

@ -323,7 +323,7 @@ resample_hermite:
add r6, r6, r0, lsl #2 @ r6 = &s[pos]
cmp r0, #3 @ pos >= 3? history not needed
ldmgedb r6, { r1-r3 } @ x3..x1 = s[pos-3]..s[pos-1]
ldmdbge r6, { r1-r3 } @ x3..x1 = s[pos-3]..s[pos-1]
bge .hrs_loadhist_done @
add r10, r0, r0, lsl #1 @ branch pc + pos*12
add pc, pc, r10, lsl #2 @
@ -496,7 +496,7 @@ resample_hermite:
ldmfd sp!, { r10, r12 } @ recover ch, h
subs r10, r10, #1 @ --ch
stmia r12!, { r1-r3 } @ h[0..2] = x3..x1
ldmgtia sp, { r0-r2 } @ load data, src, dst
ldmiagt sp, { r0-r2 } @ load data, src, dst
bgt .hrs_channel_loop
ldmfd sp!, { r1-r3 } @ pop data, src, dst
@ -614,7 +614,7 @@ filter_process:
ldr r0, [sp] @ r0 = history[channels-ch-1]
subs r3, r3, #1 @ all channels processed?
stmia r0!, { r9-r12 } @ save back history, history++
ldmhsib sp, { r1-r2 } @ r1 = buf, r2 = count
ldmibhs sp, { r1-r2 } @ r1 = buf, r2 = count
strhs r3, [sp, #12] @ store ch
strhs r0, [sp] @ store history[channels-ch-1]
bhs .fp_channelloop

View file

@ -18,6 +18,7 @@
* KIND, either express or implied.
*
****************************************************************************/
#include "rbcodecconfig.h"
/****************************************************************************
* void sample_output_mono(struct sample_io_data *this,
@ -56,7 +57,7 @@ sample_output_mono:
stmia r3!, { r12, r14 } @ store So0, So1
bgt 1b @
@
ldmltfd sp!, { r4, pc } @ if count was even, we're done
ldmfdlt sp!, { r4, pc } @ if count was even, we're done
@
2: @
ldr r12, [r2] @ round, scale, saturate
@ -113,7 +114,7 @@ sample_output_stereo:
stmia r3!, { r6, r7 } @ store So0, So1
bgt 1b @
@
ldmltfd sp!, { r4-r7, pc } @ if count was even, we're done
ldmfdlt sp!, { r4-r7, pc } @ if count was even, we're done
@
2: @
ldr r6, [r2] @ r6 = Li

View file

@ -45,7 +45,7 @@ safe_read8_faulty_addr:
@ if(value != NULL)
cmp r1, #0
@ *value = r0
strneb r0, [r1]
strbne r0, [r1]
@ return true;
mov r0, #1
bx lr
@ -72,7 +72,7 @@ safe_read16_faulty_addr:
@ if(value != NULL)
cmp r1, #0
@ *value = r0
strneh r0, [r1]
strhne r0, [r1]
@ return true;
mov r0, #1
bx lr