1
0
Fork 0
forked from len0rd/rockbox

x1000: optimize crt0.S, improve correctness

Replace inline section copy/fill loops with subroutines, which
reduces code size a bit and and handle zero size copies properly.

Remove the cache initialization loop as well. There's no actual
reason for this because the SPL initializes the caches and just
dropping the cache can even be harmful (in this case it wasn't,
because the SPL flushes the whole cache right before calling in).

Change-Id: I7cddc9ed6d060b1f1bdd75544297883d014cad2d
This commit is contained in:
Aidan MacDonald 2022-03-16 11:48:12 +00:00
parent 0df71c952c
commit c676736792

View file

@ -49,75 +49,38 @@ _header:
.ascii "ENDH" /* end of header structure */ .ascii "ENDH" /* end of header structure */
_realstart: _realstart:
/* Cache init */
li v0, 0x80000000
ori v1, v0, 0x4000
mtc0 zero, C0_TAGLO
mtc0 zero, C0_TAGHI
_cache_loop:
cache ICIndexStTag, 0(v0)
cache DCIndexStTag, 0(v0)
addiu v0, v0, 32
bne v0, v1, _cache_loop
nop
/* Invalidate BTB */
mfc0 v0, C0_Config, 7
nop
ori v0, v0, 2
mtc0 v0, C0_Config, 7
nop
/* Copy IRAM from BSS to low memory. */ /* Copy IRAM from BSS to low memory. */
la t0, _iramcopy la a0, _iramcopy
la t1, _iramstart la a1, _iramstart
la t2, _iramend la a2, _iramend
_iram_loop: bal _copy
lw t3, 0(t0) nop
addiu t1, 4
addiu t0, 4
bne t1, t2, _iram_loop
sw t3, -4(t1)
#if 0
/* Copy TCSM from BSS */ /* Copy TCSM from BSS */
la t0, _tcsmcopy la a0, _tcsmcopy
la t1, _tcsmstart la a1, _tcsmstart
la t2, _tcsmend la a2, _tcsmend
_tcsm_loop: bal _copy
lw t3, 0(t0) nop
addiu t0, 4
sw t3, 0(t1)
bne t1, t2, _tcsm_loop
addiu t1, 4
#endif
/* Clear the BSS segment (needed to zero-initialize C static values) */ /* Clear the BSS segment (needed to zero-initialize C static values) */
la t0, _bssbegin la a0, _bssbegin
la t1, _bssend la a1, _bssend
beq t0, t1, _bss_done bal _clear
_bss_loop: move a2, $0
addiu t0, 4
bne t0, t1, _bss_loop
sw zero, -4(t0)
_bss_done:
/* Set stack pointer and clear the stack */ /* Set stack pointer and clear the stack */
la sp, stackend la sp, stackend
la t0, stackbegin la a0, stackbegin
li t1, 0xDEADBEEF li a2, 0xDEADBEEF
_stack_loop: bal _clear
addiu t0, 4 move a1, sp
bne t0, sp, _stack_loop
sw t1, -4(t0)
/* Clear the IRQ stack */ /* Clear the IRQ stack */
la k0, _irqstackend la k0, _irqstackend
la t0, _irqstackbegin la a0, _irqstackbegin
_irqstack_loop: bal _clear
addiu t0, 4 move a1, k0
bne t0, k0, _irqstack_loop
sw t1, -4(t0)
/* Jump to C code */ /* Jump to C code */
jal system_early_init jal system_early_init
@ -125,6 +88,28 @@ _irqstack_loop:
j main j main
nop nop
/* copy(void* src, void* dst, void* dst_end) */
_copy:
beq a1, a2, 1f
addiu a1, 4
lw t0, 0(a0)
addiu a0, 4
b _copy
sw t0, -4(a1)
1:
jr ra
nop
/* clear(void* dst, void* dst_end, int value) */
_clear:
beq a0, a1, 1f
addiu a0, 4
b _clear
sw a2, -4(a0)
1:
jr ra
nop
/* Exception entry points */ /* Exception entry points */
.section .vectors.1, "ax", %progbits .section .vectors.1, "ax", %progbits
j tlb_refill_handler j tlb_refill_handler