protos/limine: Tighten aarch64 machine state to match spec
diff --git a/common/lib/spinup.asm_aarch64 b/common/lib/spinup.asm_aarch64
index 269ae8b0..be8ebbd7 100644
--- a/common/lib/spinup.asm_aarch64
+++ b/common/lib/spinup.asm_aarch64
@@ -46,6 +46,9 @@ enter_in_el1:
// Zero out VBAR to avoid confusion
msr vbar_el1, xzr
+ // Disable FP/SIMD/SVE
+ msr cpacr_el1, xzr
+
// Enter kernel in EL1
mov x8, #0x3c4
msr spsr_el1, x8
@@ -67,6 +70,7 @@ enter_in_el1:
msr ttbr0_el1, x5
msr ttbr1_el1, x6
msr sctlr_el1, x2
+ msr cpacr_el1, xzr
dsb sy
isb
b 4f
@@ -78,6 +82,7 @@ enter_in_el1:
msr s3_5_c2_c0_0, x5 // TTBR0_EL12
msr s3_5_c2_c0_1, x6 // TTBR1_EL12
msr s3_5_c1_c0_0, x2 // SCTLR_EL12
+ msr s3_5_c1_c0_2, xzr // CPACR_EL12
dsb sy
isb
@@ -86,8 +91,7 @@ enter_in_el1:
// Configure EL2-specific state for EL1
// Don't trap counters to EL2
- mrs x8, cnthctl_el2
- orr x8, x8, #3
+ mov x8, #3
msr cnthctl_el2, x8
msr cntvoff_el2, xzr
@@ -155,8 +159,7 @@ enter_in_el2:
// Configure EL2 state for VHE
// Don't trap counters
- mrs x8, cnthctl_el2
- orr x8, x8, #3
+ mov x8, #3
msr cnthctl_el2, x8
msr cntvoff_el2, xzr
@@ -164,14 +167,10 @@ enter_in_el2:
ldr x8, =0x488000002
msr hcr_el2, x8
- // Don't trap FP/SIMD/SVE (VHE CPTR_EL2 layout)
- mov x8, #0x330000
- msr cptr_el2, x8
+ // Disable FP/SIMD/SVE (VHE CPTR_EL2 layout, CPACR-like)
+ msr cptr_el2, xzr
msr hstr_el2, xzr
- // No stage 2 translation
- msr vttbr_el2, xzr
-
// Enter kernel in EL2
mov x8, #0x3c8
msr spsr_el1, x8
diff --git a/common/protos/limine.c b/common/protos/limine.c
index fb209a39..73e35d5c 100644
--- a/common/protos/limine.c
+++ b/common/protos/limine.c
@@ -293,12 +293,12 @@ extern symbol limine_spinup_32;
#define LIMINE_TCR(tsz, pa) ( ((uint64_t)(pa) << 32) /* Intermediate address size */ \
| ((uint64_t)2 << 30) /* TTBR1 4K granule */ \
- | ((uint64_t)2 << 28) /* TTBR1 Outer shareable */ \
+ | ((uint64_t)3 << 28) /* TTBR1 Inner shareable */ \
| ((uint64_t)1 << 26) /* TTBR1 Outer WB RW-Allocate */ \
| ((uint64_t)1 << 24) /* TTBR1 Inner WB RW-Allocate */ \
| ((uint64_t)(tsz) << 16) /* Address bits in TTBR1 */ \
/* TTBR0 4K granule */ \
- | ((uint64_t)2 << 12) /* TTBR0 Outer shareable */ \
+ | ((uint64_t)3 << 12) /* TTBR0 Inner shareable */ \
| ((uint64_t)1 << 10) /* TTBR0 Outer WB RW-Allocate */ \
| ((uint64_t)1 << 8) /* TTBR0 Inner WB RW-Allocate */ \
| ((uint64_t)(tsz) << 0)) /* Address bits in TTBR0 */
diff --git a/common/sys/smp_trampoline.asm_aarch64 b/common/sys/smp_trampoline.asm_aarch64
index 110c8cad..d11db4f4 100644
--- a/common/sys/smp_trampoline.asm_aarch64
+++ b/common/sys/smp_trampoline.asm_aarch64
@@ -48,6 +48,7 @@ smp_trampoline_start:
msr ttbr0_el1, x5
msr ttbr1_el1, x6
msr sctlr_el1, x2
+ msr cpacr_el1, xzr
isb
dsb sy
isb
@@ -72,6 +73,7 @@ smp_trampoline_start:
msr s3_5_c2_c0_0, x5 // TTBR0_EL12
msr s3_5_c2_c0_1, x6 // TTBR1_EL12
msr s3_5_c1_c0_0, x2 // SCTLR_EL12
+ msr s3_5_c1_c0_2, xzr // CPACR_EL12
isb
dsb sy
isb
@@ -81,8 +83,7 @@ smp_trampoline_start:
// VHE stay at EL2: configure EL2 state before enabling MMU
msr spsel, #0
- mrs x8, cnthctl_el2
- orr x8, x8, #3
+ mov x8, #3
msr cnthctl_el2, x8
msr cntvoff_el2, xzr
@@ -94,14 +95,10 @@ smp_trampoline_start:
orr x8, x8, #(1 << 1)
msr hcr_el2, x8
- // Don't trap FP/SIMD/SVE (VHE CPTR_EL2 layout)
- mov x8, #0x330000
- msr cptr_el2, x8
+ // Disable FP/SIMD/SVE (VHE CPTR_EL2 layout, CPACR-like)
+ msr cptr_el2, xzr
msr hstr_el2, xzr
- // No stage 2 translation
- msr vttbr_el2, xzr
-
// Set up VBAR for page table switch (VHE redirects VBAR_EL1 to VBAR_EL2)
adrp x8, 2f
add x8, x8, :lo12:2f
@@ -128,8 +125,7 @@ smp_trampoline_start:
// Common EL2-to-EL1 drop path
// Don't trap counters to EL2
- mrs x8, cnthctl_el2
- orr x8, x8, #3
+ mov x8, #3
msr cnthctl_el2, x8
msr cntvoff_el2, xzr
@@ -189,6 +185,9 @@ smp_trampoline_start:
// Zero out VBAR to avoid confusion
msr vbar_el1, xzr
+ // Disable FP/SIMD/SVE
+ msr cpacr_el1, xzr
+
3:
// Add HHDM offset to data pointer
add x1, x1, x7
