protos/limine: Implement aarch64 EL2 entry request
diff --git a/common/lib/misc.h b/common/lib/misc.h
index 9123bb05..e8de42b3 100644
--- a/common/lib/misc.h
+++ b/common/lib/misc.h
@@ -103,6 +103,9 @@ noreturn void common_spinup(void *fnptr, int args, ...);
noreturn void enter_in_el1(uint64_t entry, uint64_t sp, uint64_t sctlr,
uint64_t mair, uint64_t tcr, uint64_t ttbr0,
uint64_t ttbr1, uint64_t target_x0);
+noreturn void enter_in_el2(uint64_t entry, uint64_t sp, uint64_t sctlr,
+ uint64_t mair, uint64_t tcr, uint64_t ttbr0,
+ uint64_t ttbr1, uint64_t target_x0);
#elif defined (__riscv)
noreturn void riscv_spinup(uint64_t entry, uint64_t sp, uint64_t satp, uint64_t direct_map_offset);
#if defined (UEFI)
diff --git a/common/lib/spinup.asm_aarch64 b/common/lib/spinup.asm_aarch64
index d683def2..269ae8b0 100644
--- a/common/lib/spinup.asm_aarch64
+++ b/common/lib/spinup.asm_aarch64
@@ -110,4 +110,76 @@ enter_in_el1:
eret
+// noreturn void enter_in_el2(uint64_t entry, uint64_t sp, uint64_t sctlr,
+// uint64_t mair, uint64_t tcr, uint64_t ttbr0,
+// uint64_t ttbr1, uint64_t direct_map_offset)
+// Enter kernel at EL2 with VHE. Must be called at EL2 with E2H already enabled.
+
+.global enter_in_el2
+enter_in_el2:
+ msr spsel, #0
+ mov sp, x1
+
+ // Switch page tables using VHE-redirected register names.
+ // Under VHE, *_el1 writes go to the EL2 register bank.
+
+ // Point the exception handler to the continuation so that if we page fault
+ // during the switch, execution continues at the kernel entry.
+ adrp x8, 5f
+ add x8, x8, #:lo12:5f
+ add x8, x8, x7
+ msr vbar_el1, x8
+ isb
+ dsb sy
+ isb
+
+ // Switch the page table registers (VHE redirects to EL2)
+ msr mair_el1, x3
+ msr tcr_el1, x4
+ msr ttbr0_el1, x5
+ msr ttbr1_el1, x6
+ msr sctlr_el1, x2
+ isb
+ dsb sy
+ isb
+
+ // Jump to the higher half mapping in case we didn't immediately crash
+ br x8
+
+// Alignment required by VBAR register
+.align 11
+5:
+ // Zero out VBAR to avoid confusion
+ msr vbar_el1, xzr
+
+ // Configure EL2 state for VHE
+
+ // Don't trap counters
+ mrs x8, cnthctl_el2
+ orr x8, x8, #3
+ msr cnthctl_el2, x8
+ msr cntvoff_el2, xzr
+
+ // HCR: E2H + TGE + RW + SWIO
+ ldr x8, =0x488000002
+ msr hcr_el2, x8
+
+ // Don't trap FP/SIMD/SVE (VHE CPTR_EL2 layout)
+ mov x8, #0x330000
+ msr cptr_el2, x8
+ msr hstr_el2, xzr
+
+ // No stage 2 translation
+ msr vttbr_el2, xzr
+
+ // Enter kernel in EL2
+ mov x8, #0x3c8
+ msr spsr_el1, x8
+ msr elr_el1, x0
+
+ mov x0, xzr
+ ZERO_REGS_EXCEPT_X0
+
+ eret
+
.section .note.GNU-stack,"",%progbits
diff --git a/common/protos/limine.c b/common/protos/limine.c
index 701e390e..fb209a39 100644
--- a/common/protos/limine.c
+++ b/common/protos/limine.c
@@ -1506,6 +1506,32 @@ FEAT_END
pagemap = build_pagemap(base_revision, nx_available, ranges, ranges_count,
physical_base, virtual_base, direct_map_offset);
+#if defined (__aarch64__)
+ // aarch64 EL2
+ bool want_el2 = false;
+FEAT_START
+ struct limine_aarch64_el2_request *el2_request =
+ get_request(LIMINE_AARCH64_EL2_REQUEST_ID);
+ if (el2_request == NULL) {
+ break;
+ }
+
+ // Grant EL2 if we are at EL2 and VHE is active (E2H enabled early)
+ if (current_el() == 2) {
+ uint64_t hcr;
+ asm volatile ("mrs %0, hcr_el2" : "=r"(hcr));
+ if (hcr & (1ULL << 34)) {
+ want_el2 = true;
+
+ struct limine_aarch64_el2_response *el2_response =
+ ext_mem_alloc(sizeof(struct limine_aarch64_el2_response));
+
+ el2_request->response = reported_addr(el2_response);
+ }
+ }
+FEAT_END
+#endif
+
// MP
FEAT_START
struct limine_mp_request *mp_request = get_request(LIMINE_MP_REQUEST_ID);
@@ -1527,7 +1553,7 @@ FEAT_START
mp_info = init_smp(config, &cpu_count, &bsp_mpidr,
pagemap, LIMINE_MAIR(fb_attr), LIMINE_TCR(tsz, pa), LIMINE_SCTLR,
- direct_map_offset);
+ direct_map_offset, want_el2);
#elif defined (__riscv)
mp_info = init_smp(&cpu_count, pagemap, direct_map_offset);
#elif defined (__loongarch64)
@@ -1726,10 +1752,17 @@ FEAT_END
uint64_t reported_stack = reported_addr(stack);
- enter_in_el1(entry_point, reported_stack, LIMINE_SCTLR, LIMINE_MAIR(fb_attr), LIMINE_TCR(tsz, pa),
- (uint64_t)pagemap.top_level[0],
- (uint64_t)pagemap.top_level[1],
- direct_map_offset);
+ if (want_el2) {
+ enter_in_el2(entry_point, reported_stack, LIMINE_SCTLR, LIMINE_MAIR(fb_attr), LIMINE_TCR(tsz, pa),
+ (uint64_t)pagemap.top_level[0],
+ (uint64_t)pagemap.top_level[1],
+ direct_map_offset);
+ } else {
+ enter_in_el1(entry_point, reported_stack, LIMINE_SCTLR, LIMINE_MAIR(fb_attr), LIMINE_TCR(tsz, pa),
+ (uint64_t)pagemap.top_level[0],
+ (uint64_t)pagemap.top_level[1],
+ direct_map_offset);
+ }
#elif defined (__riscv)
uint64_t reported_stack = reported_addr(stack);
uint64_t satp = make_satp(pagemap.paging_mode, pagemap.top_level);
diff --git a/common/sys/smp.c b/common/sys/smp.c
index b72a7e62..74a4109a 100644
--- a/common/sys/smp.c
+++ b/common/sys/smp.c
@@ -324,6 +324,8 @@ struct limine_mp_info *init_smp(size_t *cpu_count,
#elif defined (__aarch64__)
struct trampoline_passed_info {
+ uint64_t smp_tpl_enter_in_el2;
+
uint64_t smp_tpl_booted_flag;
uint64_t smp_tpl_hhdm_offset;
@@ -351,7 +353,7 @@ static bool try_start_ap(int boot_method, uint64_t method_ptr,
struct limine_mp_info *info_struct,
uint64_t ttbr0, uint64_t ttbr1, uint64_t mair,
uint64_t tcr, uint64_t sctlr,
- uint64_t hhdm_offset) {
+ uint64_t hhdm_offset, bool enter_in_el2) {
// Prepare the trampoline
static void *trampoline = NULL;
if (trampoline == NULL) {
@@ -374,6 +376,7 @@ static bool try_start_ap(int boot_method, uint64_t method_ptr,
passed_info->smp_tpl_tcr = tcr;
passed_info->smp_tpl_sctlr = sctlr;
passed_info->smp_tpl_hhdm_offset = hhdm_offset;
+ passed_info->smp_tpl_enter_in_el2 = enter_in_el2 ? 1 : 0;
// Cache coherency between the I-Cache and D-Cache is not guaranteed by the
// architecture and as such we must perform I-Cache invalidation.
@@ -458,7 +461,8 @@ static struct limine_mp_info *try_acpi_smp(size_t *cpu_count,
uint64_t mair,
uint64_t tcr,
uint64_t sctlr,
- uint64_t hhdm_offset) {
+ uint64_t hhdm_offset,
+ bool enter_in_el2) {
int boot_method = BOOT_WITH_ACPI_PARK;
// Search for FADT table
@@ -562,7 +566,8 @@ static struct limine_mp_info *try_acpi_smp(size_t *cpu_count,
if (!try_start_ap(boot_method, gicc->parking_addr, info_struct,
(uint64_t)(uintptr_t)pagemap.top_level[0],
(uint64_t)(uintptr_t)pagemap.top_level[1],
- mair, tcr, sctlr, hhdm_offset)) {
+ mair, tcr, sctlr, hhdm_offset,
+ enter_in_el2)) {
print("smp: FAILED to bring-up AP\n");
continue;
}
@@ -590,7 +595,8 @@ static struct limine_mp_info *try_dtb_smp( void *dtb,
uint64_t mair,
uint64_t tcr,
uint64_t sctlr,
- uint64_t hhdm_offset) {
+ uint64_t hhdm_offset,
+ bool enter_in_el2) {
uint64_t bsp_mpidr;
asm volatile ("mrs %0, mpidr_el1" : "=r"(bsp_mpidr));
@@ -755,7 +761,8 @@ static struct limine_mp_info *try_dtb_smp( void *dtb,
if (!try_start_ap(boot_method, method_ptr, info_struct,
(uint64_t)(uintptr_t)pagemap.top_level[0],
(uint64_t)(uintptr_t)pagemap.top_level[1],
- mair, tcr, sctlr, hhdm_offset)) {
+ mair, tcr, sctlr, hhdm_offset,
+ enter_in_el2)) {
print("smp: FAILED to bring-up AP\n");
continue;
}
@@ -776,12 +783,14 @@ struct limine_mp_info *init_smp(const char *config,
uint64_t mair,
uint64_t tcr,
uint64_t sctlr,
- uint64_t hhdm_offset) {
+ uint64_t hhdm_offset,
+ bool enter_in_el2) {
struct limine_mp_info *info = NULL;
if (acpi_get_rsdp() && (info = try_acpi_smp(
cpu_count, bsp_mpidr, pagemap,
- mair, tcr, sctlr, hhdm_offset)))
+ mair, tcr, sctlr, hhdm_offset,
+ enter_in_el2)))
return info;
// No RSDP means no ACPI, try device trees in that case.
@@ -789,7 +798,8 @@ struct limine_mp_info *init_smp(const char *config,
if (dtb) {
info = try_dtb_smp(dtb,
cpu_count, bsp_mpidr, pagemap,
- mair, tcr, sctlr, hhdm_offset);
+ mair, tcr, sctlr, hhdm_offset,
+ enter_in_el2);
pmm_free(dtb, fdt_totalsize(dtb));
return info;
}
diff --git a/common/sys/smp.h b/common/sys/smp.h
index 7cac37ca..2c11058e 100644
--- a/common/sys/smp.h
+++ b/common/sys/smp.h
@@ -30,7 +30,8 @@ struct limine_mp_info *init_smp(const char *config,
uint64_t mair,
uint64_t tcr,
uint64_t sctlr,
- uint64_t hhdm_offset);
+ uint64_t hhdm_offset,
+ bool enter_in_el2);
#elif defined (__riscv)
diff --git a/common/sys/smp_trampoline.asm_aarch64 b/common/sys/smp_trampoline.asm_aarch64
index 3f08a8fd..f358a0c8 100644
--- a/common/sys/smp_trampoline.asm_aarch64
+++ b/common/sys/smp_trampoline.asm_aarch64
@@ -1,5 +1,6 @@
#include <lib/macros.aarch64_asm.h>
+.set tpl_enter_in_el2, -72
.set tpl_booted_flag, -64
.set tpl_hhdm_offset, -56
.set tpl_ttbr0, -48
@@ -31,13 +32,13 @@ smp_trampoline_start:
PICK_EL x8, 1f, 0f
0:
- // Configure EL2-specific state for EL1
+ // EL2 path
- // Check HCR_EL2.E2H (set on Apple Silicon with VHE)
+ // Check HCR_EL2.E2H
mrs x8, hcr_el2
tbnz x8, #34, 6f
- // Configure EL1 page tables (normal silicon)
+ // Non-VHE: configure real EL1 page tables directly
msr mair_el1, x3
msr tcr_el1, x4
msr ttbr0_el1, x5
@@ -46,12 +47,15 @@ smp_trampoline_start:
isb
dsb sy
isb
+ mov x2, #0x3c4 // Target: EL1t
b 7f
6:
- // Configure EL1 page tables (Apple Silicon with VHE)
- // With E2H=1, EL1 register names redirect to EL2, so use
- // EL12 aliases to access the actual EL1 registers.
+ // VHE (E2H=1): check if we should stay at EL2
+ ldr x8, [x1, tpl_enter_in_el2]
+ cbnz x8, 8f
+
+ // VHE drop to EL1: use EL12 aliases for real EL1 registers
msr s3_5_c10_c2_0, x3 // MAIR_EL12
msr s3_5_c2_c0_2, x4 // TCR_EL12
msr s3_5_c2_c0_0, x5 // TTBR0_EL12
@@ -60,8 +64,49 @@ smp_trampoline_start:
isb
dsb sy
isb
+ mov x2, #0x3c4 // Target: EL1t
+ b 7f
+
+8:
+ // VHE stay at EL2: use plain EL1 names (VHE redirects to EL2 registers)
+ msr mair_el1, x3
+ msr tcr_el1, x4
+ msr ttbr0_el1, x5
+ msr ttbr1_el1, x6
+ msr sctlr_el1, x2
+ isb
+ dsb sy
+ isb
+
+ // Configure EL2 state for VHE
+ mrs x8, cnthctl_el2
+ orr x8, x8, #3
+ msr cnthctl_el2, x8
+ msr cntvoff_el2, xzr
+
+ // HCR: E2H + TGE + RW + SWIO
+ ldr x8, =0x488000002
+ msr hcr_el2, x8
+
+ // Don't trap FP/SIMD/SVE (VHE CPTR_EL2 layout)
+ mov x8, #0x330000
+ msr cptr_el2, x8
+ msr hstr_el2, xzr
+
+ // No stage 2 translation
+ msr vttbr_el2, xzr
+
+ mov x2, #0x3c8 // Target: EL2t
+
+ // Jump directly to higher-half continuation (MMU just enabled from off)
+ adrp x8, 3f
+ add x8, x8, :lo12:3f
+ add x8, x8, x7
+ br x8
7:
+ // Common EL2-to-EL1 drop path
+
// Don't trap counters to EL2
mrs x8, cnthctl_el2
orr x8, x8, #3
@@ -90,6 +135,7 @@ smp_trampoline_start:
eret
1:
+ // EL1 path
msr spsel, #0
// Switch to the new page tables
@@ -110,6 +156,7 @@ smp_trampoline_start:
msr ttbr0_el1, x5
msr ttbr1_el1, x6
msr sctlr_el1, x2
+ mov x2, #0x3c4 // Target: EL1t (after x2 is done as sctlr)
isb
dsb sy
isb
@@ -149,9 +196,8 @@ smp_trampoline_start:
ldr x8, [x0, #16]
mov sp, x8
- // Enter kernel
- mov x8, #0x3c4
- msr spsr_el1, x8
+ // Enter kernel (x2 holds target SPSR: 0x3c4 for EL1t, 0x3c8 for EL2t)
+ msr spsr_el1, x2
ZERO_REGS_EXCEPT_X0
