:: commit 102449250ddd9fc5d93074732fc5dda7ca7a4c2c

Mintsuki <mintsuki@protonmail.com> — 2026-03-15 08:34

parents: 815c47e6e8

protos/limine: Implement aarch64 EL2 entry request

diff --git a/common/lib/misc.h b/common/lib/misc.h
index 9123bb05..e8de42b3 100644
--- a/common/lib/misc.h
+++ b/common/lib/misc.h
@@ -103,6 +103,9 @@ noreturn void common_spinup(void *fnptr, int args, ...);
 noreturn void enter_in_el1(uint64_t entry, uint64_t sp, uint64_t sctlr,
                            uint64_t mair, uint64_t tcr, uint64_t ttbr0,
                            uint64_t ttbr1, uint64_t target_x0);
+noreturn void enter_in_el2(uint64_t entry, uint64_t sp, uint64_t sctlr,
+                           uint64_t mair, uint64_t tcr, uint64_t ttbr0,
+                           uint64_t ttbr1, uint64_t target_x0);
 #elif defined (__riscv)
 noreturn void riscv_spinup(uint64_t entry, uint64_t sp, uint64_t satp, uint64_t direct_map_offset);
 #if defined (UEFI)
diff --git a/common/lib/spinup.asm_aarch64 b/common/lib/spinup.asm_aarch64
index d683def2..269ae8b0 100644
--- a/common/lib/spinup.asm_aarch64
+++ b/common/lib/spinup.asm_aarch64
@@ -110,4 +110,76 @@ enter_in_el1:
 
     eret
 
+// noreturn void enter_in_el2(uint64_t entry, uint64_t sp, uint64_t sctlr,
+//                            uint64_t mair, uint64_t tcr, uint64_t ttbr0,
+//                            uint64_t ttbr1, uint64_t direct_map_offset)
+// Enter kernel at EL2 with VHE. Must be called at EL2 with E2H already enabled.
+
+.global enter_in_el2
+enter_in_el2:
+    msr spsel, #0
+    mov sp, x1
+
+    // Switch page tables using VHE-redirected register names.
+    // Under VHE, *_el1 writes go to the EL2 register bank.
+
+    // Point the exception handler to the continuation so that if we page fault
+    // during the switch, execution continues at the kernel entry.
+    adrp x8, 5f
+    add x8, x8, #:lo12:5f
+    add x8, x8, x7
+    msr vbar_el1, x8
+    isb
+    dsb sy
+    isb
+
+    // Switch the page table registers (VHE redirects to EL2)
+    msr mair_el1, x3
+    msr tcr_el1, x4
+    msr ttbr0_el1, x5
+    msr ttbr1_el1, x6
+    msr sctlr_el1, x2
+    isb
+    dsb sy
+    isb
+
+    // Jump to the higher half mapping in case we didn't immediately crash
+    br x8
+
+// Alignment required by VBAR register
+.align 11
+5:
+    // Zero out VBAR to avoid confusion
+    msr vbar_el1, xzr
+
+    // Configure EL2 state for VHE
+
+    // Don't trap counters
+    mrs x8, cnthctl_el2
+    orr x8, x8, #3
+    msr cnthctl_el2, x8
+    msr cntvoff_el2, xzr
+
+    // HCR: E2H + TGE + RW + SWIO
+    ldr x8, =0x488000002
+    msr hcr_el2, x8
+
+    // Don't trap FP/SIMD/SVE (VHE CPTR_EL2 layout)
+    mov x8, #0x330000
+    msr cptr_el2, x8
+    msr hstr_el2, xzr
+
+    // No stage 2 translation
+    msr vttbr_el2, xzr
+
+    // Enter kernel in EL2
+    mov x8, #0x3c8
+    msr spsr_el1, x8
+    msr elr_el1, x0
+
+    mov x0, xzr
+    ZERO_REGS_EXCEPT_X0
+
+    eret
+
 .section .note.GNU-stack,"",%progbits
diff --git a/common/protos/limine.c b/common/protos/limine.c
index 701e390e..fb209a39 100644
--- a/common/protos/limine.c
+++ b/common/protos/limine.c
@@ -1506,6 +1506,32 @@ FEAT_END
     pagemap = build_pagemap(base_revision, nx_available, ranges, ranges_count,
                             physical_base, virtual_base, direct_map_offset);
 
+#if defined (__aarch64__)
+    // aarch64 EL2
+    bool want_el2 = false;
+FEAT_START
+    struct limine_aarch64_el2_request *el2_request =
+        get_request(LIMINE_AARCH64_EL2_REQUEST_ID);
+    if (el2_request == NULL) {
+        break;
+    }
+
+    // Grant EL2 if we are at EL2 and VHE is active (E2H enabled early)
+    if (current_el() == 2) {
+        uint64_t hcr;
+        asm volatile ("mrs %0, hcr_el2" : "=r"(hcr));
+        if (hcr & (1ULL << 34)) {
+            want_el2 = true;
+
+            struct limine_aarch64_el2_response *el2_response =
+                ext_mem_alloc(sizeof(struct limine_aarch64_el2_response));
+
+            el2_request->response = reported_addr(el2_response);
+        }
+    }
+FEAT_END
+#endif
+
     // MP
 FEAT_START
     struct limine_mp_request *mp_request = get_request(LIMINE_MP_REQUEST_ID);
@@ -1527,7 +1553,7 @@ FEAT_START
 
     mp_info = init_smp(config, &cpu_count, &bsp_mpidr,
                         pagemap, LIMINE_MAIR(fb_attr), LIMINE_TCR(tsz, pa), LIMINE_SCTLR,
-                        direct_map_offset);
+                        direct_map_offset, want_el2);
 #elif defined (__riscv)
     mp_info = init_smp(&cpu_count, pagemap, direct_map_offset);
 #elif defined (__loongarch64)
@@ -1726,10 +1752,17 @@ FEAT_END
 
     uint64_t reported_stack = reported_addr(stack);
 
-    enter_in_el1(entry_point, reported_stack, LIMINE_SCTLR, LIMINE_MAIR(fb_attr), LIMINE_TCR(tsz, pa),
-                 (uint64_t)pagemap.top_level[0],
-                 (uint64_t)pagemap.top_level[1],
-                 direct_map_offset);
+    if (want_el2) {
+        enter_in_el2(entry_point, reported_stack, LIMINE_SCTLR, LIMINE_MAIR(fb_attr), LIMINE_TCR(tsz, pa),
+                     (uint64_t)pagemap.top_level[0],
+                     (uint64_t)pagemap.top_level[1],
+                     direct_map_offset);
+    } else {
+        enter_in_el1(entry_point, reported_stack, LIMINE_SCTLR, LIMINE_MAIR(fb_attr), LIMINE_TCR(tsz, pa),
+                     (uint64_t)pagemap.top_level[0],
+                     (uint64_t)pagemap.top_level[1],
+                     direct_map_offset);
+    }
 #elif defined (__riscv)
     uint64_t reported_stack = reported_addr(stack);
     uint64_t satp = make_satp(pagemap.paging_mode, pagemap.top_level);
diff --git a/common/sys/smp.c b/common/sys/smp.c
index b72a7e62..74a4109a 100644
--- a/common/sys/smp.c
+++ b/common/sys/smp.c
@@ -324,6 +324,8 @@ struct limine_mp_info *init_smp(size_t   *cpu_count,
 #elif defined (__aarch64__)
 
 struct trampoline_passed_info {
+    uint64_t smp_tpl_enter_in_el2;
+
     uint64_t smp_tpl_booted_flag;
 
     uint64_t smp_tpl_hhdm_offset;
@@ -351,7 +353,7 @@ static bool try_start_ap(int boot_method, uint64_t method_ptr,
                          struct limine_mp_info *info_struct,
                          uint64_t ttbr0, uint64_t ttbr1, uint64_t mair,
                          uint64_t tcr, uint64_t sctlr,
-                         uint64_t hhdm_offset) {
+                         uint64_t hhdm_offset, bool enter_in_el2) {
     // Prepare the trampoline
     static void *trampoline = NULL;
     if (trampoline == NULL) {
@@ -374,6 +376,7 @@ static bool try_start_ap(int boot_method, uint64_t method_ptr,
     passed_info->smp_tpl_tcr         = tcr;
     passed_info->smp_tpl_sctlr       = sctlr;
     passed_info->smp_tpl_hhdm_offset = hhdm_offset;
+    passed_info->smp_tpl_enter_in_el2 = enter_in_el2 ? 1 : 0;
 
     // Cache coherency between the I-Cache and D-Cache is not guaranteed by the
     // architecture and as such we must perform I-Cache invalidation.
@@ -458,7 +461,8 @@ static struct limine_mp_info *try_acpi_smp(size_t   *cpu_count,
                                             uint64_t  mair,
                                             uint64_t  tcr,
                                             uint64_t  sctlr,
-                                            uint64_t  hhdm_offset) {
+                                            uint64_t  hhdm_offset,
+                                            bool      enter_in_el2) {
     int boot_method = BOOT_WITH_ACPI_PARK;
 
     // Search for FADT table
@@ -562,7 +566,8 @@ static struct limine_mp_info *try_acpi_smp(size_t   *cpu_count,
                 if (!try_start_ap(boot_method, gicc->parking_addr, info_struct,
                                   (uint64_t)(uintptr_t)pagemap.top_level[0],
                                   (uint64_t)(uintptr_t)pagemap.top_level[1],
-                                  mair, tcr, sctlr, hhdm_offset)) {
+                                  mair, tcr, sctlr, hhdm_offset,
+                                  enter_in_el2)) {
                     print("smp: FAILED to bring-up AP\n");
                     continue;
                 }
@@ -590,7 +595,8 @@ static struct limine_mp_info *try_dtb_smp( void *dtb,
                                            uint64_t  mair,
                                            uint64_t  tcr,
                                            uint64_t  sctlr,
-                                           uint64_t  hhdm_offset) {
+                                           uint64_t  hhdm_offset,
+                                           bool      enter_in_el2) {
     uint64_t bsp_mpidr;
     asm volatile ("mrs %0, mpidr_el1" : "=r"(bsp_mpidr));
 
@@ -755,7 +761,8 @@ static struct limine_mp_info *try_dtb_smp( void *dtb,
         if (!try_start_ap(boot_method, method_ptr, info_struct,
                                         (uint64_t)(uintptr_t)pagemap.top_level[0],
                                         (uint64_t)(uintptr_t)pagemap.top_level[1],
-                                        mair, tcr, sctlr, hhdm_offset)) {
+                                        mair, tcr, sctlr, hhdm_offset,
+                                        enter_in_el2)) {
             print("smp: FAILED to bring-up AP\n");
             continue;
         }
@@ -776,12 +783,14 @@ struct limine_mp_info *init_smp(const char *config,
                                  uint64_t  mair,
                                  uint64_t  tcr,
                                  uint64_t  sctlr,
-                                 uint64_t  hhdm_offset) {
+                                 uint64_t  hhdm_offset,
+                                 bool      enter_in_el2) {
     struct limine_mp_info *info = NULL;
 
     if (acpi_get_rsdp() && (info = try_acpi_smp(
                                     cpu_count, bsp_mpidr, pagemap,
-                                    mair, tcr, sctlr, hhdm_offset)))
+                                    mair, tcr, sctlr, hhdm_offset,
+                                    enter_in_el2)))
         return info;
 
     // No RSDP means no ACPI, try device trees in that case.
@@ -789,7 +798,8 @@ struct limine_mp_info *init_smp(const char *config,
     if (dtb) {
         info = try_dtb_smp(dtb,
                            cpu_count, bsp_mpidr, pagemap,
-                           mair, tcr, sctlr, hhdm_offset);
+                           mair, tcr, sctlr, hhdm_offset,
+                           enter_in_el2);
         pmm_free(dtb, fdt_totalsize(dtb));
         return info;
     }
diff --git a/common/sys/smp.h b/common/sys/smp.h
index 7cac37ca..2c11058e 100644
--- a/common/sys/smp.h
+++ b/common/sys/smp.h
@@ -30,7 +30,8 @@ struct limine_mp_info *init_smp(const char *config,
                                  uint64_t  mair,
                                  uint64_t  tcr,
                                  uint64_t  sctlr,
-                                 uint64_t  hhdm_offset);
+                                 uint64_t  hhdm_offset,
+                                 bool      enter_in_el2);
 
 #elif defined (__riscv)
 
diff --git a/common/sys/smp_trampoline.asm_aarch64 b/common/sys/smp_trampoline.asm_aarch64
index 3f08a8fd..f358a0c8 100644
--- a/common/sys/smp_trampoline.asm_aarch64
+++ b/common/sys/smp_trampoline.asm_aarch64
@@ -1,5 +1,6 @@
 #include <lib/macros.aarch64_asm.h>
 
+.set tpl_enter_in_el2, -72
 .set tpl_booted_flag, -64
 .set tpl_hhdm_offset, -56
 .set tpl_ttbr0, -48
@@ -31,13 +32,13 @@ smp_trampoline_start:
 
     PICK_EL x8, 1f, 0f
 0:
-    // Configure EL2-specific state for EL1
+    // EL2 path
 
-    // Check HCR_EL2.E2H (set on Apple Silicon with VHE)
+    // Check HCR_EL2.E2H
     mrs x8, hcr_el2
     tbnz x8, #34, 6f
 
-    // Configure EL1 page tables (normal silicon)
+    // Non-VHE: configure real EL1 page tables directly
     msr mair_el1, x3
     msr tcr_el1, x4
     msr ttbr0_el1, x5
@@ -46,12 +47,15 @@ smp_trampoline_start:
     isb
     dsb sy
     isb
+    mov x2, #0x3c4       // Target: EL1t
     b 7f
 
 6:
-    // Configure EL1 page tables (Apple Silicon with VHE)
-    // With E2H=1, EL1 register names redirect to EL2, so use
-    // EL12 aliases to access the actual EL1 registers.
+    // VHE (E2H=1): check if we should stay at EL2
+    ldr x8, [x1, tpl_enter_in_el2]
+    cbnz x8, 8f
+
+    // VHE drop to EL1: use EL12 aliases for real EL1 registers
     msr s3_5_c10_c2_0, x3 // MAIR_EL12
     msr s3_5_c2_c0_2, x4  // TCR_EL12
     msr s3_5_c2_c0_0, x5  // TTBR0_EL12
@@ -60,8 +64,49 @@ smp_trampoline_start:
     isb
     dsb sy
     isb
+    mov x2, #0x3c4       // Target: EL1t
+    b 7f
+
+8:
+    // VHE stay at EL2: use plain EL1 names (VHE redirects to EL2 registers)
+    msr mair_el1, x3
+    msr tcr_el1, x4
+    msr ttbr0_el1, x5
+    msr ttbr1_el1, x6
+    msr sctlr_el1, x2
+    isb
+    dsb sy
+    isb
+
+    // Configure EL2 state for VHE
+    mrs x8, cnthctl_el2
+    orr x8, x8, #3
+    msr cnthctl_el2, x8
+    msr cntvoff_el2, xzr
+
+    // HCR: E2H + TGE + RW + SWIO
+    ldr x8, =0x488000002
+    msr hcr_el2, x8
+
+    // Don't trap FP/SIMD/SVE (VHE CPTR_EL2 layout)
+    mov x8, #0x330000
+    msr cptr_el2, x8
+    msr hstr_el2, xzr
+
+    // No stage 2 translation
+    msr vttbr_el2, xzr
+
+    mov x2, #0x3c8       // Target: EL2t
+
+    // Jump directly to higher-half continuation (MMU just enabled from off)
+    adrp x8, 3f
+    add x8, x8, :lo12:3f
+    add x8, x8, x7
+    br x8
 
 7:
+    // Common EL2-to-EL1 drop path
+
     // Don't trap counters to EL2
     mrs x8, cnthctl_el2
     orr x8, x8, #3
@@ -90,6 +135,7 @@ smp_trampoline_start:
     eret
 
 1:
+    // EL1 path
     msr spsel, #0
 
     // Switch to the new page tables
@@ -110,6 +156,7 @@ smp_trampoline_start:
     msr ttbr0_el1, x5
     msr ttbr1_el1, x6
     msr sctlr_el1, x2
+    mov x2, #0x3c4       // Target: EL1t (after x2 is done as sctlr)
     isb
     dsb sy
     isb
@@ -149,9 +196,8 @@ smp_trampoline_start:
     ldr x8, [x0, #16]
     mov sp, x8
 
-    // Enter kernel
-    mov x8, #0x3c4
-    msr spsr_el1, x8
+    // Enter kernel (x2 holds target SPSR: 0x3c4 for EL1t, 0x3c8 for EL2t)
+    msr spsr_el1, x2
 
     ZERO_REGS_EXCEPT_X0
 
tab: 248 wrap: offon