limine: Backport paging mode request from trunk
diff --git a/PROTOCOL.md b/PROTOCOL.md
index 7e304101..0454f9bb 100644
--- a/PROTOCOL.md
+++ b/PROTOCOL.md
@@ -632,8 +632,78 @@ struct limine_video_mode {
};
```
+### Paging Mode Feature
+
+The Paging Mode feature allows the kernel to control which paging mode is enabled
+before control is passed to it.
+
+ID:
+```c
+#define LIMINE_PAGING_MODE_REQUEST { LIMINE_COMMON_MAGIC, 0x95c1a0edab0944cb, 0xa4e5cb3842f7488a }
+```
+
+Request:
+```c
+struct limine_paging_mode_request {
+ uint64_t id[4];
+ uint64_t revision;
+ struct limine_paging_mode_response *response;
+ uint64_t mode;
+ uint64_t flags;
+};
+```
+
+Both the `mode` and `flags` fields are architecture-specific.
+
+The `LIMINE_PAGING_MODE_DEFAULT` macro is provided by all architectures to select
+the default paging mode (see below).
+
+Response:
+```c
+struct limine_paging_mode_response {
+ uint64_t revision;
+ uint64_t mode;
+ uint64_t flags;
+};
+```
+
+The response indicates which paging mode was actually enabled by the bootloader.
+Kernels must be prepared to handle the case where the requested paging mode is
+not supported by the hardware.
+
+#### x86_64
+
+Values for `mode`:
+```c
+#define LIMINE_PAGING_MODE_X86_64_4LVL 0
+#define LIMINE_PAGING_MODE_X86_64_5LVL 1
+
+#define LIMINE_PAGING_MODE_DEFAULT LIMINE_PAGING_MODE_X86_64_4LVL
+```
+
+No `flags` are currently defined.
+
+The default mode (when this request is not provided) is `LIMINE_PAGING_MODE_X86_64_4LVL`.
+
+#### aarch64
+
+Values for `mode`:
+```c
+#define LIMINE_PAGING_MODE_AARCH64_4LVL 0
+#define LIMINE_PAGING_MODE_AARCH64_5LVL 1
+
+#define LIMINE_PAGING_MODE_DEFAULT LIMINE_PAGING_MODE_AARCH64_4LVL
+```
+
+No `flags` are currently defined.
+
+The default mode (when this request is not provided) is `LIMINE_PAGING_MODE_AARCH64_4LVL`.
+
### 5-Level Paging Feature
+Note: *This feature has been deprecated in favor of the [Paging Mode feature](#paging-mode-feature)
+and will be removed entirely in a future release.*
+
ID:
```c
#define LIMINE_5_LEVEL_PAGING_REQUEST { LIMINE_COMMON_MAGIC, 0x94469551da9b3192, 0xebe5e86db7382888 }
diff --git a/common/mm/vmm.c b/common/mm/vmm.c
index e2bdd7da..670a0544 100644
--- a/common/mm/vmm.c
+++ b/common/mm/vmm.c
@@ -10,6 +10,7 @@
typedef uint64_t pt_entry_t;
+static uint64_t page_sizes[5];
static pt_entry_t *get_next_level(pagemap_t pagemap, pt_entry_t *current_level,
uint64_t virt, enum page_size desired_sz,
size_t level_idx, size_t entry);
@@ -28,9 +29,12 @@ static pt_entry_t *get_next_level(pagemap_t pagemap, pt_entry_t *current_level,
#define PT_IS_LARGE(x) (((x) & (PT_FLAG_VALID | PT_FLAG_LARGE)) == (PT_FLAG_VALID | PT_FLAG_LARGE))
#define PT_TO_VMM_FLAGS(x) ((x) & (PT_FLAG_WRITE | PT_FLAG_NX))
-pagemap_t new_pagemap(int lv) {
+#define pte_new(addr, flags) ((pt_entry_t)(addr) | (flags))
+#define pte_addr(pte) ((pte) & PT_PADDR_MASK)
+
+pagemap_t new_pagemap(int paging_mode) {
pagemap_t pagemap;
- pagemap.levels = lv;
+ pagemap.levels = paging_mode == PAGING_MODE_X86_64_5LVL ? 5 : 4;
pagemap.top_level = ext_mem_alloc(PT_SIZE);
return pagemap;
}
@@ -146,6 +150,9 @@ void vmm_assert_4k_pages(void) {
#define PT_IS_LARGE(x) (((x) & (PT_FLAG_VALID | PT_FLAG_TABLE)) == PT_FLAG_VALID)
#define PT_TO_VMM_FLAGS(x) (pt_to_vmm_flags_internal(x))
+#define pte_new(addr, flags) ((pt_entry_t)(addr) | (flags))
+#define pte_addr(pte) ((pte) & PT_PADDR_MASK)
+
static uint64_t pt_to_vmm_flags_internal(pt_entry_t entry) {
uint64_t flags = 0;
@@ -159,9 +166,9 @@ static uint64_t pt_to_vmm_flags_internal(pt_entry_t entry) {
return flags;
}
-pagemap_t new_pagemap(int lv) {
+pagemap_t new_pagemap(int paging_mode) {
pagemap_t pagemap;
- pagemap.levels = lv;
+ pagemap.levels = paging_mode == PAGING_MODE_AARCH64_5LVL ? 5 : 4;
pagemap.top_level[0] = ext_mem_alloc(PT_SIZE);
pagemap.top_level[1] = ext_mem_alloc(PT_SIZE);
return pagemap;
@@ -225,53 +232,41 @@ level4:
#error Unknown architecture
#endif
+// Maps level indexes to the page size for that level.
+_Static_assert(VMM_MAX_LEVEL <= 5, "6-level paging not supported");
+static uint64_t page_sizes[5] = {
+ 0x1000,
+ 0x200000,
+ 0x40000000,
+ 0x800000000000,
+ 0x100000000000000,
+};
+
static pt_entry_t *get_next_level(pagemap_t pagemap, pt_entry_t *current_level,
uint64_t virt, enum page_size desired_sz,
size_t level_idx, size_t entry) {
pt_entry_t *ret;
if (PT_IS_TABLE(current_level[entry])) {
- ret = (pt_entry_t *)(size_t)(current_level[entry] & PT_PADDR_MASK);
+ ret = (pt_entry_t *)(size_t)pte_addr(current_level[entry]);
} else {
if (PT_IS_LARGE(current_level[entry])) {
// We are replacing an existing large page with a smaller page.
// Split the previous mapping into mappings of the newly requested size
// before performing the requested map operation.
- uint64_t old_page_size, new_page_size;
- switch (level_idx) {
- case 2:
- old_page_size = 0x40000000;
- break;
-
- case 1:
- old_page_size = 0x200000;
- break;
-
- default:
- panic(false, "Unexpected level in get_next_level");
- }
- switch (desired_sz) {
- case Size1GiB:
- new_page_size = 0x40000000;
- break;
+ if ((level_idx >= VMM_MAX_LEVEL) || (level_idx == 0))
+ panic(false, "Unexpected level in get_next_level");
+ if (desired_sz >= VMM_MAX_LEVEL)
+ panic(false, "Unexpected page size in get_next_level");
- case Size2MiB:
- new_page_size = 0x200000;
- break;
-
- case Size4KiB:
- new_page_size = 0x1000;
- break;
-
- default:
- panic(false, "Unexpected page size in get_next_level");
- }
+ uint64_t old_page_size = page_sizes[level_idx];
+ uint64_t new_page_size = page_sizes[desired_sz];
// Save all the information from the old entry at this level
uint64_t old_flags = PT_TO_VMM_FLAGS(current_level[entry]);
- uint64_t old_phys = current_level[entry] & PT_PADDR_MASK;
+ uint64_t old_phys = pte_addr(current_level[entry]);
uint64_t old_virt = virt & ~(old_page_size - 1);
if (old_phys & (old_page_size - 1))
@@ -279,7 +274,7 @@ static pt_entry_t *get_next_level(pagemap_t pagemap, pt_entry_t *current_level,
// Allocate a table for the next level
ret = ext_mem_alloc(PT_SIZE);
- current_level[entry] = (pt_entry_t)(size_t)ret | PT_TABLE_FLAGS;
+ current_level[entry] = pte_new((size_t)ret, PT_TABLE_FLAGS);
// Recreate the old mapping with smaller pages
for (uint64_t i = 0; i < old_page_size; i += new_page_size) {
@@ -288,11 +283,9 @@ static pt_entry_t *get_next_level(pagemap_t pagemap, pt_entry_t *current_level,
} else {
// Allocate a table for the next level
ret = ext_mem_alloc(PT_SIZE);
- current_level[entry] = (pt_entry_t)(size_t)ret | PT_TABLE_FLAGS;
+ current_level[entry] = pte_new((size_t)ret, PT_TABLE_FLAGS);
}
}
return ret;
}
-
-
diff --git a/common/mm/vmm.h b/common/mm/vmm.h
index 3927c9bc..2a110c99 100644
--- a/common/mm/vmm.h
+++ b/common/mm/vmm.h
@@ -10,6 +10,21 @@
#define VMM_FLAG_NOEXEC ((uint64_t)1 << 63)
#define VMM_FLAG_FB ((uint64_t)0)
+#define VMM_MAX_LEVEL 3
+
+#define PAGING_MODE_X86_64_4LVL 0
+#define PAGING_MODE_X86_64_5LVL 1
+
+#define paging_mode_va_bits(mode) ((mode) ? 57 : 48)
+
+static inline uint64_t paging_mode_higher_half(int paging_mode) {
+ if (paging_mode == PAGING_MODE_X86_64_5LVL) {
+ return 0xff00000000000000;
+ } else {
+ return 0xffff800000000000;
+ }
+}
+
typedef struct {
int levels;
void *top_level;
@@ -32,6 +47,21 @@ void map_page(pagemap_t pagemap, uint64_t virt_addr, uint64_t phys_addr, uint64_
#define VMM_FLAG_NOEXEC ((uint64_t)1 << 1)
#define VMM_FLAG_FB ((uint64_t)1 << 2)
+#define VMM_MAX_LEVEL 3
+
+#define PAGING_MODE_AARCH64_4LVL 0
+#define PAGING_MODE_AARCH64_5LVL 1
+
+#define paging_mode_va_bits(mode) ((mode) ? 57 : 48)
+
+static inline uint64_t paging_mode_higher_half(int paging_mode) {
+ if (paging_mode == PAGING_MODE_AARCH64_5LVL) {
+ return 0xff00000000000000;
+ } else {
+ return 0xffff800000000000;
+ }
+}
+
typedef struct {
int levels;
void *top_level[2];
diff --git a/common/protos/limine.c b/common/protos/limine.c
index 8da7d295..e451a5a3 100644
--- a/common/protos/limine.c
+++ b/common/protos/limine.c
@@ -35,10 +35,10 @@
#define MAX_REQUESTS 128
#define MAX_MEMMAP 256
-static pagemap_t build_pagemap(bool level5pg, bool nx, struct elf_range *ranges, size_t ranges_count,
+static pagemap_t build_pagemap(int paging_mode, bool nx, struct elf_range *ranges, size_t ranges_count,
uint64_t physical_base, uint64_t virtual_base,
uint64_t direct_map_offset) {
- pagemap_t pagemap = new_pagemap(level5pg ? 5 : 4);
+ pagemap_t pagemap = new_pagemap(paging_mode);
if (ranges_count == 0) {
// Map 0 to 2GiB at 0xffffffff80000000
@@ -191,6 +191,14 @@ static uint64_t physical_base, virtual_base, slide, direct_map_offset;
static size_t requests_count;
static void **requests;
+static void set_paging_mode(int paging_mode, bool kaslr) {
+ direct_map_offset = paging_mode_higher_half(paging_mode);
+ if (kaslr) {
+ uint64_t mask = ((uint64_t)1 << (paging_mode_va_bits(paging_mode) - 4)) - 1;
+ direct_map_offset += (rand64() & ~((uint64_t)0x40000000 - 1)) & mask;
+ }
+}
+
static uint64_t reported_addr(void *addr) {
return (uint64_t)(uintptr_t)addr + direct_map_offset;
}
@@ -408,41 +416,95 @@ noreturn void limine_load(char *config, char *cmdline) {
printv("limine: ELF entry point: %X\n", entry_point);
printv("limine: Requests count: %u\n", requests_count);
- // 5 level paging feature & HHDM slide
- bool want_5lv;
-FEAT_START
- // Check if 5-level paging is available
- bool level5pg = false;
- // TODO(qookie): aarch64 also has optional 5 level paging when using 4K pages
+ // Paging Mode
+ int paging_mode, max_paging_mode;
+
#if defined (__x86_64__) || defined (__i386__)
+ paging_mode = max_paging_mode = PAGING_MODE_X86_64_4LVL;
if (cpuid(0x00000007, 0, &eax, &ebx, &ecx, &edx) && (ecx & (1 << 16))) {
printv("limine: CPU has 5-level paging support\n");
- level5pg = true;
+ max_paging_mode = PAGING_MODE_X86_64_5LVL;
}
+
+#elif defined (__aarch64__)
+ paging_mode = max_paging_mode = PAGING_MODE_AARCH64_4LVL;
+ // TODO(qookie): aarch64 also has optional 5 level paging when using 4K pages
+
+#else
+#error Unknown architecture
#endif
- struct limine_5_level_paging_request *lv5pg_request = get_request(LIMINE_5_LEVEL_PAGING_REQUEST);
- want_5lv = lv5pg_request != NULL && level5pg;
+#define paging_mode_limine_to_vmm(x) (x)
+#define paging_mode_vmm_to_limine(x) (x)
- direct_map_offset = want_5lv ? 0xff00000000000000 : 0xffff800000000000;
+ bool have_paging_mode_request = false;
+ bool paging_mode_set = false;
+FEAT_START
+ struct limine_paging_mode_request *pm_request = get_request(LIMINE_PAGING_MODE_REQUEST);
+ if (pm_request == NULL)
+ break;
+ have_paging_mode_request = true;
- if (kaslr) {
- direct_map_offset += (rand64() & ~((uint64_t)0x40000000 - 1)) & 0xfffffffffff;
+ if (pm_request->mode > LIMINE_PAGING_MODE_MAX) {
+ print("warning: ignoring invalid mode in paging mode request\n");
+ break;
}
- if (want_5lv) {
- void *lv5pg_response = ext_mem_alloc(sizeof(struct limine_5_level_paging_response));
- lv5pg_request->response = reported_addr(lv5pg_response);
+ paging_mode = paging_mode_limine_to_vmm(pm_request->mode);
+ if (paging_mode > max_paging_mode)
+ paging_mode = max_paging_mode;
+
+ set_paging_mode(paging_mode, kaslr);
+ paging_mode_set = true;
+
+ struct limine_paging_mode_response *pm_response =
+ ext_mem_alloc(sizeof(struct limine_paging_mode_response));
+
+ pm_response->mode = paging_mode_vmm_to_limine(paging_mode);
+ pm_request->response = reported_addr(pm_response);
+
+FEAT_END
+
+ // 5 level paging feature & HHDM slide
+FEAT_START
+ struct limine_5_level_paging_request *lv5pg_request = get_request(LIMINE_5_LEVEL_PAGING_REQUEST);
+ if (lv5pg_request == NULL)
+ break;
+
+ if (have_paging_mode_request) {
+ print("paging: ignoring 5-level paging request in favor of paging mode request\n");
+ break;
}
+#if defined (__x86_64__) || defined (__i386__)
+ if (max_paging_mode < PAGING_MODE_X86_64_5LVL)
+ break;
+ paging_mode = PAGING_MODE_X86_64_5LVL;
+#elif defined (__aarch64__)
+ if (max_paging_mode < PAGING_MODE_AARCH64_5LVL)
+ break;
+ paging_mode = PAGING_MODE_AARCH64_5LVL;
+#else
+#error Unknown architecture
+#endif
+
+ set_paging_mode(paging_mode, kaslr);
+ paging_mode_set = true;
+
+ void *lv5pg_response = ext_mem_alloc(sizeof(struct limine_5_level_paging_response));
+ lv5pg_request->response = reported_addr(lv5pg_response);
FEAT_END
+ if (!paging_mode_set) {
+ set_paging_mode(paging_mode, kaslr);
+ }
+
#if defined (__aarch64__)
uint64_t aa64mmfr0;
asm volatile ("mrs %0, id_aa64mmfr0_el1" : "=r" (aa64mmfr0));
uint64_t pa = aa64mmfr0 & 0xF;
- uint64_t tsz = 64 - (want_5lv ? 57 : 48);
+ uint64_t tsz = 64 - paging_mode_va_bits(paging_mode);
#endif
struct limine_file *kf = ext_mem_alloc(sizeof(struct limine_file));
@@ -1003,7 +1065,7 @@ FEAT_END
#endif
pagemap_t pagemap = {0};
- pagemap = build_pagemap(want_5lv, nx_available, ranges, ranges_count,
+ pagemap = build_pagemap(paging_mode, nx_available, ranges, ranges_count,
physical_base, virtual_base, direct_map_offset);
#if defined (UEFI)
@@ -1022,7 +1084,7 @@ FEAT_START
#if defined (__x86_64__) || defined (__i386__)
uint32_t bsp_lapic_id;
smp_info = init_smp(&cpu_count, &bsp_lapic_id,
- true, want_5lv,
+ true, paging_mode,
pagemap, smp_request->flags & LIMINE_SMP_X2APIC, nx_available,
direct_map_offset, true);
#elif defined (__aarch64__)
@@ -1155,7 +1217,7 @@ FEAT_END
uint64_t reported_stack = reported_addr(stack);
common_spinup(limine_spinup_32, 8,
- want_5lv, (uint32_t)(uintptr_t)pagemap.top_level,
+ paging_mode, (uint32_t)(uintptr_t)pagemap.top_level,
(uint32_t)entry_point, (uint32_t)(entry_point >> 32),
(uint32_t)reported_stack, (uint32_t)(reported_stack >> 32),
(uint32_t)(uintptr_t)local_gdt, nx_available);
diff --git a/common/sys/smp.c b/common/sys/smp.c
index 5d709a03..fe705f84 100644
--- a/common/sys/smp.c
+++ b/common/sys/smp.c
@@ -77,7 +77,7 @@ struct trampoline_passed_info {
static bool smp_start_ap(uint32_t lapic_id, struct gdtr *gdtr,
struct limine_smp_info *info_struct,
- bool longmode, bool lv5, uint32_t pagemap,
+ bool longmode, int paging_mode, uint32_t pagemap,
bool x2apic, bool nx, uint64_t hhdm, bool wp) {
// Prepare the trampoline
static void *trampoline = NULL;
@@ -97,7 +97,7 @@ static bool smp_start_ap(uint32_t lapic_id, struct gdtr *gdtr,
passed_info->smp_tpl_booted_flag = 0;
passed_info->smp_tpl_pagemap = pagemap;
passed_info->smp_tpl_target_mode = ((uint32_t)x2apic << 2)
- | ((uint32_t)lv5 << 1)
+ | ((uint32_t)paging_mode << 1)
| ((uint32_t)nx << 3)
| ((uint32_t)wp << 4)
| ((uint32_t)longmode << 0);
@@ -137,7 +137,7 @@ static bool smp_start_ap(uint32_t lapic_id, struct gdtr *gdtr,
struct limine_smp_info *init_smp(size_t *cpu_count,
uint32_t *_bsp_lapic_id,
bool longmode,
- bool lv5,
+ int paging_mode,
pagemap_t pagemap,
bool x2apic,
bool nx,
@@ -244,7 +244,7 @@ struct limine_smp_info *init_smp(size_t *cpu_count,
// Try to start the AP
if (!smp_start_ap(lapic->lapic_id, &gdtr, info_struct,
- longmode, lv5, (uintptr_t)pagemap.top_level,
+ longmode, paging_mode, (uintptr_t)pagemap.top_level,
x2apic, nx, hhdm, wp)) {
print("smp: FAILED to bring-up AP\n");
continue;
@@ -281,7 +281,7 @@ struct limine_smp_info *init_smp(size_t *cpu_count,
// Try to start the AP
if (!smp_start_ap(x2lapic->x2apic_id, &gdtr, info_struct,
- longmode, lv5, (uintptr_t)pagemap.top_level,
+ longmode, paging_mode, (uintptr_t)pagemap.top_level,
true, nx, hhdm, wp)) {
print("smp: FAILED to bring-up AP\n");
continue;
diff --git a/common/sys/smp.h b/common/sys/smp.h
index cd74518d..a3fd5ec0 100644
--- a/common/sys/smp.h
+++ b/common/sys/smp.h
@@ -13,7 +13,7 @@
struct limine_smp_info *init_smp(size_t *cpu_count,
uint32_t *_bsp_lapic_id,
bool longmode,
- bool lv5,
+ int paging_mode,
pagemap_t pagemap,
bool x2apic,
bool nx,
diff --git a/limine.h b/limine.h
index f26d8c56..e8b05d51 100644
--- a/limine.h
+++ b/limine.h
@@ -233,20 +233,56 @@ struct LIMINE_DEPRECATED limine_terminal_request {
LIMINE_DEPRECATED_IGNORE_END
+/* Paging mode */
+
+#define LIMINE_PAGING_MODE_REQUEST { LIMINE_COMMON_MAGIC, 0x95c1a0edab0944cb, 0xa4e5cb3842f7488a }
+
+#if defined (__x86_64__) || defined (__i386__)
+#define LIMINE_PAGING_MODE_X86_64_4LVL 0
+#define LIMINE_PAGING_MODE_X86_64_5LVL 1
+#define LIMINE_PAGING_MODE_MAX LIMINE_PAGING_MODE_X86_64_5LVL
+#define LIMINE_PAGING_MODE_DEFAULT LIMINE_PAGING_MODE_X86_64_4LVL
+#elif defined (__aarch64__)
+#define LIMINE_PAGING_MODE_AARCH64_4LVL 0
+#define LIMINE_PAGING_MODE_AARCH64_5LVL 1
+#define LIMINE_PAGING_MODE_MAX LIMINE_PAGING_MODE_AARCH64_5LVL
+#define LIMINE_PAGING_MODE_DEFAULT LIMINE_PAGING_MODE_AARCH64_4LVL
+#else
+#error Unknown architecture
+#endif
+
+struct limine_paging_mode_response {
+ uint64_t revision;
+ uint64_t mode;
+ uint64_t flags;
+};
+
+struct limine_paging_mode_request {
+ uint64_t id[4];
+ uint64_t revision;
+ LIMINE_PTR(struct limine_paging_mode_response *) response;
+ uint64_t mode;
+ uint64_t flags;
+};
+
/* 5-level paging */
#define LIMINE_5_LEVEL_PAGING_REQUEST { LIMINE_COMMON_MAGIC, 0x94469551da9b3192, 0xebe5e86db7382888 }
-struct limine_5_level_paging_response {
+LIMINE_DEPRECATED_IGNORE_START
+
+struct LIMINE_DEPRECATED limine_5_level_paging_response {
uint64_t revision;
};
-struct limine_5_level_paging_request {
+struct LIMINE_DEPRECATED limine_5_level_paging_request {
uint64_t id[4];
uint64_t revision;
LIMINE_PTR(struct limine_5_level_paging_response *) response;
};
+LIMINE_DEPRECATED_IGNORE_END
+
/* SMP */
#define LIMINE_SMP_REQUEST { LIMINE_COMMON_MAGIC, 0x95a67b819a1b857e, 0xa0b61b723b6a73e0 }
diff --git a/test/limine.c b/test/limine.c
index 8e8826f3..9e103dea 100644
--- a/test/limine.c
+++ b/test/limine.c
@@ -151,6 +151,16 @@ struct limine_dtb_request _dtb_request = {
__attribute__((section(".limine_reqs")))
void *dtb_req = &_dtb_request;
+struct limine_paging_mode_request _pm_request = {
+ .id = LIMINE_PAGING_MODE_REQUEST,
+ .revision = 0, .response = NULL,
+ .mode = LIMINE_PAGING_MODE_DEFAULT,
+ .flags = 0,
+};
+
+__attribute__((section(".limine_reqs")))
+void *pm_req = &_pm_request;
+
static char *get_memmap_type(uint64_t type) {
switch (type) {
case LIMINE_MEMMAP_USABLE:
@@ -469,5 +479,17 @@ FEAT_START
e9_printf("Device tree blob pointer: %x", dtb_response->dtb_ptr);
FEAT_END
+FEAT_START
+ e9_printf("");
+ if (_pm_request.response == NULL) {
+ e9_printf("Paging mode not passed");
+ break;
+ }
+ struct limine_paging_mode_response *pm_response = _pm_request.response;
+ e9_printf("Paging mode feature, revision %d", pm_response->revision);
+ e9_printf(" mode: %d", pm_response->mode);
+ e9_printf(" flags: %x", pm_response->flags);
+FEAT_END
+
for (;;);
}
