:: commit d4fed0c086e8e8497be7a5e1c0f5d5504afde1c2

mintsuki <mintsuki@protonmail.com> — 2024-07-29 16:44

parents: 5287d6edba

protos/limine: Implement mechanisms for loading modules above 4GiB for IA-32

diff --git a/common/GNUmakefile b/common/GNUmakefile
index b10fbae9..ac5ee3d8 100644
--- a/common/GNUmakefile
+++ b/common/GNUmakefile
@@ -78,7 +78,8 @@ ifeq ($(TARGET),bios)
         -DBIOS
     override NASMFLAGS_FOR_TARGET += \
         -f elf32 \
-        -DIA32_TARGET
+        -DIA32_TARGET \
+        -DBIOS
 endif
 
 ifeq ($(TARGET),uefi-x86-64)
@@ -99,7 +100,8 @@ ifeq ($(TARGET),uefi-x86-64)
         -DUEFI
     override NASMFLAGS_FOR_TARGET += \
         -f elf64 \
-        -DX86_64_TARGET
+        -DX86_64_TARGET \
+        -DUEFI
 endif
 
 ifeq ($(TARGET),uefi-ia32)
@@ -116,7 +118,8 @@ ifeq ($(TARGET),uefi-ia32)
         -DUEFI
     override NASMFLAGS_FOR_TARGET += \
         -f elf32 \
-        -DIA32_TARGET
+        -DIA32_TARGET \
+        -DUEFI
 endif
 
 ifeq ($(TARGET),uefi-aarch64)
diff --git a/common/fs/file.h b/common/fs/file.h
index edfda32c..86474bd0 100644
--- a/common/fs/file.h
+++ b/common/fs/file.h
@@ -36,6 +36,10 @@ struct file_handle *fopen(struct volume *part, const char *filename);
 void fread(struct file_handle *fd, void *buf, uint64_t loc, uint64_t count);
 void fclose(struct file_handle *fd);
 void *freadall(struct file_handle *fd, uint32_t type);
-void *freadall_mode(struct file_handle *fd, uint32_t type, bool allow_high_allocs);
+void *freadall_mode(struct file_handle *fd, uint32_t type, bool allow_high_allocs
+#if defined (__i386__)
+    , void (*memcpy_to_64)(uint64_t dst, void *src, size_t count)
+#endif
+);
 
 #endif
diff --git a/common/fs/file.s2.c b/common/fs/file.s2.c
index 68fb354f..2b9b251c 100644
--- a/common/fs/file.s2.c
+++ b/common/fs/file.s2.c
@@ -96,10 +96,26 @@ void fread(struct file_handle *fd, void *buf, uint64_t loc, uint64_t count) {
 }
 
 void *freadall(struct file_handle *fd, uint32_t type) {
-    return freadall_mode(fd, type, false);
+    return freadall_mode(fd, type, false
+#if defined (__i386__)
+        , NULL
+#endif
+    );
 }
 
-void *freadall_mode(struct file_handle *fd, uint32_t type, bool allow_high_allocs) {
+void *freadall_mode(struct file_handle *fd, uint32_t type, bool allow_high_allocs
+#if defined (__i386__)
+    , void (*memcpy_to_64)(uint64_t dst, void *src, size_t count)
+#endif
+) {
+#if defined (__i386__)
+    static uint64_t high_ret;
+
+    if (memcpy_to_64 == NULL) {
+        allow_high_allocs = false;
+    }
+#endif
+
     if (fd->is_memfile) {
         if (fd->readall) {
             return fd->fd;
@@ -109,11 +125,39 @@ void *freadall_mode(struct file_handle *fd, uint32_t type, bool allow_high_alloc
         return fd->fd;
     } else {
         void *ret = ext_mem_alloc_type_aligned_mode(fd->size, type, 4096, allow_high_allocs);
+#if defined (__i386__)
+        if (allow_high_allocs == true) {
+            high_ret = *(uint64_t *)ret;
+            if (high_ret < 0x100000000) {
+                ret = (void *)(uintptr_t)high_ret;
+                goto low_ret;
+            }
+            void *pool = ext_mem_alloc(0x100000);
+            for (size_t i = 0; i < fd->size; i += 0x100000) {
+                size_t count;
+                if (fd->size - i < 0x100000) {
+                    count = fd->size - i;
+                } else {
+                    count = 0x100000;
+                }
+                fd->read(fd, pool, i, count);
+                memcpy_to_64(high_ret + i, pool, count);
+            }
+            pmm_free(pool, 0x100000);
+            return &high_ret;
+        }
+low_ret:
+#endif
         fd->read(fd, ret, 0, fd->size);
         fd->close(fd);
         fd->fd = ret;
         fd->readall = true;
         fd->is_memfile = true;
+#if defined (__i386__)
+        if (allow_high_allocs == true) {
+            return &high_ret;
+        }
+#endif
         return ret;
     }
 }
diff --git a/common/mm/pmm.s2.c b/common/mm/pmm.s2.c
index 68ed801d..94a6a3dc 100644
--- a/common/mm/pmm.s2.c
+++ b/common/mm/pmm.s2.c
@@ -356,27 +356,6 @@ void init_memmap(void) {
         uint64_t base = entry->PhysicalStart;
         uint64_t length = entry->NumberOfPages * 4096;
 
-#if !defined (__x86_64__) && !defined (__aarch64__) && !defined (__riscv64)
-        // We only manage memory below 4GiB. For anything above that, make it
-        // EFI reclaimable.
-        if (our_type == MEMMAP_USABLE) {
-            if (base + length > 0x100000000) {
-                if (base < 0x100000000) {
-                    memmap[memmap_entries].base = base;
-                    memmap[memmap_entries].length = 0x100000000 - base;
-                    memmap[memmap_entries].type = our_type;
-
-                    base = 0x100000000;
-                    length -= memmap[memmap_entries].length;
-
-                    memmap_entries++;
-                }
-
-                our_type = MEMMAP_EFI_RECLAIMABLE;
-            }
-        }
-#endif
-
         memmap[memmap_entries].base = base;
         memmap[memmap_entries].length = length;
         memmap[memmap_entries].type = our_type;
@@ -405,6 +384,12 @@ void init_memmap(void) {
 
         EFI_PHYSICAL_ADDRESS base = untouched_memmap[i].base;
 
+#if defined (__i386__)
+        if (untouched_memmap[i].base + untouched_memmap[i].length > 0x100000000) {
+            continue;
+        }
+#endif
+
         status = gBS->AllocatePages(AllocateAddress, EfiLoaderCode,
                                     untouched_memmap[i].length / 4096, &base);
 
@@ -573,10 +558,6 @@ void *ext_mem_alloc_type_aligned(size_t count, uint32_t type, size_t alignment)
 
 // Allocate memory top down.
 void *ext_mem_alloc_type_aligned_mode(size_t count, uint32_t type, size_t alignment, bool allow_high_allocs) {
-#if !defined (__x86_64__)
-    (void)allow_high_allocs;
-#endif
-
     count = ALIGN_UP(count, alignment);
 
     if (allocations_disallowed)
@@ -592,9 +573,7 @@ void *ext_mem_alloc_type_aligned_mode(size_t count, uint32_t type, size_t alignm
 #if defined(__x86_64__) || defined(__i386__)
         // Let's make sure the entry is not > 4GiB
         if (entry_top >= 0x100000000
-#if defined (__x86_64__)
          && !allow_high_allocs
-#endif
         ) {
             entry_top = 0x100000000;
             if (entry_base >= entry_top)
@@ -612,10 +591,22 @@ void *ext_mem_alloc_type_aligned_mode(size_t count, uint32_t type, size_t alignm
         int64_t aligned_length = entry_top - alloc_base;
         memmap_alloc_range((uint64_t)alloc_base, (uint64_t)aligned_length, type, MEMMAP_USABLE, true, false, false);
 
-        void *ret = (void *)(size_t)alloc_base;
+        void *ret;
+
+#if defined (__i386__)
+        if (!allow_high_allocs) {
+#endif
+        ret = (void *)(size_t)alloc_base;
 
         // Zero out allocated space
         memset(ret, 0, count);
+#if defined (__i386__)
+        } else {
+            static uint64_t above64_ret;
+            above64_ret = alloc_base;
+            ret = &above64_ret;
+        }
+#endif
 
         sanitise_entries(memmap, &memmap_entries, false);
 
diff --git a/common/protos/limine.c b/common/protos/limine.c
index 8a95e357..8dc92ba3 100644
--- a/common/protos/limine.c
+++ b/common/protos/limine.c
@@ -34,6 +34,8 @@
 
 #define MAX_REQUESTS 128
 
+static int paging_mode;
+
 static uint64_t get_hhdm_span_top(int base_revision) {
     uint64_t ret = 0x100000000;
     for (size_t i = 0; i < memmap_entries; i++) {
@@ -65,8 +67,70 @@ static uint64_t get_hhdm_span_top(int base_revision) {
     return ret;
 }
 
+#if defined (__i386__)
+static pagemap_t build_identity_map(void) {
+    pagemap_t pagemap = new_pagemap(paging_mode);
+
+    for (uint64_t i = 0; i < 0x100000000; i += 0x40000000) {
+        map_page(pagemap, i, i, VMM_FLAG_WRITE, Size1GiB);
+    }
+
+
+    size_t _memmap_entries = memmap_entries;
+    struct memmap_entry *_memmap =
+        ext_mem_alloc(_memmap_entries * sizeof(struct memmap_entry));
+    for (size_t i = 0; i < _memmap_entries; i++) {
+        _memmap[i] = memmap[i];
+    }
+
+    for (size_t i = 0; i < _memmap_entries; i++) {
+        if (_memmap[i].type == MEMMAP_RESERVED
+         || _memmap[i].type == MEMMAP_BAD_MEMORY) {
+            continue;
+        }
+
+        uint64_t base   = _memmap[i].base;
+        uint64_t length = _memmap[i].length;
+        uint64_t top    = base + length;
+
+        if (base < 0x100000000) {
+            base = 0x100000000;
+        }
+
+        if (base >= top) {
+            continue;
+        }
+
+        uint64_t aligned_base   = ALIGN_DOWN(base, 0x40000000);
+        uint64_t aligned_top    = ALIGN_UP(top, 0x40000000);
+        uint64_t aligned_length = aligned_top - aligned_base;
+
+        for (uint64_t j = 0; j < aligned_length; j += 0x40000000) {
+            uint64_t page = aligned_base + j;
+            map_page(pagemap, page, page, VMM_FLAG_WRITE, Size1GiB);
+        }
+    }
+
+    return pagemap;
+}
+
+void limine_memcpy_to_64_asm(int paging_mode, void *pagemap, uint64_t dst, void *src, size_t count);
+
+static void limine_memcpy_to_64(uint64_t dst, void *src, size_t count) {
+    static bool identity_map_ready = false;
+    static pagemap_t identity_map;
+
+    if (!identity_map_ready) {
+        identity_map = build_identity_map();
+        identity_map_ready = true;
+    }
+
+    limine_memcpy_to_64_asm(paging_mode, identity_map.top_level, dst, src, count);
+}
+#endif
+
 static pagemap_t build_pagemap(int base_revision,
-                               int paging_mode, bool nx, struct elf_range *ranges, size_t ranges_count,
+                               bool nx, struct elf_range *ranges, size_t ranges_count,
                                uint64_t physical_base, uint64_t virtual_base,
                                uint64_t direct_map_offset) {
     pagemap_t pagemap = new_pagemap(paging_mode);
@@ -232,7 +296,7 @@ static uint64_t physical_base, virtual_base, slide, direct_map_offset;
 static size_t requests_count;
 static void **requests;
 
-static void set_paging_mode(int paging_mode, bool kaslr) {
+static void set_paging_mode(bool kaslr) {
     direct_map_offset = paging_mode_higher_half(paging_mode);
     if (kaslr) {
         // A quarter of the higher half of wiggle room for KASLR, align to 1GiB steps.
@@ -245,6 +309,12 @@ static uint64_t reported_addr(void *addr) {
     return (uint64_t)(uintptr_t)addr + direct_map_offset;
 }
 
+#if defined (__i386__)
+static uint64_t reported_addr_64(uint64_t addr) {
+    return addr + direct_map_offset;
+}
+#endif
+
 #define get_phys_addr(addr) ({ \
     __auto_type get_phys_addr__addr = (addr); \
     uintptr_t get_phys_addr__r; \
@@ -256,7 +326,7 @@ static uint64_t reported_addr(void *addr) {
     get_phys_addr__r; \
 })
 
-static struct limine_file get_file(struct file_handle *file, char *cmdline) {
+static struct limine_file get_file(struct file_handle *file, char *cmdline, bool kernel) {
     struct limine_file ret = {0};
 
     if (file->pxe) {
@@ -294,7 +364,17 @@ static struct limine_file get_file(struct file_handle *file, char *cmdline) {
 
     ret.path = reported_addr(path);
 
-    ret.address = reported_addr(freadall_mode(file, MEMMAP_KERNEL_AND_MODULES, true));
+    void *freadall_ret = freadall_mode(file, MEMMAP_KERNEL_AND_MODULES, !kernel
+#if defined (__i386__)
+        , limine_memcpy_to_64
+#endif
+    );
+#if defined (__i386__)
+    ret.address = kernel ? reported_addr(freadall_ret) : reported_addr_64(*(uint64_t *)freadall_ret);
+#else
+    ret.address = reported_addr(freadall_ret);
+#endif
+
     ret.size = file->size;
 
     ret.cmdline = reported_addr(cmdline);
@@ -489,7 +569,7 @@ noreturn void limine_load(char *config, char *cmdline) {
     printv("limine: Top of HHDM:     %X\n", hhdm_span_top);
 
     // Paging Mode
-    int paging_mode, max_supported_paging_mode, min_supported_paging_mode;
+    int max_supported_paging_mode, min_supported_paging_mode;
 
 #if defined (__x86_64__) || defined (__i386__)
     max_supported_paging_mode = PAGING_MODE_X86_64_4LVL;
@@ -685,7 +765,7 @@ FEAT_START
         paging_mode = kern_min_mode;
     }
 
-    set_paging_mode(paging_mode, kaslr);
+    set_paging_mode(kaslr);
     paging_mode_set = true;
 
     struct limine_paging_mode_response *pm_response =
@@ -696,7 +776,7 @@ FEAT_START
 FEAT_END
 
     if (!paging_mode_set) {
-        set_paging_mode(paging_mode, kaslr);
+        set_paging_mode(kaslr);
     }
 
 #if defined (__aarch64__)
@@ -709,7 +789,7 @@ FEAT_END
 #endif
 
     struct limine_file *kf = ext_mem_alloc(sizeof(struct limine_file));
-    *kf = get_file(kernel_file, cmdline);
+    *kf = get_file(kernel_file, cmdline, true);
     fclose(kernel_file);
 
     // Entry point feature
@@ -1009,7 +1089,7 @@ FEAT_START
         }
 
         struct limine_file *l = &modules[final_module_count++];
-        *l = get_file(f, module_cmdline);
+        *l = get_file(f, module_cmdline, false);
 
         fclose(f);
     }
@@ -1197,7 +1277,7 @@ FEAT_END
 #endif
 
     pagemap_t pagemap = {0};
-    pagemap = build_pagemap(base_revision, paging_mode, nx_available, ranges, ranges_count,
+    pagemap = build_pagemap(base_revision, nx_available, ranges, ranges_count,
                             physical_base, virtual_base, direct_map_offset);
 
 #if defined (UEFI)
diff --git a/common/protos/limine_asm.asm_ia32 b/common/protos/limine_asm.asm_ia32
new file mode 100644
index 00000000..47997250
--- /dev/null
+++ b/common/protos/limine_asm.asm_ia32
@@ -0,0 +1,261 @@
+%ifdef UEFI
+extern _GLOBAL_OFFSET_TABLE_
+%endif
+
+section .data
+
+local_gdt:
+    dq 0
+
+    dq 0x00209b0000000000
+    dq 0x0000930000000000
+  .top:
+
+local_gdt_ptr:
+    dw local_gdt.top - local_gdt - 1
+    dq local_gdt
+
+old_gdt_ptr:
+    dw 0
+    dq 0
+
+old_cs:
+    dq 0
+
+interrupt_state:
+    dd 0
+
+old_ds:
+    dq 0
+old_es:
+    dq 0
+old_fs:
+    dq 0
+old_gs:
+    dq 0
+old_ss:
+    dq 0
+
+%ifdef UEFI
+paging_state:
+    dd 0
+
+pae_state:
+    dd 0
+
+old_pagemap:
+    dd 0
+%endif
+
+section .text
+
+; void limine_memcpy_to_64_asm(int paging_mode, void *pagemap, uint64_t dst, void *src, size_t count);
+
+global limine_memcpy_to_64_asm
+limine_memcpy_to_64_asm:
+    push ebp
+    mov ebp, esp
+
+    pusha
+
+%ifdef UEFI
+    call .get_got
+  .get_got:
+    pop ebx
+    add ebx, _GLOBAL_OFFSET_TABLE_ + $$ - .get_got wrt ..gotpc
+%endif
+
+    pushfd
+    mov eax, dword [esp]
+    add esp, 4
+    and eax, 0x200
+%ifdef UEFI
+    mov dword [ebx + interrupt_state wrt ..gotoff], eax
+%else
+    mov dword [interrupt_state], eax
+%endif
+
+    cli
+
+%ifdef UEFI
+    ; Disable paging if needed and save old pagemap
+    mov eax, cr0
+    and eax, (1 << 31)
+    mov dword [ebx + paging_state wrt ..gotoff], eax
+    mov eax, cr0
+    btr eax, 31
+    mov cr0, eax
+
+    mov eax, cr3
+    mov dword [ebx + old_pagemap wrt ..gotoff], eax
+%endif
+
+    mov eax, [ebp+12] ; pagemap
+    mov cr3, eax
+
+    ; Enable CR4.LA57
+    cmp dword [ebp+8], 0 ; paging_mode
+    je .no_la57
+    mov eax, cr4
+    bts eax, 12
+    mov cr4, eax
+  .no_la57:
+
+    ; Enable CR4.PAE
+%ifdef UEFI
+    mov eax, cr4
+    and eax, (1 << 5)
+    mov [ebx + pae_state wrt ..gotoff], eax
+%endif
+    mov eax, cr4
+    bts eax, 5
+    mov cr4, eax
+
+    ; Enable EFER.LME
+    mov ecx, 0xc0000080
+    rdmsr
+    bts eax, 8
+    wrmsr
+
+    ; Enable CR0.PG
+    mov eax, cr0
+    bts eax, 31
+    mov cr0, eax
+
+    ; Save old segments
+    mov [ebx + old_ss wrt ..gotoff], ss
+    mov [ebx + old_gs wrt ..gotoff], gs
+    mov [ebx + old_fs wrt ..gotoff], fs
+    mov [ebx + old_es wrt ..gotoff], es
+    mov [ebx + old_ds wrt ..gotoff], ds
+
+    ; Save old CS
+    mov eax, cs
+%ifdef UEFI
+    mov dword [ebx + old_cs wrt ..gotoff], eax
+%else
+    mov dword [old_cs], eax
+%endif
+
+    ; Save old GDT
+%ifdef UEFI
+    sgdt [ebx + old_gdt_ptr wrt ..gotoff]
+%else
+    sgdt [old_gdt_ptr]
+%endif
+
+    ; Load new GDT
+%ifdef UEFI
+    lgdt [ebx + local_gdt_ptr wrt ..gotoff]
+%else
+    lgdt [local_gdt_ptr]
+%endif
+
+    ; Go 64
+    push 0x08
+    call .p1
+  .p1:
+    add dword [esp], .mode64 - .p1
+    retf
+
+bits 64
+  .mode64:
+    mov eax, 0x10
+    mov ds, eax
+    mov es, eax
+    mov fs, eax
+    mov gs, eax
+    mov ss, eax
+
+    mov rdi, qword [rbp+16]
+    mov esi, dword [rbp+24]
+    mov ecx, dword [rbp+28]
+
+    rep movsb
+
+    ; Restore old GDT
+%ifdef UEFI
+    lgdt [ebx + old_gdt_ptr wrt ..gotoff]
+%else
+    lgdt [old_gdt_ptr]
+%endif
+
+    ; Restore old segments
+    mov ds, [ebx + old_ds wrt ..gotoff]
+    mov es, [ebx + old_es wrt ..gotoff]
+    mov fs, [ebx + old_fs wrt ..gotoff]
+    mov gs, [ebx + old_gs wrt ..gotoff]
+    mov ss, [ebx + old_ss wrt ..gotoff]
+
+    ; Go 32
+%ifdef UEFI
+    push qword [ebx + old_cs wrt ..gotoff]
+%else
+    push qword [old_cs]
+%endif
+    call .p2
+  .p2:
+    add qword [rsp], .mode32 - .p2
+    retfq
+
+bits 32
+  .mode32:
+    ; Disable CR0.PG
+    mov eax, cr0
+    btr eax, 31
+    mov cr0, eax
+
+    ; Disable EFER.LME
+    mov ecx, 0xc0000080
+    rdmsr
+    btr eax, 8
+    wrmsr
+
+    ; Disable CR4.PAE
+%ifdef UEFI
+    cmp dword [ebx + pae_state wrt ..gotoff], 0
+    jne .no_disable_pae
+%endif
+    mov eax, cr4
+    btr eax, 5
+    mov cr4, eax
+%ifdef UEFI
+  .no_disable_pae:
+%endif
+
+    ; Disable CR4.LA57
+    mov eax, cr4
+    btr eax, 12
+    mov cr4, eax
+
+    ; Invalidate pagemap
+    xor eax, eax
+    mov cr3, eax
+
+%ifdef UEFI
+    mov eax, dword [ebx + old_pagemap wrt ..gotoff]
+    mov cr3, eax
+
+    cmp dword [ebx + paging_state wrt ..gotoff], 0
+    je .no_paging
+    mov eax, cr0
+    bts eax, 31
+    mov cr0, eax
+  .no_paging:
+%endif
+
+%ifdef UEFI
+    cmp dword [ebx + interrupt_state wrt ..gotoff], 0
+%else
+    cmp dword [interrupt_state], 0
+%endif
+    je .no_ints
+    sti
+  .no_ints:
+
+    popa
+    pop ebp
+
+    ret
+
+section .note.GNU-stack noalloc noexec nowrite progbits
tab: 248 wrap: offon