:: commit 746d9146c643f43f3c5257add10ce27ecc6098bb

mintsuki <mintsuki@protonmail.com> — 2021-03-04 04:15

parents: 82466973d3

Make the smp trampoline more UEFI friendly

diff --git a/Makefile b/Makefile
index c1358123..857dbc88 100644
--- a/Makefile
+++ b/Makefile
@@ -72,6 +72,7 @@ toolchain:
 
 gnu-efi:
 	git clone https://git.code.sf.net/p/gnu-efi/code --branch=3.0.12 --depth=1 $@
+	$(MAKE) -C gnu-efi
 
 ovmf:
 	mkdir -p ovmf
diff --git a/stage23/Makefile b/stage23/Makefile
index 6c206e3e..e8410438 100644
--- a/stage23/Makefile
+++ b/stage23/Makefile
@@ -5,11 +5,11 @@ ifeq ($(TARGET), bios)
 else ifeq ($(TARGET), uefi)
 	TOOLCHAIN=x86_64-elf
 else
-	$(error Invalid toolchain)
+	$(error Invalid target)
 endif
 
 CC = $(TOOLCHAIN)-gcc
-LD = $(TOOLCHAIN)-gcc
+LD = $(TOOLCHAIN)-ld
 OBJCOPY = $(TOOLCHAIN)-objcopy
 OBJDUMP = $(TOOLCHAIN)-objdump
 READELF = $(TOOLCHAIN)-readelf
@@ -27,7 +27,6 @@ INTERNAL_CFLAGS := \
 	-fplan9-extensions \
 	-ffreestanding \
 	-fno-stack-protector \
-	-fno-pic \
 	-fno-omit-frame-pointer \
 	-fno-lto \
 	-Wno-address-of-packed-member \
@@ -42,14 +41,17 @@ INTERNAL_CFLAGS := \
 	-I. \
 	-I..
 
+ifeq ($(TARGET), bios)
+	INTERNAL_CFLAGS += \
+		-fno-pic
+endif
+
 ifeq ($(TARGET), uefi)
 	INTERNAL_CFLAGS += \
 		-I../gnu-efi/inc \
 		-I../gnu-efi/inc/x86_64 \
 		-fpic \
-		-fshort-wchar \
-		-mno-red-zone \
-		-mcmodel=small
+		-mno-red-zone
 endif
 
 LDFLAGS = -Os -g
@@ -62,14 +64,9 @@ INTERNAL_LDFLAGS := \
 ifeq ($(TARGET), bios)
 	INTERNAL_LDFLAGS += \
 		-static \
-		-no-pie \
+		-fno-pie \
 		-lgcc \
 		-static-libgcc
-else ifeq ($(TARGET), uefi)
-	INTERNAL_LDFLAGS += \
-		-shared \
-		-z nocombreloc \
-		-Wl,-Bsymbolic
 endif
 
 .PHONY: all clean
@@ -79,19 +76,16 @@ ifeq ($(TARGET), bios)
 ASM_FILES := $(shell find -L ./ -type f -name '*.asm' | sort)
 endif
 OBJ := $(ASM_FILES:.asm=.o) $(C_FILES:.c=.o)
-ifeq ($(TARGET), uefi)
-OBJ += sys/smp_trampoline.o ../gnu-efi/lib/x86_64/efi_stub.o
-endif
 HEADER_DEPS := $(C_FILES:.c=.d)
 
 ifeq ($(TARGET), bios)
 all: limine_dbg.elf limine.sys stage2.bin stage2.bin.gz
 else ifeq ($(TARGET), uefi)
-all: limine_dbg.elf BOOTX64.EFI
+all: BOOTX64.EFI
 endif
 
-BOOTX64.EFI: limine.elf
-	$(OBJCOPY) -I elf64-x86-64 -O efi-app-x86_64 limine.elf $@
+BOOTX64.EFI: limine-efi.elf
+	$(OBJCOPY) -j .text -j .sdata -j .data -j .dynamic -j .dynsym  -j .rel -j .rela -j .rel.* -j .rela.* -j .reloc --target efi-app-x86_64 --subsystem=10 limine-efi.elf $@
 
 stage2.bin.gz: stage2.bin
 	gzip -n -9 < stage2.bin > stage2.bin.gz
@@ -100,31 +94,38 @@ stage2.bin: limine.sys
 	dd if=limine.sys bs=$$(( 0x$$($(READELF) -S limine.elf | grep .stage3.text | sed 's/^.*] //' | awk '{print $$3}' | sed 's/^0*//') - 0x8000 )) count=1 of=$@
 
 limine.map.o: limine_nomap.elf
-ifeq ($(TARGET), bios)
 	./gensyms.sh $(OBJDUMP) limine_nomap.elf limine
-else ifeq ($(TARGET), uefi)
-	./gensyms64.sh $(OBJDUMP) limine_nomap.elf limine
-endif
 
 limine.sys: limine.elf
 	$(OBJCOPY) -O binary $< $@
 
-limine_nomap.elf: $(OBJ) font.o
-	$(LD) $(OBJ) font.o $(LDFLAGS) $(INTERNAL_LDFLAGS) -Tlinker_nomap_$(TARGET).ld -o $@
-ifeq ($(TARGET), bios)
-	$(LD) $(OBJ) font.o $(LDFLAGS) $(INTERNAL_LDFLAGS) -Wl,--gc-sections -Tlinker_stage2only.ld -o limine_stage2only.elf || \
+limine_nomap.elf: $(OBJ) font.o sys/smp_trampoline.o
+	$(CC) $(OBJ) font.o sys/smp_trampoline.o $(LDFLAGS) $(INTERNAL_LDFLAGS) -Tlinker_nomap.ld -o $@
+	$(CC) $(OBJ) font.o sys/smp_trampoline.o $(LDFLAGS) $(INTERNAL_LDFLAGS) -Wl,--gc-sections -Tlinker_stage2only.ld -o limine_stage2only.elf || \
 		( echo "This error means that stage 2 was trying to use stage 3 symbols before loading stage 3" && \
 		  false )
-endif
 
-limine_dbg.elf: $(OBJ) font.o
-	$(LD) $(OBJ) font.o $(LDFLAGS) $(INTERNAL_LDFLAGS) -Tlinker_dbg.ld -o $@
+limine_dbg.elf: $(OBJ) font.o sys/smp_trampoline.o
+	$(CC) $(OBJ) font.o sys/smp_trampoline.o $(LDFLAGS) $(INTERNAL_LDFLAGS) -Tlinker_dbg.ld -o $@
+
+sys/smp_trampoline.o: sys/smp_trampoline.bin
+	$(OBJCOPY) -B i8086 -I binary -O default sys/smp_trampoline.bin $@
+
+sys/smp_trampoline.bin: sys/smp_trampoline.real
+	nasm $< -f bin -o $@
 
-font.o:
+font.o: font.bin
 	$(OBJCOPY) -B i8086 -I binary -O default font.bin $@
 
-limine.elf: $(OBJ) font.o limine.map.o
-	$(LD) $(OBJ) font.o limine.map.o $(LDFLAGS) $(INTERNAL_LDFLAGS) -Tlinker_$(TARGET).ld -o $@
+limine.elf: $(OBJ) font.o sys/smp_trampoline.o limine.map.o
+	$(CC) $(OBJ) font.o sys/smp_trampoline.o limine.map.o $(LDFLAGS) $(INTERNAL_LDFLAGS) -Tlinker.ld -o $@
+
+limine-efi.elf: $(OBJ) font.o sys/smp_trampoline.o
+	$(LD) -shared -Bsymbolic \
+		-T../gnu-efi/gnuefi/elf_x86_64_efi.lds \
+		../gnu-efi/x86_64/gnuefi/crt0-efi-x86_64.o \
+		../gnu-efi/x86_64/gnuefi/libgnuefi.a \
+		$(OBJ) font.o sys/smp_trampoline.o -o $@
 
 -include $(HEADER_DEPS)
 
@@ -133,13 +134,8 @@ limine.elf: $(OBJ) font.o limine.map.o
 %.o: %.S
 	$(CC) $(CFLAGS) $(INTERNAL_CFLAGS) -c $< -o $@
 
-ifeq ($(TARGET), bios)
 %.o: %.asm
-	nasm $< -f elf32 -o $@
-else ifeq ($(TARGET), uefi)
-%.o: %.asm
-	nasm $< -f elf64 -o $@
-endif
+	nasm $< -F dwarf -g -f elf32 -o $@
 
 clean:
-	rm -f limine.elf limine_nomap.elf limine_stage2only.elf font.o limine.map.o limine.sys stage2.bin stage2.bin.gz BOOTX64.EFI $(OBJ) $(HEADER_DEPS)
+	rm -f limine.elf limine_nomap.elf limine_stage2only.elf font.o limine.map.o limine.sys stage2.bin stage2.bin.gz BOOTX64.EFI sys/smp_trampoline.bin sys/smp_trampoline.o $(OBJ) $(HEADER_DEPS)
diff --git a/stage23/entry.s3.c b/stage23/entry.s3.c
index 46c48577..c03aa345 100644
--- a/stage23/entry.s3.c
+++ b/stage23/entry.s3.c
@@ -32,7 +32,7 @@ EFI_STATUS efi_main(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable) {
     gBS = SystemTable->BootServices;
     gRT = SystemTable->RuntimeServices;
 
-    print("Limine " LIMINE_VERSION "\n\n");
+    print("Limine " LIMINE_VERSION "\n%X\n", print);
 
     volume_create_index();
 
diff --git a/stage23/gensyms64.sh b/stage23/gensyms64.sh
deleted file mode 100755
index ffad0834..00000000
--- a/stage23/gensyms64.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/sh
-
-set -e
-
-TMP1=$(mktemp)
-TMP2=$(mktemp)
-TMP3=$(mktemp)
-TMP4=$(mktemp)
-
-$1 -t "$2" | sed '/\bd\b/d' | sort > "$TMP1"
-grep "\.text" < "$TMP1" | cut -d' ' -f1 > "$TMP2"
-grep "\.text" < "$TMP1" | awk 'NF{ print $NF }' > "$TMP3"
-
-echo "section .map" > "$TMP4"
-echo "global $3_map" >> "$TMP4"
-echo "$3_map:" >> "$TMP4"
-
-paste -d'$' "$TMP2" "$TMP3" | sed 's/^/dq 0x/g' | sed 's/$/", 0/g' | sed 's/\$/\ndb "/g' >> "$TMP4"
-
-echo "dq 0xffffffffffffffff" >> "$TMP4"
-
-nasm -f elf64 "$TMP4" -o $3.map.o
-
-rm "$TMP1" "$TMP2" "$TMP3" "$TMP4"
diff --git a/stage23/linker_bios.ld b/stage23/linker.ld
similarity index 100%
rename from stage23/linker_bios.ld
rename to stage23/linker.ld
diff --git a/stage23/linker_nomap_bios.ld b/stage23/linker_nomap.ld
similarity index 100%
rename from stage23/linker_nomap_bios.ld
rename to stage23/linker_nomap.ld
diff --git a/stage23/linker_nomap_uefi.ld b/stage23/linker_nomap_uefi.ld
deleted file mode 100644
index cd6878a7..00000000
--- a/stage23/linker_nomap_uefi.ld
+++ /dev/null
@@ -1,37 +0,0 @@
-OUTPUT_FORMAT(elf64-x86-64)
-ENTRY(efi_main)
-
-SECTIONS
-{
-    . = 4K;
-
-    .text : {
-        *(.text*)
-        *(.realmode*)
-        *(.stage3_entry*)
-    }
-
-    .rodata : {
-        limine_map = .;
-        *(.rodata*)
-    }
-
-    .data : {
-        *(.data*)
-    }
-
-    .bss : {
-        *(COMMON)
-        *(.bss*)
-    }
-
-    .reloc : {
-        LONG(0);
-        LONG(10);
-        SHORT(0);
-    }
-
-    /DISCARD/ : {
-        *(*)
-    }
-}
diff --git a/stage23/linker_uefi.ld b/stage23/linker_uefi.ld
deleted file mode 100644
index 73a13f8b..00000000
--- a/stage23/linker_uefi.ld
+++ /dev/null
@@ -1,37 +0,0 @@
-OUTPUT_FORMAT(elf64-x86-64)
-ENTRY(efi_main)
-
-SECTIONS
-{
-    . = 4K;
-
-    .text : {
-        *(.text*)
-        *(.realmode*)
-        *(.stage3_entry*)
-    }
-
-    .rodata : {
-        *(.map*)
-        *(.rodata*)
-    }
-
-    .data : {
-        *(.data*)
-    }
-
-    .bss : {
-        *(COMMON)
-        *(.bss*)
-    }
-
-    .reloc : {
-        LONG(0);
-        LONG(10);
-        SHORT(0);
-    }
-
-    /DISCARD/ : {
-        *(*)
-    }
-}
diff --git a/stage23/sys/smp.c b/stage23/sys/smp.c
index 322cc719..d7dd100a 100644
--- a/stage23/sys/smp.c
+++ b/stage23/sys/smp.c
@@ -1,6 +1,7 @@
 #include <stddef.h>
 #include <stdint.h>
 #include <stdbool.h>
+#include <lib/libc.h>
 #include <lib/acpi.h>
 #include <sys/cpu.h>
 #include <lib/blib.h>
@@ -9,6 +10,7 @@
 #include <sys/lapic.h>
 #include <mm/vmm.h>
 #include <mm/pmm.h>
+#include <mm/mtrr.h>
 
 struct madt {
     struct sdt;
@@ -47,25 +49,50 @@ static void delay(uint32_t cycles) {
         inb(0x80);
 }
 
-void     smp_trampoline(void);
-extern   struct gdtr smp_tpl_gdt;
-struct   smp_information *smp_tpl_info_struct;
-uint8_t  smp_tpl_booted_flag;
-uint32_t smp_tpl_pagemap;
-uint8_t  smp_tpl_target_mode;
+extern symbol _binary_sys_smp_trampoline_bin_start;
+extern symbol _binary_sys_smp_trampoline_bin_end;
+
+struct trampoline_passed_info {
+    uint8_t  smp_tpl_booted_flag;
+    uint8_t  smp_tpl_target_mode;
+    uint32_t smp_tpl_pagemap;
+    void *mtrr_restore_vector;
+    struct smp_information *smp_tpl_info_struct;
+    struct gdtr smp_tpl_gdt;
+} __attribute__((packed));
 
 static bool smp_start_ap(uint32_t lapic_id, struct gdtr *gdtr,
                          struct smp_information *info_struct,
                          bool longmode, bool lv5, uint32_t pagemap,
                          bool x2apic) {
+    size_t trampoline_size = (size_t)_binary_sys_smp_trampoline_bin_end
+                           - (size_t)_binary_sys_smp_trampoline_bin_start;
+
     // Prepare the trampoline
-    smp_tpl_info_struct = info_struct;
-    smp_tpl_booted_flag = 0;
-    smp_tpl_pagemap     = pagemap;
-    smp_tpl_target_mode = ((uint32_t)x2apic << 2)
+    static void *trampoline = NULL;
+    if (trampoline == NULL) {
+        trampoline = conv_mem_alloc_aligned(trampoline_size, 4096);
+
+        memcpy(trampoline, _binary_sys_smp_trampoline_bin_start, trampoline_size);
+    }
+
+    static struct trampoline_passed_info *passed_info = NULL;
+    if (passed_info == NULL) {
+        passed_info = (void *)(((uintptr_t)trampoline + trampoline_size)
+                               - sizeof(struct trampoline_passed_info));
+    }
+
+    passed_info->smp_tpl_info_struct = info_struct;
+    passed_info->smp_tpl_booted_flag = 0;
+    passed_info->smp_tpl_pagemap     = pagemap;
+    passed_info->smp_tpl_target_mode = ((uint32_t)x2apic << 2)
                         | ((uint32_t)lv5 << 1)
                         | (uint32_t)longmode;
-    smp_tpl_gdt         = *gdtr;
+    passed_info->smp_tpl_gdt         = *gdtr;
+    passed_info->mtrr_restore_vector = mtrr_restore;
+    passed_info->smp_tpl_booted_flag = 0;
+
+    asm volatile ("" ::: "memory");
 
     // Send the INIT IPI
     if (x2apic) {
@@ -79,14 +106,14 @@ static bool smp_start_ap(uint32_t lapic_id, struct gdtr *gdtr,
     // Send the Startup IPI
     if (x2apic) {
         x2apic_write(LAPIC_REG_ICR0, ((uint64_t)lapic_id << 32) |
-                                     ((size_t)smp_trampoline / 4096) | 0x4600);
+                                     ((size_t)trampoline / 4096) | 0x4600);
     } else {
         lapic_write(LAPIC_REG_ICR1, lapic_id << 24);
-        lapic_write(LAPIC_REG_ICR0, ((size_t)smp_trampoline / 4096) | 0x4600);
+        lapic_write(LAPIC_REG_ICR0, ((size_t)trampoline / 4096) | 0x4600);
     }
 
     for (int i = 0; i < 100; i++) {
-        if (locked_read(&smp_tpl_booted_flag) == 1) {
+        if (locked_read(&passed_info->smp_tpl_booted_flag) == 1) {
             return true;
         }
         delay(10000);
diff --git a/stage23/sys/smp_trampoline.asm b/stage23/sys/smp_trampoline.real
similarity index 61%
rename from stage23/sys/smp_trampoline.asm
rename to stage23/sys/smp_trampoline.real
index 06c6558d..c0f31204 100644
--- a/stage23/sys/smp_trampoline.asm
+++ b/stage23/sys/smp_trampoline.real
@@ -1,35 +1,27 @@
-extern smp_tpl_info_struct
-extern smp_tpl_booted_flag
-extern smp_tpl_pagemap
-extern smp_tpl_target_mode
+org 0
 
-extern mtrr_restore
-
-section .bss
-
-temp_stack:
-    resb 1024
-  .top:
-
-section .realmode
-
-global smp_trampoline
-align 0x1000
 bits 16
 smp_trampoline:
     cli
     cld
 
-    xor ax, ax
-    mov ds, ax
+    mov ebx, cs
+    shl ebx, 4
+
+    lgdt [cs:passed_info.gdtr]
 
-    lgdt [smp_tpl_gdt]
+    lea eax, [ebx + .mode32]
+    mov [cs:.farjmp_off], eax
 
     mov eax, cr0
     bts eax, 0
     mov cr0, eax
+    o32 jmp far [cs:.farjmp]
+
+  .farjmp:
+    .farjmp_off: dd 0
+    .farjmp_seg: dd 0x18
 
-    jmp 0x18:.mode32
     bits 32
   .mode32:
     mov ax, 0x20
@@ -44,7 +36,7 @@ smp_trampoline:
     btr eax, 30
     mov cr0, eax
 
-    test dword [smp_tpl_target_mode], (1 << 2)
+    test dword [ebx + passed_info.target_mode], (1 << 2)
     jz .nox2apic
 
     mov ecx, 0x1b
@@ -54,18 +46,18 @@ smp_trampoline:
     wrmsr
 
   .nox2apic:
-    mov esp, temp_stack.top
+    lea esp, [ebx + temp_stack.top]
 
-    call mtrr_restore
+    call [ebx + passed_info.mtrr_restore_vector]
 
-    test dword [smp_tpl_target_mode], (1 << 0)
+    test dword [ebx + passed_info.target_mode], (1 << 0)
     jz parking32
 
     mov eax, cr4
     bts eax, 5
     mov cr4, eax
 
-    test dword [smp_tpl_target_mode], (1 << 1)
+    test dword [ebx + passed_info.target_mode], (1 << 1)
     jz .no5lv
 
     mov eax, cr4
@@ -73,7 +65,7 @@ smp_trampoline:
     mov cr4, eax
 
   .no5lv:
-    mov eax, dword [smp_tpl_pagemap]
+    mov eax, dword [ebx + passed_info.pagemap]
     mov cr3, eax
 
     mov ecx, 0xc0000080
@@ -85,7 +77,12 @@ smp_trampoline:
     bts eax, 31
     mov cr0, eax
 
-    jmp 0x28:.mode64
+    mov eax, .mode64
+    add eax, ebx
+    push 0x28
+    push eax
+    retf
+
     bits 64
   .mode64:
     mov ax, 0x30
@@ -97,19 +94,11 @@ smp_trampoline:
 
     jmp parking64
 
-global smp_tpl_gdt
-align 16
-smp_tpl_gdt:
-    dw 0
-    dd 0
-
-section .text
-
 bits 32
 parking32:
-    mov edi, dword [smp_tpl_info_struct]
+    mov edi, dword [ebx + passed_info.smp_info_struct]
     mov eax, 1
-    lock xchg dword [smp_tpl_booted_flag], eax
+    lock xchg dword [ebx + passed_info.booted_flag], eax
 
     xor eax, eax
   .loop:
@@ -135,9 +124,10 @@ parking32:
 
 bits 64
 parking64:
-    mov edi, dword [smp_tpl_info_struct]
+    mov ebx, ebx
+    mov edi, dword [rbx + passed_info.smp_info_struct]
     mov eax, 1
-    lock xchg dword [smp_tpl_booted_flag], eax
+    lock xchg dword [rbx + passed_info.booted_flag], eax
 
     xor eax, eax
   .loop:
@@ -166,3 +156,19 @@ parking64:
     xor r14, r14
     xor r15, r15
     ret
+
+align 16
+temp_stack:
+    times 1024 db 0
+  .top:
+
+align 16
+passed_info:
+    .booted_flag db 0
+    .target_mode db 0
+    .pagemap dd 0
+    .mtrr_restore_vector dd 0
+    .smp_info_struct dd 0
+    .gdtr:
+        dw 0
+        dd 0
tab: 248 wrap: offon