protos/limine: Strictly define x86-64 machine state for base revision 5
diff --git a/common/protos/limine_32.asm_x86 b/common/protos/limine_32.asm_x86
index 3d98d19b..5fb50f70 100644
--- a/common/protos/limine_32.asm_x86
+++ b/common/protos/limine_32.asm_x86
@@ -22,15 +22,6 @@ limine_spinup_32:
wrmsr
.no_pat:
- ; Enable EFER.NXE
- cmp dword [esp+32], 0 ; nx_available
- je .no_nx
- mov ecx, 0xc0000080
- rdmsr
- bts eax, 11
- wrmsr
- .no_nx:
-
; Enable CR4.LA57
cmp dword [esp+4], 0 ; level5pg
je .no_la57
@@ -54,10 +45,14 @@ limine_spinup_32:
bts eax, 5
mov cr4, eax
- ; Enable EFER.LME
+ ; Set EFER (LME + NX if available), all other bits cleared
mov ecx, 0xc0000080
- rdmsr
- bts eax, 8
+ xor edx, edx
+ mov eax, 1 << 8
+ cmp dword [esp+32], 0 ; nx_available
+ je .no_nx
+ or eax, 1 << 11
+ .no_nx:
wrmsr
; Enable CR0.PG
@@ -85,6 +80,12 @@ bits 64
mov eax, [rsp+28] ; local_gdt
lgdt [rax]
+ ; Clear TSS busy bit and load TR
+ mov rax, [rax + 2]
+ mov byte [rax + 0x3d], 0x89
+ mov ax, 0x38
+ ltr ax
+
; Jump to higher half
mov rax, qword [rsp+36]
add rsp, rax
@@ -117,7 +118,7 @@ bits 64
mov rax, qword [rsp+12] ; entry_point
push 0x30
push rsi
- pushfq
+ push 0x2
push 0x28
push rax
diff --git a/common/sys/gdt.s2.c b/common/sys/gdt.s2.c
index 2f413116..a7d564a7 100644
--- a/common/sys/gdt.s2.c
+++ b/common/sys/gdt.s2.c
@@ -60,7 +60,17 @@ static struct gdt_desc gdt_descs[] = {
.access = 0b10010011,
.granularity = 0b00000000,
.base_hi = 0x00
- }
+ },
+
+ { // 0x38: TSS descriptor low (base 0, limit 0)
+ .limit = 0x0000,
+ .base_low = 0x0000,
+ .base_mid = 0x00,
+ .access = 0x89,
+ .granularity = 0x00,
+ .base_hi = 0x00
+ },
+ {0} // 0x40: TSS descriptor high (base upper = 0)
};
#if defined (BIOS)
diff --git a/common/sys/smp_trampoline.asm_x86 b/common/sys/smp_trampoline.asm_x86
index 91f1a382..97296344 100644
--- a/common/sys/smp_trampoline.asm_x86
+++ b/common/sys/smp_trampoline.asm_x86
@@ -78,9 +78,14 @@ smp_trampoline_start:
bts eax, 5
mov cr4, eax
+ ; Set EFER (LME + NX if available), all other bits cleared
mov ecx, 0xc0000080
- mov eax, 0x100
xor edx, edx
+ mov eax, 1 << 8
+ test dword [ebx + (passed_info.target_mode - smp_trampoline_start)], (1 << 3)
+ jz .no_nx
+ or eax, 1 << 11
+ .no_nx:
wrmsr
test dword [ebx + (passed_info.target_mode - smp_trampoline_start)], (1 << 1)
@@ -94,7 +99,12 @@ smp_trampoline_start:
mov eax, dword [ebx + (passed_info.pagemap - smp_trampoline_start)]
mov cr3, eax
+ ; Enable CR0.PG (and WP if requested)
mov eax, cr0
+ test dword [ebx + (passed_info.target_mode - smp_trampoline_start)], (1 << 4)
+ jz .no_wp
+ bts eax, 16
+ .no_wp:
bts eax, 31
mov cr0, eax
@@ -125,27 +135,16 @@ smp_trampoline_start:
mov ss, ax
mov ebx, ebx
- test dword [rbx + (passed_info.target_mode - smp_trampoline_start)], (1 << 3)
- jz .nonx
-
- mov ecx, 0xc0000080
- rdmsr
- bts eax, 11
- wrmsr
-
- .nonx:
- test dword [rbx + (passed_info.target_mode - smp_trampoline_start)], (1 << 4)
- jz .nowp
-
- mov rax, cr0
- bts rax, 16
- mov cr0, rax
-
- .nowp:
mov rax, qword [rbx + (passed_info.hhdm - smp_trampoline_start)]
add qword [rbx + (passed_info.gdtr - smp_trampoline_start) + 2], rax
lgdt [rbx + (passed_info.gdtr - smp_trampoline_start)]
+ ; Clear TSS busy bit and load TR
+ mov rcx, [rbx + (passed_info.gdtr - smp_trampoline_start) + 2]
+ mov byte [rcx + 0x3d], 0x89
+ mov ecx, 0x38
+ ltr cx
+
lea rax, [rax + rbx + (parking64 - smp_trampoline_start)]
jmp rax
@@ -207,24 +206,37 @@ parking64:
mov rbx, cr3
mov cr3, rbx
+ ; Switch to new stack (HHDM address, safe after lower half unmap)
mov rsp, qword [rdi + 8]
+
+ ; Push fake return address
push 0
+ mov rsi, rsp
+
+ ; Prepare iretq frame
+ push 0x30
+ push rsi
+ push 0x2
+ push 0x28
push rax
- xor rax, rax
- xor rbx, rbx
- xor rcx, rcx
- xor rdx, rdx
- xor rsi, rsi
- xor rbp, rbp
- xor r8, r8
- xor r9, r9
- xor r10, r10
- xor r11, r11
- xor r12, r12
- xor r13, r13
- xor r14, r14
- xor r15, r15
- ret
+
+ ; Zero out all GPRs (except rdi = mp_info pointer)
+ xor eax, eax
+ xor ebx, ebx
+ xor ecx, ecx
+ xor edx, edx
+ xor esi, esi
+ xor ebp, ebp
+ xor r8d, r8d
+ xor r9d, r9d
+ xor r10d, r10d
+ xor r11d, r11d
+ xor r12d, r12d
+ xor r13d, r13d
+ xor r14d, r14d
+ xor r15d, r15d
+
+ iretq
invalid_idt:
times 2 dq 0
