| 1 | bits 16 |
| 2 | |
| 3 | section .rodata |
| 4 | |
| 5 | global smp_trampoline_start |
| 6 | smp_trampoline_start: |
| 7 | cli |
| 8 | cld |
| 9 | |
| 10 | mov ebx, cs |
| 11 | shl ebx, 4 |
| 12 | |
| 13 | o32 lidt [cs:(invalid_idt - smp_trampoline_start)] |
| 14 | o32 lgdt [cs:(passed_info.gdtr - smp_trampoline_start)] |
| 15 | |
| 16 | lea eax, [ebx + (.mode32 - smp_trampoline_start)] |
| 17 | mov [cs:(.farjmp_off - smp_trampoline_start)], eax |
| 18 | |
| 19 | mov eax, 0x00000011 |
| 20 | mov cr0, eax |
| 21 | o32 jmp far [cs:(.farjmp - smp_trampoline_start)] |
| 22 | |
| 23 | .farjmp: |
| 24 | .farjmp_off: dd 0 |
| 25 | .farjmp_seg: dd 0x18 |
| 26 | |
| 27 | bits 32 |
| 28 | .mode32: |
| 29 | mov ax, 0x20 |
| 30 | mov ds, ax |
| 31 | mov es, ax |
| 32 | mov fs, ax |
| 33 | mov gs, ax |
| 34 | mov ss, ax |
| 35 | |
| 36 | xor eax, eax |
| 37 | lldt ax |
| 38 | |
| 39 | xor eax, eax |
| 40 | mov cr4, eax |
| 41 | |
| 42 | mov esi, ebx |
| 43 | mov eax, 1 |
| 44 | xor ecx, ecx |
| 45 | cpuid |
| 46 | test edx, 1 << 16 |
| 47 | jz .no_pat |
| 48 | mov ecx, 0x277 |
| 49 | mov eax, 0x00070406 |
| 50 | mov edx, 0x00000105 |
| 51 | wrmsr |
| 52 | .no_pat: |
| 53 | mov ebx, esi |
| 54 | |
| 55 | mov ecx, 0x1b |
| 56 | rdmsr |
| 57 | test eax, (1 << 10) |
| 58 | jz .write_apic_msr |
| 59 | |
| 60 | ; Check if target also has x2APIC |
| 61 | test dword [ebx + (passed_info.bsp_apic_addr_msr_lo - smp_trampoline_start)], (1 << 10) |
| 62 | jnz .write_apic_msr |
| 63 | |
| 64 | ; AP is x2APIC but target is xAPIC: go through disabled state |
| 65 | btr eax, 11 |
| 66 | btr eax, 10 |
| 67 | wrmsr |
| 68 | |
| 69 | .write_apic_msr: |
| 70 | mov eax, [ebx + (passed_info.bsp_apic_addr_msr_lo - smp_trampoline_start)] |
| 71 | mov edx, [ebx + (passed_info.bsp_apic_addr_msr_hi - smp_trampoline_start)] |
| 72 | bts eax, 11 |
| 73 | btr eax, 8 |
| 74 | wrmsr |
| 75 | mov esp, [ebx + (passed_info.temp_stack - smp_trampoline_start)] |
| 76 | |
| 77 | mov eax, cr4 |
| 78 | bts eax, 5 |
| 79 | mov cr4, eax |
| 80 | |
| 81 | ; Set EFER (LME + NX if available), all other bits cleared |
| 82 | mov ecx, 0xc0000080 |
| 83 | xor edx, edx |
| 84 | mov eax, 1 << 8 |
| 85 | test dword [ebx + (passed_info.target_mode - smp_trampoline_start)], (1 << 3) |
| 86 | jz .no_nx |
| 87 | or eax, 1 << 11 |
| 88 | .no_nx: |
| 89 | wrmsr |
| 90 | |
| 91 | test dword [ebx + (passed_info.target_mode - smp_trampoline_start)], (1 << 1) |
| 92 | jz .no5lv |
| 93 | |
| 94 | mov eax, cr4 |
| 95 | bts eax, 12 |
| 96 | mov cr4, eax |
| 97 | |
| 98 | .no5lv: |
| 99 | mov eax, dword [ebx + (passed_info.pagemap - smp_trampoline_start)] |
| 100 | mov cr3, eax |
| 101 | |
| 102 | ; Enable CR0.PG (and WP if requested) |
| 103 | mov eax, cr0 |
| 104 | test dword [ebx + (passed_info.target_mode - smp_trampoline_start)], (1 << 4) |
| 105 | jz .no_wp |
| 106 | bts eax, 16 |
| 107 | .no_wp: |
| 108 | bts eax, 31 |
| 109 | mov cr0, eax |
| 110 | |
| 111 | %ifdef IA32_TARGET |
| 112 | ; Synchronise MTRRs with BSP |
| 113 | call [ebx + (passed_info.mtrr_restore - smp_trampoline_start)] |
| 114 | |
| 115 | ; Configure local APIC handoff state (if pointer is set) |
| 116 | mov eax, dword [ebx + (passed_info.lapic_setup - smp_trampoline_start)] |
| 117 | test eax, eax |
| 118 | jz .skip_lapic_setup32 |
| 119 | call eax |
| 120 | .skip_lapic_setup32: |
| 121 | %endif |
| 122 | |
| 123 | lea eax, [ebx + (.mode64 - smp_trampoline_start)] |
| 124 | push 0x28 |
| 125 | push eax |
| 126 | retf |
| 127 | |
| 128 | bits 64 |
| 129 | .mode64: |
| 130 | mov ax, 0x30 |
| 131 | mov ds, ax |
| 132 | mov es, ax |
| 133 | mov fs, ax |
| 134 | mov gs, ax |
| 135 | mov ss, ax |
| 136 | |
| 137 | mov ebx, ebx |
| 138 | mov rax, qword [rbx + (passed_info.hhdm - smp_trampoline_start)] |
| 139 | add qword [rbx + (passed_info.gdtr - smp_trampoline_start) + 2], rax |
| 140 | lgdt [rbx + (passed_info.gdtr - smp_trampoline_start)] |
| 141 | |
| 142 | ; Clear TSS busy bit and load TR |
| 143 | mov rcx, [rbx + (passed_info.gdtr - smp_trampoline_start) + 2] |
| 144 | mov byte [rcx + 0x3d], 0x89 |
| 145 | mov ecx, 0x38 |
| 146 | ltr cx |
| 147 | |
| 148 | lea rax, [rax + rbx + (parking64 - smp_trampoline_start)] |
| 149 | |
| 150 | jmp rax |
| 151 | |
| 152 | bits 64 |
| 153 | parking64: |
| 154 | mov ebx, ebx |
| 155 | |
| 156 | %ifdef X86_64_TARGET |
| 157 | ; Synchronise MTRRs with BSP |
| 158 | call [rbx + (passed_info.mtrr_restore - smp_trampoline_start)] |
| 159 | |
| 160 | ; Configure local APIC handoff state (if pointer is set) |
| 161 | mov rax, [rbx + (passed_info.lapic_setup - smp_trampoline_start)] |
| 162 | test rax, rax |
| 163 | jz .skip_lapic_setup64 |
| 164 | call rax |
| 165 | .skip_lapic_setup64: |
| 166 | %endif |
| 167 | |
| 168 | mov edi, dword [rbx + (passed_info.smp_info_struct - smp_trampoline_start)] |
| 169 | add rdi, qword [rbx + (passed_info.hhdm - smp_trampoline_start)] |
| 170 | |
| 171 | mov eax, 1 |
| 172 | xchg dword [rbx + (passed_info.booted_flag - smp_trampoline_start)], eax |
| 173 | |
| 174 | ; Check for MONITOR/MWAIT support |
| 175 | mov eax, 1 |
| 176 | xor ecx, ecx |
| 177 | cpuid |
| 178 | test ecx, (1 << 3) |
| 179 | jnz .monitor_spin |
| 180 | |
| 181 | .loop: |
| 182 | mov rax, qword [rdi + 16] |
| 183 | test rax, rax |
| 184 | jnz .out |
| 185 | pause |
| 186 | jmp .loop |
| 187 | |
| 188 | .monitor_spin: |
| 189 | mov rax, qword [rdi + 16] |
| 190 | test rax, rax |
| 191 | jnz .out |
| 192 | lea rax, [rdi + 16] |
| 193 | xor ecx, ecx |
| 194 | xor edx, edx |
| 195 | monitor |
| 196 | mov rax, qword [rdi + 16] |
| 197 | test rax, rax |
| 198 | jnz .out |
| 199 | xor eax, eax |
| 200 | xor ecx, ecx |
| 201 | mwait |
| 202 | jmp .monitor_spin |
| 203 | |
| 204 | .out: |
| 205 | ; Clear TLB |
| 206 | mov rbx, cr3 |
| 207 | mov cr3, rbx |
| 208 | |
| 209 | ; Switch to new stack (HHDM address, safe after lower half unmap) |
| 210 | mov rsp, qword [rdi + 8] |
| 211 | |
| 212 | ; Push fake return address |
| 213 | push 0 |
| 214 | mov rsi, rsp |
| 215 | |
| 216 | ; Prepare iretq frame |
| 217 | push 0x30 |
| 218 | push rsi |
| 219 | push 0x2 |
| 220 | push 0x28 |
| 221 | push rax |
| 222 | |
| 223 | ; Zero out all GPRs (except rdi = mp_info pointer) |
| 224 | xor eax, eax |
| 225 | xor ebx, ebx |
| 226 | xor ecx, ecx |
| 227 | xor edx, edx |
| 228 | xor esi, esi |
| 229 | xor ebp, ebp |
| 230 | xor r8d, r8d |
| 231 | xor r9d, r9d |
| 232 | xor r10d, r10d |
| 233 | xor r11d, r11d |
| 234 | xor r12d, r12d |
| 235 | xor r13d, r13d |
| 236 | xor r14d, r14d |
| 237 | xor r15d, r15d |
| 238 | |
| 239 | iretq |
| 240 | |
| 241 | invalid_idt: |
| 242 | times 2 dq 0 |
| 243 | |
| 244 | align 16 |
| 245 | passed_info: |
| 246 | .booted_flag db 0 |
| 247 | .target_mode db 0 |
| 248 | .pagemap dd 0 |
| 249 | .smp_info_struct dd 0 |
| 250 | .gdtr: |
| 251 | dw 0 |
| 252 | dq 0 |
| 253 | .hhdm: |
| 254 | dq 0 |
| 255 | .bsp_apic_addr_msr_lo: |
| 256 | dd 0 |
| 257 | .bsp_apic_addr_msr_hi: |
| 258 | dd 0 |
| 259 | .mtrr_restore: |
| 260 | dq 0 |
| 261 | .temp_stack: |
| 262 | dq 0 |
| 263 | .lapic_setup: |
| 264 | dq 0 |
| 265 | |
| 266 | smp_trampoline_end: |
| 267 | |
| 268 | global smp_trampoline_size |
| 269 | smp_trampoline_size dq smp_trampoline_end - smp_trampoline_start |
| 270 | |
| 271 | section .note.GNU-stack noalloc noexec nowrite progbits |