:: commit 41c68e5e43567a1c292317a9805549938671d837

mintsuki <mintsuki@protonmail.com> — 2020-09-18 12:39

parents: fc51b7e062

Initial SMP implementation

diff --git a/Makefile b/Makefile
index 9bc78539..669c8c89 100644
--- a/Makefile
+++ b/Makefile
@@ -42,7 +42,7 @@ echfs-test: limine-install test.img
 	echfs-utils -m -p0 test.img import test/test.elf boot/test.elf
 	echfs-utils -m -p0 test.img import test/limine.cfg limine.cfg
 	./limine-install limine.bin test.img
-	qemu-system-x86_64 -hda test.img -debugcon stdio -enable-kvm
+	qemu-system-x86_64 -net none -smp 4 -hda test.img -debugcon stdio -enable-kvm
 
 ext2-test: limine-install test.img
 	$(MAKE) -C test
diff --git a/STIVALE2.md b/STIVALE2.md
index f8f79e6e..552a52ce 100644
--- a/STIVALE2.md
+++ b/STIVALE2.md
@@ -193,6 +193,20 @@ Identifier: `0x932f477032007e8f`
 
 This tag does not have extra members.
 
+#### SMP header tag
+
+The presence of this tag enables support for booting up application processors.
+
+```c
+struct stivale2_header_tag_smp {
+    uint64_t identifier;          // Identifier: 0x1ab015085f3273df
+    uint64_t next;
+    uint64_t flags;               // Flags:
+                                  //   bit 0: 0 = use xAPIC, 1 = use x2APIC
+                                  // All other flags are undefined.
+} __attribute__((packed));
+```
+
 ## stivale2 structure
 
 The stivale2 structure returned by the bootloader looks like this:
@@ -355,3 +369,38 @@ struct stivale2_struct_tag_firmware {
     uint64_t flags;             // Bit 0: 0 = UEFI, 1 = BIOS
 } __attribute__((packed));
 ```
+
+#### SMP structure tag
+
+This tag reports to the kernel info about the firmware.
+
+```c
+struct stivale2_struct_tag_smp {
+    uint64_t identifier;        // Identifier: 0x34d1d96339647025
+    uint64_t next;
+    uint64_t cpu_count;         // Total number of logical CPUs (including BSP)
+    struct stivale2_smp_info smp_info[];
+} __attribute__((packed));
+```
+
+```c
+struct stivale2_smp_info {
+    uint32_t processor_id;      // Processor ID as specified by MADT
+    uint32_t lapic_id;          // LAPIC ID as specified by MADT
+    uint64_t target_stack;      // The stack that will be loaded in ESP/RSP
+                                // once the goto_address field is loaded.
+                                // This MUST point to a valid stack of at least
+                                // 256 bytes in size, and 16-byte aligned.
+    uint64_t goto_address;      // This address is polled by the started APs
+                                // until the kernel on the BSP performs an
+                                // atomic write to this field.
+                                // When that happens, bootloader code will
+                                // load up ESP/RSP with the stack value as
+                                // specified in target_stack.
+                                // It will then proceed to load a pointer to
+                                // this very structure into either register
+                                // RDI for 64-bit or on the stack for 32-bit,
+                                // then, goto_address is called and execution is
+                                // handed off.
+} __attribute__((packed));
+```
diff --git a/limine.bin b/limine.bin
index f3516ea4..2c3fdc74 100644
Binary files a/limine.bin and b/limine.bin differ
diff --git a/stage2/drivers/lapic.h b/stage2/drivers/lapic.h
new file mode 100644
index 00000000..28e88f17
--- /dev/null
+++ b/stage2/drivers/lapic.h
@@ -0,0 +1,23 @@
+#ifndef __DRIVERS__APIC_H__
+#define __DRIVERS__APIC_H__
+
+#include <stdint.h>
+#include <stddef.h>
+#include <lib/cio.h>
+
+#define LAPIC_REG_ICR0     0x300
+#define LAPIC_REG_ICR1     0x310
+#define LAPIC_REG_SPURIOUS 0x0f0
+#define LAPIC_REG_EOI      0x0b0
+
+static inline uint32_t lapic_read(uint32_t reg) {
+    size_t lapic_mmio_base = (size_t)(rdmsr(0x1b) & 0xfffff000);
+    return mmind((void *)(lapic_mmio_base + reg));
+}
+
+static inline void lapic_write(uint32_t reg, uint32_t data) {
+    size_t lapic_mmio_base = (size_t)(rdmsr(0x1b) & 0xfffff000);
+    mmoutd((void *)(lapic_mmio_base + reg), data);
+}
+
+#endif
diff --git a/stage2/lib/acpi.c b/stage2/lib/acpi.c
index 49c050af..15c41c6b 100644
--- a/stage2/lib/acpi.c
+++ b/stage2/lib/acpi.c
@@ -59,7 +59,7 @@ void *acpi_get_table(const char *signature, int index) {
         if (!memcmp(ptr->signature, signature, 4)
          && !acpi_checksum(ptr, ptr->length)
          && cnt++ == index) {
-            print("acpi: Found \"%s\" at %X\n", signature, ptr);
+            print("acpi: Found \"%s\" at %x\n", signature, ptr);
             return ptr;
         }
     }
diff --git a/stage2/lib/blib.h b/stage2/lib/blib.h
index a3045a94..54e858b7 100644
--- a/stage2/lib/blib.h
+++ b/stage2/lib/blib.h
@@ -11,7 +11,7 @@ int cpuid(uint32_t leaf, uint32_t subleaf,
 
 __attribute__((noreturn)) void panic(const char *fmt, ...);
 
-void pit_sleep(uint64_t pit_ticks);
+void pit_sleep(uint32_t pit_ticks);
 int pit_sleep_and_quit_on_keypress(uint32_t pit_ticks);
 
 void brewind(size_t count);
diff --git a/stage2/lib/cio.h b/stage2/lib/cio.h
index ac1c88ce..8c8842ca 100644
--- a/stage2/lib/cio.h
+++ b/stage2/lib/cio.h
@@ -3,6 +3,13 @@
 
 #include <stdint.h>
 
+#define FLAT_PTR(PTR) (*((int(*)[])(PTR)))
+
+#define BYTE_PTR(PTR)  (*((uint8_t *)(PTR)))
+#define WORD_PTR(PTR)  (*((uint16_t *)(PTR)))
+#define DWORD_PTR(PTR) (*((uint32_t *)(PTR)))
+#define QWORD_PTR(PTR) (*((uint64_t *)(PTR)))
+
 static inline void port_out_b(uint16_t port, uint8_t value) {
     asm volatile ("out dx, al"  : : "a" (value), "d" (port) : "memory");
 }
@@ -33,4 +40,134 @@ static inline uint32_t port_in_d(uint16_t port) {
     return value;
 }
 
+static inline void mmoutb(void *addr, uint8_t value) {
+    asm volatile (
+        "mov %0, %1\n\t"
+        : "=m"(BYTE_PTR(addr))
+        : "r"(value)
+        : "memory"
+    );
+}
+
+static inline void mmoutw(void *addr, uint16_t value) {
+    asm volatile (
+        "mov %0, %1\n\t"
+        : "=m"(WORD_PTR(addr))
+        : "r"(value)
+        : "memory"
+    );
+}
+
+static inline void mmoutd(void *addr, uint32_t value) {
+    asm volatile (
+        "mov %0, %1\n\t"
+        : "=m"(DWORD_PTR(addr))
+        : "r"(value)
+        : "memory"
+    );
+}
+
+static inline void mmoutq(void *addr, uint64_t value) {
+    asm volatile (
+        "mov %0, %1\n\t"
+        : "=m"(QWORD_PTR(addr))
+        : "r"(value)
+        : "memory"
+    );
+}
+
+static inline uint8_t mminb(void *addr) {
+    uint8_t ret;
+    asm volatile (
+        "mov %0, %1\n\t"
+        : "=r"(ret)
+        : "m"(BYTE_PTR(addr))
+        : "memory"
+    );
+    return ret;
+}
+
+static inline uint16_t mminw(void *addr) {
+    uint16_t ret;
+    asm volatile (
+        "mov %0, %1\n\t"
+        : "=r"(ret)
+        : "m"(WORD_PTR(addr))
+        : "memory"
+    );
+    return ret;
+}
+
+static inline uint32_t mmind(void *addr) {
+    uint32_t ret;
+    asm volatile (
+        "mov %0, %1\n\t"
+        : "=r"(ret)
+        : "m"(DWORD_PTR(addr))
+        : "memory"
+    );
+    return ret;
+}
+
+static inline uint64_t mminq(void *addr) {
+    uint64_t ret;
+    asm volatile (
+        "mov %0, %1\n\t"
+        : "=r"(ret)
+        : "m"(QWORD_PTR(addr))
+        : "memory"
+    );
+    return ret;
+}
+
+static inline uint64_t rdmsr(uint32_t msr) {
+    uint32_t edx, eax;
+    asm volatile ("rdmsr"
+                  : "=a" (eax), "=d" (edx)
+                  : "c" (msr)
+                  : "memory");
+    return ((uint64_t)edx << 32) | eax;
+}
+
+static inline void wrmsr(uint32_t msr, uint64_t value) {
+    uint32_t edx = value >> 32;
+    uint32_t eax = (uint32_t)value;
+    asm volatile ("wrmsr"
+                  :
+                  : "a" (eax), "d" (edx), "c" (msr)
+                  : "memory");
+}
+
+#define write_cr(reg, val) ({ \
+    asm volatile ("mov cr" reg ", %0" : : "r" (val)); \
+})
+
+#define read_cr(reg) ({ \
+    size_t cr; \
+    asm volatile ("mov %0, cr" reg : "=r" (cr)); \
+    cr; \
+})
+
+#define locked_read(var) ({ \
+    typeof(*var) ret = 0; \
+    asm volatile ( \
+        "lock xadd %1, %0;" \
+        : "+r" (ret) \
+        : "m" (*(var)) \
+        : "memory", "cc" \
+    ); \
+    ret; \
+})
+
+#define locked_write(var, val) ({ \
+    typeof(*var) ret = val; \
+    asm volatile ( \
+        "lock xchg %1, %0;" \
+        : "+r" ((ret)) \
+        : "m" (*(var)) \
+        : "memory" \
+    ); \
+    ret; \
+})
+
 #endif
diff --git a/stage2/lib/sleep.asm b/stage2/lib/sleep.asm
index c0463e32..75faad21 100644
--- a/stage2/lib/sleep.asm
+++ b/stage2/lib/sleep.asm
@@ -9,6 +9,83 @@ int_08_isr:
     iret
     bits 32
 
+global pit_sleep
+pit_sleep:
+    ; Hook int 0x08
+    mov edx, dword [0x08*4]
+    mov dword [0x80*4], edx
+    mov edx, int_08_isr
+    mov dword [0x08*4], int_08_isr
+
+    ; pit_ticks in edx
+    mov edx, dword [esp+4]
+
+    mov dword [int_08_ticks_counter], 0
+
+    ; Save non-scratch GPRs
+    push ebx
+    push esi
+    push edi
+    push ebp
+
+    ; Jump to real mode
+    jmp 0x08:.bits16
+  .bits16:
+    bits 16
+    mov ax, 0x10
+    mov ds, ax
+    mov es, ax
+    mov fs, ax
+    mov gs, ax
+    mov ss, ax
+    mov eax, cr0
+    and al, 0xfe
+    mov cr0, eax
+    jmp 0x00:.cszero
+  .cszero:
+    xor ax, ax
+    mov ds, ax
+    mov es, ax
+    mov fs, ax
+    mov gs, ax
+    mov ss, ax
+
+    sti
+
+  .loop:
+    cmp dword [int_08_ticks_counter], edx
+    je .done
+    jmp .loop
+
+  .done:
+    cli
+
+    ; Jump back to pmode
+    mov eax, cr0
+    or al, 1
+    mov cr0, eax
+    jmp 0x18:.bits32
+  .bits32:
+    bits 32
+    mov ax, 0x20
+    mov ds, ax
+    mov es, ax
+    mov fs, ax
+    mov gs, ax
+    mov ss, ax
+
+    ; Restore non-scratch GPRs
+    pop ebp
+    pop edi
+    pop esi
+    pop ebx
+
+    ; Dehook int 0x08
+    mov edx, dword [0x80*4]
+    mov dword [0x08*4], edx
+
+    ret
+
 global pit_sleep_and_quit_on_keypress
 pit_sleep_and_quit_on_keypress:
     ; Hook int 0x08
diff --git a/stage2/lib/smp.c b/stage2/lib/smp.c
new file mode 100644
index 00000000..072fbd5e
--- /dev/null
+++ b/stage2/lib/smp.c
@@ -0,0 +1,127 @@
+#include <stddef.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <lib/acpi.h>
+#include <lib/cio.h>
+#include <lib/blib.h>
+#include <lib/smp.h>
+#include <drivers/lapic.h>
+#include <mm/vmm64.h>
+
+struct madt {
+    struct sdt;
+    uint32_t local_controller_addr;
+    uint32_t flags;
+    char     madt_entries_begin[];
+} __attribute__((packed));
+
+struct madt_header {
+    uint8_t type;
+    uint8_t length;
+} __attribute__((packed));
+
+struct madt_lapic {
+    struct madt_header;
+    uint8_t  processor_id;
+    uint8_t  lapic_id;
+    uint32_t flags;
+} __attribute__((packed));
+
+struct gdtr {
+    uint16_t limit;
+    uint32_t ptr;
+} __attribute__((packed));
+
+void     smp_trampoline(void);
+extern   struct gdtr smp_tpl_gdt;
+struct   smp_information *smp_tpl_info_struct;
+uint8_t  smp_tpl_booted_flag;
+uint32_t smp_tpl_pagemap;
+uint8_t  smp_tpl_target_mode;
+
+static bool smp_start_ap(uint8_t lapic_id, struct gdtr *gdtr,
+                         struct smp_information *info_struct,
+                         uint8_t target_mode, uint32_t pagemap) {
+    // Prepare the trampoline
+    smp_tpl_info_struct = info_struct;
+    smp_tpl_booted_flag = 0;
+    smp_tpl_pagemap     = pagemap;
+    smp_tpl_target_mode = target_mode;
+    smp_tpl_gdt         = *gdtr;
+
+    // Send the INIT IPI
+    lapic_write(LAPIC_REG_ICR1, lapic_id << 24);
+    lapic_write(LAPIC_REG_ICR0, 0x500);
+    pit_sleep(1);
+
+    // Send the Startup IPI
+    lapic_write(LAPIC_REG_ICR1, lapic_id << 24);
+    lapic_write(LAPIC_REG_ICR0, ((size_t)smp_trampoline / 4096) | 0x600);
+
+    for (int i = 0; i < 20; i++) {
+        pit_sleep(1);
+        if (locked_read(&smp_tpl_booted_flag) == 1) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+struct smp_information *init_smp(size_t   *cpu_count,
+                                 bool      longmode,
+                                 pagemap_t pagemap,
+                                 bool      x2apic) {
+    // Search for MADT table
+    struct madt *madt = acpi_get_table("APIC", 0);
+
+    if (madt == NULL)
+        return NULL;
+
+    struct gdtr gdtr;
+    asm volatile ("sgdt %0" :: "m"(gdtr));
+
+    struct smp_information *ret = balloc(0);
+    *cpu_count = 1;
+
+    // Parse the MADT entries
+    for (uint8_t *madt_ptr = (uint8_t *)madt->madt_entries_begin;
+      (uintptr_t)madt_ptr < (uintptr_t)madt + madt->length;
+      madt_ptr += *(madt_ptr + 1)) {
+        switch (*madt_ptr) {
+            case 0: {
+                // Processor local xAPIC
+                if (x2apic)
+                    continue;
+
+                struct madt_lapic *lapic = (void *)madt_ptr;
+
+                // Do not try to restart the BSP
+                if (lapic->lapic_id == 0)
+                    continue;
+
+                // Check if we can actually try to start the AP
+                if (!((lapic->flags & 1) ^ ((lapic->flags >> 1) & 1)))
+                    continue;
+
+                print("smp: Found candidate AP for bring-up. LAPIC ID: %u\n", lapic->lapic_id);
+
+                struct smp_information *info_struct =
+                        balloc_aligned(sizeof(struct smp_information), 1);
+
+                // Try to start the AP
+                if (!smp_start_ap(lapic->lapic_id, &gdtr, info_struct,
+                                  longmode ? 1 : 0, (uint32_t)pagemap.top_level)) {
+                    print("smp: FAILED to bring-up AP\n");
+                    brewind(sizeof(struct smp_information));
+                    continue;
+                }
+
+                (*cpu_count)++;
+                break;
+            }
+        }
+    }
+
+    return ret;
+}
diff --git a/stage2/lib/smp.h b/stage2/lib/smp.h
new file mode 100644
index 00000000..d0dcd143
--- /dev/null
+++ b/stage2/lib/smp.h
@@ -0,0 +1,20 @@
+#ifndef __LIB__SMP_H__
+#define __LIB__SMP_H__
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <mm/vmm64.h>
+
+struct smp_information {
+    uint32_t processor_id;
+    uint32_t lapic_id;
+    uint64_t goto_address;
+} __attribute__((packed));
+
+struct smp_information *init_smp(size_t   *cpu_count,
+                                 bool      longmode,
+                                 pagemap_t pagemap,
+                                 bool      x2apic);
+
+#endif
diff --git a/stage2/lib/smp_trampoline.asm b/stage2/lib/smp_trampoline.asm
new file mode 100644
index 00000000..8fd0abef
--- /dev/null
+++ b/stage2/lib/smp_trampoline.asm
@@ -0,0 +1,92 @@
+extern smp_tpl_info_struct
+extern smp_tpl_booted_flag
+extern smp_tpl_pagemap
+extern smp_tpl_target_mode
+
+section .realmode
+
+global smp_trampoline
+align 0x1000
+bits 16
+smp_trampoline:
+    cli
+    cld
+
+    xor ax, ax
+    mov ds, ax
+
+    lgdt [smp_tpl_gdt]
+
+    mov eax, cr0
+    bts eax, 0
+    mov cr0, eax
+
+    jmp 0x18:.mode32
+    bits 32
+  .mode32:
+    mov ax, 0x20
+    mov ds, ax
+    mov es, ax
+    mov fs, ax
+    mov gs, ax
+    mov ss, ax
+
+    mov eax, cr0
+    btr eax, 29
+    btr eax, 30
+    mov cr0, eax
+
+    cmp dword [smp_tpl_target_mode], 0
+    je parking32
+
+    mov eax, dword [smp_tpl_pagemap]
+    mov cr3, eax
+
+    mov eax, cr4
+    bts eax, 5
+    mov cr4, eax
+
+    mov ecx, 0xc0000080
+    rdmsr
+    bts eax, 8
+    wrmsr
+
+    mov eax, cr0
+    bts eax, 31
+    mov cr0, eax
+
+    jmp 0x28:.mode64
+    bits 64
+  .mode64:
+    mov ax, 0x30
+    mov ds, ax
+    mov es, ax
+    mov fs, ax
+    mov gs, ax
+    mov ss, ax
+
+    jmp parking64
+
+global smp_tpl_gdt
+align 16
+smp_tpl_gdt:
+    dw 0
+    dd 0
+
+section .text
+
+bits 32
+parking32:
+    mov ecx, dword [smp_tpl_info_struct]
+    mov eax, 1
+    lock xchg dword [smp_tpl_booted_flag], eax
+    mov eax, 0xcafebabe
+    jmp $
+
+bits 64
+parking64:
+    mov ecx, dword [smp_tpl_info_struct]
+    mov eax, 1
+    lock xchg dword [smp_tpl_booted_flag], eax
+    mov eax, 0xdeadbeef
+    jmp $
diff --git a/stage2/protos/stivale.c b/stage2/protos/stivale.c
index b35e40d7..55a8c60e 100644
--- a/stage2/protos/stivale.c
+++ b/stage2/protos/stivale.c
@@ -210,14 +210,52 @@ void stivale_load(char *cmdline, int boot_drive) {
     stivale_struct.memory_map_entries = (uint64_t)memmap_entries;
     stivale_struct.memory_map_addr    = (uint64_t)(size_t)memmap;
 
-    stivale_spinup(bits, level5pg && (stivale_hdr.flags & (1 << 1)),
-                   entry_point, &stivale_struct, stivale_hdr.stack,
-                   memmap, memmap_entries);
+    bool want_5lv = level5pg && (stivale_hdr.flags & (1 << 1));
+    pagemap_t pagemap = stivale_build_pagemap(want_5lv, memmap, memmap_entries);
+
+    stivale_spinup(bits, want_5lv, pagemap,
+                   entry_point, &stivale_struct, stivale_hdr.stack);
 }
 
-__attribute__((noreturn)) void stivale_spinup(int bits, bool level5pg,
-                 uint64_t entry_point, void *stivale_struct, uint64_t stack,
-                 struct e820_entry_t *memmap, size_t memmap_entries) {
+pagemap_t stivale_build_pagemap(bool level5pg, struct e820_entry_t *memmap,
+                                size_t memmap_entries) {
+    pagemap_t pagemap = new_pagemap(level5pg ? 5 : 4);
+    uint64_t higher_half_base = level5pg ? 0xff00000000000000 : 0xffff800000000000;
+
+    // Map 0 to 2GiB at 0xffffffff80000000
+    for (uint64_t i = 0; i < 0x80000000; i += PAGE_SIZE) {
+        map_page(pagemap, 0xffffffff80000000 + i, i, 0x03);
+    }
+
+    // Map 0 to 4GiB at higher half base and 0
+    for (uint64_t i = 0; i < 0x100000000; i += PAGE_SIZE) {
+        map_page(pagemap, i, i, 0x03);
+        map_page(pagemap, higher_half_base + i, i, 0x03);
+    }
+
+    // Map any other region of memory from the memmap
+    for (size_t i = 0; i < memmap_entries; i++) {
+        uint64_t base   = memmap[i].base;
+        uint64_t length = memmap[i].length;
+        uint64_t top    = base + length;
+
+        uint64_t aligned_base   = ALIGN_DOWN(base, PAGE_SIZE);
+        uint64_t aligned_top    = ALIGN_UP(top, PAGE_SIZE);
+        uint64_t aligned_length = aligned_top - aligned_base;
+
+        for (uint64_t i = 0; i < aligned_length; i += PAGE_SIZE) {
+            uint64_t page = aligned_base + i;
+            map_page(pagemap, page, page, 0x03);
+            map_page(pagemap, higher_half_base + page, page, 0x03);
+        }
+    }
+
+    return pagemap;
+}
+
+__attribute__((noreturn)) void stivale_spinup(
+                 int bits, bool level5pg, pagemap_t pagemap,
+                 uint64_t entry_point, void *stivale_struct, uint64_t stack) {
     if (bits == 64) {
         // If we're going 64, we might as well call this BIOS interrupt
         // to tell the BIOS that we are entering Long Mode, since it is in
@@ -241,37 +279,6 @@ __attribute__((noreturn)) void stivale_spinup(int bits, bool level5pg,
             );
         }
 
-        pagemap_t pagemap = new_pagemap(level5pg ? 5 : 4);
-        uint64_t higher_half_base = level5pg ? 0xff00000000000000 : 0xffff800000000000;
-
-        // Map 0 to 2GiB at 0xffffffff80000000
-        for (uint64_t i = 0; i < 0x80000000; i += PAGE_SIZE) {
-            map_page(pagemap, 0xffffffff80000000 + i, i, 0x03);
-        }
-
-        // Map 0 to 4GiB at higher half base and 0
-        for (uint64_t i = 0; i < 0x100000000; i += PAGE_SIZE) {
-            map_page(pagemap, i, i, 0x03);
-            map_page(pagemap, higher_half_base + i, i, 0x03);
-        }
-
-        // Map any other region of memory from the memmap
-        for (size_t i = 0; i < memmap_entries; i++) {
-            uint64_t base   = memmap[i].base;
-            uint64_t length = memmap[i].length;
-            uint64_t top    = base + length;
-
-            uint64_t aligned_base   = ALIGN_DOWN(base, PAGE_SIZE);
-            uint64_t aligned_top    = ALIGN_UP(top, PAGE_SIZE);
-            uint64_t aligned_length = aligned_top - aligned_base;
-
-            for (uint64_t i = 0; i < aligned_length; i += PAGE_SIZE) {
-                uint64_t page = aligned_base + i;
-                map_page(pagemap, page, page, 0x03);
-                map_page(pagemap, higher_half_base + page, page, 0x03);
-            }
-        }
-
         asm volatile (
             "cli\n\t"
             "cld\n\t"
diff --git a/stage2/protos/stivale.h b/stage2/protos/stivale.h
index ae7ed101..85f6f734 100644
--- a/stage2/protos/stivale.h
+++ b/stage2/protos/stivale.h
@@ -4,10 +4,15 @@
 #include <stdbool.h>
 #include <stdint.h>
 #include <lib/memmap.h>
+#include <lib/e820.h>
+#include <mm/vmm64.h>
 
 void stivale_load(char *cmdline, int boot_drive);
-__attribute__((noreturn)) void stivale_spinup(int bits, bool level5pg,
-                 uint64_t entry_point, void *stivale_struct, uint64_t stack,
-                 struct e820_entry_t *memmap, size_t memmap_entries);
+
+pagemap_t stivale_build_pagemap(bool level5pg, struct e820_entry_t *memmap,
+                                size_t memmap_entries);
+__attribute__((noreturn)) void stivale_spinup(
+                 int bits, bool level5pg, pagemap_t pagemap,
+                 uint64_t entry_point, void *stivale_struct, uint64_t stack);
 
 #endif
diff --git a/stage2/protos/stivale2.c b/stage2/protos/stivale2.c
index 746ce9c1..9a4f5c50 100644
--- a/stage2/protos/stivale2.c
+++ b/stage2/protos/stivale2.c
@@ -14,6 +14,7 @@
 #include <lib/rand.h>
 #include <lib/real.h>
 #include <lib/libc.h>
+#include <lib/smp.h>
 #include <drivers/vbe.h>
 #include <lib/term.h>
 #include <drivers/pic.h>
@@ -313,7 +314,26 @@ void stivale2_load(char *cmdline, int boot_drive) {
     // Check if 5-level paging tag is requesting support
     bool level5pg_requested = get_tag(&stivale2_hdr, STIVALE2_HEADER_TAG_5LV_PAGING_ID) ? true : false;
 
-    stivale_spinup(bits, level5pg && level5pg_requested,
-                   entry_point, &stivale2_struct, stivale2_hdr.stack,
-                   memmap, memmap_entries);
+    pagemap_t pagemap = {0};
+    if (bits == 64)
+        pagemap = stivale_build_pagemap(level5pg && level5pg_requested,
+                                        memmap, memmap_entries);
+
+    //////////////////////////////////////////////
+    // Create SMP struct tag
+    //////////////////////////////////////////////
+    {
+    struct stivale2_header_tag_smp *smp_hdr_tag = get_tag(&stivale2_hdr, STIVALE2_HEADER_TAG_SMP_ID);
+    if (smp_hdr_tag != NULL) {
+        struct stivale2_struct_tag_smp *tag = balloc(sizeof(struct stivale2_struct_tag_smp));
+
+        init_smp((size_t*)&tag->cpu_count, bits == 64, pagemap,
+                 smp_hdr_tag->flags & 1);
+
+        append_tag(&stivale2_struct, (struct stivale2_tag *)tag);
+    }
+    }
+
+    stivale_spinup(bits, level5pg && level5pg_requested, pagemap,
+                   entry_point, &stivale2_struct, stivale2_hdr.stack);
 }
diff --git a/stivale/stivale2.h b/stivale/stivale2.h
index 81e975be..34617800 100644
--- a/stivale/stivale2.h
+++ b/stivale/stivale2.h
@@ -27,6 +27,13 @@ struct stivale2_header_tag_framebuffer {
     uint16_t framebuffer_bpp;
 } __attribute__((packed));
 
+#define STIVALE2_HEADER_TAG_SMP_ID 0x1ab015085f3273df
+
+struct stivale2_header_tag_smp {
+    struct stivale2_tag tag;
+    uint64_t flags;
+} __attribute__((packed));
+
 #define STIVALE2_HEADER_TAG_5LV_PAGING_ID 0x932f477032007e8f
 
 /* --- Struct --------------------------------------------------------------- */
@@ -112,4 +119,21 @@ struct stivale2_struct_tag_firmware {
     uint64_t flags;
 } __attribute__((packed));
 
+#define STIVALE2_STRUCT_TAG_SMP_ID 0x34d1d96339647025
+
+struct stivale2_smp_info {
+    uint32_t processor_id;
+    uint32_t lapic_id;
+    uint64_t target_stack;
+    uint64_t goto_address;
+} __attribute__((packed));
+
+struct stivale2_struct_tag_smp {
+    struct stivale2_tag tag;
+    uint64_t identifier;
+    uint64_t next;
+    uint64_t cpu_count;
+    struct stivale2_smp_info smp_info[];
+} __attribute__((packed));
+
 #endif
diff --git a/test/test.asm b/test/test.asm
index 1596b2a3..e1f1aa33 100644
--- a/test/test.asm
+++ b/test/test.asm
@@ -13,6 +13,11 @@ section .rodata
 
 lv5:
     dq 0x932f477032007e8f
+    dq smp
+
+smp:
+    dq 0x1ab015085f3273df
+    dq 0
     dq 0
 
 section .bss
tab: 248 wrap: offon