loongarch64: Add smp support
diff --git a/common/lib/acpi.h b/common/lib/acpi.h
index d0e68031..40c93643 100644
--- a/common/lib/acpi.h
+++ b/common/lib/acpi.h
@@ -171,6 +171,17 @@ struct madt_riscv_intc {
#define MADT_RISCV_INTC_ENABLED ((uint32_t)1 << 0)
#define MADT_RISCV_INTC_ONLINE_CAPABLE ((uint32_t)1 << 1)
+struct madt_core_pic {
+ struct madt_header header;
+ uint8_t version;
+ uint32_t acpi_processor_uid;
+ uint32_t core_id;
+ uint32_t flags;
+} __attribute__((packed));
+
+#define MADT_CORE_PIC_ENABLED ((uint32_t)1 << 0)
+#define MADT_CORE_PIC_ONLINE_CAPABLE ((uint32_t)1 << 1)
+
uint8_t acpi_checksum(void *ptr, size_t size);
void *acpi_get_rsdp(void);
diff --git a/common/mm/vmm.h b/common/mm/vmm.h
index 01cf7e6b..f6402f25 100644
--- a/common/mm/vmm.h
+++ b/common/mm/vmm.h
@@ -123,11 +123,20 @@ void map_page(pagemap_t pagemap, uint64_t virt_addr, uint64_t phys_addr, uint64_
#elif defined (__loongarch64)
-#define paging_mode_va_bits(mode) 48
+static inline uint32_t read_cpucfg(uint32_t reg) {
+ uint32_t val = 0;
+ asm volatile("cpucfg %0, %1\n\t"
+ :"=r"(val)
+ :"r"(reg)
+ );
+ return val;
+}
+
+#define paging_mode_va_bits(mode) (((read_cpucfg(0x1) >> 12) & 0xFF) + 1)
static inline uint64_t paging_mode_higher_half(int paging_mode) {
(void)paging_mode;
- return 0xffff800000000000;
+ return 0UL - (1UL << (paging_mode_va_bits(paging_mode) - 1));
}
// We use fake flags here because these don't properly map onto the
diff --git a/common/protos/limine.c b/common/protos/limine.c
index 959261fb..140fe5b7 100644
--- a/common/protos/limine.c
+++ b/common/protos/limine.c
@@ -1505,8 +1505,8 @@ FEAT_START
#elif defined (__riscv)
mp_info = init_smp(&cpu_count, pagemap, direct_map_offset);
#elif defined (__loongarch64)
- cpu_count = 0;
- mp_info = NULL; // TODO: LoongArch MP
+ uint32_t bsp_phys_id;
+ mp_info = init_smp(&cpu_count, &bsp_phys_id, pagemap, direct_map_offset);
#else
#error Unknown architecture
#endif
@@ -1529,6 +1529,9 @@ FEAT_START
continue;
}
#elif defined (__loongarch64)
+ if (mp_info[i].phys_id == bsp_phys_id) {
+ continue;
+ }
#else
#error Unknown architecture
#endif
@@ -1549,6 +1552,7 @@ FEAT_START
#elif defined (__riscv)
mp_response->bsp_hartid = bsp_hartid;
#elif defined (__loongarch64)
+ mp_response->bsp_phys_id = bsp_phys_id;
#else
#error Unknown architecture
#endif
diff --git a/common/sys/cpu.h b/common/sys/cpu.h
index 5ab2757e..f9420c71 100644
--- a/common/sys/cpu.h
+++ b/common/sys/cpu.h
@@ -374,6 +374,92 @@ void init_riscv(const char *config);
#elif defined (__loongarch64)
+#define csr_read64(reg) ({ \
+ uint64_t csr_read64__ret; \
+ asm volatile ( \
+ "csrrd %0, %1" \
+ : "=r"(csr_read64__ret) \
+ : "i"(reg) \
+ ); \
+ csr_read64__ret; \
+})
+
+#define csr_write64(val, reg) do { \
+ __auto_type csr_write64__val = (val); \
+ asm volatile ( \
+ "csrwr %0, %1" \
+ : \
+ : "r"(csr_write64__val), "i"(reg) \
+ : "memory" \
+ ); \
+} while (0)
+
+#define csr_read32(reg) ((uint32_t)csr_read64(reg))
+
+#define csr_write32(val, reg) do { \
+ csr_write64((uint64_t)(val), reg); \
+} while (0)
+
+#define csr_xchg64(val, mask, reg) ({ \
+ uint64_t csr_xchg64__ret = (uint64_t)(val); \
+ uint64_t csr_xchg64__mask = (uint64_t)(mask); \
+ asm volatile ( \
+ "csrxchg %0, %1, %2" \
+ : "+r"(csr_xchg64__ret) \
+ : "r"(csr_xchg64__mask), "i"(reg) \
+ : "memory" \
+ ); \
+ csr_xchg64__ret; \
+})
+
+#define locked_read(var) ({ \
+ typeof(*var) locked_read__ret; \
+ asm volatile ( \
+ "ld.d %0, %1\n\t" \
+ "dbar 0" \
+ : "=r"(locked_read__ret) \
+ : "m"(*(var)) \
+ : "memory" \
+ ); \
+ locked_read__ret; \
+})
+
+static inline uint32_t iocsr_read32(uint64_t reg) {
+ uint32_t val;
+ asm volatile (
+ "iocsrrd.w %0, %1"
+ : "=r"(val)
+ : "r"(reg)
+ );
+ return val;
+}
+
+static inline void iocsr_write32(uint32_t val, uint64_t reg) {
+ asm volatile (
+ "iocsrwr.w %0, %1"
+ :
+ : "r"(val), "r"(reg)
+ );
+}
+
+static inline uint64_t iocsr_read64(uint64_t reg) {
+ uint64_t val;
+ asm volatile (
+ "iocsrrd.d %0, %1"
+ : "=r"(val)
+ : "r"(reg)
+ );
+ return val;
+}
+
+static inline void iocsr_write64(uint64_t val, uint64_t reg) {
+ asm volatile (
+ "iocsrwr.d %0, %1"
+ :
+ : "r"(val), "r"(reg)
+ );
+}
+
static inline uint64_t rdtsc(void) {
uint64_t v;
asm volatile ("rdtime.d %0, $zero" : "=r" (v));
diff --git a/common/sys/smp.c b/common/sys/smp.c
index e3519be2..1778bdde 100644
--- a/common/sys/smp.c
+++ b/common/sys/smp.c
@@ -17,7 +17,7 @@
#if defined (__riscv)
#include <sys/sbi.h>
#endif
-#if defined (__aarch64__)
+#if defined (__aarch64__) || defined(__loongarch__)
#include <libfdt.h>
#endif
@@ -853,6 +853,339 @@ struct limine_mp_info *init_smp(size_t *cpu_count, pagemap_t pagemap, uint64_t h
}
#elif defined (__loongarch64)
+
+enum {
+ LOONGARCH_CSR_CPUID = 0x20,
+
+ LOONGARCH_IOCSR_IPI_SEND = 0x1040,
+ LOONGARCH_IOCSR_MBUF_SEND = 0x1048,
+
+ IOCSR_IPI_SEND_BLOCKING_BIT = 31,
+ IOCSR_IPI_SEND_CPU_SHIFT = 16,
+ IOCSR_IPI_SEND_IP_SHIFT = 0,
+
+ IOCSR_MBUF_SEND_BLOCKING_BIT = 31,
+ IOCSR_MBUF_SEND_CPU_SHIFT = 16,
+ IOCSR_MBUF_SEND_BOX_SHIFT = 2,
+
+ SMP_BOOT_CPU = 0x1,
+
+ MADT_ENTRY_CORE_PIC = 17
+};
+
+struct trampoline_passed_info {
+ uint64_t smp_tpl_booted_flag;
+ uint64_t smp_tpl_info_struct;
+ uint64_t smp_tpl_pgd_low;
+ uint64_t smp_tpl_pgd_high;
+ uint64_t smp_tpl_hhdm_offset;
+ uint64_t smp_tpl_temp_stack;
+};
+
+struct trampoline_passed_info loongarch_smp_passed_info;
+
+static inline uint32_t loongarch_phys_id(void) {
+ return csr_read32(LOONGARCH_CSR_CPUID);
+}
+
+static inline bool core_pic_startable(uint32_t flags) {
+ return (flags & MADT_CORE_PIC_ENABLED)
+ || (flags & MADT_CORE_PIC_ONLINE_CAPABLE);
+}
+
+static void csr_mail_send(uint64_t data, int cpu, int mailbox) {
+ uint64_t val;
+
+ // High 32bit
+ val = ((uint64_t)1 << IOCSR_MBUF_SEND_BLOCKING_BIT);
+ val |= (((mailbox << 1) + 1) << IOCSR_MBUF_SEND_BOX_SHIFT);
+ val |= (cpu << IOCSR_MBUF_SEND_CPU_SHIFT);
+ val |= (data & 0xFFFFFFFF00000000);
+ iocsr_write64(val, LOONGARCH_IOCSR_MBUF_SEND);
+
+ // Low 32bit
+ val = ((uint64_t)1 << IOCSR_MBUF_SEND_BLOCKING_BIT);
+ val |= ((mailbox << 1) << IOCSR_MBUF_SEND_BOX_SHIFT);
+ val |= (cpu << IOCSR_MBUF_SEND_CPU_SHIFT);
+ val |= (data << 32);
+ iocsr_write64(val, LOONGARCH_IOCSR_MBUF_SEND);
+};
+
+static void smp_send_ipi(uint32_t phys_id, uint32_t action) {
+ uint32_t val = ((uint32_t)1 << IOCSR_IPI_SEND_BLOCKING_BIT)
+ | (phys_id << IOCSR_IPI_SEND_CPU_SHIFT)
+ | (action << IOCSR_IPI_SEND_IP_SHIFT);
+
+ iocsr_write32(val, LOONGARCH_IOCSR_IPI_SEND);
+}
+
+static bool smp_start_ap(uint32_t phys_id, struct limine_mp_info *info_struct,
+ uint64_t pgd_low, uint64_t pgd_high,
+ uint64_t hhdm_offset) {
+ static void *temp_stack =NULL;
+ if (temp_stack == NULL) {
+ temp_stack = ext_mem_alloc(8192);
+ }
+
+ loongarch_smp_passed_info.smp_tpl_booted_flag = 0;
+ loongarch_smp_passed_info.smp_tpl_info_struct = (uint64_t)(uintptr_t)info_struct;
+ loongarch_smp_passed_info.smp_tpl_pgd_low = pgd_low;
+ loongarch_smp_passed_info.smp_tpl_pgd_high = pgd_high;
+ loongarch_smp_passed_info.smp_tpl_hhdm_offset = hhdm_offset;
+ loongarch_smp_passed_info.smp_tpl_temp_stack = (uint64_t)(uintptr_t)temp_stack + 8192;
+
+ asm volatile ("dbar 0" ::: "memory");
+
+ uint64_t trampoline_entry = (uint64_t)(uintptr_t)smp_trampoline_start;
+
+ // Mailbox 0 and 1 carry the low and high 32 bits of the AP entry point.
+ csr_mail_send(trampoline_entry, phys_id, 0);
+ smp_send_ipi(phys_id, SMP_BOOT_CPU);
+
+ for (int i = 0; i < 1000000; i++) {
+ if (locked_read(&loongarch_smp_passed_info.smp_tpl_booted_flag) == 1)
+ return true;
+ delay(100000);
+ }
+
+ return false;
+}
+
+static struct limine_mp_info *try_acpi_smp(size_t *cpu_count, uint32_t *bsp_phys_id,
+ pagemap_t pagemap, uint64_t hhdm_offset) {
+ struct madt *madt = acpi_get_table("APIC", 0);
+ if (madt == NULL)
+ return NULL;
+
+ *bsp_phys_id = loongarch_phys_id();
+ *cpu_count = 0;
+
+ size_t max_cpus = 0;
+
+ for (uint8_t *madt_ptr = (uint8_t *)madt->madt_entries_begin;
+ (uintptr_t)madt_ptr + 1 < (uintptr_t)madt + madt->header.length;
+ madt_ptr += *(madt_ptr + 1)) {
+ if (*(madt_ptr + 1) == 0)
+ break;
+
+ if (*madt_ptr != MADT_ENTRY_CORE_PIC)
+ continue;
+
+ if (*(madt_ptr + 1) < sizeof(struct madt_core_pic))
+ continue;
+
+ struct madt_core_pic *core_pic = (void *)madt_ptr;
+
+ if (core_pic_startable(core_pic->flags))
+ max_cpus++;
+ }
+
+ if (max_cpus == 0)
+ return NULL;
+
+ struct limine_mp_info *ret = ext_mem_alloc(max_cpus * sizeof(struct limine_mp_info));
+
+ for (uint8_t *madt_ptr = (uint8_t *)madt->madt_entries_begin;
+ (uintptr_t)madt_ptr + 1 < (uintptr_t)madt + madt->header.length;
+ madt_ptr += *(madt_ptr + 1)) {
+ if (*(madt_ptr + 1) == 0)
+ break;
+
+ if (*madt_ptr != MADT_ENTRY_CORE_PIC)
+ continue;
+
+ if (*(madt_ptr + 1) < sizeof(struct madt_core_pic))
+ continue;
+
+ struct madt_core_pic *core_pic = (void *)madt_ptr;
+
+ if (!core_pic_startable(core_pic->flags))
+ continue;
+
+ struct limine_mp_info *info_struct = &ret[*cpu_count];
+ info_struct->processor_id = core_pic->acpi_processor_uid;
+ info_struct->phys_id = core_pic->core_id;
+
+ // Do not try to restart the BSP.
+ if (core_pic->core_id == *bsp_phys_id) {
+ (*cpu_count)++;
+ continue;
+ }
+
+ printv("smp: Found candidate AP for bring-up. Core ID: %u\n", core_pic->core_id);
+
+ if (!smp_start_ap(core_pic->core_id, info_struct,
+ (uint64_t)(uintptr_t)pagemap.pgd[0],
+ (uint64_t)(uintptr_t)pagemap.pgd[1],
+ hhdm_offset)) {
+ print("smp: FAILED to bring-up AP\n");
+ continue;
+ }
+
+ printv("smp: Successfully brought up AP\n");
+ (*cpu_count)++;
+ }
+
+ if (*cpu_count == 0) {
+ pmm_free(ret, max_cpus * sizeof(struct limine_mp_info));
+ return NULL;
+ }
+
+ return ret;
+}
+
+static struct limine_mp_info *try_dtb_smp(void *dtb, size_t *cpu_count,
+ uint32_t *bsp_phys_id,
+ pagemap_t pagemap,
+ uint64_t hhdm_offset) {
+ int cpus = fdt_path_offset(dtb, "/cpus");
+ if (cpus < 0) {
+ printv("smp: failed to find /cpus node: %s\n", fdt_strerror(cpus));
+ return NULL;
+ }
+
+ int address_cells = fdt_address_cells(dtb, cpus);
+ if (address_cells < 1) {
+ printv("smp: fdt_address_cells failed: %s\n", fdt_strerror(address_cells));
+ return NULL;
+ }
+ if (address_cells > 2) {
+ printv("smp: illegal #address-cells value: %d\n", address_cells);
+ return NULL;
+ }
+
+ *bsp_phys_id = loongarch_phys_id();
+ *cpu_count = 0;
+
+ size_t max_cpus = 0;
+ int node;
+ fdt_for_each_subnode(node, dtb, cpus) {
+ const void *prop;
+ int prop_len;
+
+ if (!(prop = fdt_getprop(dtb, node, "device_type", NULL)) || strcmp(prop, "cpu"))
+ continue;
+
+ if (!(prop = fdt_getprop(dtb, node, "reg", &prop_len)) || prop_len < address_cells * 4)
+ continue;
+
+ uint64_t phys_id = 0;
+ const uint8_t *bytes = prop;
+
+ if (address_cells == 1) {
+ phys_id = ((uint64_t)bytes[0] << 24)
+ | ((uint64_t)bytes[1] << 16)
+ | ((uint64_t)bytes[2] << 8)
+ | ((uint64_t)bytes[3] << 0);
+ } else {
+ phys_id = ((uint64_t)bytes[0] << 56)
+ | ((uint64_t)bytes[1] << 48)
+ | ((uint64_t)bytes[2] << 40)
+ | ((uint64_t)bytes[3] << 32)
+ | ((uint64_t)bytes[4] << 24)
+ | ((uint64_t)bytes[5] << 16)
+ | ((uint64_t)bytes[6] << 8)
+ | ((uint64_t)bytes[7] << 0);
+ }
+
+ if (phys_id > UINT32_MAX) {
+ printv("smp: core id %U does not fit in 32 bits, skipping\n", phys_id);
+ continue;
+ }
+
+ max_cpus++;
+ }
+
+ if (max_cpus == 0)
+ return NULL;
+
+ struct limine_mp_info *ret = ext_mem_alloc(max_cpus * sizeof(struct limine_mp_info));
+
+ fdt_for_each_subnode(node, dtb, cpus) {
+ const void *prop;
+ int prop_len;
+
+ if (!(prop = fdt_getprop(dtb, node, "device_type", NULL)) || strcmp(prop, "cpu"))
+ continue;
+
+ if (!(prop = fdt_getprop(dtb, node, "reg", &prop_len)) || prop_len < address_cells * 4)
+ continue;
+
+ uint64_t phys_id = 0;
+ const uint8_t *bytes = prop;
+
+ if (address_cells == 1) {
+ phys_id = ((uint64_t)bytes[0] << 24)
+ | ((uint64_t)bytes[1] << 16)
+ | ((uint64_t)bytes[2] << 8)
+ | ((uint64_t)bytes[3] << 0);
+ } else {
+ phys_id = ((uint64_t)bytes[0] << 56)
+ | ((uint64_t)bytes[1] << 48)
+ | ((uint64_t)bytes[2] << 40)
+ | ((uint64_t)bytes[3] << 32)
+ | ((uint64_t)bytes[4] << 24)
+ | ((uint64_t)bytes[5] << 16)
+ | ((uint64_t)bytes[6] << 8)
+ | ((uint64_t)bytes[7] << 0);
+ }
+
+ if (phys_id > UINT32_MAX) {
+ printv("smp: core id %U does not fit in 32 bits, skipping\n", phys_id);
+ continue;
+ }
+
+ struct limine_mp_info *info_struct = &ret[*cpu_count];
+ info_struct->processor_id = 0;
+ info_struct->phys_id = phys_id;
+
+ // Do not try to restart the BSP.
+ if (phys_id == *bsp_phys_id) {
+ (*cpu_count)++;
+ continue;
+ }
+
+ printv("smp: Found candidate AP for bring-up. Core ID: %U\n", phys_id);
+
+ if (!smp_start_ap((uint32_t)phys_id, info_struct,
+ (uint64_t)(uintptr_t)pagemap.pgd[0],
+ (uint64_t)(uintptr_t)pagemap.pgd[1],
+ hhdm_offset)) {
+ print("smp: FAILED to bring-up AP\n");
+ continue;
+ }
+
+ printv("smp: Successfully brought up AP\n");
+ (*cpu_count)++;
+ }
+
+ if (*cpu_count == 0) {
+ pmm_free(ret, max_cpus * sizeof(struct limine_mp_info));
+ return NULL;
+ }
+
+ return ret;
+}
+
+struct limine_mp_info *init_smp(size_t *cpu_count, uint32_t *bsp_phys_id,
+ pagemap_t pagemap, uint64_t hhdm_offset) {
+ struct limine_mp_info *info = NULL;
+
+ if (acpi_get_rsdp() && (info = try_acpi_smp(cpu_count, bsp_phys_id, pagemap, hhdm_offset)))
+ return info;
+
+ void *dtb = get_device_tree_blob(NULL, 0);
+ if (dtb) {
+ info = try_dtb_smp(dtb, cpu_count, bsp_phys_id, pagemap, hhdm_offset);
+ pmm_free(dtb, fdt_totalsize(dtb));
+ return info;
+ }
+
+ printv("Failed to figure out how to start APs.");
+
+ return NULL;
+}
+
#else
#error Unknown architecture
#endif
diff --git a/common/sys/smp.h b/common/sys/smp.h
index de45f08c..743ce009 100644
--- a/common/sys/smp.h
+++ b/common/sys/smp.h
@@ -37,6 +37,10 @@ struct limine_mp_info *init_smp(size_t *cpu_count,
uint64_t hhdm_offset);
#elif defined (__loongarch64)
+
+struct limine_mp_info *init_smp(size_t *cpu_count, uint32_t *bsp_phys_id,
+ pagemap_t pagemap, uint64_t hhdm_offset);
+
#else
#error Unknown architecture
#endif
diff --git a/common/sys/smp_trampoline.asm_loongarch64 b/common/sys/smp_trampoline.asm_loongarch64
new file mode 100644
index 00000000..a825a1ed
--- /dev/null
+++ b/common/sys/smp_trampoline.asm_loongarch64
@@ -0,0 +1,76 @@
+.section .text
+
+.set tpl_booted_flag, 0
+.set tpl_info_struct, 8
+.set tpl_pgd_low, 16
+.set tpl_pgd_high, 24
+.set tpl_hhdm_offset, 32
+.set tpl_temp_stack, 40
+
+.global smp_trampoline_start
+.extern loongarch_spinup
+.extern loongarch_smp_passed_info
+
+smp_trampoline_start:
+ // Load trampoline state and switch to the kernel page tables.
+ la.local $t0, loongarch_smp_passed_info
+ la.local $a0, 1f
+ ld.d $a1, $t0, tpl_temp_stack
+ ld.d $a2, $t0, tpl_pgd_low
+ ld.d $a3, $t0, tpl_pgd_high
+ b loongarch_spinup
+
+1:
+ // Relocate the MP info pointer to the higher-half direct map.
+ la.local $t0, loongarch_smp_passed_info
+ ld.d $t1, $t0, tpl_info_struct
+ ld.d $t2, $t0, tpl_hhdm_offset
+ add.d $t1, $t1, $t2
+ move $a0, $t1
+
+ // Tell the BSP that this AP reached the parking loop.
+ li.d $t3, 1
+ st.d $t3, $t0, tpl_booted_flag
+ dbar 0
+
+2:
+ // Wait until the kernel sets goto_address.
+ ld.d $t8, $t1, 24
+ dbar 0
+ beqz $t8, 2b
+
+ // Load the kernel-provided stack (reserved field).
+ ld.d $sp, $t1, 16
+
+ // Match the other trampolines by zeroing non-essential registers.
+ move $ra, $zero
+ move $tp, $zero
+ move $a1, $zero
+ move $a2, $zero
+ move $a3, $zero
+ move $a4, $zero
+ move $a5, $zero
+ move $a6, $zero
+ move $a7, $zero
+ move $t0, $zero
+ move $t1, $zero
+ move $t2, $zero
+ move $t3, $zero
+ move $t4, $zero
+ move $t5, $zero
+ move $t6, $zero
+ move $t7, $zero
+ move $fp, $zero
+ move $s0, $zero
+ move $s1, $zero
+ move $s2, $zero
+ move $s3, $zero
+ move $s4, $zero
+ move $s5, $zero
+ move $s6, $zero
+ move $s7, $zero
+ move $s8, $zero
+
+ jirl $zero, $t8, 0
+
+.section .note.GNU-stack,"",%progbits
