:: commit 0a2fec6ac68e58d8af5c3eaeeb42d9cf7dfb43eb

Mintsuki <mintsuki@protonmail.com> — 2026-02-20 00:41

parents: c51b667246

sys/iommu: Disable Intel VT-d and AMD-Vi IOMMUs before kernel entry

diff --git a/common/protos/limine.c b/common/protos/limine.c
index 959261fb..0979430c 100644
--- a/common/protos/limine.c
+++ b/common/protos/limine.c
@@ -24,6 +24,7 @@
 #include <flanterm_backends/fb.h>
 #include <sys/pic.h>
 #include <sys/lapic.h>
+#include <sys/iommu.h>
 #include <sys/idt.h>
 #include <fs/file.h>
 #include <mm/pmm.h>
@@ -1658,6 +1659,8 @@ FEAT_END
     rm_int(0x15, &r, &r);
 #endif
 
+    iommu_disable_all();
+
     pic_mask_all();
     io_apic_mask_all();
 
diff --git a/common/protos/linux_x86.c b/common/protos/linux_x86.c
index 989db114..bb0e72f0 100644
--- a/common/protos/linux_x86.c
+++ b/common/protos/linux_x86.c
@@ -16,6 +16,7 @@
 #include <sys/idt.h>
 #include <lib/fb.h>
 #include <lib/acpi.h>
+#include <sys/iommu.h>
 #include <drivers/edid.h>
 #include <drivers/vga_textmode.h>
 #include <drivers/gop.h>
@@ -643,6 +644,8 @@ no_fb:;
     // Spin up
     ///////////////////////////////////////
 
+    iommu_disable_all();
+
     irq_flush_type = IRQ_PIC_ONLY_FLUSH;
 
 #if defined (UEFI) && defined (__x86_64__)
diff --git a/common/protos/multiboot1.c b/common/protos/multiboot1.c
index 401502b0..87263c85 100644
--- a/common/protos/multiboot1.c
+++ b/common/protos/multiboot1.c
@@ -18,6 +18,7 @@
 #include <sys/pic.h>
 #include <sys/cpu.h>
 #include <sys/idt.h>
+#include <sys/iommu.h>
 #include <fs/file.h>
 #include <mm/vmm.h>
 #include <mm/pmm.h>
@@ -484,6 +485,8 @@ skip_modeset:;
     multiboot1_info->mmap_addr = (uint32_t)(size_t)mmap - mb1_info_slide;
     multiboot1_info->flags |= (1 << 0) | (1 << 6);
 
+    iommu_disable_all();
+
     irq_flush_type = IRQ_PIC_ONLY_FLUSH;
 
     common_spinup(multiboot_spinup_32, 6,
diff --git a/common/protos/multiboot2.c b/common/protos/multiboot2.c
index 23c56a98..a7422a64 100644
--- a/common/protos/multiboot2.c
+++ b/common/protos/multiboot2.c
@@ -18,6 +18,7 @@
 #include <sys/pic.h>
 #include <sys/cpu.h>
 #include <sys/idt.h>
+#include <sys/iommu.h>
 #include <fs/file.h>
 #include <mm/vmm.h>
 #include <lib/acpi.h>
@@ -1001,6 +1002,8 @@ skip_modeset:;
     mbi_start->size = info_idx;
     mbi_start->reserved = 0x00;
 
+    iommu_disable_all();
+
     irq_flush_type = IRQ_PIC_ONLY_FLUSH;
 
     common_spinup(multiboot_spinup_32, 6,
diff --git a/common/sys/iommu.c b/common/sys/iommu.c
new file mode 100644
index 00000000..6dd4e1a0
--- /dev/null
+++ b/common/sys/iommu.c
@@ -0,0 +1,159 @@
+#if defined (__x86_64__) || defined (__i386__)
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/iommu.h>
+#include <sys/cpu.h>
+#include <lib/acpi.h>
+#include <lib/libc.h>
+
+// Intel VT-d registers
+#define VTD_GCMD_REG  0x18
+#define VTD_GSTS_REG  0x1C
+
+// GSTS/GCMD bit positions
+#define VTD_GSTS_TES  (1u << 31)  // Translation Enable Status
+#define VTD_GSTS_QIES (1u << 26)  // Queued Invalidation Enable Status
+#define VTD_GSTS_IRES (1u << 25)  // Interrupt Remapping Enable Status
+
+// Mask to clear one-shot command bits when reading GSTS for use as GCMD base.
+// One-shot bits auto-clear after hardware processes them and must not be
+// carried over from GSTS into GCMD writes:
+//   Bit 30: SRTP (Set Root Table Pointer)
+//   Bit 29: SFL  (Set Fault Log)
+//   Bit 27: WBF  (Write Buffer Flush)
+//   Bit 24: SIRTP (Set Interrupt Remap Table Pointer)
+// All other bits (TE, EAFL, QIE, IRE, CFI) are persistent toggles.
+#define VTD_GCMD_ONESHOT_MASK 0x96FFFFFF
+
+#define VTD_TIMEOUT 10000000
+
+static void vtd_disable_unit(uintptr_t reg_base) {
+    uint32_t sts = mmind(reg_base + VTD_GSTS_REG);
+
+    // Disable interrupt remapping first (IRE depends on QIE, so reverse
+    // the enable order: disable IRE before QIE)
+    if (sts & VTD_GSTS_IRES) {
+        uint32_t gcmd = (sts & VTD_GCMD_ONESHOT_MASK) & ~VTD_GSTS_IRES;
+        mmoutd(reg_base + VTD_GCMD_REG, gcmd);
+
+        for (int i = 0; i < VTD_TIMEOUT; i++) {
+            asm volatile ("pause");
+            sts = mmind(reg_base + VTD_GSTS_REG);
+            if (!(sts & VTD_GSTS_IRES)) {
+                break;
+            }
+        }
+    }
+
+    // Disable DMA translation
+    if (sts & VTD_GSTS_TES) {
+        uint32_t gcmd = (sts & VTD_GCMD_ONESHOT_MASK) & ~VTD_GSTS_TES;
+        mmoutd(reg_base + VTD_GCMD_REG, gcmd);
+
+        for (int i = 0; i < VTD_TIMEOUT; i++) {
+            asm volatile ("pause");
+            sts = mmind(reg_base + VTD_GSTS_REG);
+            if (!(sts & VTD_GSTS_TES)) {
+                break;
+            }
+        }
+    }
+
+    // Disable queued invalidation last (was prerequisite for IRE)
+    if (sts & VTD_GSTS_QIES) {
+        uint32_t gcmd = (sts & VTD_GCMD_ONESHOT_MASK) & ~VTD_GSTS_QIES;
+        mmoutd(reg_base + VTD_GCMD_REG, gcmd);
+
+        for (int i = 0; i < VTD_TIMEOUT; i++) {
+            asm volatile ("pause");
+            sts = mmind(reg_base + VTD_GSTS_REG);
+            if (!(sts & VTD_GSTS_QIES)) {
+                break;
+            }
+        }
+    }
+}
+
+static void vtd_disable_all(void) {
+    struct sdt *dmar = acpi_get_table("DMAR", 0);
+    if (dmar == NULL) {
+        return;
+    }
+
+    // DMAR header is 48 bytes: 36 (SDT) + 1 (width) + 1 (flags) + 10 (reserved)
+    uint8_t *ptr = (uint8_t *)dmar + 48;
+    uint8_t *end = (uint8_t *)dmar + dmar->length;
+
+    while (ptr + 4 <= end) {
+        uint16_t type   = *(uint16_t *)(ptr + 0);
+        uint16_t length = *(uint16_t *)(ptr + 2);
+
+        if (length < 4 || ptr + length > end) {
+            break;
+        }
+
+        // Type 0 = DRHD (DMA Remapping Hardware Unit Definition)
+        if (type == 0 && length >= 16) {
+            uint64_t reg_base;
+            memcpy(&reg_base, ptr + 8, sizeof(reg_base));
+            vtd_disable_unit((uintptr_t)reg_base);
+        }
+
+        ptr += length;
+    }
+}
+
+// AMD IOMMU control register (64-bit at offset 0x18)
+#define AMDVI_CONTROL_REG 0x18
+
+static void amdvi_disable_unit(uintptr_t mmio_base) {
+    // Read low 32 bits of the 64-bit control register
+    uint32_t ctrl_lo = mmind(mmio_base + AMDVI_CONTROL_REG);
+
+    if (!(ctrl_lo & (1u << 0))) {
+        return; // IOMMU not enabled
+    }
+
+    // Clear IommuEn (bit 0) — the master enable for all IOMMU functionality.
+    // Takes effect immediately, no polling needed (unlike Intel VT-d).
+    ctrl_lo &= ~(1u << 0);
+    mmoutd(mmio_base + AMDVI_CONTROL_REG, ctrl_lo);
+}
+
+static void amdvi_disable_all(void) {
+    struct sdt *ivrs = acpi_get_table("IVRS", 0);
+    if (ivrs == NULL) {
+        return;
+    }
+
+    // IVRS header is 48 bytes: 36 (SDT) + 4 (IVinfo) + 8 (reserved)
+    uint8_t *ptr = (uint8_t *)ivrs + 48;
+    uint8_t *end = (uint8_t *)ivrs + ivrs->length;
+
+    while (ptr + 4 <= end) {
+        uint8_t  type   = *(uint8_t *)(ptr + 0);
+        uint16_t length = *(uint16_t *)(ptr + 2);
+
+        if (length < 4 || ptr + length > end) {
+            break;
+        }
+
+        // IVHD types: 0x10 (basic), 0x11 (extended), 0x40 (extended, newer)
+        if ((type == 0x10 || type == 0x11 || type == 0x40) && length >= 16) {
+            uint64_t mmio_base;
+            memcpy(&mmio_base, ptr + 8, sizeof(mmio_base));
+            amdvi_disable_unit((uintptr_t)mmio_base);
+        }
+
+        ptr += length;
+    }
+}
+
+void iommu_disable_all(void) {
+    vtd_disable_all();
+    amdvi_disable_all();
+}
+
+#endif
diff --git a/common/sys/iommu.h b/common/sys/iommu.h
new file mode 100644
index 00000000..6826eefa
--- /dev/null
+++ b/common/sys/iommu.h
@@ -0,0 +1,6 @@
+#ifndef SYS__IOMMU_H__
+#define SYS__IOMMU_H__
+
+void iommu_disable_all(void);
+
+#endif
tab: 248 wrap: offon