Add write-combining and MTRR support to speed up VBE framebuffer
diff --git a/Makefile b/Makefile
index f3a90715..265cc974 100644
--- a/Makefile
+++ b/Makefile
@@ -10,7 +10,7 @@ all: stage2 decompressor
clean: stage2-clean decompressor-clean
rm -f stage2/stage2.bin.gz
- rm -f limine-install
+ #rm -f limine-install
stage2:
$(MAKE) -C stage2 all
diff --git a/limine.bin b/limine.bin
index 34171bfc..35db061c 100644
Binary files a/limine.bin and b/limine.bin differ
diff --git a/stage2/drivers/vbe.c b/stage2/drivers/vbe.c
index 33bda7fd..08d8f831 100644
--- a/stage2/drivers/vbe.c
+++ b/stage2/drivers/vbe.c
@@ -8,6 +8,7 @@
#include <lib/print.h>
#include <lib/image.h>
#include <mm/pmm.h>
+#include <mm/mtrr.h>
#define VGA_FONT_WIDTH 8
#define VGA_FONT_HEIGHT 16
@@ -274,6 +275,10 @@ void vbe_putchar(char c) {
void vbe_tty_init(int *_rows, int *_cols, uint32_t *_colours, int _margin, struct image *_background) {
init_vbe(&vbe_framebuffer, &vbe_pitch, &vbe_width, &vbe_height, &vbe_bpp);
+
+ mtrr_set_range((uint64_t)(size_t)vbe_framebuffer,
+ (uint64_t)vbe_pitch * vbe_height, MTRR_MEMORY_TYPE_WC);
+
vga_font_retrieve();
*_cols = cols = (vbe_width - _margin * 2) / VGA_FONT_WIDTH;
*_rows = rows = (vbe_height - _margin * 2) / VGA_FONT_HEIGHT;
diff --git a/stage2/main.c b/stage2/main.c
index 5c7ec2a9..b20eb736 100644
--- a/stage2/main.c
+++ b/stage2/main.c
@@ -10,6 +10,7 @@
#include <fs/file.h>
#include <lib/elf.h>
#include <mm/pmm.h>
+#include <mm/mtrr.h>
#include <protos/stivale.h>
#include <protos/stivale2.h>
#include <protos/linux.h>
@@ -17,6 +18,8 @@
#include <menu.h>
void entry(int boot_drive) {
+ mtrr_save();
+
term_textmode();
print("Limine " LIMINE_VERSION "\n\n");
diff --git a/stage2/mm/mtrr.c b/stage2/mm/mtrr.c
new file mode 100644
index 00000000..5f90c4e3
--- /dev/null
+++ b/stage2/mm/mtrr.c
@@ -0,0 +1,111 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <mm/mtrr.h>
+#include <mm/pmm.h>
+#include <sys/cpu.h>
+#include <lib/print.h>
+#include <lib/blib.h>
+
+struct mtrr {
+ uint64_t base;
+ uint64_t mask;
+};
+
+static bool is_block_in_mtrr_range(struct mtrr *mtrr, uint64_t block_base, uint64_t block_size) {
+ // False if the MTRR is not valid
+ if (!(mtrr->mask & (1 << 11)))
+ return false;
+
+ uint64_t base = mtrr->base & ~(0xfff);
+ uint64_t mask = mtrr->mask & ~(0xfff);
+
+ for (uint64_t i = block_base; i < block_size; i += 4096) {
+ if ((i & mask) == (base & mask))
+ return true;
+ }
+
+ return false;
+}
+
+bool mtrr_set_range(uint64_t base, uint64_t size, uint8_t memory_type) {
+ uint32_t eax, ebx, ecx, edx;
+
+ cpuid(0x80000008, 0, &eax, &ebx, &ecx, &edx);
+ uint8_t maxphysaddr = eax & 0xff;
+ print("mtrr: Max phys addr: %u\n", maxphysaddr);
+
+ base = ALIGN_DOWN(base, 0x1000);
+ size = ALIGN_UP(size, 0x1000);
+ uint64_t mask = (((uint64_t)1 << maxphysaddr) - 1) & ~((uint64_t)size - 1);
+
+ print("mtrr: Base: %X Mask: %X\n", base, mask);
+
+ uint64_t ia32_mtrrcap = rdmsr(0xfe);
+
+ if (ia32_mtrrcap & (1 << 10)) {
+ print("mtrr: Write-combining supported\n");
+ } else {
+ if (memory_type == MTRR_MEMORY_TYPE_WC)
+ return false;
+ }
+
+ uint8_t var_reg_count = ia32_mtrrcap & 0xff;
+
+ // Check if we're not overlapping any other MTRR range
+ for (uint8_t i = 0; i < var_reg_count; i++) {
+ struct mtrr mtrr;
+ mtrr.base = rdmsr(0x200 + i * 2);
+ mtrr.mask = rdmsr(0x200 + i * 2 + 1);
+
+ if (is_block_in_mtrr_range(&mtrr, base, size))
+ return false;
+ }
+
+ print("mtrr: Block does not overlap other ranges, good to go\n");
+
+ // Find usable MTRR slot
+ for (uint8_t i = 0; i < var_reg_count; i++) {
+ struct mtrr mtrr;
+ mtrr.base = rdmsr(0x200 + i * 2);
+ mtrr.mask = rdmsr(0x200 + i * 2 + 1);
+
+ if (mtrr.mask & (1 << 11))
+ continue;
+
+ // Found
+ wrmsr(0x200 + i * 2, base & memory_type);
+ wrmsr(0x200 + i * 2 + 1, mask & (1 << 11));
+
+ print("mtrr: Set range in variable MTRR number %u\n", i);
+ return true;
+ }
+
+ return false;
+}
+
+static struct mtrr *saved_mtrr = NULL;
+
+void mtrr_save(void) {
+ uint64_t ia32_mtrrcap = rdmsr(0xfe);
+
+ uint8_t var_reg_count = ia32_mtrrcap & 0xff;
+
+ if (!saved_mtrr)
+ saved_mtrr = conv_mem_alloc(var_reg_count * sizeof(struct mtrr));
+
+ for (uint8_t i = 0; i < var_reg_count; i++) {
+ saved_mtrr[i].base = rdmsr(0x200 + i * 2);
+ saved_mtrr[i].mask = rdmsr(0x200 + i * 2 + 1);
+ }
+}
+
+void mtrr_restore(void) {
+ uint64_t ia32_mtrrcap = rdmsr(0xfe);
+
+ uint8_t var_reg_count = ia32_mtrrcap & 0xff;
+
+ for (uint8_t i = 0; i < var_reg_count; i++) {
+ wrmsr(0x200 + i * 2, saved_mtrr[i].base);
+ wrmsr(0x200 + i * 2 + 1, saved_mtrr[i].mask);
+ }
+}
diff --git a/stage2/mm/mtrr.h b/stage2/mm/mtrr.h
new file mode 100644
index 00000000..9f76136d
--- /dev/null
+++ b/stage2/mm/mtrr.h
@@ -0,0 +1,17 @@
+#ifndef __MM__MTRR_H__
+#define __MM__MTRR_H__
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#define MTRR_MEMORY_TYPE_UC 0x00
+#define MTRR_MEMORY_TYPE_WC 0x01
+#define MTRR_MEMORY_TYPE_WT 0x04
+#define MTRR_MEMORY_TYPE_WP 0x05
+#define MTRR_MEMORY_TYPE_WB 0x06
+
+void mtrr_save(void);
+void mtrr_restore(void);
+bool mtrr_set_range(uint64_t base, uint64_t size, uint8_t caching_type);
+
+#endif
diff --git a/stage2/protos/chainload.c b/stage2/protos/chainload.c
index d36e98c9..a5018ede 100644
--- a/stage2/protos/chainload.c
+++ b/stage2/protos/chainload.c
@@ -6,6 +6,7 @@
#include <lib/blib.h>
#include <drivers/disk.h>
#include <lib/term.h>
+#include <mm/mtrr.h>
__attribute__((section(".realmode"), used))
static void spinup(uint8_t drive) {
@@ -73,5 +74,7 @@ void chainload(void) {
read(drive, (void *)0x7c00, 0, 512);
}
+ mtrr_restore();
+
spinup(drive);
}
diff --git a/stage2/protos/linux.c b/stage2/protos/linux.c
index 48470b2e..4fa19853 100644
--- a/stage2/protos/linux.c
+++ b/stage2/protos/linux.c
@@ -8,6 +8,7 @@
#include <lib/config.h>
#include <lib/print.h>
#include <mm/pmm.h>
+#include <mm/mtrr.h>
#define KERNEL_LOAD_ADDR ((size_t)0x100000)
#define INITRD_LOAD_ADDR ((size_t)0x1000000)
@@ -174,5 +175,7 @@ void linux_load(char *cmdline, int boot_drive) {
term_deinit();
+ mtrr_restore();
+
spinup(real_mode_code_seg, kernel_entry_seg);
}
diff --git a/stage2/protos/stivale.c b/stage2/protos/stivale.c
index 11492fff..1250442f 100644
--- a/stage2/protos/stivale.c
+++ b/stage2/protos/stivale.c
@@ -16,6 +16,7 @@
#include <fs/file.h>
#include <mm/vmm.h>
#include <mm/pmm.h>
+#include <mm/mtrr.h>
#include <stivale/stivale.h>
#define KASLR_SLIDE_BITMASK 0x03FFFF000u
@@ -258,6 +259,8 @@ pagemap_t stivale_build_pagemap(bool level5pg, struct e820_entry_t *memmap,
__attribute__((noreturn)) void stivale_spinup(
int bits, bool level5pg, pagemap_t pagemap,
uint64_t entry_point, void *stivale_struct, uint64_t stack) {
+ mtrr_restore();
+
if (bits == 64) {
// If we're going 64, we might as well call this BIOS interrupt
// to tell the BIOS that we are entering Long Mode, since it is in
