compress: transparent gzip-compression (#545) (#549)
* compress: transparent gzip-compression provides a port of embeddable pdgzip (CC0) with small performance tweaks to attain better-than-zlib decode times. we have a couple of unique tricks up our sleeve: - extra validation of huffman trees via the kraft-mcmillan condition - multi-level huffman tables; standard technique in accelerated zlib decompressors, here implemented in a low code volume. - semi-space based window design to decrease overall latency and move a hotspot to semi-space memcpy() back to history buffer. - full support of the RFC quirks for deflate/gzip, including fixed huffman trees. - fast crc32 implementation via runtime-computed tables and the slicing-by-4 algorithm. comparison (size): tinf ~2.5kb x86 code, this ~15kb x86 code, zlib ~22kb of x86 code. comparison (code volume): tinf 639 sloc, this 594 sloc, zlib >=10k sloc, libdeflate >=7.7k sloc. performance (r7 pro 7840u; enwik8 100MB): tinf ~2.3s, this ~409.7 ms, zlib ~417.7 ms. extra: fuzzed with afl++. pending addition of fuzzing scripts and automated ci-bound testing. thin wrapper over file streams automatically used when the $-prefix is found as per #545. * contrib: check in the gzip-fuzzing harness this fuzzing harness is not intended to be ran by end users or distro maintainers, hence it does not follow a lot of the standard "portability" kludges of mainline limine code. * compress: gzip.c/.h - document limitations. document the behaviour of the ->size field in the public API of gzip.h; adhere to a more uniform style. * fix ci replace the 2M image with a 32M UEFI image with a specific head/track/sector geometry. * test: limine.c now uses outw() for QEMU-specific fast path shutdown. * gzip.c: more memory-frugal by constructing fixed tables on demand also move crc32 tables to ext_mem_alloc, document peak mem usage * contrib: mechanical test for validating gzip compression * minor cosmetic * compress: properly hook up gzip to uri_open - remove ISIZE parsing in gzip.c - add a streaming file_handle wrapper over blake2b. - make ->read and fread return the actual # of bytes read (needed by the gzip decoder downstream users to determine real EOF) - add is_high_mem and load_addr_64 to file_handle (after freadall_mode inlined body in uri_open relocates to high memory) - as a result of the changes, freadall and freadall_mode are no longer used anywhere by the code base. hence they were removed. - update the signature of uri_open to accept memcpy functions to/from high memory and a boolean parameter for whether high memory allocations are acceptable for this specific resource. - inline size-agnostic (stretchy-vector type) functionality to uri_open to facilitate streaming unknown-size decoding. uri_open now returns memfiles, always, mimicking the behaviour prior to streaming blake2b change commit. - minimum patch in limine_asm to make limine_memcpy_64_asm take two wide pointers. - update downstream callers of uri_open, as well as the gzip fuzzing suite. * fs: fat32.s2.c: update a stale comment * crypt: blake2b.h: opaque pointer to fs guts * lib: uri.c silence warning on non-i386 * lib: uri.c: remove stale comment. * move to pdgzip * drop old * test/test.mk: rollback a merge artifact. * test.mk: re-add extra cflags
diff --git a/.gitignore b/.gitignore
index 81f17d2e..f8c7df7c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -31,6 +31,9 @@
/common/lib/stb_image.h
/common/cc-runtime.s2.c
/cc-runtime
+/common/compress/pdgzip.c
+/common/compress/pdgzip.h
+/pdgzip
/libfdt
/edk2-ovmf
/bochsout.txt
diff --git a/3RDPARTY.md b/3RDPARTY.md
index 4a255511..cdcb17eb 100644
--- a/3RDPARTY.md
+++ b/3RDPARTY.md
@@ -58,6 +58,9 @@ used for wallpaper image loading.
- [libfdt](https://github.com/osdev0/libfdt) (BSD-2-Clause) is used for
manipulating Flat Device Trees.
+- [pdgzip](https://github.com/iczelia/pdgzip) (0BSD) is used to provide the
+transparent gzip decompression layer for loaded files.
+
Note that some of these projects, or parts of them, are provided under
dual-licensing, in which case, in the above list, the only license mentioned is
the one chosen by the Limine developers. Refer to each individual project's
diff --git a/GNUmakefile.in b/GNUmakefile.in
index fd0c24aa..2e19b9f3 100644
--- a/GNUmakefile.in
+++ b/GNUmakefile.in
@@ -209,8 +209,8 @@ $(call MKESCAPE,$(BINDIR))/limine-uefi-cd.bin: $(if $(BUILD_UEFI_IA32),$(call MK
ifneq ($(BUILD_UEFI_CD),no)
$(MKDIR_P) '$(call SHESCAPE,$(BINDIR))'
rm -f '$(call SHESCAPE,$(BINDIR))/limine-uefi-cd.bin'
- dd if=/dev/zero of='$(call SHESCAPE,$(BINDIR))/limine-uefi-cd.bin' bs=512 count=5760 2>/dev/null
- mformat -i '$(call SHESCAPE,$(BINDIR))/limine-uefi-cd.bin' -f 2880 -N 12345678 ::
+ dd if=/dev/zero of='$(call SHESCAPE,$(BINDIR))/limine-uefi-cd.bin' bs=512 count=32768 2>/dev/null
+ mformat -i '$(call SHESCAPE,$(BINDIR))/limine-uefi-cd.bin' -h 64 -t 32 -s 16 -N 12345678 ::
LIMINE_UEFI_CD_TMP="$$(mktemp -d)"; \
mkdir -p "$$LIMINE_UEFI_CD_TMP"/EFI/BOOT; \
cp '$(call SHESCAPE,$(BUILDDIR))/common-uefi-aarch64/BOOTAA64.EFI' "$$LIMINE_UEFI_CD_TMP"/EFI/BOOT/ 2>/dev/null; \
diff --git a/bootstrap b/bootstrap
index c4b22a7a..64e35bf4 100755
--- a/bootstrap
+++ b/bootstrap
@@ -81,6 +81,13 @@ if ! test -f version; then
dae79833b57a01b9fd3e359ee31def69f5ae899b
cp cc-runtime/src/cc-runtime.c common/cc-runtime.s2.c
+ clone_repo_commit \
+ https://github.com/iczelia/pdgzip.git \
+ pdgzip \
+ 16c41d9af067c4185c136622c58ad4188609a3d1
+ cp pdgzip/pdgzip.c common/compress/pdgzip.c
+ cp pdgzip/pdgzip.h common/compress/pdgzip.h
+
clone_repo_commit \
https://github.com/Limine-Bootloader/limine-protocol.git \
limine-protocol \
diff --git a/common/compress/gzip.c b/common/compress/gzip.c
new file mode 100644
index 00000000..345a91bb
--- /dev/null
+++ b/common/compress/gzip.c
@@ -0,0 +1,146 @@
+/* Limine glue around pdgzip: transparent gzip decompression layer over a
+ * file_handle. The underlying decoder lives in common/compress/pdgzip.c
+ * (imported by ./bootstrap from the upstream iczelia/pdgzip repo); this
+ * file only wires pdgzip's streaming read-callback API into Limine's
+ * file_handle abstraction and adds support for random-access reads via
+ * rewind-and-skip.
+ *
+ * Copyright (C) 2019-2026 Mintsuki and contributors.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <lib/libc.h>
+#include <lib/misc.h>
+#include <lib/print.h>
+#include <mm/pmm.h>
+#include <compress/gzip.h>
+#include <compress/pdgzip.h>
+
+struct gzip_handle {
+ struct file_handle * source; /* compressed file (owned) */
+ pdgzip_t * gz; /* decoder backed by `scratch` */
+ void * scratch; /* pdgzip scratch buffer */
+ size_t scratch_sz;
+ uint64_t src_pos; /* next byte to pull from `source` */
+ uint64_t dec_pos; /* current decompressed stream offset */
+};
+
+/* pdgzip read callback: pull up to `len` bytes from the compressed source
+ starting at gh->src_pos. A short read (including zero) signals EOF to
+ the decoder, which is correct at the end of the file. */
+static size_t gz_source_read(void * user, void * buf, size_t len) {
+ struct gzip_handle * gh = user;
+ uint64_t avail = gh->source->size - gh->src_pos;
+ if ((uint64_t)len > avail) len = (size_t)avail;
+ if (len == 0) return 0;
+ fread(gh->source, buf, gh->src_pos, len);
+ gh->src_pos += len;
+ return len;
+}
+
+/* (Re)initialize the decoder for a fresh pass over the compressed stream.
+ pdgzip_init zeroes its own scratch, so we only need to reset our own
+ bookkeeping. */
+static void gz_reset(struct gzip_handle * gh) {
+ pdgzip_cfg_t cfg = { .read = gz_source_read, .user = gh, .concat = 0 };
+ gh->src_pos = 0;
+ gh->dec_pos = 0;
+ gh->gz = pdgzip_init(gh->scratch, &cfg);
+}
+
+static uint64_t gzip_read(struct file_handle * file, void * buf, uint64_t loc, uint64_t count) {
+ struct gzip_handle * gh = file->fd;
+ /* Rewind on backward seeks. */
+ if (loc < gh->dec_pos) gz_reset(gh);
+ /* Skip forward to reach the requested offset. EOS during seek means
+ the requested location is past end-of-stream - return 0 bytes. */
+ while (gh->dec_pos < loc) {
+ uint8_t discard[4096];
+ uint64_t gap = loc - gh->dec_pos;
+ size_t chunk = gap > sizeof(discard) ? sizeof(discard) : (size_t)gap;
+ int64_t n = pdgzip_read(gh->gz, discard, chunk);
+ if (n < 0) panic(false, "gzip: decompression error during seek");
+ if (n == 0) return 0;
+ gh->dec_pos += (uint64_t)n;
+ }
+ /* Decompress the requested data. */
+ uint8_t * dst = buf;
+ uint64_t remaining = count;
+ while (remaining > 0) {
+ size_t chunk = remaining > 65536 ? 65536 : (size_t)remaining;
+ int64_t n = pdgzip_read(gh->gz, dst, chunk);
+ if (n < 0) panic(false, "gzip: decompression error");
+ if (n == 0) break;
+ dst += n;
+ remaining -= (uint64_t)n;
+ gh->dec_pos += (uint64_t)n;
+ }
+ return count - remaining;
+}
+
+static void gzip_close(struct file_handle * file) {
+ struct gzip_handle * gh = file->fd;
+ fclose(gh->source);
+ pmm_free(gh->scratch, gh->scratch_sz);
+ pmm_free(gh, sizeof(struct gzip_handle));
+}
+
+bool gzip_check(struct file_handle * fd) {
+ if (fd->size < 18) return false;
+ uint8_t magic[2]; fread(fd, magic, 0, 2);
+ return magic[0] == 0x1F && magic[1] == 0x8B;
+}
+
+struct file_handle * gzip_open(struct file_handle * compressed) {
+ /* The decompressed size is not known up front. The 4-byte ISIZE trailer
+ is unreliable (modulo 2^32, spec defect) and callers must instead
+ drain until gzip_read returns 0 bytes (EOS). Advertise an unknown
+ size via UINT64_MAX. */
+ struct gzip_handle * gh = ext_mem_alloc(sizeof(struct gzip_handle));
+ gh->source = compressed;
+ gh->scratch_sz = pdgzip_state_size();
+ gh->scratch = ext_mem_alloc(gh->scratch_sz);
+ gz_reset(gh);
+ /* Depends on ext_mem_alloc returning zeroed memory. */
+ struct file_handle * ret = ext_mem_alloc(sizeof(struct file_handle));
+ ret->fd = gh;
+ ret->read = (void *) gzip_read;
+ ret->close = (void *) gzip_close;
+ ret->size = UINT64_MAX;
+ ret->vol = compressed->vol;
+ if (compressed->path != NULL && compressed->path_len > 0) {
+ ret->path = ext_mem_alloc(compressed->path_len);
+ memcpy(ret->path, compressed->path, compressed->path_len);
+ ret->path_len = compressed->path_len;
+ }
+#if defined (UEFI)
+ ret->efi_part_handle = compressed->efi_part_handle;
+#endif
+ ret->pxe = compressed->pxe;
+ ret->pxe_ip = compressed->pxe_ip;
+ ret->pxe_port = compressed->pxe_port;
+ return ret;
+}
diff --git a/common/compress/gzip.h b/common/compress/gzip.h
new file mode 100644
index 00000000..67b0a230
--- /dev/null
+++ b/common/compress/gzip.h
@@ -0,0 +1,53 @@
+/* embeddable gzip decoder: Copyright (C) 2026 Kamila Szewczyk <k@iczelia.net>
+ * limine: Copyright (C) 2019-2026 Mintsuki and contributors.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef COMPRESS__GZIP_H__
+#define COMPRESS__GZIP_H__
+
+#include <fs/file.h>
+
+/* Check if a file handle points to gzip-compressed data (0x1F 0x8B magic). */
+bool gzip_check(struct file_handle * fd);
+
+/* Wrap a gzip-compressed file handle in a decompressing layer.
+ *
+ * Returns a new file_handle whose read callback transparently
+ * decompresses the data. The returned handle takes ownership of
+ * `compressed` and will close it when itself is closed.
+ *
+ * WARNING: Due to a Gzip format deficiency, ->size of the resulting
+ * file_handle is only an approximation (i.e., it is not correct for
+ * files larger than 4 GiB and doesn't necessarily have to reflect
+ * the genuine decompressed size at all in adversarial circumstances).
+ *
+ * The real decompressed size can only be authoritatively obtained by
+ * fully decompressing the file.
+ *
+ * Supports very fast sequential reads and random-access reads (with
+ * an implicit rewind + skip penalty inherent to the gzip format).
+ */
+struct file_handle * gzip_open(struct file_handle * compressed);
+
+#endif
diff --git a/common/crypt/blake2b.c b/common/crypt/blake2b.c
index b62183f3..f52637a6 100644
--- a/common/crypt/blake2b.c
+++ b/common/crypt/blake2b.c
@@ -6,6 +6,8 @@
#include <stddef.h>
#include <crypt/blake2b.h>
#include <lib/libc.h>
+#include <lib/misc.h>
+#include <mm/pmm.h>
#define BLAKE2B_BLOCK_BYTES 128
#define BLAKE2B_KEY_BYTES 64
@@ -218,3 +220,73 @@ void blake2b(void *out, const void *in, size_t in_len) {
blake2b_update(&state, in, in_len);
blake2b_final(&state, out);
}
+
+/* Streaming filter: wraps a source file_handle and hashes bytes as
+ they are read sequentially. The hash is finalized and compared via
+ blake2b_check_hash(). Non-sequential reads panic -- the filter is
+ meant to sit underneath the gzip bitreader or uri_open's drain loop,
+ both of which advance monotonically. */
+struct blake2b_handle {
+ struct file_handle *source;
+ struct blake2b_state state;
+ uint64_t pos;
+ bool finalized;
+ uint8_t digest[BLAKE2B_OUT_BYTES];
+};
+
+static uint64_t blake2b_read(struct file_handle *fh, void *buf, uint64_t loc, uint64_t count) {
+ struct blake2b_handle *h = fh->fd;
+ if (loc != h->pos) {
+ panic(false, "blake2b filter: non-sequential read (pos=%x, loc=%x)",
+ (uint64_t)h->pos, loc);
+ }
+ uint64_t got = fread(h->source, buf, loc, count);
+ blake2b_update(&h->state, buf, got);
+ h->pos += got;
+ return got;
+}
+
+static void blake2b_close(struct file_handle *fh) {
+ struct blake2b_handle *h = fh->fd;
+ fclose(h->source);
+ pmm_free(h, sizeof(struct blake2b_handle));
+}
+
+struct file_handle *blake2b_open(struct file_handle *source) {
+ struct blake2b_handle *h = ext_mem_alloc(sizeof(struct blake2b_handle));
+ blake2b_init(&h->state);
+ h->source = source;
+ h->pos = 0;
+ h->finalized = false;
+
+ struct file_handle *ret = ext_mem_alloc(sizeof(struct file_handle));
+ ret->fd = h;
+ ret->read = (void *)blake2b_read;
+ ret->close = (void *)blake2b_close;
+ ret->size = source->size;
+ ret->vol = source->vol;
+ if (source->path != NULL && source->path_len > 0) {
+ ret->path = ext_mem_alloc(source->path_len);
+ memcpy(ret->path, source->path, source->path_len);
+ ret->path_len = source->path_len;
+ }
+#if defined (UEFI)
+ ret->efi_part_handle = source->efi_part_handle;
+#endif
+ ret->pxe = source->pxe;
+ ret->pxe_ip = source->pxe_ip;
+ ret->pxe_port = source->pxe_port;
+ return ret;
+}
+
+bool blake2b_check_hash(struct file_handle *fh, void *reference_hash) {
+ if (fh->read != (void *)blake2b_read) {
+ panic(false, "blake2b_check_hash: not a blake2b filter handle");
+ }
+ struct blake2b_handle *h = fh->fd;
+ if (!h->finalized) {
+ blake2b_final(&h->state, h->digest);
+ h->finalized = true;
+ }
+ return memcmp(h->digest, reference_hash, BLAKE2B_OUT_BYTES) == 0;
+}
diff --git a/common/crypt/blake2b.h b/common/crypt/blake2b.h
index 313a7c63..bcee067f 100644
--- a/common/crypt/blake2b.h
+++ b/common/crypt/blake2b.h
@@ -7,4 +7,8 @@
void blake2b(void *out, const void *in, size_t in_len);
+struct file_handle;
+struct file_handle * blake2b_open(struct file_handle * source);
+bool blake2b_check_hash(struct file_handle *fd, void* reference_hash);
+
#endif
diff --git a/common/fs/fat32.s2.c b/common/fs/fat32.s2.c
index 00d38f93..956aa43f 100644
--- a/common/fs/fat32.s2.c
+++ b/common/fs/fat32.s2.c
@@ -605,7 +605,7 @@ char *fat32_get_label(struct volume *part) {
return context.label;
}
-static void fat32_read(struct file_handle *handle, void *buf, uint64_t loc, uint64_t count);
+static uint64_t fat32_read(struct file_handle *handle, void *buf, uint64_t loc, uint64_t count);
static void fat32_close(struct file_handle *file);
struct file_handle *fat32_open(struct volume *part, const char *path) {
@@ -622,7 +622,7 @@ struct file_handle *fat32_open(struct volume *part, const char *path) {
unsigned int current_index = 0;
char current_part[FAT32_LFN_MAX_FILENAME_LENGTH];
- // skip trailing slashes
+ // skip leading slashes
while (path[current_index] == '/') {
current_index++;
}
@@ -719,11 +719,12 @@ struct file_handle *fat32_open(struct volume *part, const char *path) {
}
}
-static void fat32_read(struct file_handle *file, void *buf, uint64_t loc, uint64_t count) {
+static uint64_t fat32_read(struct file_handle *file, void *buf, uint64_t loc, uint64_t count) {
struct fat32_file_handle *f = file->fd;
if (!read_cluster_chain(&f->context, f->cluster_chain, f->chain_len, buf, loc, count)) {
panic(false, "fat32: cluster chain read failed (corrupted filesystem?)");
}
+ return count;
}
static void fat32_close(struct file_handle *file) {
diff --git a/common/fs/file.h b/common/fs/file.h
index 86474bd0..80016924 100644
--- a/common/fs/file.h
+++ b/common/fs/file.h
@@ -17,13 +17,15 @@ char *fs_get_label(struct volume *part);
struct file_handle {
bool is_memfile;
bool readall;
+ bool is_high_mem;
struct volume *vol;
char *path;
size_t path_len;
void *fd;
- void (*read)(void *fd, void *buf, uint64_t loc, uint64_t count);
+ uint64_t (*read)(void *fd, void *buf, uint64_t loc, uint64_t count);
void (*close)(void *fd);
uint64_t size;
+ uint64_t load_addr_64;
#if defined (UEFI)
EFI_HANDLE efi_part_handle;
#endif
@@ -33,13 +35,7 @@ struct file_handle {
};
struct file_handle *fopen(struct volume *part, const char *filename);
-void fread(struct file_handle *fd, void *buf, uint64_t loc, uint64_t count);
+uint64_t fread(struct file_handle *fd, void *buf, uint64_t loc, uint64_t count);
void fclose(struct file_handle *fd);
-void *freadall(struct file_handle *fd, uint32_t type);
-void *freadall_mode(struct file_handle *fd, uint32_t type, bool allow_high_allocs
-#if defined (__i386__)
- , void (*memcpy_to_64)(uint64_t dst, void *src, size_t count)
-#endif
-);
#endif
diff --git a/common/fs/file.s2.c b/common/fs/file.s2.c
index 5cde4663..ad133c3d 100644
--- a/common/fs/file.s2.c
+++ b/common/fs/file.s2.c
@@ -81,104 +81,14 @@ void fclose(struct file_handle *fd) {
pmm_free(fd, sizeof(struct file_handle));
}
-void fread(struct file_handle *fd, void *buf, uint64_t loc, uint64_t count) {
+uint64_t fread(struct file_handle *fd, void *buf, uint64_t loc, uint64_t count) {
if (fd->is_memfile) {
if (loc >= fd->size || count > fd->size - loc) {
panic(false, "fread: attempted out of bounds read");
}
memcpy(buf, fd->fd + loc, count);
+ return count;
} else {
- fd->read(fd, buf, loc, count);
- }
-}
-
-void *freadall(struct file_handle *fd, uint32_t type) {
- return freadall_mode(fd, type, false
-#if defined (__i386__)
- , NULL
-#endif
- );
-}
-
-void *freadall_mode(struct file_handle *fd, uint32_t type, bool allow_high_allocs
-#if defined (__i386__)
- , void (*memcpy_to_64)(uint64_t dst, void *src, size_t count)
-#endif
-) {
-#if defined (__i386__)
- static uint64_t high_ret;
-
- if (memcpy_to_64 == NULL) {
- allow_high_allocs = false;
- }
-#endif
-
- if (fd->is_memfile) {
- if (fd->readall) {
-#if defined (__i386__)
- if (allow_high_allocs == true) {
- high_ret = (uintptr_t)fd->fd;
- return &high_ret;
- }
-#endif
- return fd->fd;
- }
-#if defined (UEFI) && defined (__x86_64__)
- if (!allow_high_allocs && (uintptr_t)fd->fd >= 0x100000000) {
- void *newptr = ext_mem_alloc_type(fd->size, type);
- memcpy(newptr, fd->fd, fd->size);
- pmm_free(fd->fd, fd->size);
- fd->fd = newptr;
- } else {
-#endif
- memmap_alloc_range((uint64_t)(size_t)fd->fd, ALIGN_UP(fd->size, 4096, panic(false, "Alignment overflow")), type, 0, true, false, false);
-#if defined (UEFI) && defined (__x86_64__)
- }
-#endif
- fd->readall = true;
-#if defined (__i386__)
- if (allow_high_allocs == true) {
- high_ret = (uintptr_t)fd->fd;
- return &high_ret;
- }
-#endif
- return fd->fd;
- } else {
- void *ret = ext_mem_alloc_type_aligned_mode(fd->size, type, 4096, allow_high_allocs);
-#if defined (__i386__)
- if (allow_high_allocs == true) {
- high_ret = *(uint64_t *)ret;
- if (high_ret < 0x100000000) {
- ret = (void *)(uintptr_t)high_ret;
- goto low_ret;
- }
- void *pool = ext_mem_alloc(0x100000);
- for (uint64_t i = 0; i < fd->size; i += 0x100000) {
- size_t count;
- if (fd->size - i < 0x100000) {
- count = fd->size - i;
- } else {
- count = 0x100000;
- }
- fd->read(fd, pool, i, count);
- memcpy_to_64(high_ret + i, pool, count);
- }
- pmm_free(pool, 0x100000);
- fd->close(fd);
- return &high_ret;
- }
-low_ret:
-#endif
- fd->read(fd, ret, 0, fd->size);
- fd->close(fd);
- fd->fd = ret;
- fd->readall = true;
- fd->is_memfile = true;
-#if defined (__i386__)
- if (allow_high_allocs == true) {
- return &high_ret;
- }
-#endif
- return ret;
+ return fd->read(fd, buf, loc, count);
}
}
diff --git a/common/fs/iso9660.s2.c b/common/fs/iso9660.s2.c
index fd63d676..c1b96bfb 100644
--- a/common/fs/iso9660.s2.c
+++ b/common/fs/iso9660.s2.c
@@ -338,7 +338,7 @@ static struct iso9660_directory_entry *iso9660_find(void *buffer, uint32_t size,
return NULL;
}
-static void iso9660_read(struct file_handle *handle, void *buf, uint64_t loc, uint64_t count);
+static uint64_t iso9660_read(struct file_handle *handle, void *buf, uint64_t loc, uint64_t count);
static void iso9660_close(struct file_handle *file);
struct file_handle *iso9660_open(struct volume *vol, const char *path) {
@@ -519,7 +519,8 @@ setup_handle:;
return handle;
}
-static void iso9660_read(struct file_handle *file, void *buf, uint64_t loc, uint64_t count) {
+static uint64_t iso9660_read(struct file_handle *file, void *buf, uint64_t loc, uint64_t count) {
+ uint64_t requested = count;
struct iso9660_file_handle *f = file->fd;
// Find which extent 'loc' falls into and read across extents as needed
@@ -551,6 +552,7 @@ static void iso9660_read(struct file_handle *file, void *buf, uint64_t loc, uint
if (count > 0) {
panic(false, "iso9660: read beyond end of file");
}
+ return requested;
}
static void iso9660_close(struct file_handle *file) {
diff --git a/common/lib/gterm.c b/common/lib/gterm.c
index cdc37b7b..c77d2dc6 100644
--- a/common/lib/gterm.c
+++ b/common/lib/gterm.c
@@ -588,7 +588,11 @@ static void gterm_parse_config(char *config, struct gterm_config *cfg) {
print("Wallpaper skipped: Secure Boot is active and no hash is associated.\n");
} else {
struct file_handle *bg_file;
- if ((bg_file = uri_open(background_path)) != NULL) {
+ if ((bg_file = uri_open(background_path, MEMMAP_BOOTLOADER_RECLAIMABLE, false
+#if defined (__i386__)
+ , NULL, NULL
+#endif
+ )) != NULL) {
background = image_open(bg_file);
fclose(bg_file);
}
@@ -663,7 +667,11 @@ static void gterm_parse_config(char *config, struct gterm_config *cfg) {
goto config_no_load_font;
}
struct file_handle *f;
- if ((f = uri_open(menu_font)) == NULL) {
+ if ((f = uri_open(menu_font, MEMMAP_BOOTLOADER_RECLAIMABLE, false
+#if defined (__i386__)
+ , NULL, NULL
+#endif
+ )) == NULL) {
print("menu: Could not open font file.\n");
} else {
if (cfg->font_size > f->size) {
diff --git a/common/lib/misc.c b/common/lib/misc.c
index 45cfdbb8..2c850a7b 100644
--- a/common/lib/misc.c
+++ b/common/lib/misc.c
@@ -135,10 +135,14 @@ void *get_device_tree_blob(const char *config, size_t extra_size) {
}
if (dtb_path != NULL) {
struct file_handle *dtb_file;
- if ((dtb_file = uri_open(dtb_path)) == NULL)
+ if ((dtb_file = uri_open(dtb_path, MEMMAP_BOOTLOADER_RECLAIMABLE, false
+#if defined (__i386__)
+ , NULL, NULL
+#endif
+ )) == NULL)
panic(soft_panic, "dtb: Failed to open device tree blob with path `%#`. Is the path correct?", dtb_path);
- dtb = freadall(dtb_file, MEMMAP_BOOTLOADER_RECLAIMABLE);
+ dtb = dtb_file->fd;
size = dtb_file->size;
fclose(dtb_file);
printv("dtb: loaded dtb at %p from file `%#`\n", dtb, dtb_path);
diff --git a/common/lib/uri.c b/common/lib/uri.c
index 7f868791..2850bf7e 100644
--- a/common/lib/uri.c
+++ b/common/lib/uri.c
@@ -12,6 +12,7 @@
#include <menu.h>
#include <lib/getchar.h>
#include <crypt/blake2b.h>
+#include <compress/gzip.h>
// A URI takes the form of: resource(root):/path#hash
// The following function splits up a URI into its components.
@@ -222,8 +223,52 @@ static struct file_handle *uri_boot_dispatch(char *s_part, char *path) {
return fopen(volume, path);
}
-struct file_handle *uri_open(char *uri) {
- struct file_handle *ret;
+// Release a range of memory previously reserved with memmap_alloc_range.
+// Works for both low and high addresses, unlike pmm_free which truncates
+// on 32-bit builds.
+static void uri_release_range(uint64_t addr, uint64_t count) {
+ count = ALIGN_UP(count, 4096, panic(false, "uri: alignment overflow"));
+ memmap_alloc_range(addr, count, MEMMAP_USABLE, 0, false, false, true);
+}
+
+// Allocate `count` bytes via ext_mem_alloc_type_aligned_mode and return
+// the physical address in *out_addr. When allow_high_mem is true on i386
+// and the allocator landed above 4 GiB, *out_low is set to NULL and the
+// 64-bit address is stored in *out_addr. Otherwise *out_low points at the
+// allocation and *out_addr == (uintptr_t)*out_low.
+static void uri_alloc(uint64_t count, uint32_t type, bool allow_high_mem,
+ void **out_low, uint64_t *out_addr) {
+ void *ret = ext_mem_alloc_type_aligned_mode(count, type, 4096, allow_high_mem);
+#if defined (__i386__)
+ if (allow_high_mem) {
+ uint64_t addr = *(uint64_t *)ret;
+ if (addr >= 0x100000000) {
+ *out_low = NULL;
+ *out_addr = addr;
+ return;
+ }
+ ret = (void *)(uintptr_t)addr;
+ }
+#else
+ (void)allow_high_mem;
+#endif
+ *out_low = ret;
+ *out_addr = (uintptr_t)ret;
+}
+
+struct file_handle *uri_open(char *uri, uint32_t type, bool allow_high_mem
+#if defined (__i386__)
+ , void (*memcpy_to_64)(uint64_t dst, void *src, size_t count)
+ , void (*memcpy_from_64)(void *dst, uint64_t src, size_t count)
+#endif
+) {
+#if defined (__i386__)
+ if (memcpy_to_64 == NULL || memcpy_from_64 == NULL) {
+ allow_high_mem = false;
+ }
+#endif
+
+ struct file_handle *raw;
char *resource = NULL, *root = NULL, *path = NULL, *hash = NULL;
if (!uri_resolve(uri, &resource, &root, &path, &hash)) {
@@ -234,43 +279,199 @@ struct file_handle *uri_open(char *uri) {
panic(true, "No resource specified for URI `%#`.", uri);
}
+ bool gz_compressed = *resource == '$';
+ if (gz_compressed) {
+ resource++;
+ }
+
if (!strcmp(resource, "hdd")) {
- ret = uri_hdd_dispatch(root, path);
+ raw = uri_hdd_dispatch(root, path);
} else if (!strcmp(resource, "odd")) {
- ret = uri_odd_dispatch(root, path);
+ raw = uri_odd_dispatch(root, path);
} else if (!strcmp(resource, "boot")) {
- ret = uri_boot_dispatch(root, path);
+ raw = uri_boot_dispatch(root, path);
} else if (!strcmp(resource, "guid")) {
- ret = uri_guid_dispatch(root, path);
+ raw = uri_guid_dispatch(root, path);
} else if (!strcmp(resource, "uuid")) {
- ret = uri_guid_dispatch(root, path);
+ raw = uri_guid_dispatch(root, path);
} else if (!strcmp(resource, "fslabel")) {
- ret = uri_fslabel_dispatch(root, path);
+ raw = uri_fslabel_dispatch(root, path);
} else if (!strcmp(resource, "tftp")) {
- ret = uri_tftp_dispatch(root, path);
+ raw = uri_tftp_dispatch(root, path);
} else {
panic(true, "Resource `%s` not valid.", resource);
}
- if (secure_boot_active && hash == NULL && ret != NULL) {
- panic(true, "Secure Boot is active and URI `%#` has no associated hash!", uri);
+ if (raw == NULL) {
+ return NULL;
}
- if (hash != NULL && ret != NULL) {
- uint8_t out_buf[BLAKE2B_OUT_BYTES];
-#if defined (UEFI) && defined (__x86_64__)
- void *file_buf = freadall_mode(ret, MEMMAP_BOOTLOADER_RECLAIMABLE, true);
-#else
- void *file_buf = freadall(ret, MEMMAP_BOOTLOADER_RECLAIMABLE);
-#endif
- blake2b(out_buf, file_buf, ret->size);
- uint8_t hash_buf[BLAKE2B_OUT_BYTES];
+ if (secure_boot_active && hash == NULL) {
+ panic(true, "Secure Boot is active and URI `%#` has no associated hash!", uri);
+ }
+ uint8_t hash_buf[BLAKE2B_OUT_BYTES];
+ if (hash != NULL) {
for (size_t i = 0; i < sizeof(hash_buf); i++) {
hash_buf[i] = digit_to_int(hash[i * 2]) << 4 | digit_to_int(hash[i * 2 + 1]);
}
+ }
- if (memcmp(hash_buf, out_buf, sizeof(out_buf)) != 0) {
+ // Snapshot metadata from raw before the close cascade frees its buffers.
+ struct volume *raw_vol = raw->vol;
+ size_t raw_path_len = raw->path_len;
+ char *raw_path_copy = NULL;
+ if (raw->path != NULL && raw_path_len > 0) {
+ raw_path_copy = ext_mem_alloc(raw_path_len);
+ memcpy(raw_path_copy, raw->path, raw_path_len);
+ }
+#if defined (UEFI)
+ EFI_HANDLE raw_efi_part = raw->efi_part_handle;
+#endif
+ bool raw_pxe = raw->pxe;
+ uint32_t raw_pxe_ip = raw->pxe_ip;
+ uint16_t raw_pxe_port = raw->pxe_port;
+
+ // Build the filter chain: raw -> blake2b -> gzip. blake2b hashes on-disk
+ // (compressed) bytes.
+ struct file_handle *top = raw;
+ struct file_handle *hash_fh = NULL;
+ if (hash != NULL) {
+ hash_fh = blake2b_open(top);
+ top = hash_fh;
+ }
+ if (gz_compressed) {
+ top = gzip_open(top);
+ }
+
+ // Drain the stream into a final allocation.
+ void *buf_low = NULL;
+ uint64_t buf_addr = 0;
+ uint64_t buf_cap = 0;
+ uint64_t buf_len = 0;
+ bool is_high = false;
+
+ if (!gz_compressed) {
+ // Size is authoritative. Single up-front allocation, one copy.
+ uint64_t sz = top->size;
+ uri_alloc(sz, type, allow_high_mem, &buf_low, &buf_addr);
+ is_high = (buf_low == NULL);
+
+#if defined (__i386__)
+ if (is_high) {
+ // 1 MiB bounce loop, same as the old freadall_mode high path.
+ void *pool = ext_mem_alloc(0x100000);
+ for (uint64_t i = 0; i < sz; i += 0x100000) {
+ size_t chunk = sz - i < 0x100000 ? (size_t)(sz - i) : 0x100000;
+ uint64_t got = top->read(top, pool, i, chunk);
+ if (got != chunk) {
+ panic(false, "uri: short read from non-gzip stream");
+ }
+ memcpy_to_64(buf_addr + i, pool, chunk);
+ }
+ pmm_free(pool, 0x100000);
+ } else
+#endif
+ {
+ // In-place fill.
+ if (sz > 0) {
+ uint64_t got = top->read(top, buf_low, 0, sz);
+ if (got != sz) {
+ panic(false, "uri: short read from non-gzip stream");
+ }
+ }
+ }
+ buf_len = sz;
+ } else {
+ // Size is unknown (UINT64_MAX from gzip_open). Stretchy vector.
+ // Initial capacity: 1 MiB, doubles on exhaustion.
+ buf_cap = 0x100000;
+ uri_alloc(buf_cap, type, allow_high_mem, &buf_low, &buf_addr);
+ is_high = (buf_low == NULL);
+
+#if defined (__i386__)
+ // High-path uses a 1 MiB bounce pool for both the read side and
+ // the grow-copy; reused across iterations.
+ void *pool = is_high ? ext_mem_alloc(0x100000) : NULL;
+#endif
+
+ for (;;) {
+ if (buf_len == buf_cap) {
+ // Grow: new capacity = 2x (capped to prevent absurd jumps).
+ uint64_t new_cap = buf_cap * 2;
+ void *new_low = NULL;
+ uint64_t new_addr = 0;
+ uri_alloc(new_cap, type, allow_high_mem, &new_low, &new_addr);
+ bool new_is_high = (new_low == NULL);
+
+#if defined (__i386__)
+ if (is_high && new_is_high) {
+ // 64-to-64: bounce via low pool in 1 MiB strides.
+ for (uint64_t off = 0; off < buf_len; off += 0x100000) {
+ size_t chunk = buf_len - off < 0x100000 ? (size_t)(buf_len - off) : 0x100000;
+ memcpy_from_64(pool, buf_addr + off, chunk);
+ memcpy_to_64(new_addr + off, pool, chunk);
+ }
+ } else if (is_high && !new_is_high) {
+ // Shouldn't happen: once we landed high we ask for high.
+ // Keep a defensive path: bounce via pool, then memcpy.
+ for (uint64_t off = 0; off < buf_len; off += 0x100000) {
+ size_t chunk = buf_len - off < 0x100000 ? (size_t)(buf_len - off) : 0x100000;
+ memcpy_from_64(pool, buf_addr + off, chunk);
+ memcpy((uint8_t *)new_low + off, pool, chunk);
+ }
+ } else if (!is_high && new_is_high) {
+ for (uint64_t off = 0; off < buf_len; off += 0x100000) {
+ size_t chunk = buf_len - off < 0x100000 ? (size_t)(buf_len - off) : 0x100000;
+ memcpy_to_64(new_addr + off, (uint8_t *)buf_low + off, chunk);
+ }
+ } else
+#endif
+ {
+ (void)new_is_high; /* Silence unused warning on non-i386. */
+ memcpy(new_low, buf_low, buf_len);
+ }
+
+ // Release the old allocation.
+ uri_release_range(buf_addr, buf_cap);
+
+ buf_low = new_low;
+ buf_addr = new_addr;
+ buf_cap = new_cap;
+#if defined (__i386__)
+ if (is_high != new_is_high && new_is_high && pool == NULL) {
+ pool = ext_mem_alloc(0x100000);
+ }
+ is_high = new_is_high;
+#endif
+ }
+
+ uint64_t want = buf_cap - buf_len;
+ if (want > 65536) want = 65536;
+
+ uint64_t got;
+#if defined (__i386__)
+ if (is_high) {
+ got = top->read(top, pool, buf_len, want);
+ if (got > 0) memcpy_to_64(buf_addr + buf_len, pool, got);
+ } else
+#endif
+ {
+ got = top->read(top, (uint8_t *)buf_low + buf_len, buf_len, want);
+ }
+ if (got == 0) break;
+ buf_len += got;
+ }
+
+#if defined (__i386__)
+ if (pool != NULL) pmm_free(pool, 0x100000);
+#endif
+ }
+
+ // Finalize hash check now that all compressed bytes have flowed through
+ // the filter.
+ if (hash_fh != NULL) {
+ if (!blake2b_check_hash(hash_fh, hash_buf)) {
if (hash_mismatch_panic) {
panic(true, "Blake2b hash for URI `%#` does not match!", uri);
} else {
@@ -286,5 +487,27 @@ struct file_handle *uri_open(char *uri) {
}
}
- return ret;
+ // Close the filter chain. fclose cascades.
+ fclose(top);
+
+ // Build the returned memfile. Fresh allocation so we never mutate any
+ // closed filter handle's state.
+ struct file_handle *out = ext_mem_alloc(sizeof(struct file_handle));
+ out->is_memfile = true;
+ out->readall = true;
+ out->is_high_mem = is_high;
+ out->fd = is_high ? NULL : buf_low;
+ out->load_addr_64 = buf_addr;
+ out->size = buf_len;
+ out->vol = raw_vol;
+ out->path = raw_path_copy;
+ out->path_len = raw_path_copy != NULL ? raw_path_len : 0;
+#if defined (UEFI)
+ out->efi_part_handle = raw_efi_part;
+#endif
+ out->pxe = raw_pxe;
+ out->pxe_ip = raw_pxe_ip;
+ out->pxe_port = raw_pxe_port;
+
+ return out;
}
diff --git a/common/lib/uri.h b/common/lib/uri.h
index d15a5494..107a4292 100644
--- a/common/lib/uri.h
+++ b/common/lib/uri.h
@@ -5,6 +5,22 @@
#include <fs/file.h>
bool uri_resolve(char *uri, char **resource, char **root, char **path, char **hash);
-struct file_handle *uri_open(char *uri);
+
+// uri_open resolves the URI, verifies the blake2b hash (if present) and
+// gzip-decodes (if the resource is prefixed with `$`) in a single streaming
+// pass, and returns a memfile (is_memfile=true, readall=true) whose payload
+// has been placed in memory of the requested `type`.
+//
+// When `allow_high_mem` is true and the target architecture is i386, the
+// buffer may end up above 4 GiB; in that case the returned handle has
+// is_high_mem=true, fd=NULL, and load_addr_64 holding the physical address.
+// Otherwise load_addr_64 == (uintptr_t)fd. On i386 the two memcpy callbacks
+// are used only when allow_high_mem is true; pass NULL otherwise.
+struct file_handle *uri_open(char *uri, uint32_t type, bool allow_high_mem
+#if defined (__i386__)
+ , void (*memcpy_to_64)(uint64_t dst, void *src, size_t count)
+ , void (*memcpy_from_64)(void *dst, uint64_t src, size_t count)
+#endif
+);
#endif
diff --git a/common/protos/chainload.c b/common/protos/chainload.c
index dfd20e5e..b92a5b60 100644
--- a/common/protos/chainload.c
+++ b/common/protos/chainload.c
@@ -272,7 +272,11 @@ noreturn void chainload(char *config, char *cmdline) {
secure_boot_active = false;
struct file_handle *image;
- if ((image = uri_open(image_path)) == NULL)
+ if ((image = uri_open(image_path, MEMMAP_RESERVED, false
+#if defined (__i386__)
+ , NULL, NULL
+#endif
+ )) == NULL)
panic(true, "efi: Failed to open image with path `%s`. Is the path correct?", image_path);
secure_boot_active = saved_secure_boot_active;
@@ -281,7 +285,7 @@ noreturn void chainload(char *config, char *cmdline) {
EFI_HANDLE efi_part_handle = image->efi_part_handle;
- void *ptr = freadall(image, MEMMAP_RESERVED);
+ void *ptr = image->fd;
size_t image_size = image->size;
memmap_alloc_range_in(untouched_memmap, &untouched_memmap_entries,
diff --git a/common/protos/limine.c b/common/protos/limine.c
index aced5f8d..0bd53bc1 100644
--- a/common/protos/limine.c
+++ b/common/protos/limine.c
@@ -143,9 +143,9 @@ static pagemap_t build_identity_map(void) {
return pagemap;
}
-void limine_memcpy_to_64_asm(int paging_mode, void *pagemap, uint64_t dst, void *src, size_t count);
+void limine_memcpy_64_asm(int paging_mode, void *pagemap, uint64_t dst, uint64_t src, size_t count);
-static void limine_memcpy_to_64(uint64_t dst, void *src, size_t count) {
+static void limine_ensure_identity_map(pagemap_t *out) {
static bool identity_map_ready = false;
static pagemap_t identity_map;
@@ -153,8 +153,19 @@ static void limine_memcpy_to_64(uint64_t dst, void *src, size_t count) {
identity_map = build_identity_map();
identity_map_ready = true;
}
+ *out = identity_map;
+}
+
+static void limine_memcpy_to_64(uint64_t dst, void *src, size_t count) {
+ pagemap_t identity_map;
+ limine_ensure_identity_map(&identity_map);
+ limine_memcpy_64_asm(paging_mode, identity_map.top_level, dst, (uint64_t)(uintptr_t)src, count);
+}
- limine_memcpy_to_64_asm(paging_mode, identity_map.top_level, dst, src, count);
+static void limine_memcpy_from_64(void *dst, uint64_t src, size_t count) {
+ pagemap_t identity_map;
+ limine_ensure_identity_map(&identity_map);
+ limine_memcpy_64_asm(paging_mode, identity_map.top_level, (uint64_t)(uintptr_t)dst, src, count);
}
#endif
@@ -367,7 +378,7 @@ static uint64_t reported_addr_64(uint64_t addr) {
get_phys_addr__r; \
})
-static struct limine_file get_file(struct file_handle *file, char *cmdline, bool kernel) {
+static struct limine_file get_file(struct file_handle *file, char *cmdline) {
struct limine_file ret = {0};
if (file->pxe) {
@@ -405,15 +416,14 @@ static struct limine_file get_file(struct file_handle *file, char *cmdline, bool
ret.path = reported_addr(path);
- void *freadall_ret = freadall_mode(file, MEMMAP_KERNEL_AND_MODULES, !kernel
#if defined (__i386__)
- , limine_memcpy_to_64
-#endif
- );
-#if defined (__i386__)
- ret.address = kernel ? reported_addr(freadall_ret) : reported_addr_64(*(uint64_t *)freadall_ret);
+ if (file->is_high_mem) {
+ ret.address = reported_addr_64(file->load_addr_64);
+ } else {
+ ret.address = reported_addr(file->fd);
+ }
#else
- ret.address = reported_addr(freadall_ret);
+ ret.address = reported_addr(file->fd);
#endif
ret.size = file->size;
@@ -472,7 +482,11 @@ noreturn void limine_load(char *config, char *cmdline) {
print("limine: Loading executable `%#`...\n", kernel_path);
struct file_handle *kernel_file;
- if ((kernel_file = uri_open(kernel_path)) == NULL)
+ if ((kernel_file = uri_open(kernel_path, MEMMAP_BOOTLOADER_RECLAIMABLE, false
+#if defined (__i386__)
+ , NULL, NULL
+#endif
+ )) == NULL)
panic(true, "limine: Failed to open executable with path `%#`. Is the path correct?", kernel_path);
char *k_path_copy = ext_mem_alloc(strlen(kernel_path) + 1);
@@ -495,7 +509,7 @@ noreturn void limine_load(char *config, char *cmdline) {
k_path[i] = 0;
}
- uint8_t *kernel = freadall(kernel_file, MEMMAP_BOOTLOADER_RECLAIMABLE);
+ uint8_t *kernel = kernel_file->fd;
char *kaslr_s = config_get_value(config, 0, "KASLR");
bool kaslr = false;
@@ -929,7 +943,7 @@ FEAT_END
#endif
struct limine_file *kf = ext_mem_alloc(sizeof(struct limine_file));
- *kf = get_file(kernel_file, cmdline, true);
+ *kf = get_file(kernel_file, cmdline);
fclose(kernel_file);
// Entry point feature
@@ -1229,23 +1243,24 @@ FEAT_START
module_path = (char *)get_phys_addr(internal_module->path);
module_cmdline = (char *)get_phys_addr(internal_module->string);
- if (internal_module->flags & LIMINE_INTERNAL_MODULE_COMPRESSED) {
- panic(true, "limine: Compressed internal modules no longer supported");
- }
-
+ bool module_compressed = internal_module->flags & LIMINE_INTERNAL_MODULE_COMPRESSED;
+
// Validate path length to prevent buffer overflow
size_t k_resource_len = strlen(k_resource);
size_t k_root_len = strlen(k_root);
size_t module_path_len = strlen(module_path);
size_t k_path_len = strlen(k_path);
- // Format: k_resource + "(" + k_root + "):" + k_path + "/" + module_path + null
- size_t total_len = k_resource_len + 1 + k_root_len + 2 + k_path_len + 1 + module_path_len + 1;
+ // Format: ["$"] + k_resource + "(" + k_root + "):" + k_path + "/" + module_path + null
+ size_t total_len = (module_compressed ? 1 : 0) + k_resource_len + 1 + k_root_len + 2 + k_path_len + 1 + module_path_len + 1;
if (total_len > 1024) {
panic(true, "limine: Internal module path too long");
}
char *module_path_abs = ext_mem_alloc(1024);
char *module_path_abs_p = module_path_abs;
+ if (module_compressed) {
+ *module_path_abs_p++ = '$';
+ }
memcpy(module_path_abs_p, k_resource, k_resource_len);
module_path_abs_p += k_resource_len;
*module_path_abs_p++ = '(';
@@ -1284,7 +1299,11 @@ FEAT_START
print("limine: Loading module `%#`...\n", module_path);
struct file_handle *f;
- if ((f = uri_open(module_path)) == NULL) {
+ if ((f = uri_open(module_path, MEMMAP_KERNEL_AND_MODULES, true
+#if defined (__i386__)
+ , limine_memcpy_to_64, limine_memcpy_from_64
+#endif
+ )) == NULL) {
if (module_required) {
panic(true, "limine: Failed to open module with path `%#`. Is the path correct?", module_path);
}
@@ -1299,7 +1318,7 @@ FEAT_START
}
struct limine_file *l = &modules[final_module_count++];
- *l = get_file(f, module_cmdline, false);
+ *l = get_file(f, module_cmdline);
fclose(f);
}
diff --git a/common/protos/limine_asm.asm_ia32 b/common/protos/limine_asm.asm_ia32
index a83cc9d5..b7bb456b 100644
--- a/common/protos/limine_asm.asm_ia32
+++ b/common/protos/limine_asm.asm_ia32
@@ -50,10 +50,14 @@ old_pagemap:
section .text
-; void limine_memcpy_to_64_asm(int paging_mode, void *pagemap, uint64_t dst, void *src, size_t count);
-
-global limine_memcpy_to_64_asm
-limine_memcpy_to_64_asm:
+; void limine_memcpy_64_asm(int paging_mode, void *pagemap, uint64_t dst, uint64_t src, size_t count);
+;
+; Both endpoints are 64-bit-wide. Low-memory callers cast pointers
+; through (uint64_t)(uintptr_t). Identity mapping makes low addresses
+; valid as 64-bit virtual addresses inside the long-mode copy.
+
+global limine_memcpy_64_asm
+limine_memcpy_64_asm:
push ebp
mov ebp, esp
@@ -177,8 +181,8 @@ bits 64
mov ss, eax
mov rdi, qword [rbp+16]
- mov esi, dword [rbp+24]
- mov ecx, dword [rbp+28]
+ mov rsi, qword [rbp+24]
+ mov ecx, dword [rbp+32]
rep movsb
diff --git a/common/protos/linux_risc.c b/common/protos/linux_risc.c
index 702f7692..6e6285a4 100644
--- a/common/protos/linux_risc.c
+++ b/common/protos/linux_risc.c
@@ -133,7 +133,7 @@ static void load_module(struct boot_param *p, char *config) {
print("linux: Loading module `%#`...\n", module_path);
- struct file_handle *module_file = uri_open(module_path);
+ struct file_handle *module_file = uri_open(module_path, MEMMAP_BOOTLOADER_RECLAIMABLE, false);
if (!module_file) {
panic(true, "linux: failed to open module `%s`. Is the path correct?", module_path);
}
@@ -481,7 +481,7 @@ noreturn void linux_load(char *config, char *cmdline) {
print("linux: Loading kernel `%#`...\n", kernel_path);
- if ((kernel_file = uri_open(kernel_path)) == NULL) {
+ if ((kernel_file = uri_open(kernel_path, MEMMAP_BOOTLOADER_RECLAIMABLE, false)) == NULL) {
panic(true, "linux: failed to open kernel `%s`. Is the path correct?", kernel_path);
}
diff --git a/common/protos/linux_x86.c b/common/protos/linux_x86.c
index adc64c10..8960e196 100644
--- a/common/protos/linux_x86.c
+++ b/common/protos/linux_x86.c
@@ -302,7 +302,11 @@ noreturn void linux_load(char *config, char *cmdline) {
print("linux: Loading kernel `%#`...\n", kernel_path);
- if ((kernel_file = uri_open(kernel_path)) == NULL)
+ if ((kernel_file = uri_open(kernel_path, MEMMAP_BOOTLOADER_RECLAIMABLE, false
+#if defined (__i386__)
+ , NULL, NULL
+#endif
+ )) == NULL)
panic(true, "linux: Failed to open kernel with path `%#`. Is the path correct?", kernel_path);
// Minimum size check: need at least 0x206 bytes for signature at 0x202
@@ -439,7 +443,11 @@ noreturn void linux_load(char *config, char *cmdline) {
print("linux: Loading module `%#`...\n", module_path);
struct file_handle *module;
- if ((module = uri_open(module_path)) == NULL)
+ if ((module = uri_open(module_path, MEMMAP_BOOTLOADER_RECLAIMABLE, false
+#if defined (__i386__)
+ , NULL, NULL
+#endif
+ )) == NULL)
panic(true, "linux: Failed to open module with path `%s`. Is the path correct?", module_path);
size_of_all_modules = CHECKED_ADD(size_of_all_modules, module->size,
diff --git a/common/protos/multiboot1.c b/common/protos/multiboot1.c
index 0fd7795e..b7e945be 100644
--- a/common/protos/multiboot1.c
+++ b/common/protos/multiboot1.c
@@ -65,10 +65,14 @@ noreturn void multiboot1_load(char *config, char *cmdline) {
print("multiboot1: Loading executable `%#`...\n", kernel_path);
- if ((kernel_file = uri_open(kernel_path)) == NULL)
+ if ((kernel_file = uri_open(kernel_path, MEMMAP_KERNEL_AND_MODULES, false
+#if defined (__i386__)
+ , NULL, NULL
+#endif
+ )) == NULL)
panic(true, "multiboot1: Failed to open executable with path `%#`. Is the path correct?", kernel_path);
- uint8_t *kernel = freadall(kernel_file, MEMMAP_KERNEL_AND_MODULES);
+ uint8_t *kernel = kernel_file->fd;
size_t kernel_file_size = kernel_file->size;
@@ -326,7 +330,11 @@ noreturn void multiboot1_load(char *config, char *cmdline) {
print("multiboot1: Loading module `%#`...\n", module_path);
struct file_handle *f;
- if ((f = uri_open(module_path)) == NULL)
+ if ((f = uri_open(module_path, MEMMAP_BOOTLOADER_RECLAIMABLE, false
+#if defined (__i386__)
+ , NULL, NULL
+#endif
+ )) == NULL)
panic(true, "multiboot1: Failed to open module with path `%#`. Is the path correct?", module_path);
char *module_cmdline = conf_tuple.value2;
@@ -336,7 +344,7 @@ noreturn void multiboot1_load(char *config, char *cmdline) {
char *lowmem_modstr = mb1_info_alloc(&mb1_info_raw, strlen(module_cmdline) + 1);
strcpy(lowmem_modstr, module_cmdline);
- void *module_addr = freadall(f, MEMMAP_BOOTLOADER_RECLAIMABLE);
+ void *module_addr = f->fd;
uint64_t module_target = (uint64_t)-1; /* no target preference, use top */
if (!elsewhere_append(true /* flexible target */,
diff --git a/common/protos/multiboot2.c b/common/protos/multiboot2.c
index 15871117..be01885c 100644
--- a/common/protos/multiboot2.c
+++ b/common/protos/multiboot2.c
@@ -85,10 +85,14 @@ noreturn void multiboot2_load(char *config, char* cmdline) {
print("multiboot2: Loading executable `%#`...\n", kernel_path);
- if ((kernel_file = uri_open(kernel_path)) == NULL)
+ if ((kernel_file = uri_open(kernel_path, MEMMAP_KERNEL_AND_MODULES, false
+#if defined (__i386__)
+ , NULL, NULL
+#endif
+ )) == NULL)
panic(true, "multiboot2: Failed to open executable with path `%#`. Is the path correct?", kernel_path);
- uint8_t *kernel = freadall(kernel_file, MEMMAP_KERNEL_AND_MODULES);
+ uint8_t *kernel = kernel_file->fd;
size_t kernel_file_size = kernel_file->size;
@@ -631,7 +635,11 @@ reloc_fail:
print("multiboot2: Loading module `%#`...\n", module_path);
struct file_handle *f;
- if ((f = uri_open(module_path)) == NULL)
+ if ((f = uri_open(module_path, MEMMAP_BOOTLOADER_RECLAIMABLE, false
+#if defined (__i386__)
+ , NULL, NULL
+#endif
+ )) == NULL)
panic(true, "multiboot2: Failed to open module with path `%#`. Is the path correct?", module_path);
// Module commandline can be null, so we guard against that and make the
@@ -639,7 +647,7 @@ reloc_fail:
char *module_cmdline = conf_tuple.value2;
if (!module_cmdline) module_cmdline = "";
- void *module_addr = freadall(f, MEMMAP_BOOTLOADER_RECLAIMABLE);
+ void *module_addr = f->fd;
uint64_t module_target = (uint64_t)-1;
if (!elsewhere_append(true /* flexible target */,
diff --git a/contrib/test_gzip_qemu.sh b/contrib/test_gzip_qemu.sh
new file mode 100755
index 00000000..4cc24a7a
--- /dev/null
+++ b/contrib/test_gzip_qemu.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+# Mechanically test gzip decompression end-to-end in QEMU.
+# Boots the Limine test kernel via UEFI with both a plain file and
+# its .gz counterpart as internal modules. The test kernel loads
+# the compressed module (decompressed by the bootloader) and compares
+# it byte-for-byte against the plain copy.
+# Usage: bash contrib/test_gzip_qemu.sh
+# Requires: qemu-system-x86_64, mtools, gzip
+set -euo pipefail
+cd "$(dirname "$0")/.."
+TIMEOUT="${QEMU_TIMEOUT:-20}"
+if ! command -v gzip >/dev/null 2>&1; then
+ echo "where's your gzip?"
+ exit 1
+fi
+TEST_CFLAGS="-DENABLE_QEMU_SHUTDOWN -DENABLE_GZIP_TEST"
+make limine-bios limine-uefi-x86-64 2>&1 | tail -1
+make edk2-ovmf 2>&1 | tail -1
+make -C test -f test.mk ARCH=x86 EXTRA_CFLAGS="$TEST_CFLAGS" test.elf 2>&1 | tail -1
+IMG=test_uefi.img
+rm -f "$IMG"
+mformat -i "$IMG" -C -F -T 131072 :: 2>/dev/null
+mmd -i "$IMG" ::/boot ::/EFI ::/EFI/BOOT 2>/dev/null
+mcopy -i "$IMG" bin/BOOTX64.EFI ::/EFI/BOOT/
+mcopy -i "$IMG" bin/limine-bios.sys ::/boot/
+mcopy -i "$IMG" test/test.elf ::/boot/
+mcopy -i "$IMG" test/bg.jpg ::/boot/
+mcopy -i "$IMG" test/limine.conf ::/boot/
+GZ_TMP=$(mktemp)
+gzip -c test/limine.conf > "$GZ_TMP"
+mcopy -i "$IMG" "$GZ_TMP" ::/boot/limine.conf.gz
+rm -f "$GZ_TMP"
+QEMU_LOG=$(mktemp)
+trap 'rm -f "$QEMU_LOG" "$IMG"' EXIT
+timeout "$TIMEOUT" \
+ qemu-system-x86_64 \
+ -display none \
+ -m 512M -M q35 \
+ -drive if=pflash,unit=0,format=raw,file=edk2-ovmf/ovmf-code-x86_64.fd,readonly=on \
+ -net none -smp 4 \
+ -drive format=raw,file="$IMG" \
+ -debugcon file:"$QEMU_LOG" \
+ || true # timeout exits 124
+if grep -q 'gzip: pass' "$QEMU_LOG"; then
+ grep 'gzip:' "$QEMU_LOG"
+ echo "pass: gzip decompression verified in QEMU"
+ exit 0
+elif grep -q 'gzip: FAIL' "$QEMU_LOG"; then
+ grep 'gzip:' "$QEMU_LOG"
+ echo "fail"
+ exit 1
+else
+ echo "fail: gzip test marker not found in QEMU output"
+ echo "last 20 lines of log:"
+ tail -20 "$QEMU_LOG"
+ exit 1
+fi
diff --git a/test/limine.c b/test/limine.c
index b0b11e79..52ed23ab 100644
--- a/test/limine.c
+++ b/test/limine.c
@@ -6,6 +6,14 @@
#include <flanterm.h>
#include <flanterm_backends/fb.h>
+int memcmp(const void *, const void *, size_t);
+
+#ifdef ENABLE_QEMU_SHUTDOWN
+static inline void outw(uint16_t port, uint16_t value) {
+ __asm volatile ("outw %%ax, %1" : : "a" (value), "Nd" (port) : "memory");
+}
+#endif
+
__attribute__((section(".limine_requests")))
static volatile uint64_t limine_base_revision[] = LIMINE_BASE_REVISION(6);
@@ -77,12 +85,25 @@ struct limine_internal_module internal_module2 = {
struct limine_internal_module internal_module3 = {
.path = "./limine.conf",
.string = "Third internal module"
+ /* gzip test depends on this name to find
+ the original to compare against. */
};
+#ifdef ENABLE_GZIP_TEST
+struct limine_internal_module internal_module4 = {
+ .path = "./limine.conf.gz",
+ .string = "gzip-compressed limine.conf",
+ .flags = LIMINE_INTERNAL_MODULE_COMPRESSED
+};
+#endif
+
struct limine_internal_module *internal_modules[] = {
&internal_module1,
&internal_module2,
- &internal_module3
+ &internal_module3,
+#ifdef ENABLE_GZIP_TEST
+ &internal_module4,
+#endif
};
__attribute__((section(".limine_requests")))
@@ -90,7 +111,7 @@ static volatile struct limine_module_request module_request = {
.id = LIMINE_MODULE_REQUEST_ID,
.revision = 1, .response = NULL,
- .internal_module_count = 3,
+ .internal_module_count = sizeof(internal_modules) / sizeof(internal_modules[0]),
.internal_modules = internal_modules
};
@@ -528,6 +549,38 @@ FEAT_START
e9_printf("---");
print_file(f);
}
+
+#ifdef ENABLE_GZIP_TEST
+ /* Gzip decompression test: compare internal_module3 (plain limine.conf)
+ against internal_module4 (limine.conf.gz, decompressed by bootloader). */
+ {
+ struct limine_file *plain = NULL, *decompressed = NULL;
+ for (size_t i = 0; i < module_response->module_count; i++) {
+ struct limine_file *f = module_response->modules[i];
+ if (f->string != NULL) {
+ /* Match by the module string we assigned. */
+ bool is_third = f->string[0] == 'T' && f->string[1] == 'h'
+ && f->string[2] == 'i' && f->string[3] == 'r'
+ && f->string[4] == 'd';
+ bool is_gz = f->string[0] == 'g' && f->string[1] == 'z';
+ if (is_third) plain = f;
+ if (is_gz) decompressed = f;
+ }
+ }
+ if (plain == NULL) {
+ e9_printf("gzip: FAIL (plain module not found)");
+ } else if (decompressed == NULL) {
+ e9_printf("gzip: FAIL (decompressed module not found)");
+ } else if (plain->size != decompressed->size) {
+ e9_printf("gzip: FAIL (size mismatch: plain=%x, decompressed=%x)",
+ plain->size, decompressed->size);
+ } else if (memcmp(plain->address, decompressed->address, plain->size) != 0) {
+ e9_printf("gzip: FAIL (content mismatch, size=%x)", plain->size);
+ } else {
+ e9_printf("gzip: pass (size=%x)", plain->size);
+ }
+ }
+#endif
FEAT_END
FEAT_START
@@ -701,5 +754,8 @@ FEAT_START
e9_printf("Exec time: %d usec", perf_response->exec_usec);
FEAT_END
+#ifdef ENABLE_QEMU_SHUTDOWN
+ outw(0x604, 0x2000); /* QEMU-specific shutdown, used by automated tests. */
+#endif
for (;;);
}
diff --git a/test/test.mk b/test/test.mk
index beea2001..48073e41 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -71,7 +71,8 @@ override CFLAGS += \
-I../limine-protocol/include \
-I../flanterm/src \
-isystem ../freestnd-c-hdrs/include \
- -D_LIMINE_PROTO
+ -D_LIMINE_PROTO \
+ $(EXTRA_CFLAGS)
ifeq ($(ARCH),x86)
override CFLAGS += \
