:: commit 45aaa72e85412f8b62075c64e3135b495265dd8b

mint <36459316+mintsuki@users.noreply.github.com> — 2020-09-06 01:35

parents: 734a3e268f

Decompressor (#23)

* Add decompressor infrastructure

* Fix up everything

* Add a gzip decompressor (#22)

Co-authored-by: Matteo Semenzato <mattew8898@gmail.com>
diff --git a/.gitignore b/.gitignore
index 2841a406..ea292fbf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
 /**/*.a
 /**/*.bc
 /**/*.bin
+/**/*.bin.gz
 /**/*.elf
 /**/*.img
 /bochsout.txt
diff --git a/README.md b/README.md
index 8555dbac..db2e138e 100644
--- a/README.md
+++ b/README.md
@@ -76,6 +76,8 @@ limine-install test.img
 
 One can get `echfs-utils` by installing https://github.com/qword-os/echfs.
 
+Limine uses a stripped-down version of https://github.com/jibsen/tinf
+
 ## Discord server
 We have a Discord server if you need support, info, or you just want to
 hang out: https://discord.gg/QEeZMz4
diff --git a/src/Makefile b/src/Makefile
index 5230d543..26b9883e 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -21,8 +21,8 @@ INTERNAL_LDFLAGS = \
 
 .PHONY: all clean
 
-C_FILES := $(shell find ./ -type f -name '*.c' | sort)
-ASM_FILES := $(shell find ./ -type f -name '*.asm' | grep -v bootsect | sort)
+C_FILES := $(shell find ./ -type f -name '*.c' | grep -v bootsect | grep -v decompressor | sort)
+ASM_FILES := $(shell find ./ -type f -name '*.asm' | grep -v bootsect | grep -v decompressor | sort)
 ASM_OBJ := $(ASM_FILES:.asm=.o)
 BC := $(C_FILES:.c=.bc)
 
@@ -34,6 +34,8 @@ limine.bin: libgcc.a $(BC) $(ASM_OBJ)
 	clang --target=i386-elf -O$(OPT_LEVEL) -c optimised_bundle.bc -o optimised_bundle.o
 	ld.lld optimised_bundle.o $(ASM_OBJ) libgcc.a $(INTERNAL_LDFLAGS) -o stage2.elf
 	llvm-objcopy -O binary stage2.elf stage2.bin
+	gzip -9 stage2.bin
+	$(MAKE) -C decompressor
 	cd bootsect && nasm bootsect.asm -fbin -o ../limine.bin
 
 libgcc.a:
@@ -47,4 +49,5 @@ libgcc.a:
 	nasm $< -f elf32 -o $@
 
 clean:
-	rm -f limine.bin $(ASM_OBJ) $(BC)
+	$(MAKE) -C decompressor clean
+	rm -f stage2.bin.gz limine.bin $(ASM_OBJ) $(BC)
diff --git a/src/bootsect/bootsect.asm b/src/bootsect/bootsect.asm
index 582c428f..b7404f15 100644
--- a/src/bootsect/bootsect.asm
+++ b/src/bootsect/bootsect.asm
@@ -15,7 +15,7 @@ start:
     mov fs, ax
     mov gs, ax
     mov ss, ax
-    mov sp, 0x7c00
+    mov sp, 0xfff0
     sti
 
     ; Some BIOSes don't pass the correct boot drive number,
@@ -93,20 +93,24 @@ times 0x1b8-($-$$) db 0
 times 510-($-$$) db 0
 dw 0xaa55
 
-; ********************* Stage 2 *********************
+; ********************* Stage 1.5 *********************
 
-stage2:
+stage15:
+    push es
+    push 0x6000
+    pop es
     mov eax, dword [stage2_sector]
     inc eax
-    mov ebx, 0x8000
+    xor ebx, ebx
     mov ecx, 62
     call read_sectors
+    pop es
     jc err_reading_disk
 
     call enable_a20
     jc err_enabling_a20
 
-    lgdt [GDT]
+    call load_gdt
 
     cli
 
@@ -127,7 +131,10 @@ stage2:
     and edx, 0xff
     push edx
 
-    call 0x8000
+    push stage2.size
+    push (stage2 - 0x8000) + 0x60000
+
+    call 0x60000
 
 bits 16
 %include 'a20_enabler.inc'
@@ -135,6 +142,11 @@ bits 16
 
 times 1024-($-$$) db 0
 
-incbin '../stage2.bin'
+incbin '../decompressor/decompressor.bin'
+
+align 16
+stage2:
+incbin '../stage2.bin.gz'
+.size: equ $ - stage2
 
 times 32768-($-$$) db 0
diff --git a/src/bootsect/gdt.inc b/src/bootsect/gdt.inc
index a3a0a632..9df929b1 100644
--- a/src/bootsect/gdt.inc
+++ b/src/bootsect/gdt.inc
@@ -1,7 +1,25 @@
+load_gdt:
+    pusha
+    push es
+    push ds
+    push 0x7000
+    pop es
+    xor di, di
+    push 0
+    pop ds
+    mov si, GDT.GDTStart
+    mov cx, GDT.GDTEnd - GDT.GDTStart
+    rep movsb
+    lgdt [GDT]
+    pop ds
+    pop es
+    popa
+    ret
+
 GDT:
 
 dw .GDTEnd - .GDTStart - 1	; GDT size
-dd .GDTStart				; GDT start
+dd 0x70000				; GDT start
 
 .GDTStart:
 
diff --git a/src/decompressor/Makefile b/src/decompressor/Makefile
new file mode 100644
index 00000000..7115eb4c
--- /dev/null
+++ b/src/decompressor/Makefile
@@ -0,0 +1,40 @@
+OPT_LEVEL = z
+CFLAGS = -pipe -Wall -Wextra
+
+INTERNAL_CFLAGS = \
+	-O$(OPT_LEVEL) \
+	-std=gnu99 \
+	-ffreestanding \
+	-flto \
+	-mno-80387 \
+	-mno-mmx \
+	-mno-sse \
+	-mno-sse2 \
+	-I. \
+	-Wno-address-of-packed-member
+
+INTERNAL_LDFLAGS = \
+	-static \
+	-nostdlib \
+	-Tlinker.ld \
+	-no-pie
+
+.PHONY: all clean
+
+C_FILES := $(shell find ./ -type f -name '*.c' | sort)
+BC := $(C_FILES:.c=.bc)
+
+all: decompressor.bin
+
+decompressor.bin: $(BC)
+	llvm-link $(BC) -o bundle.bc
+	opt --O$(OPT_LEVEL) bundle.bc -o optimised_bundle.bc
+	clang --target=i386-elf -O$(OPT_LEVEL) -c optimised_bundle.bc -o optimised_bundle.o
+	ld.lld optimised_bundle.o ../libgcc.a $(INTERNAL_LDFLAGS) -o decompressor.elf
+	llvm-objcopy -O binary decompressor.elf decompressor.bin
+
+%.bc: %.c
+	clang --target=i386-elf $(CFLAGS) $(INTERNAL_CFLAGS) -c $< -o $@
+
+clean:
+	rm -f decompressor.bin $(BC)
diff --git a/src/decompressor/gzip/tinf.h b/src/decompressor/gzip/tinf.h
new file mode 100644
index 00000000..6e56faa3
--- /dev/null
+++ b/src/decompressor/gzip/tinf.h
@@ -0,0 +1,80 @@
+/*
+ * tinf - tiny inflate library (inflate, gzip, zlib)
+ *
+ * Copyright (c) 2003-2019 Joergen Ibsen
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ *   1. The origin of this software must not be misrepresented; you must
+ *      not claim that you wrote the original software. If you use this
+ *      software in a product, an acknowledgment in the product
+ *      documentation would be appreciated but is not required.
+ *
+ *   2. Altered source versions must be plainly marked as such, and must
+ *      not be misrepresented as being the original software.
+ *
+ *   3. This notice may not be removed or altered from any source
+ *      distribution.
+ */
+
+#ifndef TINF_H_INCLUDED
+#define TINF_H_INCLUDED
+
+#define TINF_VER_MAJOR 1        /**< Major version number */
+#define TINF_VER_MINOR 2        /**< Minor version number */
+#define TINF_VER_PATCH 1        /**< Patch version number */
+#define TINF_VER_STRING "1.2.1" /**< Version number as a string */
+
+/**
+ * Status codes returned.
+ *
+ * @see tinf_uncompress, tinf_gzip_uncompress, tinf_zlib_uncompress
+ */
+typedef enum {
+    TINF_OK         = 0,  /**< Success */
+    TINF_DATA_ERROR = -3, /**< Input error */
+    TINF_BUF_ERROR  = -5  /**< Not enough room for output */
+} tinf_error_code;
+
+/**
+ * Decompress `sourceLen` bytes of deflate data from `source` to `dest`.
+ *
+ * The variable `destLen` points to must contain the size of `dest` on entry,
+ * and will be set to the size of the decompressed data on success.
+ *
+ * Reads at most `sourceLen` bytes from `source`.
+ * Writes at most `*destLen` bytes to `dest`.
+ *
+ * @param dest pointer to where to place decompressed data
+ * @param destLen pointer to variable containing size of `dest`
+ * @param source pointer to compressed data
+ * @param sourceLen size of compressed data
+ * @return `TINF_OK` on success, error code on error
+ */
+int tinf_uncompress(void *dest,
+                           const void *source, unsigned int sourceLen);
+
+/**
+ * Decompress `sourceLen` bytes of gzip data from `source` to `dest`.
+ *
+ * The variable `destLen` points to must contain the size of `dest` on entry,
+ * and will be set to the size of the decompressed data on success.
+ *
+ * Reads at most `sourceLen` bytes from `source`.
+ * Writes at most `*destLen` bytes to `dest`.
+ *
+ * @param dest pointer to where to place decompressed data
+ * @param destLen pointer to variable containing size of `dest`
+ * @param source pointer to compressed data
+ * @param sourceLen size of compressed data
+ * @return `TINF_OK` on success, error code on error
+ */
+int tinf_gzip_uncompress(void *dest,
+                                const void *source, unsigned int sourceLen);
+#endif /* TINF_H_INCLUDED */
diff --git a/src/decompressor/gzip/tinfgzip.c b/src/decompressor/gzip/tinfgzip.c
new file mode 100644
index 00000000..7935a50e
--- /dev/null
+++ b/src/decompressor/gzip/tinfgzip.c
@@ -0,0 +1,120 @@
+/*
+ * tinfgzip - tiny gzip decompressor
+ *
+ * Copyright (c) 2003-2019 Joergen Ibsen
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ *   1. The origin of this software must not be misrepresented; you must
+ *      not claim that you wrote the original software. If you use this
+ *      software in a product, an acknowledgment in the product
+ *      documentation would be appreciated but is not required.
+ *
+ *   2. Altered source versions must be plainly marked as such, and must
+ *      not be misrepresented as being the original software.
+ *
+ *   3. This notice may not be removed or altered from any source
+ *      distribution.
+ */
+
+#include "tinf.h"
+
+typedef enum {
+    FTEXT    = 1,
+    FHCRC    = 2,
+    FEXTRA   = 4,
+    FNAME    = 8,
+    FCOMMENT = 16
+} tinf_gzip_flag;
+
+int tinf_gzip_uncompress(void *dest,
+                         const void *source, unsigned int sourceLen) {
+    const unsigned char *src = (const unsigned char *) source;
+    unsigned char *dst = (unsigned char *) dest;
+    const unsigned char *start;
+    int res;
+    unsigned char flg;
+
+    /* -- Check header -- */
+
+    /* Check room for at least 10 byte header and 8 byte trailer */
+    if (sourceLen < 18) {
+        return TINF_DATA_ERROR;
+    }
+
+    /* Check id bytes */
+    if (src[0] != 0x1F || src[1] != 0x8B) {
+        return TINF_DATA_ERROR;
+    }
+
+    /* Check method is deflate */
+    if (src[2] != 8) {
+        return TINF_DATA_ERROR;
+    }
+
+    /* Get flag byte */
+    flg = src[3];
+
+    /* Check that reserved bits are zero */
+    if (flg & 0xE0) {
+        return TINF_DATA_ERROR;
+    }
+
+    /* -- Find start of compressed data -- */
+
+    /* Skip base header of 10 bytes */
+    start = src + 10;
+
+    /* Skip extra data if present */
+    if (flg & FEXTRA) {
+        unsigned int xlen = *start;
+
+        if (xlen > sourceLen - 12) {
+            return TINF_DATA_ERROR;
+        }
+
+        start += xlen + 2;
+    }
+
+    /* Skip file name if present */
+    if (flg & FNAME) {
+        do {
+            if (((unsigned int)(start - src)) >= sourceLen) {
+                return TINF_DATA_ERROR;
+            }
+        } while (*start++);
+    }
+
+    /* Skip file comment if present */
+    if (flg & FCOMMENT) {
+        do {
+            if (((unsigned int)(start - src)) >= sourceLen) {
+                return TINF_DATA_ERROR;
+            }
+        } while (*start++);
+    }
+
+    if (flg & FHCRC) {
+        start += 2;
+    }
+
+    /* -- Decompress data -- */
+
+    if ((src + sourceLen) - start < 8) {
+        return TINF_DATA_ERROR;
+    }
+
+    res = tinf_uncompress(dst, start,
+                          (src + sourceLen) - start - 8);
+
+    if (res != TINF_OK) {
+        return TINF_DATA_ERROR;
+    }
+    return TINF_OK;
+}
diff --git a/src/decompressor/gzip/tinflate.c b/src/decompressor/gzip/tinflate.c
new file mode 100644
index 00000000..67993904
--- /dev/null
+++ b/src/decompressor/gzip/tinflate.c
@@ -0,0 +1,566 @@
+/*
+ * tinflate - tiny inflate
+ *
+ * Copyright (c) 2003-2019 Joergen Ibsen
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ *   1. The origin of this software must not be misrepresented; you must
+ *      not claim that you wrote the original software. If you use this
+ *      software in a product, an acknowledgment in the product
+ *      documentation would be appreciated but is not required.
+ *
+ *   2. Altered source versions must be plainly marked as such, and must
+ *      not be misrepresented as being the original software.
+ *
+ *   3. This notice may not be removed or altered from any source
+ *      distribution.
+ */
+
+#include "tinf.h"
+
+#include <limits.h>
+
+#if defined(UINT_MAX) && (UINT_MAX) < 0xFFFFFFFFUL
+#  error "tinf requires unsigned int to be at least 32-bit"
+#endif
+
+/* -- Internal data structures -- */
+
+struct tinf_tree {
+    unsigned short counts[16]; /* Number of codes with a given length */
+    unsigned short symbols[288]; /* Symbols sorted by code */
+    int max_sym;
+};
+
+struct tinf_data {
+    const unsigned char *source;
+    const unsigned char *source_end;
+    unsigned int tag;
+    int bitcount;
+    int overflow;
+
+    unsigned char *dest_start;
+    unsigned char *dest;
+
+    struct tinf_tree ltree; /* Literal/length tree */
+    struct tinf_tree dtree; /* Distance tree */
+};
+
+/* Given an array of code lengths, build a tree */
+static int tinf_build_tree(struct tinf_tree *t, const unsigned char *lengths,
+                           unsigned int num) {
+    unsigned short offs[16];
+    unsigned int i, num_codes, available;
+
+
+    for (i = 0; i < 16; ++i) {
+        t->counts[i] = 0;
+    }
+
+    t->max_sym = -1;
+
+    /* Count number of codes for each non-zero length */
+    for (i = 0; i < num; ++i) {
+
+        if (lengths[i]) {
+            t->max_sym = i;
+            t->counts[lengths[i]]++;
+        }
+    }
+
+    /* Compute offset table for distribution sort */
+    for (available = 1, num_codes = 0, i = 0; i < 16; ++i) {
+        unsigned int used = t->counts[i];
+
+        /* Check length contains no more codes than available */
+        if (used > available) {
+            return TINF_DATA_ERROR;
+        }
+        available = 2 * (available - used);
+
+        offs[i] = num_codes;
+        num_codes += used;
+    }
+
+    /*
+     * Check all codes were used, or for the special case of only one
+     * code that it has length 1
+     */
+    if ((num_codes > 1 && available > 0)
+     || (num_codes == 1 && t->counts[1] != 1)) {
+        return TINF_DATA_ERROR;
+    }
+
+    /* Fill in symbols sorted by code */
+    for (i = 0; i < num; ++i) {
+        if (lengths[i]) {
+            t->symbols[offs[lengths[i]]++] = i;
+        }
+    }
+
+    /*
+     * For the special case of only one code (which will be 0) add a
+     * code 1 which results in a symbol that is too large
+     */
+    if (num_codes == 1) {
+        t->counts[1] = 2;
+        t->symbols[1] = t->max_sym + 1;
+    }
+
+    return TINF_OK;
+}
+
+/* -- Decode functions -- */
+
+static void tinf_refill(struct tinf_data *d, int num) {
+
+    /* Read bytes until at least num bits available */
+    while (d->bitcount < num) {
+        if (d->source != d->source_end) {
+            d->tag |= (unsigned int) *d->source++ << d->bitcount;
+        }
+        else {
+            d->overflow = 1;
+        }
+        d->bitcount += 8;
+    }
+
+}
+
+static unsigned int tinf_getbits_no_refill(struct tinf_data *d, int num) {
+    unsigned int bits;
+
+
+    /* Get bits from tag */
+    bits = d->tag & ((1UL << num) - 1);
+
+    /* Remove bits from tag */
+    d->tag >>= num;
+    d->bitcount -= num;
+
+    return bits;
+}
+
+/* Get num bits from source stream */
+static unsigned int tinf_getbits(struct tinf_data *d, int num) {
+    tinf_refill(d, num);
+    return tinf_getbits_no_refill(d, num);
+}
+
+/* Read a num bit value from stream and add base */
+static unsigned int tinf_getbits_base(struct tinf_data *d, int num, int base) {
+    return base + (num ? tinf_getbits(d, num) : 0);
+}
+
+/* Given a data stream and a tree, decode a symbol */
+static int tinf_decode_symbol(struct tinf_data *d, const struct tinf_tree *t) {
+    int base = 0, offs = 0;
+    int len;
+
+    /*
+     * Get more bits while code index is above number of codes
+     *
+     * Rather than the actual code, we are computing the position of the
+     * code in the sorted order of codes, which is the index of the
+     * corresponding symbol.
+     *
+     * Conceptually, for each code length (level in the tree), there are
+     * counts[len] leaves on the left and internal nodes on the right.
+     * The index we have decoded so far is base + offs, and if that
+     * falls within the leaves we are done. Otherwise we adjust the range
+     * of offs and add one more bit to it.
+     */
+    for (len = 1; ; ++len) {
+        offs = 2 * offs + tinf_getbits(d, 1);
+
+        if (offs < t->counts[len]) {
+            break;
+        }
+
+        base += t->counts[len];
+        offs -= t->counts[len];
+    }
+
+
+    return t->symbols[base + offs];
+}
+
+/* Given a data stream, decode dynamic trees from it */
+static int tinf_decode_trees(struct tinf_data *d, struct tinf_tree *lt,
+                             struct tinf_tree *dt) {
+    unsigned char lengths[288 + 32];
+
+    /* Special ordering of code length codes */
+    static const unsigned char clcidx[19] = {
+        16, 17, 18, 0,  8, 7,  9, 6, 10, 5,
+        11,  4, 12, 3, 13, 2, 14, 1, 15
+    };
+
+    unsigned int hlit, hdist, hclen;
+    unsigned int i, num, length;
+    int res;
+
+    /* Get 5 bits HLIT (257-286) */
+    hlit = tinf_getbits_base(d, 5, 257);
+
+    /* Get 5 bits HDIST (1-32) */
+    hdist = tinf_getbits_base(d, 5, 1);
+
+    /* Get 4 bits HCLEN (4-19) */
+    hclen = tinf_getbits_base(d, 4, 4);
+
+    /*
+     * The RFC limits the range of HLIT to 286, but lists HDIST as range
+     * 1-32, even though distance codes 30 and 31 have no meaning. While
+     * we could allow the full range of HLIT and HDIST to make it possible
+     * to decode the fixed trees with this function, we consider it an
+     * error here.
+     *
+     * See also: https://github.com/madler/zlib/issues/82
+     */
+    if (hlit > 286 || hdist > 30) {
+        return TINF_DATA_ERROR;
+    }
+
+    for (i = 0; i < 19; ++i) {
+        lengths[i] = 0;
+    }
+
+    /* Read code lengths for code length alphabet */
+    for (i = 0; i < hclen; ++i) {
+        /* Get 3 bits code length (0-7) */
+        unsigned int clen = tinf_getbits(d, 3);
+
+        lengths[clcidx[i]] = clen;
+    }
+
+    /* Build code length tree (in literal/length tree to save space) */
+    res = tinf_build_tree(lt, lengths, 19);
+
+    if (res != TINF_OK) {
+        return res;
+    }
+
+    /* Check code length tree is not empty */
+    if (lt->max_sym == -1) {
+        return TINF_DATA_ERROR;
+    }
+
+    /* Decode code lengths for the dynamic trees */
+    for (num = 0; num < hlit + hdist; ) {
+        int sym = tinf_decode_symbol(d, lt);
+
+        if (sym > lt->max_sym) {
+            return TINF_DATA_ERROR;
+        }
+
+        switch (sym) {
+        case 16:
+            /* Copy previous code length 3-6 times (read 2 bits) */
+            if (num == 0) {
+                return TINF_DATA_ERROR;
+            }
+            sym = lengths[num - 1];
+            length = tinf_getbits_base(d, 2, 3);
+            break;
+        case 17:
+            /* Repeat code length 0 for 3-10 times (read 3 bits) */
+            sym = 0;
+            length = tinf_getbits_base(d, 3, 3);
+            break;
+        case 18:
+            /* Repeat code length 0 for 11-138 times (read 7 bits) */
+            sym = 0;
+            length = tinf_getbits_base(d, 7, 11);
+            break;
+        default:
+            /* Values 0-15 represent the actual code lengths */
+            length = 1;
+            break;
+        }
+
+        if (length > hlit + hdist - num) {
+            return TINF_DATA_ERROR;
+        }
+
+        while (length--) {
+            lengths[num++] = sym;
+        }
+    }
+
+    /* Check EOB symbol is present */
+    if (lengths[256] == 0) {
+        return TINF_DATA_ERROR;
+    }
+
+    /* Build dynamic trees */
+    res = tinf_build_tree(lt, lengths, hlit);
+
+    if (res != TINF_OK) {
+        return res;
+    }
+
+    res = tinf_build_tree(dt, lengths + hlit, hdist);
+
+    if (res != TINF_OK) {
+        return res;
+    }
+
+    return TINF_OK;
+}
+
+/* -- Block inflate functions -- */
+
+/* Given a stream and two trees, inflate a block of data */
+static int tinf_inflate_block_data(struct tinf_data *d, struct tinf_tree *lt,
+                                   struct tinf_tree *dt) {
+    /* Extra bits and base tables for length codes */
+    static const unsigned char length_bits[30] = {
+        0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
+        1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
+        4, 4, 4, 4, 5, 5, 5, 5, 0, 127
+    };
+
+    static const unsigned short length_base[30] = {
+         3,  4,  5,   6,   7,   8,   9,  10,  11,  13,
+        15, 17, 19,  23,  27,  31,  35,  43,  51,  59,
+        67, 83, 99, 115, 131, 163, 195, 227, 258,   0
+    };
+
+    /* Extra bits and base tables for distance codes */
+    static const unsigned char dist_bits[30] = {
+        0, 0,  0,  0,  1,  1,  2,  2,  3,  3,
+        4, 4,  5,  5,  6,  6,  7,  7,  8,  8,
+        9, 9, 10, 10, 11, 11, 12, 12, 13, 13
+    };
+
+    static const unsigned short dist_base[30] = {
+           1,    2,    3,    4,    5,    7,    9,    13,    17,    25,
+          33,   49,   65,   97,  129,  193,  257,   385,   513,   769,
+        1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
+    };
+
+    for (;;) {
+        int sym = tinf_decode_symbol(d, lt);
+
+        /* Check for overflow in bit reader */
+        if (d->overflow) {
+            return TINF_DATA_ERROR;
+        }
+
+        if (sym < 256) {
+            *d->dest++ = sym;
+        }
+        else {
+            int length, dist, offs;
+            int i;
+
+            /* Check for end of block */
+            if (sym == 256) {
+                return TINF_OK;
+            }
+
+            /* Check sym is within range and distance tree is not empty */
+            if (sym > lt->max_sym || sym - 257 > 28 || dt->max_sym == -1) {
+                return TINF_DATA_ERROR;
+            }
+
+            sym -= 257;
+
+            /* Possibly get more bits from length code */
+            length = tinf_getbits_base(d, length_bits[sym],
+                                       length_base[sym]);
+
+            dist = tinf_decode_symbol(d, dt);
+
+            /* Check dist is within range */
+            if (dist > dt->max_sym || dist > 29) {
+                return TINF_DATA_ERROR;
+            }
+
+            /* Possibly get more bits from distance code */
+            offs = tinf_getbits_base(d, dist_bits[dist],
+                                     dist_base[dist]);
+
+            if (offs > d->dest - d->dest_start) {
+                return TINF_DATA_ERROR;
+            }
+
+            /* Copy match */
+            for (i = 0; i < length; ++i) {
+                d->dest[i] = d->dest[i - offs];
+            }
+
+            d->dest += length;
+        }
+    }
+}
+
+/* Inflate an uncompressed block of data */
+static int tinf_inflate_uncompressed_block(struct tinf_data *d) {
+    unsigned int length, invlength;
+
+    if (d->source_end - d->source < 4) {
+        return TINF_DATA_ERROR;
+    }
+
+    /* Get length */
+    length = *(d->source);
+
+    /* Get one's complement of length */
+    invlength = *(d->source + 2);
+
+    /* Check length */
+    if (length != (~invlength & 0x0000FFFF)) {
+        return TINF_DATA_ERROR;
+    }
+
+    d->source += 4;
+
+    if ((unsigned int)((d->source_end - d->source)) < length) {
+        return TINF_DATA_ERROR;
+    }
+
+    /* Copy block */
+    while (length--) {
+        *d->dest++ = *d->source++;
+    }
+
+    /* Make sure we start next block on a byte boundary */
+    d->tag = 0;
+    d->bitcount = 0;
+
+    return TINF_OK;
+}
+
+/* Build fixed Huffman trees */
+static void tinf_build_fixed_trees(struct tinf_tree *lt, struct tinf_tree *dt) {
+    int i;
+
+    /* Build fixed literal/length tree */
+    for (i = 0; i < 16; ++i) {
+        lt->counts[i] = 0;
+    }
+
+    lt->counts[7] = 24;
+    lt->counts[8] = 152;
+    lt->counts[9] = 112;
+
+    for (i = 0; i < 24; ++i) {
+        lt->symbols[i] = 256 + i;
+    }
+    for (i = 0; i < 144; ++i) {
+        lt->symbols[24 + i] = i;
+    }
+    for (i = 0; i < 8; ++i) {
+        lt->symbols[24 + 144 + i] = 280 + i;
+    }
+    for (i = 0; i < 112; ++i) {
+        lt->symbols[24 + 144 + 8 + i] = 144 + i;
+    }
+
+    lt->max_sym = 285;
+
+    /* Build fixed distance tree */
+    for (i = 0; i < 16; ++i) {
+        dt->counts[i] = 0;
+    }
+
+    dt->counts[5] = 32;
+
+    for (i = 0; i < 32; ++i) {
+        dt->symbols[i] = i;
+    }
+
+    dt->max_sym = 29;
+}
+
+
+/* Inflate a block of data compressed with fixed Huffman trees */
+static int tinf_inflate_fixed_block(struct tinf_data *d) {
+    /* Build fixed Huffman trees */
+    tinf_build_fixed_trees(&d->ltree, &d->dtree);
+
+    /* Decode block using fixed trees */
+    return tinf_inflate_block_data(d, &d->ltree, &d->dtree);
+}
+
+
+/* Inflate a block of data compressed with dynamic Huffman trees */
+static int tinf_inflate_dynamic_block(struct tinf_data *d) {
+    /* Decode trees from stream */
+    int res = tinf_decode_trees(d, &d->ltree, &d->dtree);
+
+    if (res != TINF_OK) {
+        return res;
+    }
+
+    /* Decode block using decoded trees */
+    return tinf_inflate_block_data(d, &d->ltree, &d->dtree);
+}
+
+/* Inflate stream from source to dest */
+int tinf_uncompress(void *dest,
+                    const void *source, unsigned int sourceLen) {
+    struct tinf_data d;
+    int bfinal;
+
+    /* Initialise data */
+    d.source = (const unsigned char *) source;
+    d.source_end = d.source + sourceLen;
+    d.tag = 0;
+    d.bitcount = 0;
+    d.overflow = 0;
+
+    d.dest = (unsigned char *) dest;
+    d.dest_start = d.dest;
+
+    do {
+        unsigned int btype;
+        int res;
+
+        /* Read final block flag */
+        bfinal = tinf_getbits(&d, 1);
+
+        /* Read block type (2 bits) */
+        btype = tinf_getbits(&d, 2);
+
+        /* Decompress block */
+        switch (btype) {
+        case 0:
+            /* Decompress uncompressed block */
+            res = tinf_inflate_uncompressed_block(&d);
+            break;
+        case 1:
+            /* Decompress block with fixed Huffman trees */
+            res = tinf_inflate_fixed_block(&d);
+            break;
+        case 2:
+            /* Decompress block with dynamic Huffman trees */
+            res = tinf_inflate_dynamic_block(&d);
+            break;
+        default:
+            res = TINF_DATA_ERROR;
+            break;
+        }
+
+        if (res != TINF_OK) {
+            return res;
+        }
+    } while (!bfinal);
+
+    /* Check for overflow in bit reader */
+    if (d.overflow) {
+        return TINF_DATA_ERROR;
+    }
+
+    return TINF_OK;
+}
+
diff --git a/src/decompressor/lib/asm.h b/src/decompressor/lib/asm.h
new file mode 100644
index 00000000..560057c0
--- /dev/null
+++ b/src/decompressor/lib/asm.h
@@ -0,0 +1,17 @@
+#ifndef __LIB__ASM_H__
+#define __LIB__ASM_H__
+
+#define ASM(body, ...) asm volatile (".intel_syntax noprefix\n\t" body ".att_syntax prefix" : __VA_ARGS__)
+#define ASM_BASIC(body) asm (".intel_syntax noprefix\n\t" body ".att_syntax prefix")
+
+#define FARJMP16(seg, off) \
+    ".byte 0xea\n\t" \
+    ".2byte " off "\n\t" \
+    ".2byte " seg "\n\t" \
+
+#define FARJMP32(seg, off) \
+    ".byte 0xea\n\t" \
+    ".4byte " off "\n\t" \
+    ".2byte " seg "\n\t" \
+
+#endif
diff --git a/src/decompressor/linker.ld b/src/decompressor/linker.ld
new file mode 100644
index 00000000..baf1f0de
--- /dev/null
+++ b/src/decompressor/linker.ld
@@ -0,0 +1,27 @@
+OUTPUT_FORMAT(elf32-i386)
+ENTRY(main)
+
+SECTIONS
+{
+    . = 0x60000;
+
+    .text : {
+        KEEP(*(.entry*))
+        *(.text*)
+    }
+
+    .rodata : {
+        *(.rodata*)
+    }
+
+    .data : {
+        *(.data*)
+    }
+
+    .bss : {
+        bss_begin = .;
+        *(COMMON)
+        *(.bss*)
+        bss_end = .;
+    }
+}
diff --git a/src/decompressor/main.c b/src/decompressor/main.c
new file mode 100644
index 00000000..e6bc3841
--- /dev/null
+++ b/src/decompressor/main.c
@@ -0,0 +1,35 @@
+#include <lib/asm.h>
+
+ASM_BASIC(
+    ".section .entry\n\t"
+
+    "cld\n\t"
+
+    // Zero out .bss
+    "xor al, al\n\t"
+    "mov edi, OFFSET bss_begin\n\t"
+    "mov ecx, OFFSET bss_end\n\t"
+    "sub ecx, OFFSET bss_begin\n\t"
+    "rep stosb\n\t"
+
+    "mov ebx, OFFSET main\n\t"
+    "jmp ebx\n\t"
+);
+
+#include <stdint.h>
+#include <stddef.h>
+#include <gzip/tinf.h>
+
+__attribute__((noreturn))
+void main(uint8_t *compressed_stage2, size_t stage2_size, uint8_t boot_drive) {
+    // The decompressor should decompress compressed_stage2 to address 0x500.
+    // For now, just copy it over as it is not compressed. TODO: implement decompressor.
+    volatile uint8_t *dest = (volatile uint8_t *)0x500;
+
+    tinf_gzip_uncompress(dest, compressed_stage2, stage2_size);
+
+    __attribute__((noreturn))
+    void (*stage2)(uint8_t boot_drive) = (void *)dest;
+
+    stage2(boot_drive);
+}
diff --git a/src/lib/blib.c b/src/lib/blib.c
index 7fab85c8..98ef721a 100644
--- a/src/lib/blib.c
+++ b/src/lib/blib.c
@@ -45,9 +45,8 @@ __attribute__((noreturn)) void panic(const char *fmt, ...) {
     }
 }
 
-extern symbol bss_end;
-static size_t bump_allocator_base = (size_t)bss_end;
-#define BUMP_ALLOCATOR_LIMIT ((size_t)0x80000)
+static size_t bump_allocator_base = 0x10000;
+#define BUMP_ALLOCATOR_LIMIT ((size_t)0x70000)
 
 void brewind(size_t count) {
     bump_allocator_base -= count;
diff --git a/src/linker.ld b/src/linker.ld
index 2c8efd1b..f9f0057f 100644
--- a/src/linker.ld
+++ b/src/linker.ld
@@ -3,10 +3,9 @@ ENTRY(main)
 
 SECTIONS
 {
-    . = 0x8000;
+    . = 0x500;
 
     .text : {
-        bootsect_begin = .;
         KEEP(*(.entry*))
         *(.text*)
     }
tab: 248 wrap: offon