| 1 | /* limlz: Copyright (C) 2026 Kamila Szewczyk <k@iczelia.net> |
| 2 | * limine: Copyright (C) 2019-2026 Mintsuki and contributors. |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * 1. Redistributions of source code must retain the above copyright notice, this |
| 8 | * list of conditions and the following disclaimer. |
| 9 | * |
| 10 | * 2. Redistributions in binary form must reproduce the above copyright notice, |
| 11 | * this list of conditions and the following disclaimer in the documentation |
| 12 | * and/or other materials provided with the distribution. |
| 13 | * |
| 14 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
| 15 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| 16 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| 17 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
| 18 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 19 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| 20 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| 21 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| 22 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 24 | */ |
| 25 | |
| 26 | #include <stdint.h> |
| 27 | #include <stddef.h> |
| 28 | #include <stdlib.h> |
| 29 | #include <string.h> |
| 30 | #include <stdio.h> |
| 31 | #include <stdbool.h> |
| 32 | |
| 33 | typedef unsigned char byte; |
| 34 | |
| 35 | static uint16_t endswap16(uint16_t value) { |
| 36 | uint16_t ret = 0; |
| 37 | ret |= (value >> 8) & 0x00ff; |
| 38 | ret |= (value << 8) & 0xff00; |
| 39 | return ret; |
| 40 | } |
| 41 | |
| 42 | static uint32_t endswap32(uint32_t value) { |
| 43 | uint32_t ret = 0; |
| 44 | ret |= (value >> 24) & 0x000000ff; |
| 45 | ret |= (value >> 8) & 0x0000ff00; |
| 46 | ret |= (value << 8) & 0x00ff0000; |
| 47 | ret |= (value << 24) & 0xff000000; |
| 48 | return ret; |
| 49 | } |
| 50 | |
| 51 | static uint64_t endswap64(uint64_t value) { |
| 52 | uint64_t ret = 0; |
| 53 | ret |= (value >> 56) & 0x00000000000000ff; |
| 54 | ret |= (value >> 40) & 0x000000000000ff00; |
| 55 | ret |= (value >> 24) & 0x0000000000ff0000; |
| 56 | ret |= (value >> 8) & 0x00000000ff000000; |
| 57 | ret |= (value << 8) & 0x000000ff00000000; |
| 58 | ret |= (value << 24) & 0x0000ff0000000000; |
| 59 | ret |= (value << 40) & 0x00ff000000000000; |
| 60 | ret |= (value << 56) & 0xff00000000000000; |
| 61 | return ret; |
| 62 | } |
| 63 | |
| 64 | #ifdef __BYTE_ORDER__ |
| 65 | |
| 66 | #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ |
| 67 | #define bigendian true |
| 68 | #else |
| 69 | #define bigendian false |
| 70 | #endif |
| 71 | |
| 72 | #else /* !__BYTE_ORDER__ */ |
| 73 | |
| 74 | static bool bigendian = false; |
| 75 | |
| 76 | #endif /* !__BYTE_ORDER__ */ |
| 77 | |
| 78 | #define ENDSWAP(VALUE) (bigendian ? ( \ |
| 79 | sizeof(VALUE) == 1 ? (VALUE) : \ |
| 80 | sizeof(VALUE) == 2 ? endswap16(VALUE) : \ |
| 81 | sizeof(VALUE) == 4 ? endswap32(VALUE) : \ |
| 82 | sizeof(VALUE) == 8 ? endswap64(VALUE) : (abort(), 1) \ |
| 83 | ) : (VALUE)) |
| 84 | |
| 85 | /* Higher -> better compression with exponentally dimnishing gains. */ |
| 86 | #define LIMLZ_SA_NEIGHBORS 32 |
| 87 | |
| 88 | struct sa_cmp_ctx { int32_t *rank; size_t n, k; }; |
| 89 | static struct sa_cmp_ctx g_sa_ctx; |
| 90 | |
| 91 | static int32_t sa_cmp_idx(int32_t i, int32_t j) { |
| 92 | int32_t ri, rj; |
| 93 | if (g_sa_ctx.rank[i] != g_sa_ctx.rank[j]) |
| 94 | return g_sa_ctx.rank[i] - g_sa_ctx.rank[j]; |
| 95 | ri = (i + (int32_t)g_sa_ctx.k < (int32_t)g_sa_ctx.n) ? g_sa_ctx.rank[i + g_sa_ctx.k] : -1; |
| 96 | rj = (j + (int32_t)g_sa_ctx.k < (int32_t)g_sa_ctx.n) ? g_sa_ctx.rank[j + g_sa_ctx.k] : -1; |
| 97 | return ri - rj; |
| 98 | } |
| 99 | |
| 100 | static int sa_qsort_cmp(const void * a, const void * b) { |
| 101 | int32_t d = sa_cmp_idx(*(const int32_t *)a, *(const int32_t *)b); |
| 102 | return (d > 0) - (d < 0); |
| 103 | } |
| 104 | |
| 105 | static int saca(const byte * s, size_t n, int32_t * sa, int32_t * rank, int32_t * tmp) { |
| 106 | size_t i; |
| 107 | if (!n) |
| 108 | return 0; |
| 109 | for (i = 0; i < n; ++i) { |
| 110 | sa[i] = (int32_t)i; rank[i] = (int32_t)s[i]; |
| 111 | } |
| 112 | for (g_sa_ctx.k = 1;; g_sa_ctx.k <<= 1) { |
| 113 | g_sa_ctx.rank = rank; g_sa_ctx.n = n; |
| 114 | qsort(sa, n, sizeof(sa[0]), sa_qsort_cmp); |
| 115 | tmp[sa[0]] = 0; |
| 116 | for (i = 1; i < n; ++i) |
| 117 | tmp[sa[i]] = tmp[sa[i - 1]] + (sa_cmp_idx(sa[i - 1], sa[i]) < 0); |
| 118 | for (i = 0; i < n; ++i) |
| 119 | rank[i] = tmp[i]; |
| 120 | if ((size_t)rank[sa[n - 1]] == n - 1) |
| 121 | break; |
| 122 | } |
| 123 | return 0; |
| 124 | } |
| 125 | |
| 126 | static size_t lcp_bytes(const byte * s, size_t n, size_t i, size_t j) { |
| 127 | size_t l = 0, m = n - (i > j ? i : j); |
| 128 | for (; l < m && s[i + l] == s[j + l]; ++l); |
| 129 | return l; |
| 130 | } |
| 131 | |
| 132 | struct match_choice { uint32_t len; uint16_t off; }; |
| 133 | struct parse_choice { uint32_t lit, mlen; uint16_t off; }; |
| 134 | |
| 135 | static int longest_matches(const byte * src, size_t n, struct match_choice * mch) { |
| 136 | int32_t *sa, *rank, *tmp, *inv; |
| 137 | size_t i; |
| 138 | if (!n) |
| 139 | return 0; |
| 140 | sa = malloc(n * sizeof(*sa)); |
| 141 | rank = malloc(n * sizeof(*rank)); |
| 142 | tmp = malloc(n * sizeof(*tmp)); |
| 143 | inv = malloc(n * sizeof(*inv)); |
| 144 | if (!sa || !rank || !tmp || !inv || saca(src, n, sa, rank, tmp)) { |
| 145 | free(sa); free(rank); free(tmp); free(inv); |
| 146 | return -1; |
| 147 | } |
| 148 | for (i = 0; i < n; ++i) |
| 149 | inv[sa[i]] = (int32_t)i; |
| 150 | for (i = 0; i < n; ++i) { |
| 151 | int32_t r = inv[i], rr; |
| 152 | int d; |
| 153 | size_t best_len = 0; |
| 154 | uint16_t best_off = 0; |
| 155 | for (d = -LIMLZ_SA_NEIGHBORS; d <= LIMLZ_SA_NEIGHBORS; ++d) { |
| 156 | size_t j, l, off; |
| 157 | if (!d) |
| 158 | continue; |
| 159 | rr = r + d; |
| 160 | if (rr < 0 || rr >= (int32_t)n) |
| 161 | continue; |
| 162 | j = (size_t)sa[rr]; |
| 163 | if (j >= i) |
| 164 | continue; |
| 165 | off = i - j; |
| 166 | if (off == 0 || off > 65535) |
| 167 | continue; |
| 168 | l = lcp_bytes(src, n, i, j); |
| 169 | if (l > best_len) { |
| 170 | best_len = l; |
| 171 | best_off = (uint16_t)off; |
| 172 | } |
| 173 | } |
| 174 | if (best_len >= 4) { |
| 175 | mch[i].len = (uint32_t)best_len; |
| 176 | mch[i].off = best_off; |
| 177 | } else { |
| 178 | mch[i].len = mch[i].off = 0; |
| 179 | } |
| 180 | } |
| 181 | free(sa); free(rank); free(tmp); free(inv); |
| 182 | return 0; |
| 183 | } |
| 184 | |
| 185 | static int encode_len_tail(byte ** outp, byte * out_end, size_t n) { |
| 186 | byte *out = * outp; |
| 187 | if (n >= 15) { |
| 188 | if (out >= out_end) return -1; |
| 189 | *out++ = (byte)(n - 15); |
| 190 | } |
| 191 | *outp = out; |
| 192 | return 0; |
| 193 | } |
| 194 | |
| 195 | static int encode_len_tail_ml(byte ** outp, byte * out_end, size_t n) { |
| 196 | byte * out = *outp; |
| 197 | if (n >= 7) { |
| 198 | if (out >= out_end) |
| 199 | return -1; |
| 200 | *out++ = (byte)(n - 7); |
| 201 | } |
| 202 | *outp = out; |
| 203 | return 0; |
| 204 | } |
| 205 | |
| 206 | static size_t limlzpack(void * dst, size_t dstcap, const void * srcv, size_t srcsz) { |
| 207 | const byte * src = (const byte *) srcv; |
| 208 | byte * dstp = (byte *) dst; |
| 209 | byte * out = dstp, * out_end = dstp + dstcap; |
| 210 | struct match_choice * mch, * bestm; |
| 211 | struct parse_choice * pick; |
| 212 | size_t i, * dp; |
| 213 | if (!srcsz) { |
| 214 | if (dstcap < 1) |
| 215 | return 0; |
| 216 | dstp[0] = 0; |
| 217 | return 1; |
| 218 | } |
| 219 | mch = calloc(srcsz, sizeof(*mch)); |
| 220 | pick = calloc(srcsz + 1, sizeof(*pick)); |
| 221 | bestm = calloc(srcsz, sizeof(*bestm)); |
| 222 | dp = malloc((srcsz + 1) * sizeof(*dp)); |
| 223 | if (!mch || !pick || !bestm || !dp || longest_matches(src, srcsz, mch)) |
| 224 | goto fail; |
| 225 | dp[srcsz] = 0; |
| 226 | pick[srcsz].lit = pick[srcsz].mlen = pick[srcsz].off = 0; |
| 227 | for (i = srcsz; i-- > 0;) { |
| 228 | size_t j, best_cost; |
| 229 | uint32_t best_lit, best_len; |
| 230 | uint16_t best_off; |
| 231 | bestm[i].len = bestm[i].off = 0; |
| 232 | if (mch[i].len >= 4) { |
| 233 | size_t ml, lim = mch[i].len; |
| 234 | if (lim > 266) lim = 266; |
| 235 | size_t off_bytes = (mch[i].off > 255) ? 2 : 1; |
| 236 | size_t mcost = (size_t)-1; |
| 237 | uint32_t mlen = 0; |
| 238 | if (i + lim > srcsz) |
| 239 | lim = srcsz - i; |
| 240 | for (ml = 4; ml <= lim; ++ml) { |
| 241 | size_t c = off_bytes + (ml - 4 >= 7) + dp[i + ml]; |
| 242 | if (c < mcost) { |
| 243 | mcost = c; mlen = (uint32_t)ml; |
| 244 | } |
| 245 | } |
| 246 | if (mlen) { |
| 247 | bestm[i].len = mlen; bestm[i].off = mch[i].off; |
| 248 | } |
| 249 | } |
| 250 | if (srcsz - i <= 270) { // 256 + 15 - 1 |
| 251 | best_cost = 1 + (srcsz - i) + (srcsz - i >= 15); |
| 252 | best_lit = (uint32_t)(srcsz - i); |
| 253 | } else { |
| 254 | best_cost = (size_t)-1; |
| 255 | best_lit = 0; |
| 256 | } |
| 257 | best_len = best_off = 0; |
| 258 | for (j = i; j < srcsz && j - i <= 270; ++j) { |
| 259 | size_t lit = j - i, off_bytes_j, c; |
| 260 | if (!bestm[j].len) |
| 261 | continue; |
| 262 | off_bytes_j = (bestm[j].off > 255) ? 2 : 1; |
| 263 | c = 1 + lit + (lit >= 15) + |
| 264 | (off_bytes_j + (bestm[j].len - 4 >= 7) + dp[j + bestm[j].len]); |
| 265 | if (c < best_cost) { |
| 266 | best_cost = c; best_lit = (uint32_t)lit; |
| 267 | best_len = bestm[j].len; best_off = bestm[j].off; |
| 268 | } |
| 269 | } |
| 270 | dp[i] = best_cost; pick[i].lit = best_lit; |
| 271 | pick[i].mlen = best_len; pick[i].off = best_off; |
| 272 | } |
| 273 | int terminated = 0; |
| 274 | for (i = 0; i < srcsz; ) { |
| 275 | byte * tokenp; |
| 276 | size_t lit = pick[i].lit, ml = pick[i].mlen; |
| 277 | uint16_t off = pick[i].off; |
| 278 | unsigned token_hi, token_lo; |
| 279 | if (i + lit > srcsz) |
| 280 | goto fail; |
| 281 | if (i + lit < srcsz && ml < 4) |
| 282 | goto fail; |
| 283 | tokenp = out; |
| 284 | if (out >= out_end) |
| 285 | goto fail; |
| 286 | *out++ = 0; |
| 287 | token_hi = (lit < 15) ? (unsigned)lit : 15u; |
| 288 | if (encode_len_tail(&out, out_end, lit)) |
| 289 | goto fail; |
| 290 | if ((size_t)(out_end - out) < lit) |
| 291 | goto fail; |
| 292 | memcpy(out, src + i, lit); |
| 293 | out += lit; |
| 294 | i += lit; |
| 295 | if (i >= srcsz) { |
| 296 | *tokenp = (byte)(token_hi << 3); |
| 297 | terminated = 1; |
| 298 | break; |
| 299 | } |
| 300 | unsigned mode_bit = (off > 255) ? 1u : 0u; |
| 301 | token_lo = (ml - 4 < 7) ? (unsigned)(ml - 4) : 7u; |
| 302 | *tokenp = (byte)((mode_bit << 7) | (token_hi << 3) | token_lo); |
| 303 | if (off > 255) { |
| 304 | if (out_end - out < 2) |
| 305 | goto fail; |
| 306 | *out++ = (byte)(off & 255); |
| 307 | *out++ = (byte)(off >> 8); |
| 308 | } else { |
| 309 | if (out >= out_end) |
| 310 | goto fail; |
| 311 | *out++ = (byte)off; |
| 312 | } |
| 313 | if (encode_len_tail_ml(&out, out_end, ml - 4)) |
| 314 | goto fail; |
| 315 | i += ml; |
| 316 | } |
| 317 | /* A match-ended parse leaves no trailing token; the decompressor keys |
| 318 | * termination off a zero-or-more-byte literal copy reaching ipe, so always |
| 319 | * emit a final lit=0 token when the main loop didn't already. */ |
| 320 | if (!terminated) { |
| 321 | if (out >= out_end) |
| 322 | goto fail; |
| 323 | *out++ = 0; |
| 324 | } |
| 325 | free(mch); free(pick); free(bestm); free(dp); |
| 326 | return (size_t)(out - dstp); |
| 327 | fail: |
| 328 | free(mch); free(pick); free(bestm); free(dp); |
| 329 | return 0; |
| 330 | } |
| 331 | |
| 332 | static const uint32_t tab[16] = { |
| 333 | 0x00000000u, 0x1DB71064u, 0x3B6E20C8u, 0x26D930ACu, |
| 334 | 0x76DC4190u, 0x6B6B51F4u, 0x4DB26158u, 0x5005713Cu, |
| 335 | 0xEDB88320u, 0xF00F9344u, 0xD6D6A3E8u, 0xCB61B38Cu, |
| 336 | 0x9B64C2B0u, 0x86D3D2D4u, 0xA00AE278u, 0xBDBDF21Cu |
| 337 | }; |
| 338 | |
| 339 | static uint32_t crc32_nibble(const byte *data, size_t len) { |
| 340 | uint32_t crc = ~0u; // faster than decompressor.asm bit-by-bit, same result. |
| 341 | while (len--) { |
| 342 | crc ^= *data++; |
| 343 | crc = (crc >> 4) ^ tab[crc & 0x0Fu]; |
| 344 | crc = (crc >> 4) ^ tab[crc & 0x0Fu]; |
| 345 | } |
| 346 | return ~crc; |
| 347 | } |
| 348 | |
| 349 | int main(int argc, char *argv[]) { |
| 350 | #ifndef __BYTE_ORDER__ |
| 351 | uint32_t endcheck = 0x12345678; |
| 352 | uint8_t endbyte = *((uint8_t *)&endcheck); |
| 353 | bigendian = endbyte == 0x12; |
| 354 | #endif |
| 355 | if (argc != 3) { |
| 356 | fprintf(stderr, "? %s <input> <output>\n", argv[0]); return 1; |
| 357 | } |
| 358 | FILE * fin = fopen(argv[1], "rb"); |
| 359 | FILE * fout = fopen(argv[2], "wb"); |
| 360 | byte * inbuf, *outbuf; |
| 361 | size_t insz, outsz; |
| 362 | if (!fin || !fout) { |
| 363 | fprintf(stderr, "? fopen\n"); return 1; |
| 364 | } |
| 365 | fseek(fin, 0, SEEK_END); |
| 366 | insz = ftell(fin); |
| 367 | fseek(fin, 0, SEEK_SET); |
| 368 | inbuf = malloc(insz); outbuf = malloc(insz * 2); |
| 369 | if (!inbuf || !outbuf) { |
| 370 | fprintf(stderr, "? malloc\n"); return 1; |
| 371 | } |
| 372 | fread(inbuf, 1, insz, fin); |
| 373 | fclose(fin); |
| 374 | outsz = limlzpack(outbuf, insz * 2, inbuf, insz); |
| 375 | if (!outsz) { |
| 376 | fprintf(stderr, "? limlzpack\n"); return 1; |
| 377 | } |
| 378 | uint32_t crc = ENDSWAP(crc32_nibble(inbuf, insz)); |
| 379 | fwrite(&crc, sizeof(crc), 1, fout); |
| 380 | fwrite(outbuf, 1, outsz, fout); |
| 381 | fclose(fout); |
| 382 | free(inbuf); free(outbuf); |
| 383 | } |