:: limine / tools / limlzpack.c 11.3 KB raw

1
/* limlz: Copyright (C) 2026 Kamila Szewczyk <k@iczelia.net>
2
 * limine: Copyright (C) 2019-2026 Mintsuki and contributors.
3
 * 
4
 * Redistribution and use in source and binary forms, with or without
5
 * modification, are permitted provided that the following conditions are met:
6
 * 
7
 * 1. Redistributions of source code must retain the above copyright notice, this
8
 *    list of conditions and the following disclaimer.
9
 * 
10
 * 2. Redistributions in binary form must reproduce the above copyright notice,
11
 *    this list of conditions and the following disclaimer in the documentation
12
 *    and/or other materials provided with the distribution.
13
 * 
14
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
15
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
18
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24
 */
25
 
26
#include <stdint.h>
27
#include <stddef.h>
28
#include <stdlib.h>
29
#include <string.h>
30
#include <stdio.h>
31
#include <stdbool.h>
32
33
typedef unsigned char byte;
34
35
static uint16_t endswap16(uint16_t value) {
36
    uint16_t ret = 0;
37
    ret |= (value >> 8) & 0x00ff;
38
    ret |= (value << 8) & 0xff00;
39
    return ret;
40
}
41
42
static uint32_t endswap32(uint32_t value) {
43
    uint32_t ret = 0;
44
    ret |= (value >> 24) & 0x000000ff;
45
    ret |= (value >> 8)  & 0x0000ff00;
46
    ret |= (value << 8)  & 0x00ff0000;
47
    ret |= (value << 24) & 0xff000000;
48
    return ret;
49
}
50
51
static uint64_t endswap64(uint64_t value) {
52
    uint64_t ret = 0;
53
    ret |= (value >> 56) & 0x00000000000000ff;
54
    ret |= (value >> 40) & 0x000000000000ff00;
55
    ret |= (value >> 24) & 0x0000000000ff0000;
56
    ret |= (value >> 8)  & 0x00000000ff000000;
57
    ret |= (value << 8)  & 0x000000ff00000000;
58
    ret |= (value << 24) & 0x0000ff0000000000;
59
    ret |= (value << 40) & 0x00ff000000000000;
60
    ret |= (value << 56) & 0xff00000000000000;
61
    return ret;
62
}
63
64
#ifdef __BYTE_ORDER__
65
66
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
67
#define bigendian true
68
#else
69
#define bigendian false
70
#endif
71
72
#else /* !__BYTE_ORDER__ */
73
74
static bool bigendian = false;
75
76
#endif /* !__BYTE_ORDER__ */
77
78
#define ENDSWAP(VALUE) (bigendian ? (                    \
79
    sizeof(VALUE) == 1 ? (VALUE)          :              \
80
    sizeof(VALUE) == 2 ? endswap16(VALUE) :              \
81
    sizeof(VALUE) == 4 ? endswap32(VALUE) :              \
82
    sizeof(VALUE) == 8 ? endswap64(VALUE) : (abort(), 1) \
83
) : (VALUE))
84
85
/*  Higher -> better compression with exponentally dimnishing gains.  */
86
#define LIMLZ_SA_NEIGHBORS 32
87
88
struct sa_cmp_ctx { int32_t *rank; size_t n, k; };
89
static struct sa_cmp_ctx g_sa_ctx;
90
91
static int32_t sa_cmp_idx(int32_t i, int32_t j) {
92
  int32_t ri, rj;
93
  if (g_sa_ctx.rank[i] != g_sa_ctx.rank[j])
94
    return g_sa_ctx.rank[i] - g_sa_ctx.rank[j];
95
  ri = (i + (int32_t)g_sa_ctx.k < (int32_t)g_sa_ctx.n) ? g_sa_ctx.rank[i + g_sa_ctx.k] : -1;
96
  rj = (j + (int32_t)g_sa_ctx.k < (int32_t)g_sa_ctx.n) ? g_sa_ctx.rank[j + g_sa_ctx.k] : -1;
97
  return ri - rj;
98
}
99
100
static int sa_qsort_cmp(const void * a, const void * b) {
101
  int32_t d = sa_cmp_idx(*(const int32_t *)a, *(const int32_t *)b);
102
  return (d > 0) - (d < 0);
103
}
104
105
static int saca(const byte * s, size_t n, int32_t * sa, int32_t * rank, int32_t * tmp) {
106
  size_t i;
107
  if (!n)
108
    return 0;
109
  for (i = 0; i < n; ++i) {
110
    sa[i] = (int32_t)i;  rank[i] = (int32_t)s[i];
111
  }
112
  for (g_sa_ctx.k = 1;; g_sa_ctx.k <<= 1) {
113
    g_sa_ctx.rank = rank;  g_sa_ctx.n = n;
114
    qsort(sa, n, sizeof(sa[0]), sa_qsort_cmp);
115
    tmp[sa[0]] = 0;
116
    for (i = 1; i < n; ++i)
117
      tmp[sa[i]] = tmp[sa[i - 1]] + (sa_cmp_idx(sa[i - 1], sa[i]) < 0);
118
    for (i = 0; i < n; ++i)
119
      rank[i] = tmp[i];
120
    if ((size_t)rank[sa[n - 1]] == n - 1)
121
      break;
122
  }
123
  return 0;
124
}
125
126
static size_t lcp_bytes(const byte * s, size_t n, size_t i, size_t j) {
127
  size_t l = 0, m = n - (i > j ? i : j);
128
  for (; l < m && s[i + l] == s[j + l]; ++l);
129
  return l;
130
}
131
132
struct match_choice { uint32_t len;  uint16_t off; };
133
struct parse_choice { uint32_t lit, mlen;  uint16_t off; };
134
135
static int longest_matches(const byte * src, size_t n, struct match_choice * mch) {
136
  int32_t *sa, *rank, *tmp, *inv;
137
  size_t i;
138
  if (!n)
139
    return 0;
140
  sa = malloc(n * sizeof(*sa));
141
  rank = malloc(n * sizeof(*rank));
142
  tmp = malloc(n * sizeof(*tmp));
143
  inv = malloc(n * sizeof(*inv));
144
  if (!sa || !rank || !tmp || !inv || saca(src, n, sa, rank, tmp)) {
145
    free(sa);  free(rank);  free(tmp);  free(inv);
146
    return -1;
147
  }
148
  for (i = 0; i < n; ++i)
149
    inv[sa[i]] = (int32_t)i;
150
  for (i = 0; i < n; ++i) {
151
    int32_t r = inv[i], rr;
152
    int d;
153
    size_t best_len = 0;
154
    uint16_t best_off = 0;
155
    for (d = -LIMLZ_SA_NEIGHBORS; d <= LIMLZ_SA_NEIGHBORS; ++d) {
156
      size_t j, l, off;
157
      if (!d)
158
        continue;
159
      rr = r + d;
160
      if (rr < 0 || rr >= (int32_t)n)
161
        continue;
162
      j = (size_t)sa[rr];
163
      if (j >= i)
164
        continue;
165
      off = i - j;
166
      if (off == 0 || off > 65535)
167
        continue;
168
      l = lcp_bytes(src, n, i, j);
169
      if (l > best_len) {
170
        best_len = l;
171
        best_off = (uint16_t)off;
172
      }
173
    }
174
    if (best_len >= 4) {
175
      mch[i].len = (uint32_t)best_len;
176
      mch[i].off = best_off;
177
    } else {
178
      mch[i].len = mch[i].off = 0;
179
    }
180
  }
181
  free(sa);  free(rank);  free(tmp);  free(inv);
182
  return 0;
183
}
184
185
static int encode_len_tail(byte ** outp, byte * out_end, size_t n) {
186
  byte *out = * outp;
187
  if (n >= 15) {
188
    if (out >= out_end) return -1;
189
    *out++ = (byte)(n - 15);
190
  }
191
  *outp = out;
192
  return 0;
193
}
194
195
static int encode_len_tail_ml(byte ** outp, byte * out_end, size_t n) {
196
  byte * out = *outp;
197
  if (n >= 7) {
198
    if (out >= out_end)
199
      return -1;
200
    *out++ = (byte)(n - 7);
201
  }
202
  *outp = out;
203
  return 0;
204
}
205
206
static size_t limlzpack(void * dst, size_t dstcap, const void * srcv, size_t srcsz) {
207
  const byte * src = (const byte *) srcv;
208
  byte * dstp = (byte *) dst;
209
  byte * out = dstp, * out_end = dstp + dstcap;
210
  struct match_choice * mch, * bestm;
211
  struct parse_choice * pick;
212
  size_t i, * dp;
213
  if (!srcsz) {
214
    if (dstcap < 1)
215
      return 0;
216
    dstp[0] = 0;
217
    return 1;
218
  }
219
  mch = calloc(srcsz, sizeof(*mch));
220
  pick = calloc(srcsz + 1, sizeof(*pick));
221
  bestm = calloc(srcsz, sizeof(*bestm));
222
  dp = malloc((srcsz + 1) * sizeof(*dp));
223
  if (!mch || !pick || !bestm || !dp || longest_matches(src, srcsz, mch))
224
    goto fail;
225
  dp[srcsz] = 0;
226
  pick[srcsz].lit = pick[srcsz].mlen = pick[srcsz].off = 0;
227
  for (i = srcsz; i-- > 0;) {
228
    size_t j, best_cost;
229
    uint32_t best_lit, best_len;
230
    uint16_t best_off;
231
    bestm[i].len = bestm[i].off = 0;
232
    if (mch[i].len >= 4) {
233
      size_t ml, lim = mch[i].len;
234
      if (lim > 266) lim = 266;
235
      size_t off_bytes = (mch[i].off > 255) ? 2 : 1;
236
      size_t mcost = (size_t)-1;
237
      uint32_t mlen = 0;
238
      if (i + lim > srcsz)
239
        lim = srcsz - i;
240
      for (ml = 4; ml <= lim; ++ml) {
241
        size_t c = off_bytes + (ml - 4 >= 7) + dp[i + ml];
242
        if (c < mcost) {
243
          mcost = c;  mlen = (uint32_t)ml;
244
        }
245
      }
246
      if (mlen) {
247
        bestm[i].len = mlen;  bestm[i].off = mch[i].off;
248
      }
249
    }
250
    if (srcsz - i <= 270) { // 256 + 15 - 1
251
      best_cost = 1 + (srcsz - i) + (srcsz - i >= 15);
252
      best_lit = (uint32_t)(srcsz - i);
253
    } else {
254
      best_cost = (size_t)-1;
255
      best_lit = 0;
256
    }
257
    best_len = best_off = 0;
258
    for (j = i; j < srcsz && j - i <= 270; ++j) {
259
      size_t lit = j - i, off_bytes_j, c;
260
      if (!bestm[j].len)
261
        continue;
262
      off_bytes_j = (bestm[j].off > 255) ? 2 : 1;
263
      c = 1 + lit + (lit >= 15) +
264
                  (off_bytes_j + (bestm[j].len - 4 >= 7) + dp[j + bestm[j].len]);
265
      if (c < best_cost) {
266
        best_cost = c;  best_lit = (uint32_t)lit;
267
        best_len = bestm[j].len;  best_off = bestm[j].off;
268
      }
269
    }
270
    dp[i] = best_cost;  pick[i].lit = best_lit;
271
    pick[i].mlen = best_len;  pick[i].off = best_off;
272
  }
273
  int terminated = 0;
274
  for (i = 0; i < srcsz; ) {
275
    byte * tokenp;
276
    size_t lit = pick[i].lit, ml = pick[i].mlen;
277
    uint16_t off = pick[i].off;
278
    unsigned token_hi, token_lo;
279
    if (i + lit > srcsz)
280
      goto fail;
281
    if (i + lit < srcsz && ml < 4)
282
      goto fail;
283
    tokenp = out;
284
    if (out >= out_end)
285
      goto fail;
286
    *out++ = 0;
287
    token_hi = (lit < 15) ? (unsigned)lit : 15u;
288
    if (encode_len_tail(&out, out_end, lit))
289
      goto fail;
290
    if ((size_t)(out_end - out) < lit)
291
      goto fail;
292
    memcpy(out, src + i, lit);
293
    out += lit;
294
    i += lit;
295
    if (i >= srcsz) {
296
      *tokenp = (byte)(token_hi << 3);
297
      terminated = 1;
298
      break;
299
    }
300
    unsigned mode_bit = (off > 255) ? 1u : 0u;
301
    token_lo = (ml - 4 < 7) ? (unsigned)(ml - 4) : 7u;
302
    *tokenp = (byte)((mode_bit << 7) | (token_hi << 3) | token_lo);
303
    if (off > 255) {
304
      if (out_end - out < 2)
305
        goto fail;
306
      *out++ = (byte)(off & 255);
307
      *out++ = (byte)(off >> 8);
308
    } else {
309
      if (out >= out_end)
310
        goto fail;
311
      *out++ = (byte)off;
312
    }
313
    if (encode_len_tail_ml(&out, out_end, ml - 4))
314
      goto fail;
315
    i += ml;
316
  }
317
  /* A match-ended parse leaves no trailing token; the decompressor keys
318
   * termination off a zero-or-more-byte literal copy reaching ipe, so always
319
   * emit a final lit=0 token when the main loop didn't already. */
320
  if (!terminated) {
321
    if (out >= out_end)
322
      goto fail;
323
    *out++ = 0;
324
  }
325
  free(mch);  free(pick);  free(bestm);  free(dp);
326
  return (size_t)(out - dstp);
327
fail:
328
  free(mch);  free(pick);  free(bestm);  free(dp);
329
  return 0;
330
}
331
332
static const uint32_t tab[16] = {
333
  0x00000000u, 0x1DB71064u, 0x3B6E20C8u, 0x26D930ACu,
334
  0x76DC4190u, 0x6B6B51F4u, 0x4DB26158u, 0x5005713Cu,
335
  0xEDB88320u, 0xF00F9344u, 0xD6D6A3E8u, 0xCB61B38Cu,
336
  0x9B64C2B0u, 0x86D3D2D4u, 0xA00AE278u, 0xBDBDF21Cu
337
};
338
339
static uint32_t crc32_nibble(const byte *data, size_t len) {
340
  uint32_t crc = ~0u; // faster than decompressor.asm bit-by-bit, same result.
341
  while (len--) {
342
    crc ^= *data++;
343
    crc = (crc >> 4) ^ tab[crc & 0x0Fu];
344
    crc = (crc >> 4) ^ tab[crc & 0x0Fu];
345
  }
346
  return ~crc;
347
}
348
349
int main(int argc, char *argv[]) {
350
#ifndef __BYTE_ORDER__
351
  uint32_t endcheck = 0x12345678;
352
  uint8_t endbyte = *((uint8_t *)&endcheck);
353
  bigendian = endbyte == 0x12;
354
#endif
355
  if (argc != 3) {
356
    fprintf(stderr, "? %s <input> <output>\n", argv[0]);  return 1;
357
  }
358
  FILE * fin = fopen(argv[1], "rb");
359
  FILE * fout = fopen(argv[2], "wb");
360
  byte * inbuf, *outbuf;
361
  size_t insz, outsz;
362
  if (!fin || !fout) {
363
    fprintf(stderr, "? fopen\n");  return 1;
364
  }
365
  fseek(fin, 0, SEEK_END);
366
  insz = ftell(fin);
367
  fseek(fin, 0, SEEK_SET);
368
  inbuf = malloc(insz);  outbuf = malloc(insz * 2);
369
  if (!inbuf || !outbuf) {
370
    fprintf(stderr, "? malloc\n");  return 1;
371
  }
372
  fread(inbuf, 1, insz, fin);
373
  fclose(fin);
374
  outsz = limlzpack(outbuf, insz * 2, inbuf, insz);
375
  if (!outsz) {
376
    fprintf(stderr, "? limlzpack\n");  return 1;
377
  }
378
  uint32_t crc = ENDSWAP(crc32_nibble(inbuf, insz));
379
  fwrite(&crc, sizeof(crc), 1, fout);
380
  fwrite(outbuf, 1, outsz, fout);
381
  fclose(fout);
382
  free(inbuf);  free(outbuf);
383
}
tab: 248 wrap: offon