:: commit 47a70679a6052cd6ade9d4f587534fff1e4a3954

Kamila Szewczyk <kspalaiologos@gmail.com> — 2022-05-02 07:29

parents: 98c98c3701

move cm/rle/srt code to separate units

diff --git a/Makefile b/Makefile
index ee5a197..b9e9bc7 100644
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,8 @@ CFLAGS=-O3 -march=native -mtune=native -flto -Iinclude
 
 .PHONY: all clean
 
-OBJECTS=obj/main.o obj/libsais.o obj/crc32.o obj/mtf.o
+OBJECTS=obj/main.o obj/libsais.o obj/crc32.o obj/mtf.o obj/srt.o obj/rle.o \
+        obj/cm.o
 
 all: bzip3
 
diff --git a/include/cm.h b/include/cm.h
index 7f4d68a..ecd80f7 100644
--- a/include/cm.h
+++ b/include/cm.h
@@ -2,6 +2,9 @@
 #ifndef _CM_H
 #define _CM_H
 
+#include <inttypes.h>
+#include <stdint.h>
+
 typedef struct {
     uint32_t low, high, code;
     uint16_t C0[256], C1[256][256], C2[2][256][17];
@@ -11,163 +14,10 @@ typedef struct {
     int64_t input_ptr, output_ptr, input_max;
 } state;
 
-static void write_out(state *s, uint8_t c) {
-    s->out_queue[s->output_ptr++] = c;
-}
-
-static uint8_t read_in(state *s) {
-    if (s->input_ptr < s->input_max) return s->in_queue[s->input_ptr++];
-    return -1;
-}
-
-static void encodebit0(state *s, uint32_t p) {
-    s->low += (((uint64_t)(s->high - s->low) * p) >> 18) + 1;
-    while ((s->low ^ s->high) < (1 << 24)) {
-        write_out(s, s->low >> 24);
-        s->low <<= 8;
-        s->high = (s->high << 8) | 0xFF;
-    }
-}
-
-static void encodebit1(state *s, uint32_t p) {
-    s->high = s->low + (((uint64_t)(s->high - s->low) * p) >> 18);
-    while ((s->low ^ s->high) < (1 << 24)) {
-        write_out(s, s->low >> 24);
-        s->low <<= 8;
-        s->high = (s->high << 8) + 255;
-    }
-}
-
-static uint8_t decodebit(state *s, uint32_t p) {
-    const uint32_t mid = s->low + (((uint64_t)(s->high - s->low) * p) >> 18);
-    const uint8_t bit = s->code <= mid;
-    if (bit)
-        s->high = mid;
-    else
-        s->low = mid + 1;
-    while ((s->low ^ s->high) < (1 << 24)) {
-        s->low <<= 8;
-        s->high = (s->high << 8) + 255;
-        s->code = (s->code << 8) + read_in(s);
-    }
-    return bit;
-}
-
-static void flush(state *s) {
-    write_out(s, s->low >> 24); s->low <<= 8;
-    write_out(s, s->low >> 24); s->low <<= 8;
-    write_out(s, s->low >> 24); s->low <<= 8;
-    write_out(s, s->low >> 24); s->low <<= 8;
-}
-
-static void init(state *s) {
-    s->code = (s->code << 8) + read_in(s);
-    s->code = (s->code << 8) + read_in(s);
-    s->code = (s->code << 8) + read_in(s);
-    s->code = (s->code << 8) + read_in(s);
-}
-
-#define update0(p, x) ((p) - ((p) >> x))
-#define update1(p, x) ((p) + (((p) ^ 65535) >> x))
-
-static void begin(state * s) {
-    s->c1 = s->c2 = 0;
-    s->run = 0;
-    s->low = 0;
-    s->high = 0xFFFFFFFF;
-    s->code = 0;
-    for (int i = 0; i < 256; i++) s->C0[i] = 1 << 15;
-    for (int i = 0; i < 256; i++)
-        for (int j = 0; j < 256; j++) s->C1[i][j] = 1 << 15;
-    for (int i = 0; i < 2; i++)
-        for (int j = 0; j < 256; j++)
-            for (int k = 0; k < 17; k++) s->C2[i][j][k] = (k << 12) - (k == 16);
-}
-
-static void encode_bit(state *s, uint8_t c) {
-    if (s->c1 == s->c2)
-        ++s->run;
-    else
-        s->run = 0;
-
-    const int f = s->run > 1;
-
-    int ctx = 1;
-
-    while (ctx < 256) {
-        const int p0 = s->C0[ctx];
-        const int p1 = s->C1[s->c1][ctx];
-        const int p2 = s->C1[s->c2][ctx];
-        const int p = ((p0 + p1) * 7 + p2 + p2) >> 4;
-
-        const int j = p >> 12;
-        const int x1 = s->C2[f][ctx][j];
-        const int x2 = s->C2[f][ctx][j + 1];
-        const int ssep = x1 + (((x2 - x1) * (p & 4095)) >> 12);
-
-        if (c & 128) {
-            encodebit1(s, ssep * 3 + p);
-            s->C0[ctx] = update1(s->C0[ctx], 2);
-            s->C1[s->c1][ctx] = update1(s->C1[s->c1][ctx], 4);
-            s->C2[f][ctx][j] = update1(s->C2[f][ctx][j], 6);
-            s->C2[f][ctx][j + 1] = update1(s->C2[f][ctx][j + 1], 6);
-            ctx += ctx + 1;
-        } else {
-            encodebit0(s, ssep * 3 + p);
-            s->C0[ctx] = update0(s->C0[ctx], 2);
-            s->C1[s->c1][ctx] = update0(s->C1[s->c1][ctx], 4);
-            s->C2[f][ctx][j] = update0(s->C2[f][ctx][j], 6);
-            s->C2[f][ctx][j + 1] = update0(s->C2[f][ctx][j + 1], 6);
-            ctx += ctx;
-        }
-
-        c <<= 1;
-    }
-
-    s->c2 = s->c1;
-    s->c1 = ctx & 255;
-}
-
-static uint8_t decode_bit(state *s) {
-    if (s->c1 == s->c2)
-        ++s->run;
-    else
-        s->run = 0;
-
-    const int f = s->run > 1;
-
-    int ctx = 1;
-
-    while (ctx < 256) {
-        const int p0 = s->C0[ctx];
-        const int p1 = s->C1[s->c1][ctx];
-        const int p2 = s->C1[s->c2][ctx];
-        const int p = ((p0 + p1) * 7 + p2 + p2) >> 4;
-
-        const int j = p >> 12;
-        const int x1 = s->C2[f][ctx][j];
-        const int x2 = s->C2[f][ctx][j + 1];
-        const int ssep = x1 + (((x2 - x1) * (p & 4095)) >> 12);
-
-        const int bit = decodebit(s, ssep * 3 + p);
-
-        if (bit) {
-            s->C0[ctx] = update1(s->C0[ctx], 2);
-            s->C1[s->c1][ctx] = update1(s->C1[s->c1][ctx], 4);
-            s->C2[f][ctx][j] = update1(s->C2[f][ctx][j], 6);
-            s->C2[f][ctx][j + 1] = update1(s->C2[f][ctx][j + 1], 6);
-            ctx += ctx + 1;
-        } else {
-            s->C0[ctx] = update0(s->C0[ctx], 2);
-            s->C1[s->c1][ctx] = update0(s->C1[s->c1][ctx], 4);
-            s->C2[f][ctx][j] = update0(s->C2[f][ctx][j], 6);
-            s->C2[f][ctx][j + 1] = update0(s->C2[f][ctx][j + 1], 6);
-            ctx += ctx;
-        }
-    }
-
-    s->c2 = s->c1;
-    return s->c1 = ctx & 255;
-}
+void flush(state *s);
+void init(state *s);
+void begin(state * s);
+void encode_byte(state *s, uint8_t c);
+uint8_t decode_byte(state *s);
 
 #endif
diff --git a/include/rle.h b/include/rle.h
index 34f796e..f6800a2 100644
--- a/include/rle.h
+++ b/include/rle.h
@@ -2,76 +2,7 @@
 #ifndef _RLE_H
 #define _RLE_H
 
-/* Derived from Matt Mahoney's public domain RLE code. */
-
-#define _putc(__ch, __out) *__out++ = (__ch)
-#define _getc(in, in_) (in < in_ ? (*in++) : -1)
-#define _rewind(in, _in) in = _in
-
-static int mrlec(unsigned char *in, int inlen, unsigned char *out) {
-    unsigned char *ip = in, *in_ = in + inlen, *op = out;
-    int i;
-    int c, pc = -1;
-    long t[256] = {0};
-    long run = 0;
-    while ((c = _getc(ip, in_)) != -1) {
-        if (c == pc)
-            t[c] += (++run % 255) != 0;
-        else
-            --t[c], run = 0;
-        pc = c;
-    }
-    for (i = 0; i < 32; ++i) {
-        int j;
-        c = 0;
-        for (j = 0; j < 8; ++j) c += (t[i * 8 + j] > 0) << j;
-        _putc(c, op);
-    }
-    _rewind(ip, in);
-    c = pc = -1;
-    run = 0;
-    do {
-        c = _getc(ip, in_);
-        if (c == pc)
-            ++run;
-        else if (run > 0 && t[pc] > 0) {
-            _putc(pc, op);
-            for (; run > 255; run -= 255) _putc(255, op);
-            _putc(run - 1, op);
-            run = 1;
-        } else
-            for (++run; run > 1; --run) _putc(pc, op);
-        pc = c;
-    } while (c != -1);
-
-    return op - out;
-}
-
-static int mrled(unsigned char *in, unsigned char *out, int outlen) {
-    unsigned char *ip = in, *op = out;
-    int i;
-
-    int c, pc = -1;
-    long t[256] = {0};
-    long run = 0;
-
-    for (i = 0; i < 32; ++i) {
-        int j;
-        c = *ip++;
-        for (j = 0; j < 8; ++j) t[i * 8 + j] = (c >> j) & 1;
-    }
-
-    while (op < out + outlen) {
-        c = *ip++;
-        if (t[c]) {
-            for (run = 0; (pc = *ip++) == 255; run += 255)
-                ;
-            run += pc + 1;
-            for (; run > 0; --run) _putc(c, op);
-        } else
-            _putc(c, op);
-    }
-    return ip - in;
-}
+int mrlec(unsigned char *in, int inlen, unsigned char *out);
+int mrled(unsigned char *in, unsigned char *out, int outlen);
 
 #endif
diff --git a/include/srt.h b/include/srt.h
index 0eb9aed..30e7c27 100644
--- a/include/srt.h
+++ b/include/srt.h
@@ -2,7 +2,8 @@
 #ifndef _SRT_H
 #define _SRT_H
 
-static const int MAX_HDR_SIZE = 4 * 256;
+#include <inttypes.h>
+#include <stddef.h>
 
 struct srt_state {
     uint32_t freqs[256];
@@ -13,143 +14,7 @@ struct srt_state {
     uint32_t bucket_ends[256];
 };
 
-static int preprocess(const uint32_t * freqs, uint8_t * symbols) {
-    int nb_symbols = 0;
-    for(int i = 0; i < 256; i++)
-        if(freqs[i] > 0)
-            symbols[nb_symbols++] = i;
-    uint32_t h = 4;
-    while(h < nb_symbols)
-        h = h * 3 + 1;
-    while(1) {
-        h /= 3;
-        for(uint32_t i = h; i < nb_symbols; i++) {
-            const int t = symbols[i] & 0xFF;
-            int32_t b = i - h;
-            while((b >= 0) && freqs[symbols[b]] < freqs[t]
-            || (freqs[t] == freqs[symbols[b]]) && t < symbols[b])
-                { symbols[b + h] = symbols[b]; b -= h; }
-            symbols[b + h] = t;
-        }
-        if(h == 1)
-            break;
-    }
-    return nb_symbols;
-}
-
-static int encode_header(uint32_t * freqs, uint8_t * dst) {
-    uint32_t idx = 0;
-    for(int i = 0; i < 256; i++) {
-        uint32_t f = freqs[i];
-        while(f >= 128) {
-            dst[idx++] = (uint8_t) (f | 0x80);
-            f >>= 7;
-        }
-        dst[idx++] = (uint8_t) f;
-    }
-    return idx;
-}
-
-static int decode_header(uint8_t * src, uint32_t * freqs) {
-    uint32_t idx = 0;
-    for(int i = 0; i < 256; i++) {
-        int val = src[idx++] & 0xFF;
-        int res = val & 0x7F;
-        int shift = 7;
-        while(val >= 128) {
-            val = src[idx++] & 0xFF;
-            res |= (val & 0x7F) << shift;
-            if(shift > 21)
-                break;
-            shift += 7;
-        }
-        freqs[i] = res;
-    }
-    return idx;
-}
-
-uint32_t srt_encode(struct srt_state * mtf, uint8_t *src, uint8_t *dst, uint32_t count) {
-    // Find first symbols and build a histogram.
-    for(int i = 0; i < 256; i++)
-        mtf->freqs[i] = 0;
-    for(uint32_t i = 0, b = 0; i < count;) {
-        if(mtf->freqs[src[i]] == 0) {
-            mtf->r2s[b] = src[i];
-            mtf->s2r[src[i]] = b;
-            b++;
-        }
-        uint32_t j = i + 1;
-        while(j < count && src[j] == src[i])
-            j++;
-        mtf->freqs[src[i]] += j - i;
-        i = j;
-    }
-
-    int n_symbols = preprocess(mtf->freqs, mtf->symbols);
-    for(uint32_t i = 0, bucket_pos = 0; i < n_symbols; i++) {
-        mtf->buckets[mtf->symbols[i]] = bucket_pos;
-        bucket_pos += mtf->freqs[mtf->symbols[i]];
-    }
-
-    const uint32_t header_size = encode_header(mtf->freqs, dst);
-    const int dst_idx = header_size;
-    for(uint32_t i = 0; i < count; ) {
-        const int c = src[i] & 0xFF;
-        int r = mtf->s2r[c] & 0xFF;
-        uint32_t p = mtf->buckets[c];
-        dst[dst_idx + p++] = r;
-        if(r != 0) {
-            do {
-                mtf->r2s[r] = mtf->r2s[r - 1];
-                mtf->s2r[mtf->r2s[r]] = r;
-                r--;
-            } while(r != 0);
-            mtf->r2s[0] = c;
-            mtf->s2r[c] = 0;
-        }
-        i++;
-        while(i < count && src[i] == c) {
-            dst[dst_idx + p++] = 0;
-            i++;
-        }
-        mtf->buckets[c] = p;
-    }
-    return count + header_size;
-}
-
-uint32_t srt_decode(struct srt_state * mtf, uint8_t *src, uint8_t *dst, uint32_t count) {
-    const uint32_t header_size = decode_header(src, mtf->freqs);
-    const uint32_t src_idx = header_size;
-    int nb_symbols = preprocess(mtf->freqs, mtf->symbols);
-    for(uint32_t i = 0, bucket_pos = 0; i < nb_symbols; i++) {
-        const int c = mtf->symbols[i] & 0xFF;
-        mtf->r2s[src[src_idx + bucket_pos] & 0xFF] = c;
-        mtf->buckets[c] = bucket_pos + 1;
-        bucket_pos += mtf->freqs[c];
-        mtf->bucket_ends[c] = bucket_pos;
-    }
-    uint32_t c = mtf->r2s[0];
-    for(uint32_t i = 0; i < count; i++) {
-        dst[i] = c;
-        if(mtf->buckets[c] < mtf->bucket_ends[c]) {
-            const int r = src[src_idx + mtf->buckets[c]] & 0xFF;
-            mtf->buckets[c]++;
-            if(r == 0)
-                continue;
-            for(int s = 0; s < r; s++)
-                mtf->r2s[s] = mtf->r2s[s + 1];
-            mtf->r2s[r] = c;
-            c = mtf->r2s[0];
-        } else {
-            if(nb_symbols == 1)
-                continue;
-            nb_symbols--;
-            for(int s = 0; s < nb_symbols; s++)
-                mtf->r2s[s] = mtf->r2s[s + 1];
-            c = mtf->r2s[0];
-        }
-    }
-    return count - header_size;
-}
+uint32_t srt_encode(struct srt_state * mtf, uint8_t *src, uint8_t *dst, uint32_t count);
+uint32_t srt_decode(struct srt_state * mtf, uint8_t *src, uint8_t *dst, uint32_t count);
 
 #endif
diff --git a/src/cm.c b/src/cm.c
new file mode 100644
index 0000000..ccfc994
--- /dev/null
+++ b/src/cm.c
@@ -0,0 +1,161 @@
+
+#include "cm.h"
+
+static void write_out(state *s, uint8_t c) {
+    s->out_queue[s->output_ptr++] = c;
+}
+
+static uint8_t read_in(state *s) {
+    if (s->input_ptr < s->input_max) return s->in_queue[s->input_ptr++];
+    return -1;
+}
+
+static void encodebit0(state *s, uint32_t p) {
+    s->low += (((uint64_t)(s->high - s->low) * p) >> 18) + 1;
+    while ((s->low ^ s->high) < (1 << 24)) {
+        write_out(s, s->low >> 24);
+        s->low <<= 8;
+        s->high = (s->high << 8) | 0xFF;
+    }
+}
+
+static void encodebit1(state *s, uint32_t p) {
+    s->high = s->low + (((uint64_t)(s->high - s->low) * p) >> 18);
+    while ((s->low ^ s->high) < (1 << 24)) {
+        write_out(s, s->low >> 24);
+        s->low <<= 8;
+        s->high = (s->high << 8) + 255;
+    }
+}
+
+static uint8_t decodebit(state *s, uint32_t p) {
+    const uint32_t mid = s->low + (((uint64_t)(s->high - s->low) * p) >> 18);
+    const uint8_t bit = s->code <= mid;
+    if (bit)
+        s->high = mid;
+    else
+        s->low = mid + 1;
+    while ((s->low ^ s->high) < (1 << 24)) {
+        s->low <<= 8;
+        s->high = (s->high << 8) + 255;
+        s->code = (s->code << 8) + read_in(s);
+    }
+    return bit;
+}
+
+void flush(state *s) {
+    write_out(s, s->low >> 24); s->low <<= 8;
+    write_out(s, s->low >> 24); s->low <<= 8;
+    write_out(s, s->low >> 24); s->low <<= 8;
+    write_out(s, s->low >> 24); s->low <<= 8;
+}
+
+void init(state *s) {
+    s->code = (s->code << 8) + read_in(s);
+    s->code = (s->code << 8) + read_in(s);
+    s->code = (s->code << 8) + read_in(s);
+    s->code = (s->code << 8) + read_in(s);
+}
+
+#define update0(p, x) ((p) - ((p) >> x))
+#define update1(p, x) ((p) + (((p) ^ 65535) >> x))
+
+void begin(state * s) {
+    s->c1 = s->c2 = 0;
+    s->run = 0;
+    s->low = 0;
+    s->high = 0xFFFFFFFF;
+    s->code = 0;
+    for (int i = 0; i < 256; i++) s->C0[i] = 1 << 15;
+    for (int i = 0; i < 256; i++)
+        for (int j = 0; j < 256; j++) s->C1[i][j] = 1 << 15;
+    for (int i = 0; i < 2; i++)
+        for (int j = 0; j < 256; j++)
+            for (int k = 0; k < 17; k++) s->C2[i][j][k] = (k << 12) - (k == 16);
+}
+
+void encode_byte(state *s, uint8_t c) {
+    if (s->c1 == s->c2)
+        ++s->run;
+    else
+        s->run = 0;
+
+    const int f = s->run > 1;
+
+    int ctx = 1;
+
+    while (ctx < 256) {
+        const int p0 = s->C0[ctx];
+        const int p1 = s->C1[s->c1][ctx];
+        const int p2 = s->C1[s->c2][ctx];
+        const int p = ((p0 + p1) * 7 + p2 + p2) >> 4;
+
+        const int j = p >> 12;
+        const int x1 = s->C2[f][ctx][j];
+        const int x2 = s->C2[f][ctx][j + 1];
+        const int ssep = x1 + (((x2 - x1) * (p & 4095)) >> 12);
+
+        if (c & 128) {
+            encodebit1(s, ssep * 3 + p);
+            s->C0[ctx] = update1(s->C0[ctx], 2);
+            s->C1[s->c1][ctx] = update1(s->C1[s->c1][ctx], 4);
+            s->C2[f][ctx][j] = update1(s->C2[f][ctx][j], 6);
+            s->C2[f][ctx][j + 1] = update1(s->C2[f][ctx][j + 1], 6);
+            ctx += ctx + 1;
+        } else {
+            encodebit0(s, ssep * 3 + p);
+            s->C0[ctx] = update0(s->C0[ctx], 2);
+            s->C1[s->c1][ctx] = update0(s->C1[s->c1][ctx], 4);
+            s->C2[f][ctx][j] = update0(s->C2[f][ctx][j], 6);
+            s->C2[f][ctx][j + 1] = update0(s->C2[f][ctx][j + 1], 6);
+            ctx += ctx;
+        }
+
+        c <<= 1;
+    }
+
+    s->c2 = s->c1;
+    s->c1 = ctx & 255;
+}
+
+uint8_t decode_byte(state *s) {
+    if (s->c1 == s->c2)
+        ++s->run;
+    else
+        s->run = 0;
+
+    const int f = s->run > 1;
+
+    int ctx = 1;
+
+    while (ctx < 256) {
+        const int p0 = s->C0[ctx];
+        const int p1 = s->C1[s->c1][ctx];
+        const int p2 = s->C1[s->c2][ctx];
+        const int p = ((p0 + p1) * 7 + p2 + p2) >> 4;
+
+        const int j = p >> 12;
+        const int x1 = s->C2[f][ctx][j];
+        const int x2 = s->C2[f][ctx][j + 1];
+        const int ssep = x1 + (((x2 - x1) * (p & 4095)) >> 12);
+
+        const int bit = decodebit(s, ssep * 3 + p);
+
+        if (bit) {
+            s->C0[ctx] = update1(s->C0[ctx], 2);
+            s->C1[s->c1][ctx] = update1(s->C1[s->c1][ctx], 4);
+            s->C2[f][ctx][j] = update1(s->C2[f][ctx][j], 6);
+            s->C2[f][ctx][j + 1] = update1(s->C2[f][ctx][j + 1], 6);
+            ctx += ctx + 1;
+        } else {
+            s->C0[ctx] = update0(s->C0[ctx], 2);
+            s->C1[s->c1][ctx] = update0(s->C1[s->c1][ctx], 4);
+            s->C2[f][ctx][j] = update0(s->C2[f][ctx][j], 6);
+            s->C2[f][ctx][j + 1] = update0(s->C2[f][ctx][j + 1], 6);
+            ctx += ctx;
+        }
+    }
+
+    s->c2 = s->c1;
+    return s->c1 = ctx & 255;
+}
diff --git a/src/main.c b/src/main.c
index 3805626..3d17b40 100644
--- a/src/main.c
+++ b/src/main.c
@@ -35,9 +35,9 @@ void encode_block(int output_des, int32_t bytes_read, uint8_t * buffer, uint8_t
     cm_state->out_queue = output;
     cm_state->output_ptr = 0;
     if(new_size2 != -1)
-        for (int32_t i = 0; i < new_size2; i++) encode_bit(cm_state, buffer[i]);
+        for (int32_t i = 0; i < new_size2; i++) encode_byte(cm_state, buffer[i]);
     else 
-        for (int32_t i = 0; i < new_size; i++) encode_bit(cm_state, buffer[i]);
+        for (int32_t i = 0; i < new_size; i++) encode_byte(cm_state, buffer[i]);
     flush(cm_state);
     int32_t new_size3 = cm_state->output_ptr;
 
@@ -71,10 +71,10 @@ int decode_block(int input_des, int output_des, uint8_t * buffer, uint8_t * outp
     cm_state->input_max = new_size3;
     init(cm_state);
     if(new_size2 != -1) {
-        for (int32_t i = 0; i < new_size2; i++) output[i] = decode_bit(cm_state);
+        for (int32_t i = 0; i < new_size2; i++) output[i] = decode_byte(cm_state);
         srt_decode(srt_state, output, buffer, new_size2);
     } else {
-        for (int32_t i = 0; i < new_size; i++) output[i] = decode_bit(cm_state);
+        for (int32_t i = 0; i < new_size; i++) output[i] = decode_byte(cm_state);
         mtf_decode(mtf_state, output, buffer, new_size);
     }
     libsais_unbwt(buffer, output, sais_array, new_size, NULL,
diff --git a/src/rle.c b/src/rle.c
new file mode 100644
index 0000000..32b8717
--- /dev/null
+++ b/src/rle.c
@@ -0,0 +1,74 @@
+
+#include "rle.h"
+
+/* Derived from Matt Mahoney's public domain RLE code. */
+
+#define _putc(__ch, __out) *__out++ = (__ch)
+#define _getc(in, in_) (in < in_ ? (*in++) : -1)
+#define _rewind(in, _in) in = _in
+
+int mrlec(unsigned char *in, int inlen, unsigned char *out) {
+    unsigned char *ip = in, *in_ = in + inlen, *op = out;
+    int i;
+    int c, pc = -1;
+    long t[256] = {0};
+    long run = 0;
+    while ((c = _getc(ip, in_)) != -1) {
+        if (c == pc)
+            t[c] += (++run % 255) != 0;
+        else
+            --t[c], run = 0;
+        pc = c;
+    }
+    for (i = 0; i < 32; ++i) {
+        int j;
+        c = 0;
+        for (j = 0; j < 8; ++j) c += (t[i * 8 + j] > 0) << j;
+        _putc(c, op);
+    }
+    _rewind(ip, in);
+    c = pc = -1;
+    run = 0;
+    do {
+        c = _getc(ip, in_);
+        if (c == pc)
+            ++run;
+        else if (run > 0 && t[pc] > 0) {
+            _putc(pc, op);
+            for (; run > 255; run -= 255) _putc(255, op);
+            _putc(run - 1, op);
+            run = 1;
+        } else
+            for (++run; run > 1; --run) _putc(pc, op);
+        pc = c;
+    } while (c != -1);
+
+    return op - out;
+}
+
+int mrled(unsigned char *in, unsigned char *out, int outlen) {
+    unsigned char *ip = in, *op = out;
+    int i;
+
+    int c, pc = -1;
+    long t[256] = {0};
+    long run = 0;
+
+    for (i = 0; i < 32; ++i) {
+        int j;
+        c = *ip++;
+        for (j = 0; j < 8; ++j) t[i * 8 + j] = (c >> j) & 1;
+    }
+
+    while (op < out + outlen) {
+        c = *ip++;
+        if (t[c]) {
+            for (run = 0; (pc = *ip++) == 255; run += 255)
+                ;
+            run += pc + 1;
+            for (; run > 0; --run) _putc(c, op);
+        } else
+            _putc(c, op);
+    }
+    return ip - in;
+}
diff --git a/src/srt.c b/src/srt.c
new file mode 100644
index 0000000..5e59200
--- /dev/null
+++ b/src/srt.c
@@ -0,0 +1,143 @@
+
+#include "srt.h"
+
+static const int MAX_HDR_SIZE = 4 * 256;
+
+static int preprocess(const uint32_t * freqs, uint8_t * symbols) {
+    int nb_symbols = 0;
+    for(int i = 0; i < 256; i++)
+        if(freqs[i] > 0)
+            symbols[nb_symbols++] = i;
+    uint32_t h = 4;
+    while(h < nb_symbols)
+        h = h * 3 + 1;
+    while(1) {
+        h /= 3;
+        for(uint32_t i = h; i < nb_symbols; i++) {
+            const int t = symbols[i] & 0xFF;
+            int32_t b = i - h;
+            while((b >= 0) && freqs[symbols[b]] < freqs[t]
+            || (freqs[t] == freqs[symbols[b]]) && t < symbols[b])
+                { symbols[b + h] = symbols[b]; b -= h; }
+            symbols[b + h] = t;
+        }
+        if(h == 1)
+            break;
+    }
+    return nb_symbols;
+}
+
+static int encode_header(uint32_t * freqs, uint8_t * dst) {
+    uint32_t idx = 0;
+    for(int i = 0; i < 256; i++) {
+        uint32_t f = freqs[i];
+        while(f >= 128) {
+            dst[idx++] = (uint8_t) (f | 0x80);
+            f >>= 7;
+        }
+        dst[idx++] = (uint8_t) f;
+    }
+    return idx;
+}
+
+static int decode_header(uint8_t * src, uint32_t * freqs) {
+    uint32_t idx = 0;
+    for(int i = 0; i < 256; i++) {
+        int val = src[idx++] & 0xFF;
+        int res = val & 0x7F;
+        int shift = 7;
+        while(val >= 128) {
+            val = src[idx++] & 0xFF;
+            res |= (val & 0x7F) << shift;
+            if(shift > 21)
+                break;
+            shift += 7;
+        }
+        freqs[i] = res;
+    }
+    return idx;
+}
+
+uint32_t srt_encode(struct srt_state * mtf, uint8_t *src, uint8_t *dst, uint32_t count) {
+    // Find first symbols and build a histogram.
+    for(int i = 0; i < 256; i++)
+        mtf->freqs[i] = 0;
+    for(uint32_t i = 0, b = 0; i < count;) {
+        if(mtf->freqs[src[i]] == 0) {
+            mtf->r2s[b] = src[i];
+            mtf->s2r[src[i]] = b;
+            b++;
+        }
+        uint32_t j = i + 1;
+        while(j < count && src[j] == src[i])
+            j++;
+        mtf->freqs[src[i]] += j - i;
+        i = j;
+    }
+
+    int n_symbols = preprocess(mtf->freqs, mtf->symbols);
+    for(uint32_t i = 0, bucket_pos = 0; i < n_symbols; i++) {
+        mtf->buckets[mtf->symbols[i]] = bucket_pos;
+        bucket_pos += mtf->freqs[mtf->symbols[i]];
+    }
+
+    const uint32_t header_size = encode_header(mtf->freqs, dst);
+    const int dst_idx = header_size;
+    for(uint32_t i = 0; i < count; ) {
+        const int c = src[i] & 0xFF;
+        int r = mtf->s2r[c] & 0xFF;
+        uint32_t p = mtf->buckets[c];
+        dst[dst_idx + p++] = r;
+        if(r != 0) {
+            do {
+                mtf->r2s[r] = mtf->r2s[r - 1];
+                mtf->s2r[mtf->r2s[r]] = r;
+                r--;
+            } while(r != 0);
+            mtf->r2s[0] = c;
+            mtf->s2r[c] = 0;
+        }
+        i++;
+        while(i < count && src[i] == c) {
+            dst[dst_idx + p++] = 0;
+            i++;
+        }
+        mtf->buckets[c] = p;
+    }
+    return count + header_size;
+}
+
+uint32_t srt_decode(struct srt_state * mtf, uint8_t *src, uint8_t *dst, uint32_t count) {
+    const uint32_t header_size = decode_header(src, mtf->freqs);
+    const uint32_t src_idx = header_size;
+    int nb_symbols = preprocess(mtf->freqs, mtf->symbols);
+    for(uint32_t i = 0, bucket_pos = 0; i < nb_symbols; i++) {
+        const int c = mtf->symbols[i] & 0xFF;
+        mtf->r2s[src[src_idx + bucket_pos] & 0xFF] = c;
+        mtf->buckets[c] = bucket_pos + 1;
+        bucket_pos += mtf->freqs[c];
+        mtf->bucket_ends[c] = bucket_pos;
+    }
+    uint32_t c = mtf->r2s[0];
+    for(uint32_t i = 0; i < count; i++) {
+        dst[i] = c;
+        if(mtf->buckets[c] < mtf->bucket_ends[c]) {
+            const int r = src[src_idx + mtf->buckets[c]] & 0xFF;
+            mtf->buckets[c]++;
+            if(r == 0)
+                continue;
+            for(int s = 0; s < r; s++)
+                mtf->r2s[s] = mtf->r2s[s + 1];
+            mtf->r2s[r] = c;
+            c = mtf->r2s[0];
+        } else {
+            if(nb_symbols == 1)
+                continue;
+            nb_symbols--;
+            for(int s = 0; s < nb_symbols; s++)
+                mtf->r2s[s] = mtf->r2s[s + 1];
+            c = mtf->r2s[0];
+        }
+    }
+    return count - header_size;
+}
tab: 248 wrap: offon