preliminary e8e9 transform + CM tweaks
diff --git a/include/cm.h b/include/cm.h
index 1b3a806..f1d7b33 100644
--- a/include/cm.h
+++ b/include/cm.h
@@ -8,8 +8,6 @@
#include "common.h"
typedef struct {
- u32 low, high, code;
- s32 c1, c2, run;
u8 *in_queue, *out_queue;
s32 input_ptr, output_ptr, input_max;
diff --git a/include/e8e9.h b/include/e8e9.h
index 8d00c0e..edf1f51 100644
--- a/include/e8e9.h
+++ b/include/e8e9.h
@@ -26,6 +26,6 @@
#include "common.h"
s32 e8e9_forward(u8 * in, s32 inlen, u8 * out);
-void e8e9_backward(u8 * in, u8 * out, s32 outlen);
+s32 e8e9_backward(u8 * in, s32 inlen, u8 * out);
#endif
diff --git a/src/cm.c b/src/cm.c
index 3a14fab..e043965 100644
--- a/src/cm.c
+++ b/src/cm.c
@@ -35,11 +35,6 @@
void begin(state * s) {
prefetch(s);
- s->c1 = s->c2 = 0;
- s->run = 0;
- s->low = 0;
- s->high = 0xFFFFFFFF;
- s->code = 0;
for (int i = 0; i < 256; i++) s->C0[i] = 1 << 15;
for (int i = 0; i < 256; i++)
for (int j = 0; j < 256; j++) s->C1[i][j] = 1 << 15;
@@ -49,7 +44,9 @@ void begin(state * s) {
}
void encode_bytes(state * s, u8 * buf, s32 size) {
- u32 high = s->high, low = s->low, c1 = s->c1, c2 = s->c2, run = s->run;
+ u32 low = 0, high = 0xFFFFFFFF, code = 0;
+ s32 c1 = 0, c2 = 0, run = 0;
+
for (s32 i = 0; i < size; i++) {
u8 c = buf[i];
@@ -119,16 +116,11 @@ void encode_bytes(state * s, u8 * buf, s32 size) {
low <<= 8;
write_out(s, low >> 24);
low <<= 8;
-
- s->high = high;
- s->low = low;
- s->c1 = c1;
- s->c2 = c2;
- s->run = run;
}
void decode_bytes(state * s, u8 * c, s32 size) {
- u32 high = s->high, low = s->low, c1 = s->c1, c2 = s->c2, run = s->run, code = s->code;
+ u32 low = 0, high = 0xFFFFFFFF, code = 0;
+ s32 c1 = 0, c2 = 0, run = 0;
code = (code << 8) + read_in(s);
code = (code << 8) + read_in(s);
@@ -186,11 +178,4 @@ void decode_bytes(state * s, u8 * c, s32 size) {
c2 = c1;
c[i] = c1 = ctx & 255;
}
-
- s->high = high;
- s->low = low;
- s->c1 = c1;
- s->c2 = c2;
- s->run = run;
- s->code = code;
}
diff --git a/src/e8e9.c b/src/e8e9.c
index 4bbc125..f9a63a8 100644
--- a/src/e8e9.c
+++ b/src/e8e9.c
@@ -19,4 +19,127 @@
#include "e8e9.h"
-/* TODO */
+/* Loosely based on Shelwien's E8E9 filter. Doesn't blindly transform data. */
+struct e8e9 {
+ uint8_t cs;
+ uint32_t x0, x1, i, k;
+};
+
+static struct e8e9 e8e9_init() {
+ struct e8e9 s;
+ s.x0 = 0;
+ s.x1 = 0;
+ s.i = 0;
+ s.k = 5;
+ s.cs = 0xFF;
+ return s;
+}
+
+static int32_t e8e9_cache_byte(struct e8e9 * s, int32_t c) {
+ int32_t d = s->cs & 0x80 ? -1 : (uint8_t)(s->x1);
+ s->x1 >>= 8;
+ s->x1 |= (s->x0 << 24);
+ s->x0 >>= 8;
+ s->x0 |= (c << 24);
+ s->cs <<= 1;
+ s->i++;
+ return d;
+}
+
+static uint32_t e8e9_x_swap(uint32_t x) {
+ x <<= 7;
+ return (x >> 24) | ((uint8_t)(x >> 16) << 8) | ((uint8_t)(x >> 8) << 16) | ((uint8_t)(x) << (24 - 7));
+}
+
+static uint32_t e8e9_y_swap(uint32_t x) {
+ x = ((uint8_t)(x >> 24) << 7) | ((uint8_t)(x >> 16) << 8) | ((uint8_t)(x >> 8) << 16) | (x << 24);
+ return x >> 7;
+}
+
+static int32_t e8e9_fb(struct e8e9 * s, int32_t c) {
+ uint32_t x;
+ if (s->i >= s->k) {
+ if ((s->x1 & 0xFE000000) == 0xE8000000) {
+ s->k = s->i + 4;
+ x = s->x0 - 0xFF000000;
+ if (x < 0x02000000) {
+ x = (x + s->i) & 0x01FFFFFF;
+ x = x_swap(x);
+ s->x0 = x + 0xFF000000;
+ }
+ }
+ }
+ return cache_byte(s, c);
+}
+
+static int32_t e8e9_bb(struct e8e9 * s, int32_t c) {
+ uint32_t x;
+ if (s->i >= s->k) {
+ if ((s->x1 & 0xFE000000) == 0xE8000000) {
+ s->k = s->i + 4;
+ x = s->x0 - 0xFF000000;
+ if (x < 0x02000000) {
+ x = y_swap(x);
+ x = (x - s->i) & 0x01FFFFFF;
+ s->x0 = x + 0xFF000000;
+ }
+ }
+ }
+ return cache_byte(s, c);
+}
+
+static int32_t e8e9_flush(struct e8e9 * s) {
+ int32_t d;
+ if (s->cs != 0xFF) {
+ while (s->cs & 0x80) cache_byte(s, 0), ++s->cs;
+ d = cache_byte(s, 0);
+ ++s->cs;
+ return d;
+ } else {
+ s->x0 = 0;
+ s->x1 = 0;
+ s->i = 0;
+ s->k = 5;
+ s->cs = 0xFF;
+ return -1;
+ }
+}
+
+s32 e8e9_forward(u8 * restrict in, s32 inlen, u8 * restrict out) {
+ s32 out_ptr = 0;
+
+ s32 oct = 0;
+ for(s32 i = 0; i < inlen; i++)
+ if(in[i] == 0xE8 || in[i] == 0xE9)
+ oct++;
+
+ struct e8e9 s = e8e9_init();
+
+ /* All of the octets should be less than 2% of the data. */
+ s32 p = oct * 1000 / inlen;
+ if(p < 20) {
+ for(s32 i = 0; i < inlen; i++) {
+ int c = e8e9_fb(&s, in[i]);
+ if (c >= 0) out[out_ptr++] = c;
+ }
+ int c;
+ while ((c = flush()) >= 0)
+ out[out_ptr++] = c;
+ return out_ptr;
+ } else {
+ return -1;
+ }
+}
+
+s32 e8e9_backward(u8 * restrict in, s32 inlen, u8 * restrict out) {
+ s32 out_ptr = 0;
+ struct e8e9 s = e8e9_init();
+ for(s32 i = 0; i < inlen; i++) {
+ int c = e8e9_bb(&s, in[i]);
+ if (c >= 0) out[out_ptr++] = c;
+ }
+ int c;
+ while ((c = flush()) >= 0)
+ out[out_ptr++] = c;
+ return out_ptr;
+}
diff --git a/src/libbz3.c b/src/libbz3.c
index 487274d..edbdbb5 100644
--- a/src/libbz3.c
+++ b/src/libbz3.c
@@ -28,6 +28,7 @@
#include "libsais.h"
#include "lzp.h"
#include "rle.h"
+#include "e8e9.h"
#define LZP_DICTIONARY 18
#define LZP_MIN_MATCH 40
@@ -134,8 +135,16 @@ PUBLIC_API s32 bz3_encode_block(struct bz3_state * state, u8 * buffer, s32 data_
// Back to front:
// bit 1: lzp | no lzp
// bit 2: srt | no srt
+ // bit 3: e8e9 | no e8e9
s8 model = 0;
- s32 lzp_size, rle_size;
+ s32 lzp_size, rle_size, e8e9_size;
+
+ e8e9_size = e8e9_forward(b1, data_size, b2);
+ if(e8e9_size != -1) {
+ swap(b1, b2);
+ data_size = e8e9_size;
+ model |= 8;
+ }
rle_size = mrlec(b1, data_size, b2);
if (rle_size < data_size + 64) {
@@ -161,6 +170,7 @@ PUBLIC_API s32 bz3_encode_block(struct bz3_state * state, u8 * buffer, s32 data_
s32 overhead = 2; // CRC32 + BWT index
if (model & 2) overhead++; // LZP
if (model & 4) overhead++; // RLE
+ if (model & 8) overhead++; // E8E9
begin(state->cm_state);
state->cm_state->out_queue = b1 + overhead * 4 + 1;
@@ -176,6 +186,7 @@ PUBLIC_API s32 bz3_encode_block(struct bz3_state * state, u8 * buffer, s32 data_
s32 p = 0;
if (model & 2) write_neutral_s32(b1 + 9 + 4 * p++, lzp_size);
if (model & 4) write_neutral_s32(b1 + 9 + 4 * p++, rle_size);
+ if (model & 8) write_neutral_s32(b1 + 9 + 4 * p++, e8e9_size);
state->last_error = BZ3_OK;
@@ -211,10 +222,11 @@ PUBLIC_API s32 bz3_decode_block(struct bz3_state * state, u8 * buffer, s32 data_
}
s8 model = buffer[8];
- s32 lzp_size = -1, rle_size = -1, p = 0;
+ s32 lzp_size = -1, rle_size = -1, e8e9_size = -1, p = 0;
if (model & 2) lzp_size = read_neutral_s32(buffer + 9 + 4 * p++);
if (model & 4) rle_size = read_neutral_s32(buffer + 9 + 4 * p++);
+ if (model & 8) e8e9_size = read_neutral_s32(buffer + 9 + 4 * p++);
p += 2;
@@ -231,6 +243,11 @@ PUBLIC_API s32 bz3_decode_block(struct bz3_state * state, u8 * buffer, s32 data_
return -1;
}
+ if (e8e9_size > state->block_size + state->block_size / 50 + 32 || e8e9_size < 0) {
+ state->last_error = BZ3_ERR_MALFORMED_HEADER;
+ return -1;
+ }
+
// Decode the data.
u8 *b1 = buffer, *b2 = state->swap_buffer;
@@ -245,6 +262,8 @@ PUBLIC_API s32 bz3_decode_block(struct bz3_state * state, u8 * buffer, s32 data_
size_src = lzp_size;
else if (model & 4)
size_src = rle_size;
+ else if (model & 8)
+ size_src = e8e9_size;
else
size_src = orig_size;
@@ -269,8 +288,16 @@ PUBLIC_API s32 bz3_decode_block(struct bz3_state * state, u8 * buffer, s32 data_
swap(b1, b2);
}
+ // Undo RLE
if (model & 4) {
mrled(b1, b2, orig_size);
+ size_src = model & 8 ? e8e9_size : orig_size;
+ swap(b1, b2);
+ }
+
+ // Undo E8E9
+ if (model & 8) {
+ e8e9_backward(b1, size_src, b2);
size_src = orig_size;
swap(b1, b2);
}
