:: commit 73ff26a3214ac269c55e149f9428e22cca5e2625

Kamila Szewczyk <kspalaiologos@gmail.com> — 2022-05-03 16:47

parents: 8560074321

RLE

diff --git a/src/libbz3.c b/src/libbz3.c
index eaacc9a..f4f66da 100644
--- a/src/libbz3.c
+++ b/src/libbz3.c
@@ -17,7 +17,7 @@
 #include "txt.h"
 
 #define LZP_DICTIONARY 18
-#define LZP_MIN_MATCH 100
+#define LZP_MIN_MATCH 40
 
 struct block_encoder_state {
     u8 *buf1, *buf2;
@@ -93,8 +93,6 @@ void delete_block_encoder_state(struct block_encoder_state * state) {
     free(state);
 }
 
-// TODO: Wire up RLE with lzp percentage checking.
-
 #define swap(x, y) { u8 * tmp = x; x = y; y = tmp; }
 
 struct encoding_result encode_block(struct block_encoder_state * state) {
@@ -107,17 +105,24 @@ struct encoding_result encode_block(struct block_encoder_state * state) {
     // bit 0: text | binary
     // bit 1: lzp | no lzp
     // bit 2: srt | no srt
+    // bit 2: mtf | no mtf
     s8 model = is_text(b1, data_size);
 
     s32 lzp_size;
-    if(model)
+    if(model) {
         lzp_size = lzp_compress(b1, b2, data_size, LZP_DICTIONARY, LZP_MIN_MATCH);
-    else
-        lzp_size = lzp_compress(b1, b2, data_size, LZP_DICTIONARY, 2 * LZP_MIN_MATCH);
-    if(lzp_size > 0) {
-        swap(b1, b2);
-        data_size = lzp_size;
-        model |= 2;
+        if(lzp_size > 0) {
+            swap(b1, b2);
+            data_size = lzp_size;
+            model |= 2;
+        }
+    } else {
+        lzp_size = mrlec(b1, data_size, b2);
+        if(lzp_size < data_size) {
+            swap(b1, b2);
+            data_size = lzp_size;
+            model |= 16;
+        }
     }
 
     s32 bwt_idx = libsais_bwt(b1, b2, state->sais_array, data_size, 16, NULL);
@@ -143,7 +148,7 @@ struct encoding_result encode_block(struct block_encoder_state * state) {
 
     // Compute the amount of overhead dwords.
     s32 overhead = 4; // CRC32 + BWT index + original size + new size
-    if(model & 2) overhead++; // LZP
+    if((model & 2) || (model & 16)) overhead++; // LZP
     if(model & 4) overhead++; // sorted rank transform
 
     begin(state->cm_state);
@@ -161,7 +166,7 @@ struct encoding_result encode_block(struct block_encoder_state * state) {
     b2[16] = model;
 
     s32 p = 0;
-    if(model & 2) ((s32 *)(b2 + 17))[p++] = htonl(lzp_size);
+    if((model & 2) || (model & 16)) ((s32 *)(b2 + 17))[p++] = htonl(lzp_size);
     if(model & 4) ((s32 *)(b2 + 17))[p++] = htonl(srt_size);
 
     return (struct encoding_result) { .buffer = b2, .size = data_size + overhead * 4 + 1 };
@@ -176,7 +181,7 @@ struct encoding_result decode_block(struct block_encoder_state * state) {
     s8 model = state->buf1[16];
     s32 lzp_size = -1, srt_size = -1, p = 0;
 
-    if(model & 2) lzp_size = ntohl(((s32 *) (state->buf1 + 17))[p++]);
+    if((model & 2) || (model & 16)) lzp_size = ntohl(((s32 *) (state->buf1 + 17))[p++]);
     if(model & 4) srt_size = ntohl(((s32 *) (state->buf1 + 17))[p++]);
 
     data_len -= p * 4;
@@ -194,7 +199,7 @@ struct encoding_result decode_block(struct block_encoder_state * state) {
 
     if(model & 4)
         size_src = srt_size;
-    else if(model & 2)
+    else if((model & 2) || (model & 16))
         size_src = lzp_size;
     else
         size_src = orig_size;
@@ -223,6 +228,10 @@ struct encoding_result decode_block(struct block_encoder_state * state) {
     if(model & 2) {
         size_src = lzp_decompress(b1, b2, lzp_size, LZP_DICTIONARY, (model & 1) ? LZP_MIN_MATCH : 2 * LZP_MIN_MATCH);
         swap(b1, b2);
+    } else if(model & 16) {
+        mrled(b1, b2, orig_size);
+        size_src = orig_size;
+        swap(b1, b2);
     }
 
     return (struct encoding_result) { .buffer = b1, .size = size_src };
diff --git a/src/main.c b/src/main.c
index 2af0d42..23a4fd6 100644
--- a/src/main.c
+++ b/src/main.c
@@ -127,9 +127,8 @@ int main(int argc, char * argv[]) {
         return 1;
     }
 
-    if (mode == 1)
-        while (commit_read(block_encoder_state,
-                           read(input_des, get_buffer(block_encoder_state), block_size)) > 0) {
+    if (mode == 1) {
+        while (commit_read(block_encoder_state, read(input_des, get_buffer(block_encoder_state), block_size)) > 0) {
             if (get_last_error(block_encoder_state) != BZ3_OK) {
                 fprintf(stderr, "Failed to read data: %s\n", str_last_error(block_encoder_state));
                 return 1;
@@ -142,6 +141,7 @@ int main(int argc, char * argv[]) {
             }
             write(output_des, r.buffer, r.size);
         }
+    }
     else if (mode == -1) {
         s32 read_size;
         while ((read_size = read_block(input_des, block_encoder_state)) > 0) {
tab: 248 wrap: offon