:: commit 33a235073a6999bfe3db13477047f3a35ff14458

Kamila Szewczyk <kspalaiologos@gmail.com> — 2022-05-02 19:03

parents: f0091e81ce

text data smoke wired to libbz3

diff --git a/src/libbz3.c b/src/libbz3.c
index 460569b..f1aed9a 100644
--- a/src/libbz3.c
+++ b/src/libbz3.c
@@ -95,44 +95,72 @@ void delete_block_encoder_state(struct block_encoder_state * state) {
 struct encoding_result encode_block(struct block_encoder_state * state) {
     u32 crc32 = crc32sum(1, state->buf1, state->bytes_read);
 
-    s32 new_size = mrlec(state->buf1, state->bytes_read, state->buf2);
-    s32 bwt_index = libsais_bwt(state->buf2, state->buf2, state->sais_array,
-                                new_size, 16, NULL);
-    if (bwt_index < 0) {
-        state->last_error = BZ3_ERR_BWT;
-        return (struct encoding_result){ NULL, -1 };
-    }
-    s32 new_size2;
-
-    if (new_size > MiB(3)) {
-        new_size2 =
-            srt_encode(state->srt_state, state->buf2, state->buf1, new_size);
+    int txt = is_text(state->buf1, state->bytes_read);
+
+    if(txt) {
+        s32 bwt_index = libsais_bwt(state->buf1, state->buf1, state->sais_array,
+                                    state->bytes_read, 16, NULL);
+        if(bwt_index < 0) {
+            state->last_error = BZ3_ERR_BWT;
+            return (struct encoding_result) { NULL, -1 };
+        }
+        begin(state->cm_state);
+        state->cm_state->out_queue = state->buf2 + 24;
+        state->cm_state->output_ptr = 0;
+        for(s32 i = 0; i < state->bytes_read; i++)
+            encode_byte(state->cm_state, state->buf1[i]);
+        flush(state->cm_state);
+        s32 new_size = state->cm_state->output_ptr;
+
+        ((uint32_t *)state->buf2)[0] = htonl(crc32);
+        ((uint32_t *)state->buf2)[1] = htonl(state->bytes_read);
+        ((uint32_t *)state->buf2)[2] = htonl(bwt_index);
+        ((uint32_t *)state->buf2)[3] = 0xFFFFFFFF;
+        ((uint32_t *)state->buf2)[4] = 0xFFFFFFFF;
+        ((uint32_t *)state->buf2)[5] = htonl(new_size);
+        
+        state->last_error = BZ3_OK;
+        return (struct encoding_result) { state->buf2, 24 + new_size };
     } else {
-        new_size2 = -1;
-        mtf_encode(state->mtf_state, state->buf2, state->buf1, new_size);
+        s32 new_size = mrlec(state->buf1, state->bytes_read, state->buf2);
+        s32 bwt_index = libsais_bwt(state->buf2, state->buf2, state->sais_array,
+                                    new_size, 16, NULL);
+        if (bwt_index < 0) {
+            state->last_error = BZ3_ERR_BWT;
+            return (struct encoding_result){ NULL, -1 };
+        }
+        s32 new_size2;
+
+        if (new_size > MiB(3)) {
+            new_size2 =
+                srt_encode(state->srt_state, state->buf2, state->buf1, new_size);
+        } else {
+            new_size2 = -1;
+            mtf_encode(state->mtf_state, state->buf2, state->buf1, new_size);
+        }
+
+        begin(state->cm_state);
+        state->cm_state->out_queue = state->buf2 + 24;
+        state->cm_state->output_ptr = 0;
+        if (new_size2 != -1)
+            for (s32 i = 0; i < new_size2; i++)
+                encode_byte(state->cm_state, state->buf1[i]);
+        else
+            for (s32 i = 0; i < new_size; i++)
+                encode_byte(state->cm_state, state->buf1[i]);
+        flush(state->cm_state);
+        s32 new_size3 = state->cm_state->output_ptr;
+
+        ((uint32_t *)state->buf2)[0] = htonl(crc32);
+        ((uint32_t *)state->buf2)[1] = htonl(state->bytes_read);
+        ((uint32_t *)state->buf2)[2] = htonl(bwt_index);
+        ((uint32_t *)state->buf2)[3] = htonl(new_size);
+        ((uint32_t *)state->buf2)[4] = htonl(new_size2);
+        ((uint32_t *)state->buf2)[5] = htonl(new_size3);
+        state->last_error = BZ3_OK;
+        return (struct encoding_result){ .buffer = state->buf2,
+                                        .size = 24 + new_size3 };
     }
-
-    begin(state->cm_state);
-    state->cm_state->out_queue = state->buf2 + 24;
-    state->cm_state->output_ptr = 0;
-    if (new_size2 != -1)
-        for (s32 i = 0; i < new_size2; i++)
-            encode_byte(state->cm_state, state->buf1[i]);
-    else
-        for (s32 i = 0; i < new_size; i++)
-            encode_byte(state->cm_state, state->buf1[i]);
-    flush(state->cm_state);
-    s32 new_size3 = state->cm_state->output_ptr;
-
-    ((uint32_t *)state->buf2)[0] = htonl(crc32);
-    ((uint32_t *)state->buf2)[1] = htonl(state->bytes_read);
-    ((uint32_t *)state->buf2)[2] = htonl(bwt_index);
-    ((uint32_t *)state->buf2)[3] = htonl(new_size);
-    ((uint32_t *)state->buf2)[4] = htonl(new_size2);
-    ((uint32_t *)state->buf2)[5] = htonl(new_size3);
-    state->last_error = BZ3_OK;
-    return (struct encoding_result){ .buffer = state->buf2,
-                                     .size = 24 + new_size3 };
 }
 
 struct encoding_result decode_block(struct block_encoder_state * state) {
@@ -146,33 +174,55 @@ struct encoding_result decode_block(struct block_encoder_state * state) {
     new_size2 = ntohl(((uint32_t *)state->buf1)[4]);
     new_size3 = ntohl(((uint32_t *)state->buf1)[5]);
 
-    begin(state->cm_state);
-    state->cm_state->in_queue = state->buf1 + 24;
-    state->cm_state->input_ptr = 0;
-    state->cm_state->input_max = new_size3;
-    init(state->cm_state);
-    if (new_size2 != -1) {
-        for (s32 i = 0; i < new_size2; i++)
-            state->buf2[i] = decode_byte(state->cm_state);
-        srt_decode(state->srt_state, state->buf2, state->buf1, new_size2);
+    if(new_size2 != 0xFFFFFFFF || new_size != 0xFFFFFFFF) {
+        begin(state->cm_state);
+        state->cm_state->in_queue = state->buf1 + 24;
+        state->cm_state->input_ptr = 0;
+        state->cm_state->input_max = new_size3;
+        init(state->cm_state);
+        if (new_size2 != -1) {
+            for (s32 i = 0; i < new_size2; i++)
+                state->buf2[i] = decode_byte(state->cm_state);
+            srt_decode(state->srt_state, state->buf2, state->buf1, new_size2);
+        } else {
+            for (s32 i = 0; i < new_size; i++)
+                state->buf2[i] = decode_byte(state->cm_state);
+            mtf_decode(state->mtf_state, state->buf2, state->buf1, new_size);
+        }
+        if (libsais_unbwt(state->buf1, state->buf2, state->sais_array, new_size,
+                        NULL, bwt_index) < 0) {
+            state->last_error = BZ3_ERR_BWT;
+            return (struct encoding_result){ NULL, -1 };
+        }
+        mrled(state->buf2, state->buf1, state->bytes_read);
+        if (crc32sum(1, state->buf1, state->bytes_read) != crc32) {
+            state->last_error = BZ3_ERR_CRC;
+            return (struct encoding_result){ .buffer = NULL, .size = -1 };
+        }
+        state->last_error = BZ3_OK;
+        return (struct encoding_result){ .buffer = state->buf1,
+                                        .size = state->bytes_read };
     } else {
-        for (s32 i = 0; i < new_size; i++)
+        begin(state->cm_state);
+        state->cm_state->in_queue = state->buf1 + 24;
+        state->cm_state->input_ptr = 0;
+        state->cm_state->input_max = new_size3;
+        init(state->cm_state);
+        for (s32 i = 0; i < state->bytes_read; i++)
             state->buf2[i] = decode_byte(state->cm_state);
-        mtf_decode(state->mtf_state, state->buf2, state->buf1, new_size);
-    }
-    if (libsais_unbwt(state->buf1, state->buf2, state->sais_array, new_size,
-                      NULL, bwt_index) < 0) {
-        state->last_error = BZ3_ERR_BWT;
-        return (struct encoding_result){ NULL, -1 };
+        if (libsais_unbwt(state->buf2, state->buf1, state->sais_array, state->bytes_read,
+                        NULL, bwt_index) < 0) {
+            state->last_error = BZ3_ERR_BWT;
+            return (struct encoding_result){ NULL, -1 };
+        }
+        if (crc32sum(1, state->buf1, state->bytes_read) != crc32) {
+            state->last_error = BZ3_ERR_CRC;
+            return (struct encoding_result){ .buffer = NULL, .size = -1 };
+        }
+        state->last_error = BZ3_OK;
+        return (struct encoding_result){ .buffer = state->buf1,
+                                         .size = state->bytes_read };
     }
-    mrled(state->buf2, state->buf1, state->bytes_read);
-    if (crc32sum(1, state->buf1, state->bytes_read) != crc32) {
-        state->last_error = BZ3_ERR_CRC;
-        return (struct encoding_result){ .buffer = NULL, .size = -1 };
-    }
-    state->last_error = BZ3_OK;
-    return (struct encoding_result){ .buffer = state->buf1,
-                                     .size = state->bytes_read };
 }
 
 s32 read_block(int filedes, struct block_encoder_state * state) {
diff --git a/src/main.c b/src/main.c
index e4094db..cce43cc 100644
--- a/src/main.c
+++ b/src/main.c
@@ -156,7 +156,7 @@ int main(int argc, char * argv[]) {
             }
             struct encoding_result r = decode_block(block_encoder_state);
             if (get_last_error(block_encoder_state) != BZ3_OK) {
-                fprintf(stderr, "Failed to encode the block: %s\n",
+                fprintf(stderr, "Failed to decode the block: %s\n",
                         str_last_error(block_encoder_state));
                 return 1;
             }
tab: 248 wrap: offon