text data smoke wired to libbz3
diff --git a/src/libbz3.c b/src/libbz3.c
index 460569b..f1aed9a 100644
--- a/src/libbz3.c
+++ b/src/libbz3.c
@@ -95,44 +95,72 @@ void delete_block_encoder_state(struct block_encoder_state * state) {
struct encoding_result encode_block(struct block_encoder_state * state) {
u32 crc32 = crc32sum(1, state->buf1, state->bytes_read);
- s32 new_size = mrlec(state->buf1, state->bytes_read, state->buf2);
- s32 bwt_index = libsais_bwt(state->buf2, state->buf2, state->sais_array,
- new_size, 16, NULL);
- if (bwt_index < 0) {
- state->last_error = BZ3_ERR_BWT;
- return (struct encoding_result){ NULL, -1 };
- }
- s32 new_size2;
-
- if (new_size > MiB(3)) {
- new_size2 =
- srt_encode(state->srt_state, state->buf2, state->buf1, new_size);
+ int txt = is_text(state->buf1, state->bytes_read);
+
+ if(txt) {
+ s32 bwt_index = libsais_bwt(state->buf1, state->buf1, state->sais_array,
+ state->bytes_read, 16, NULL);
+ if(bwt_index < 0) {
+ state->last_error = BZ3_ERR_BWT;
+ return (struct encoding_result) { NULL, -1 };
+ }
+ begin(state->cm_state);
+ state->cm_state->out_queue = state->buf2 + 24;
+ state->cm_state->output_ptr = 0;
+ for(s32 i = 0; i < state->bytes_read; i++)
+ encode_byte(state->cm_state, state->buf1[i]);
+ flush(state->cm_state);
+ s32 new_size = state->cm_state->output_ptr;
+
+ ((uint32_t *)state->buf2)[0] = htonl(crc32);
+ ((uint32_t *)state->buf2)[1] = htonl(state->bytes_read);
+ ((uint32_t *)state->buf2)[2] = htonl(bwt_index);
+ ((uint32_t *)state->buf2)[3] = 0xFFFFFFFF;
+ ((uint32_t *)state->buf2)[4] = 0xFFFFFFFF;
+ ((uint32_t *)state->buf2)[5] = htonl(new_size);
+
+ state->last_error = BZ3_OK;
+ return (struct encoding_result) { state->buf2, 24 + new_size };
} else {
- new_size2 = -1;
- mtf_encode(state->mtf_state, state->buf2, state->buf1, new_size);
+ s32 new_size = mrlec(state->buf1, state->bytes_read, state->buf2);
+ s32 bwt_index = libsais_bwt(state->buf2, state->buf2, state->sais_array,
+ new_size, 16, NULL);
+ if (bwt_index < 0) {
+ state->last_error = BZ3_ERR_BWT;
+ return (struct encoding_result){ NULL, -1 };
+ }
+ s32 new_size2;
+
+ if (new_size > MiB(3)) {
+ new_size2 =
+ srt_encode(state->srt_state, state->buf2, state->buf1, new_size);
+ } else {
+ new_size2 = -1;
+ mtf_encode(state->mtf_state, state->buf2, state->buf1, new_size);
+ }
+
+ begin(state->cm_state);
+ state->cm_state->out_queue = state->buf2 + 24;
+ state->cm_state->output_ptr = 0;
+ if (new_size2 != -1)
+ for (s32 i = 0; i < new_size2; i++)
+ encode_byte(state->cm_state, state->buf1[i]);
+ else
+ for (s32 i = 0; i < new_size; i++)
+ encode_byte(state->cm_state, state->buf1[i]);
+ flush(state->cm_state);
+ s32 new_size3 = state->cm_state->output_ptr;
+
+ ((uint32_t *)state->buf2)[0] = htonl(crc32);
+ ((uint32_t *)state->buf2)[1] = htonl(state->bytes_read);
+ ((uint32_t *)state->buf2)[2] = htonl(bwt_index);
+ ((uint32_t *)state->buf2)[3] = htonl(new_size);
+ ((uint32_t *)state->buf2)[4] = htonl(new_size2);
+ ((uint32_t *)state->buf2)[5] = htonl(new_size3);
+ state->last_error = BZ3_OK;
+ return (struct encoding_result){ .buffer = state->buf2,
+ .size = 24 + new_size3 };
}
-
- begin(state->cm_state);
- state->cm_state->out_queue = state->buf2 + 24;
- state->cm_state->output_ptr = 0;
- if (new_size2 != -1)
- for (s32 i = 0; i < new_size2; i++)
- encode_byte(state->cm_state, state->buf1[i]);
- else
- for (s32 i = 0; i < new_size; i++)
- encode_byte(state->cm_state, state->buf1[i]);
- flush(state->cm_state);
- s32 new_size3 = state->cm_state->output_ptr;
-
- ((uint32_t *)state->buf2)[0] = htonl(crc32);
- ((uint32_t *)state->buf2)[1] = htonl(state->bytes_read);
- ((uint32_t *)state->buf2)[2] = htonl(bwt_index);
- ((uint32_t *)state->buf2)[3] = htonl(new_size);
- ((uint32_t *)state->buf2)[4] = htonl(new_size2);
- ((uint32_t *)state->buf2)[5] = htonl(new_size3);
- state->last_error = BZ3_OK;
- return (struct encoding_result){ .buffer = state->buf2,
- .size = 24 + new_size3 };
}
struct encoding_result decode_block(struct block_encoder_state * state) {
@@ -146,33 +174,55 @@ struct encoding_result decode_block(struct block_encoder_state * state) {
new_size2 = ntohl(((uint32_t *)state->buf1)[4]);
new_size3 = ntohl(((uint32_t *)state->buf1)[5]);
- begin(state->cm_state);
- state->cm_state->in_queue = state->buf1 + 24;
- state->cm_state->input_ptr = 0;
- state->cm_state->input_max = new_size3;
- init(state->cm_state);
- if (new_size2 != -1) {
- for (s32 i = 0; i < new_size2; i++)
- state->buf2[i] = decode_byte(state->cm_state);
- srt_decode(state->srt_state, state->buf2, state->buf1, new_size2);
+ if(new_size2 != 0xFFFFFFFF || new_size != 0xFFFFFFFF) {
+ begin(state->cm_state);
+ state->cm_state->in_queue = state->buf1 + 24;
+ state->cm_state->input_ptr = 0;
+ state->cm_state->input_max = new_size3;
+ init(state->cm_state);
+ if (new_size2 != -1) {
+ for (s32 i = 0; i < new_size2; i++)
+ state->buf2[i] = decode_byte(state->cm_state);
+ srt_decode(state->srt_state, state->buf2, state->buf1, new_size2);
+ } else {
+ for (s32 i = 0; i < new_size; i++)
+ state->buf2[i] = decode_byte(state->cm_state);
+ mtf_decode(state->mtf_state, state->buf2, state->buf1, new_size);
+ }
+ if (libsais_unbwt(state->buf1, state->buf2, state->sais_array, new_size,
+ NULL, bwt_index) < 0) {
+ state->last_error = BZ3_ERR_BWT;
+ return (struct encoding_result){ NULL, -1 };
+ }
+ mrled(state->buf2, state->buf1, state->bytes_read);
+ if (crc32sum(1, state->buf1, state->bytes_read) != crc32) {
+ state->last_error = BZ3_ERR_CRC;
+ return (struct encoding_result){ .buffer = NULL, .size = -1 };
+ }
+ state->last_error = BZ3_OK;
+ return (struct encoding_result){ .buffer = state->buf1,
+ .size = state->bytes_read };
} else {
- for (s32 i = 0; i < new_size; i++)
+ begin(state->cm_state);
+ state->cm_state->in_queue = state->buf1 + 24;
+ state->cm_state->input_ptr = 0;
+ state->cm_state->input_max = new_size3;
+ init(state->cm_state);
+ for (s32 i = 0; i < state->bytes_read; i++)
state->buf2[i] = decode_byte(state->cm_state);
- mtf_decode(state->mtf_state, state->buf2, state->buf1, new_size);
- }
- if (libsais_unbwt(state->buf1, state->buf2, state->sais_array, new_size,
- NULL, bwt_index) < 0) {
- state->last_error = BZ3_ERR_BWT;
- return (struct encoding_result){ NULL, -1 };
+ if (libsais_unbwt(state->buf2, state->buf1, state->sais_array, state->bytes_read,
+ NULL, bwt_index) < 0) {
+ state->last_error = BZ3_ERR_BWT;
+ return (struct encoding_result){ NULL, -1 };
+ }
+ if (crc32sum(1, state->buf1, state->bytes_read) != crc32) {
+ state->last_error = BZ3_ERR_CRC;
+ return (struct encoding_result){ .buffer = NULL, .size = -1 };
+ }
+ state->last_error = BZ3_OK;
+ return (struct encoding_result){ .buffer = state->buf1,
+ .size = state->bytes_read };
}
- mrled(state->buf2, state->buf1, state->bytes_read);
- if (crc32sum(1, state->buf1, state->bytes_read) != crc32) {
- state->last_error = BZ3_ERR_CRC;
- return (struct encoding_result){ .buffer = NULL, .size = -1 };
- }
- state->last_error = BZ3_OK;
- return (struct encoding_result){ .buffer = state->buf1,
- .size = state->bytes_read };
}
s32 read_block(int filedes, struct block_encoder_state * state) {
diff --git a/src/main.c b/src/main.c
index e4094db..cce43cc 100644
--- a/src/main.c
+++ b/src/main.c
@@ -156,7 +156,7 @@ int main(int argc, char * argv[]) {
}
struct encoding_result r = decode_block(block_encoder_state);
if (get_last_error(block_encoder_state) != BZ3_OK) {
- fprintf(stderr, "Failed to encode the block: %s\n",
+ fprintf(stderr, "Failed to decode the block: %s\n",
str_last_error(block_encoder_state));
return 1;
}
