phase out mtf
diff --git a/include/mtf.h b/include/mtf.h
deleted file mode 100644
index 0d36a9b..0000000
--- a/include/mtf.h
+++ /dev/null
@@ -1,35 +0,0 @@
-
-/*
- * BZip3 - A spiritual successor to BZip2.
- * Copyright (C) 2022 Kamila Szewczyk
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _MTF_H
-#define _MTF_H
-
-#include <inttypes.h>
-#include <stddef.h>
-
-#include "common.h"
-
-struct mtf_state {
- u32 prev[256], curr[256], symbols[256], ranks[256];
-};
-
-void mtf_encode(struct mtf_state * mtf, u8 * src, u8 * dst, u32 count);
-void mtf_decode(struct mtf_state * mtf, u8 * src, u8 * dst, u32 count);
-
-#endif
diff --git a/src/libbz3.c b/src/libbz3.c
index cae1839..d027bb1 100644
--- a/src/libbz3.c
+++ b/src/libbz3.c
@@ -27,13 +27,12 @@
#include "crc32.h"
#include "libsais.h"
#include "lzp.h"
-#include "mtf.h"
#include "rle.h"
#include "srt.h"
#include "txt.h"
#define LZP_DICTIONARY 18
-#define LZP_MIN_MATCH 40
+#define LZP_MIN_MATCH 80
struct bz3_state {
u8 *swap_buffer;
@@ -123,23 +122,21 @@ s32 bz3_encode_block(struct bz3_state * state, u8 * buffer, s32 data_size) {
// bit 1: lzp | no lzp
// bit 2: srt | no srt
// bit 2: mtf | no mtf
- s8 model = is_text(b1, data_size);
-
- s32 lzp_size;
- if(model) {
- lzp_size = lzp_compress(b1, b2, data_size, LZP_DICTIONARY, LZP_MIN_MATCH);
- if(lzp_size > 0) {
- swap(b1, b2);
- data_size = lzp_size;
- model |= 2;
- }
- } else {
- lzp_size = mrlec(b1, data_size, b2);
- if(lzp_size < data_size) {
- swap(b1, b2);
- data_size = lzp_size;
- model |= 16;
- }
+ s8 model = 0;
+ s32 lzp_size, rle_size;
+
+ rle_size = mrlec(b1, data_size, b2);
+ if(rle_size < data_size) {
+ swap(b1, b2);
+ data_size = rle_size;
+ model |= 4;
+ }
+
+ lzp_size = lzp_compress(b1, b2, data_size, LZP_DICTIONARY, LZP_MIN_MATCH);
+ if(lzp_size > 0) {
+ swap(b1, b2);
+ data_size = lzp_size;
+ model |= 2;
}
s32 bwt_idx = libsais_bwt(b1, b2, state->sais_array, data_size, 16, NULL);
@@ -147,28 +144,11 @@ s32 bz3_encode_block(struct bz3_state * state, u8 * buffer, s32 data_size) {
state->last_error = BZ3_ERR_BWT;
return -1;
}
-
- // Important: b2 is the input now, b1 is the output.
- // This avoids an expensive memory copy.
-
- s32 srt_size;
- if((model & 1) == 0) {
- if(data_size > MiB(3)) {
- srt_size = srt_encode(state->srt_state, b2, b1, data_size);
- swap(b1, b2);
- data_size = srt_size;
- model |= 4;
- } else {
- mtf_encode(state->mtf_state, b2, b1, data_size);
- swap(b1, b2);
- model |= 8;
- }
- }
// Compute the amount of overhead dwords.
s32 overhead = 2; // CRC32 + BWT index
- if((model & 2) || (model & 16)) overhead++; // LZP
- if(model & 4) overhead++; // sorted rank transform
+ if(model & 2) overhead++; // LZP
+ if(model & 4) overhead++; // RLE
begin(state->cm_state);
state->cm_state->out_queue = b1 + overhead * 4 + 1;
@@ -183,8 +163,8 @@ s32 bz3_encode_block(struct bz3_state * state, u8 * buffer, s32 data_size) {
b1[8] = model;
s32 p = 0;
- if((model & 2) || (model & 16)) ((s32 *)(b1 + 9))[p++] = htonl(lzp_size);
- if(model & 4) ((s32 *)(b1 + 9))[p++] = htonl(srt_size);
+ if(model & 2) ((s32 *)(b1 + 9))[p++] = htonl(lzp_size);
+ if(model & 4) ((s32 *)(b1 + 9))[p++] = htonl(rle_size);
state->last_error = BZ3_OK;
@@ -211,10 +191,10 @@ s32 bz3_decode_block(struct bz3_state * state, u8 * buffer, s32 data_size, s32 o
}
s8 model = buffer[8];
- s32 lzp_size = -1, srt_size = -1, p = 0;
+ s32 lzp_size = -1, rle_size, p = 0;
- if((model & 2) || (model & 16)) lzp_size = ntohl(((s32 *) (buffer + 9))[p++]);
- if(model & 4) srt_size = ntohl(((s32 *) (buffer + 9))[p++]);
+ if(model & 2) lzp_size = ntohl(((s32 *) (buffer + 9))[p++]);
+ if(model & 4) rle_size = ntohl(((s32 *) (buffer + 9))[p++]);
p += 2;
@@ -231,10 +211,10 @@ s32 bz3_decode_block(struct bz3_state * state, u8 * buffer, s32 data_size, s32 o
s32 size_src;
- if(model & 4)
- size_src = srt_size;
- else if((model & 2) || (model & 16))
+ if(model & 2)
size_src = lzp_size;
+ else if(model & 4)
+ size_src = rle_size;
else
size_src = orig_size;
@@ -242,15 +222,6 @@ s32 bz3_decode_block(struct bz3_state * state, u8 * buffer, s32 data_size, s32 o
b2[i] = decode_byte(state->cm_state);
swap(b1, b2);
- // Undo SRT
- if(model & 4) {
- size_src = srt_decode(state->srt_state, b1, b2, srt_size);
- swap(b1, b2);
- } else if(model & 8) {
- mtf_decode(state->mtf_state, b1, b2, size_src);
- swap(b1, b2);
- }
-
// Undo BWT
if (libsais_unbwt(b1, b2, state->sais_array, size_src, NULL, bwt_idx) < 0) {
state->last_error = BZ3_ERR_BWT;
@@ -260,9 +231,11 @@ s32 bz3_decode_block(struct bz3_state * state, u8 * buffer, s32 data_size, s32 o
// Undo LZP
if(model & 2) {
- size_src = lzp_decompress(b1, b2, lzp_size, LZP_DICTIONARY, (model & 1) ? LZP_MIN_MATCH : 2 * LZP_MIN_MATCH);
+ size_src = lzp_decompress(b1, b2, lzp_size, LZP_DICTIONARY, LZP_MIN_MATCH);
swap(b1, b2);
- } else if(model & 16) {
+ }
+
+ if(model & 4) {
mrled(b1, b2, orig_size);
size_src = orig_size;
swap(b1, b2);
diff --git a/src/mtf.c b/src/mtf.c
deleted file mode 100644
index a2f6cf6..0000000
--- a/src/mtf.c
+++ /dev/null
@@ -1,62 +0,0 @@
-
-/*
- * BZip3 - A spiritual successor to BZip2.
- * Copyright (C) 2022 Kamila Szewczyk
- *
- * This program is free software: you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation, either version 3 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "mtf.h"
-
-void mtf_encode(struct mtf_state * mtf, u8 * src, u8 * dst, u32 count) {
- for (u32 i = 0; i < 256; i++) {
- mtf->prev[i] = mtf->curr[i] = 0;
- mtf->symbols[i] = mtf->ranks[i] = i;
- }
-
- for (u32 i = 0; i < count; i++) {
- u32 r = mtf->symbols[src[i]];
- dst[i] = r;
-
- mtf->prev[src[i]] = mtf->curr[src[i]] = i;
-
- for (; r > 0 && mtf->curr[mtf->ranks[r - 1]] <= i; r--) {
- mtf->ranks[r] = mtf->ranks[r - 1];
- mtf->symbols[mtf->ranks[r]] = r;
- }
-
- mtf->ranks[r] = src[i];
- mtf->symbols[src[i]] = r;
- }
-}
-
-void mtf_decode(struct mtf_state * mtf, u8 * src, u8 * dst, u32 count) {
- for (u32 i = 0; i < 256; i++) {
- mtf->prev[i] = mtf->curr[i] = 0;
- mtf->ranks[i] = i;
- }
-
- for (u32 i = 0; i < count; i++) {
- u32 r = src[i] & 0xFF;
-
- const u32 c = mtf->ranks[r];
- dst[i] = (s8)c;
-
- mtf->prev[c] = mtf->curr[c] = i;
-
- for (; r > 0 && mtf->curr[mtf->ranks[r - 1]] <= i; r--) mtf->ranks[r] = mtf->ranks[r - 1];
-
- mtf->ranks[r] = c;
- }
-}
