:: commit c925eed5d8350e539c8ed1f1147d61073c5809b0

Kamila Szewczyk <kspalaiologos@gmail.com> — 2022-05-02 10:46

parents: c6021bb016

a "testing" preset

diff --git a/BENCHMARKS.md b/BENCHMARKS.md
new file mode 100644
index 0000000..e6d9641
--- /dev/null
+++ b/BENCHMARKS.md
@@ -0,0 +1,88 @@
+
+Note: These benchmarks are rarely updated.
+
+[Testing corpus](https://github.com/kspalaiologos/bzip3/releases/download/corpus/corpus.7z)
+
+```
+496847 cantrbry.tar.bz3
+570856 bzip2/cantrbry.tar.bz2
+
+874687 calgary.tar.bz3
+891321 bzip2/calgary.tar.bz2
+
+28442351 silesia2.tar.bz3
+30128327 bzip2/silesia2.tar.bz2
+
+24790019 enwik8.bz3
+29008758 bzip2/enwik8.bz2
+
+21854321 silesia1.tar.bz3
+24462553 bzip2/silesia1.tar.bz2
+
+6835103 lisp.mb.bz3
+13462295 bzip2/lisp.mb.bz2
+
+138366523 linux.tar.bz3
+157810434 bzip2/linux.tar.bz2
+```
+
+## Benchmark on the Calgary corpus
+
+Results:
+
+```
+% wc -c corpus/calgary.tar.bz3 corpus/calgary.tar.bz2 corpus/calgary.tar.lzma corpus/calgary.tar.gz corpus/calgary.tar
+ 874691 corpus/calgary.tar.bz3
+ 891321 corpus/calgary.tar.bz2
+ 853112 corpus/calgary.tar.lzma
+1062584 corpus/calgary.tar.gz
+3265536 corpus/calgary.tar
+```
+
+Performance:
+
+```
+Benchmark 1: gzip -9 -k -f corpus/calgary.tar
+  Time (mean ± σ):     224.3 ms ±   2.6 ms    [User: 221.4 ms, System: 2.5 ms]
+  Range (min … max):   219.9 ms … 230.9 ms    30 runs
+
+Benchmark 2: lzma -9 -k -f corpus/calgary.tar
+  Time (mean ± σ):     787.9 ms ±   9.6 ms    [User: 753.6 ms, System: 33.7 ms]
+  Range (min … max):   764.8 ms … 813.1 ms    30 runs
+
+Benchmark 3: ./bzip3 -e -b 3 corpus/calgary.tar corpus/calgary.tar.bz3
+  Time (mean ± σ):     286.0 ms ±   4.8 ms    [User: 280.3 ms, System: 4.5 ms]
+  Range (min … max):   280.1 ms … 298.9 ms    30 runs
+
+Benchmark 4: bzip2 -9 -k -f corpus/calgary.tar
+  Time (mean ± σ):     172.9 ms ±   2.4 ms    [User: 168.4 ms, System: 4.4 ms]
+  Range (min … max):   169.5 ms … 179.4 ms    30 runs
+```
+
+Memory usage (as reported by `zsh`'s `time`):
+
+```
+bzip2 8M memory
+bzip3 17M memory
+lzma 95M memory
+gzip 5M memory
+```
+
+## Benchmark on the Linux kernel
+
+```
+bzip3 -e -b 32 linux.tar linux.tar.bz3  104.71s user 0.41s system 99% cpu 192M memory 1:45.16 total
+bzip2 -9 -k linux.tar  61.23s user 0.35s system 99% cpu 8M memory 1:01.58 total
+gzip -9 -k linux.tar  43.08s user 0.35s system 99% cpu 4M memory 43.435 total
+lzma -9 -k linux.tar  397.30s user 0.90s system 99% cpu 675M memory 6:38.28 total
+```
+
+```
+wc -c linux.tar*
+1215221760 linux.tar
+ 157810434 linux.tar.bz2
+ 130959938 linux.tar.bz3
+ 208100532 linux.tar.gz
+ 125725455 linux.tar.lzma
+```
+
diff --git a/README.md b/README.md
index 4843551..0bef85e 100644
--- a/README.md
+++ b/README.md
@@ -3,89 +3,12 @@
 
 A harder, better, faster and stronger spiritual successor to BZip2. Features higher compression ratios and better performance thanks to a order-2 context mixing entropy coder and fast Burrows-Wheeler transform code making use of suffix arrays.
 
-Work-In-Progress. For the time being, don't expect the trunk to be compatible with previous commits.
+No stability guarantees yet.
 
-[Testing corpus](https://github.com/kspalaiologos/bzip3/releases/download/corpus/corpus.7z)
+## Installation
 
 ```
-496847 cantrbry.tar.bz3
-570856 bzip2/cantrbry.tar.bz2
-
-874687 calgary.tar.bz3
-891321 bzip2/calgary.tar.bz2
-
-28442351 silesia2.tar.bz3
-30128327 bzip2/silesia2.tar.bz2
-
-24790019 enwik8.bz3
-29008758 bzip2/enwik8.bz2
-
-21854321 silesia1.tar.bz3
-24462553 bzip2/silesia1.tar.bz2
-
-6835103 lisp.mb.bz3
-13462295 bzip2/lisp.mb.bz2
-
-138366523 linux.tar.bz3
-157810434 bzip2/linux.tar.bz2
-```
-
-## Benchmark on the Calgary corpus
-
-Results:
-
-```
-% wc -c corpus/calgary.tar.bz3 corpus/calgary.tar.bz2 corpus/calgary.tar.lzma corpus/calgary.tar.gz corpus/calgary.tar
- 874691 corpus/calgary.tar.bz3
- 891321 corpus/calgary.tar.bz2
- 853112 corpus/calgary.tar.lzma
-1062584 corpus/calgary.tar.gz
-3265536 corpus/calgary.tar
-```
-
-Performance:
-
+make all && sudo make install
 ```
-Benchmark 1: gzip -9 -k -f corpus/calgary.tar
-  Time (mean ± σ):     224.3 ms ±   2.6 ms    [User: 221.4 ms, System: 2.5 ms]
-  Range (min … max):   219.9 ms … 230.9 ms    30 runs
 
-Benchmark 2: lzma -9 -k -f corpus/calgary.tar
-  Time (mean ± σ):     787.9 ms ±   9.6 ms    [User: 753.6 ms, System: 33.7 ms]
-  Range (min … max):   764.8 ms … 813.1 ms    30 runs
-
-Benchmark 3: ./bzip3 -e -b 3 corpus/calgary.tar corpus/calgary.tar.bz3
-  Time (mean ± σ):     286.0 ms ±   4.8 ms    [User: 280.3 ms, System: 4.5 ms]
-  Range (min … max):   280.1 ms … 298.9 ms    30 runs
-
-Benchmark 4: bzip2 -9 -k -f corpus/calgary.tar
-  Time (mean ± σ):     172.9 ms ±   2.4 ms    [User: 168.4 ms, System: 4.4 ms]
-  Range (min … max):   169.5 ms … 179.4 ms    30 runs
-```
-
-Memory usage (as reported by `zsh`'s `time`):
-
-```
-bzip2 8M memory
-bzip3 17M memory
-lzma 95M memory
-gzip 5M memory
-```
-
-## Benchmark on the Linux kernel
-
-```
-bzip3 -e -b 32 linux.tar linux.tar.bz3  104.71s user 0.41s system 99% cpu 192M memory 1:45.16 total
-bzip2 -9 -k linux.tar  61.23s user 0.35s system 99% cpu 8M memory 1:01.58 total
-gzip -9 -k linux.tar  43.08s user 0.35s system 99% cpu 4M memory 43.435 total
-lzma -9 -k linux.tar  397.30s user 0.90s system 99% cpu 675M memory 6:38.28 total
-```
-
-```
-wc -c linux.tar*
-1215221760 linux.tar
- 157810434 linux.tar.bz2
- 130959938 linux.tar.bz3
- 208100532 linux.tar.gz
- 125725455 linux.tar.lzma
-```
+To set the installation directory, use e.g. `sudo make install PREFIX=/usr`.
diff --git a/src/main.c b/src/main.c
index 09a1575..fdb0db0 100644
--- a/src/main.c
+++ b/src/main.c
@@ -70,7 +70,7 @@ void encode_block(int output_des, s32 bytes_read, u8 * buffer,
 }
 
 int decode_block(int input_des, int output_des, u8 * buffer,
-                 u8 * output, s32 * sais_array,
+                 u8 * output, s32 * sais_array, s8 test,
                  struct srt_state * srt_state, state * cm_state,
                  struct mtf_state * mtf_state) {
 #define safe_read(fd, buf, size) \
@@ -107,12 +107,13 @@ int decode_block(int input_des, int output_des, u8 * buffer,
         fprintf(stderr, "CRC32 checksum mismatch.\n");
         return 1;
     }
-    write(output_des, buffer, bytes_read);
+    if(!test)
+        write(output_des, buffer, bytes_read);
     return 0;
 }
 
 int main(int argc, char * argv[]) {
-    int mode = 0;  // -1: encode, 0: unspecified, 1: encode
+    int mode = 0;  // -1: encode, 0: unspecified, 1: encode, 2: test
     char *input = NULL, *output = NULL;     // input and output file names
     u32 block_size = 8 * 1024 * 1024;  // the block size
 
@@ -122,6 +123,8 @@ int main(int argc, char * argv[]) {
                 mode = 1;
             } else if (argv[i][1] == 'd') {
                 mode = -1;
+            } else if (argv[i][1] == 't') {
+                mode = 2;
             } else if (argv[i][1] == 'b') {
                 block_size = 1024 * 1024 * atoi(argv[i + 1]);
                 i++;
@@ -136,7 +139,7 @@ int main(int argc, char * argv[]) {
     }
 
     if (mode == 0) {
-        fprintf(stderr, "Usage: %s [-e/-d] [-b block_size] input output\n",
+        fprintf(stderr, "Usage: %s [-e/-d/-t] [-b block_size] input output\n",
                 argv[0]);
         fprintf(stderr,
                 "If input or output are not specified, they default to stdin "
@@ -214,7 +217,29 @@ int main(int argc, char * argv[]) {
 
         state s;
 
-        while (decode_block(input_des, output_des, buffer, output, sais_array,
+        while (decode_block(input_des, output_des, buffer, output, sais_array, 0,
+                            &srt_state, &s, &mtf_state) == 0)
+            ;
+
+        free(buffer);
+        free(output);
+        free(sais_array);
+    } else if(mode == -2) {
+        // Test
+        char signature[5];
+        read(input_des, signature, 5);
+        if (strncmp(signature, "BZ3v1", 5) != 0) {
+            fprintf(stderr, "Invalid signature.\n");
+            return 1;
+        }
+        read(input_des, &block_size, sizeof(u32));
+        u8 * buffer = malloc(block_size + block_size / 2);
+        u8 * output = malloc(block_size + block_size / 2);
+        s32 * sais_array = malloc(block_size * sizeof(s32) + 16);
+
+        state s;
+
+        while (decode_block(input_des, output_des, buffer, output, sais_array, 1,
                             &srt_state, &s, &mtf_state) == 0)
             ;
 
tab: 248 wrap: offon