:: commit 9a2d7a0ad2e129e9d7adff4b98d29a50a4f7be44

Kamila Szewczyk <kspalaiologos@gmail.com> — 2022-05-06 13:41

parents: 2a2d3b265b

speed up the context mixer

diff --git a/.vscode/settings.json b/.vscode/settings.json
index de8f3f6..b94e796 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -15,6 +15,7 @@
         "istream": "c",
         "sstream": "c",
         "streambuf": "c",
-        "config.h": "c"
+        "config.h": "c",
+        "intrin.h": "c"
     }
 }
\ No newline at end of file
diff --git a/include/cm.h b/include/cm.h
index 19916df..e2bcec0 100644
--- a/include/cm.h
+++ b/include/cm.h
@@ -9,11 +9,11 @@
 
 typedef struct {
     u32 low, high, code;
-    u16 C0[256], C1[256][256], C2[2][256][17];
     s32 c1, c2, run;
-
     u8 *in_queue, *out_queue;
     s32 input_ptr, output_ptr, input_max;
+    
+    u16 C0[256], C1[256][256], C2[2][256][17];
 } state;
 
 void flush(state * s);
diff --git a/include/common.h b/include/common.h
index 4a06348..1773fe3 100644
--- a/include/common.h
+++ b/include/common.h
@@ -59,19 +59,7 @@ typedef int32_t s32;
         #endif
     }
 
-    static u32 htonl(u32 value) {
-        #ifdef WORDS_BIGENDIAN
-            return value;
-        #else
-            u8 data[4];
-            memcpy(&data, &value, sizeof(data));
-
-            return ((u32) data[3])
-                 | ((u32) data[2] << 8)
-                 | ((u32) data[1] << 16)
-                 | ((u32) data[0] << 24);
-        #endif
-    }
+    #define htonl ntohl
 #endif
 
 #endif
diff --git a/src/cm.c b/src/cm.c
index 05ecea7..d509e7d 100644
--- a/src/cm.c
+++ b/src/cm.c
@@ -1,6 +1,29 @@
 
 #include "cm.h"
 
+#if defined(__has_builtin)
+    #if __has_builtin(__builtin_prefetch)
+        #define HAS_BUILTIN_PREFECTCH
+    #endif
+#elif defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 2)) || (__GNUC__ >= 4))
+    #define HAS_BUILTIN_PREFECTCH
+#endif
+
+#if defined(HAS_BUILTIN_PREFECTCH)
+    #define prefetch(address) __builtin_prefetch((const void *)(address), 0, 0)
+#elif defined(_M_IX86) || defined(_M_AMD64)
+    #include <intrin.h>
+    #define prefetch(address) _mm_prefetch((const void *)(address), _MM_HINT_NTA)
+#elif defined(_M_ARM)
+    #include <intrin.h>
+    #define prefetch(address) __prefetch((const void *)(address))
+#elif defined(_M_ARM64)
+    #include <intrin.h>
+    #define prefetch(address) __prefetch2((const void *)(address), 1)
+#else
+    #define prefetch(address)
+#endif
+
 // Uses an arithmetic coder implementation outlined in:
 // http://mattmahoney.net/dc/dce.html#Section_31
 
@@ -71,6 +94,7 @@ void init(state * s) {
 #define update1(p, x) ((p) + (((p) ^ 65535) >> x))
 
 void begin(state * s) {
+    prefetch(s);
     s->c1 = s->c2 = 0;
     s->run = 0;
     s->low = 0;
diff --git a/src/main.c b/src/main.c
index 0817c28..744ca59 100644
--- a/src/main.c
+++ b/src/main.c
@@ -26,11 +26,15 @@
 #include "libbz3.h"
 
 int main(int argc, char * argv[]) {
-    // -1: encode, 0: unspecified, 1: encode, 2: test
+    // -1: decode, 0: unspecified, 1: encode, 2: test
     int mode = 0;
 
     // input and output file names
     char *input = NULL, *output = NULL;
+    char *bz3_file = NULL, *regular_file = NULL;
+
+    // command line arguments
+    int force_stdstreams = 0;
 
     // the block size
     u32 block_size = MiB(8);
@@ -46,24 +50,40 @@ int main(int argc, char * argv[]) {
             } else if (argv[i][1] == 'b') {
                 block_size = MiB(atoi(argv[i + 1]));
                 i++;
+            } else if (argv[i][1] == 'c') {
+                force_stdstreams = 1;
             }
         } else {
-            if (input == NULL) {
-                input = argv[i];
-            } else if (output == NULL) {
-                output = argv[i];
+            if(strlen(argv[i]) > 4 && !strcmp(argv[i] + strlen(argv[i]) - 4, ".bz3")) {
+                bz3_file = argv[i];
+            } else {
+                regular_file = argv[i];
             }
         }
     }
 
     if (mode == 0) {
-        fprintf(stderr, "Usage: %s [-e/-d/-t] [-b block_size] input output\n", argv[0]);
-        fprintf(stderr,
-                "If input or output are not specified, they default to stdin "
-                "and stdout.\n");
+        fprintf(stderr, "bzip3 - A better and stronger spiritual successor to bzip2.\n");
+        fprintf(stderr, "Copyright (C) by Kamila Szewczyk, 2022. Licensed under the terms of GPLv3.\n");
+        fprintf(stderr, "Usage: bzip3 [-e/-d/-t/-c] [-b block_size] input output\n");
+        fprintf(stderr, "Operations:\n");
+        fprintf(stderr, "  -e: encode\n");
+        fprintf(stderr, "  -d: decode\n");
+        fprintf(stderr, "  -t: test\n");
+        fprintf(stderr, "Extra flags:\n");
+        fprintf(stderr, "  -c: force reading/writing from standard streams\n");
+        fprintf(stderr, "  -b N: set block size in MiB\n");
         return 1;
     }
 
+    if(mode == 1) {
+        input = regular_file;
+        output = bz3_file;
+    } else {
+        input = bz3_file;
+        output = regular_file;
+    }
+
     FILE * input_des, * output_des;
 
     if (input != NULL) {
@@ -72,7 +92,7 @@ int main(int argc, char * argv[]) {
             perror("fopen");
             return 1;
         }
-    } else {
+    } else if(force_stdstreams) {
         input_des = stdin;
     }
 
@@ -82,7 +102,7 @@ int main(int argc, char * argv[]) {
             perror("open");
             return 1;
         }
-    } else {
+    } else if(force_stdstreams) {
         output_des = stdout;
     }
 
tab: 248 wrap: offon