speed up the context mixer
diff --git a/.vscode/settings.json b/.vscode/settings.json
index de8f3f6..b94e796 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -15,6 +15,7 @@
"istream": "c",
"sstream": "c",
"streambuf": "c",
- "config.h": "c"
+ "config.h": "c",
+ "intrin.h": "c"
}
}
\ No newline at end of file
diff --git a/include/cm.h b/include/cm.h
index 19916df..e2bcec0 100644
--- a/include/cm.h
+++ b/include/cm.h
@@ -9,11 +9,11 @@
typedef struct {
u32 low, high, code;
- u16 C0[256], C1[256][256], C2[2][256][17];
s32 c1, c2, run;
-
u8 *in_queue, *out_queue;
s32 input_ptr, output_ptr, input_max;
+
+ u16 C0[256], C1[256][256], C2[2][256][17];
} state;
void flush(state * s);
diff --git a/include/common.h b/include/common.h
index 4a06348..1773fe3 100644
--- a/include/common.h
+++ b/include/common.h
@@ -59,19 +59,7 @@ typedef int32_t s32;
#endif
}
- static u32 htonl(u32 value) {
- #ifdef WORDS_BIGENDIAN
- return value;
- #else
- u8 data[4];
- memcpy(&data, &value, sizeof(data));
-
- return ((u32) data[3])
- | ((u32) data[2] << 8)
- | ((u32) data[1] << 16)
- | ((u32) data[0] << 24);
- #endif
- }
+ #define htonl ntohl
#endif
#endif
diff --git a/src/cm.c b/src/cm.c
index 05ecea7..d509e7d 100644
--- a/src/cm.c
+++ b/src/cm.c
@@ -1,6 +1,29 @@
#include "cm.h"
+#if defined(__has_builtin)
+ #if __has_builtin(__builtin_prefetch)
+ #define HAS_BUILTIN_PREFECTCH
+ #endif
+#elif defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 2)) || (__GNUC__ >= 4))
+ #define HAS_BUILTIN_PREFECTCH
+#endif
+
+#if defined(HAS_BUILTIN_PREFECTCH)
+ #define prefetch(address) __builtin_prefetch((const void *)(address), 0, 0)
+#elif defined(_M_IX86) || defined(_M_AMD64)
+ #include <intrin.h>
+ #define prefetch(address) _mm_prefetch((const void *)(address), _MM_HINT_NTA)
+#elif defined(_M_ARM)
+ #include <intrin.h>
+ #define prefetch(address) __prefetch((const void *)(address))
+#elif defined(_M_ARM64)
+ #include <intrin.h>
+ #define prefetch(address) __prefetch2((const void *)(address), 1)
+#else
+ #define prefetch(address)
+#endif
+
// Uses an arithmetic coder implementation outlined in:
// http://mattmahoney.net/dc/dce.html#Section_31
@@ -71,6 +94,7 @@ void init(state * s) {
#define update1(p, x) ((p) + (((p) ^ 65535) >> x))
void begin(state * s) {
+ prefetch(s);
s->c1 = s->c2 = 0;
s->run = 0;
s->low = 0;
diff --git a/src/main.c b/src/main.c
index 0817c28..744ca59 100644
--- a/src/main.c
+++ b/src/main.c
@@ -26,11 +26,15 @@
#include "libbz3.h"
int main(int argc, char * argv[]) {
- // -1: encode, 0: unspecified, 1: encode, 2: test
+ // -1: decode, 0: unspecified, 1: encode, 2: test
int mode = 0;
// input and output file names
char *input = NULL, *output = NULL;
+ char *bz3_file = NULL, *regular_file = NULL;
+
+ // command line arguments
+ int force_stdstreams = 0;
// the block size
u32 block_size = MiB(8);
@@ -46,24 +50,40 @@ int main(int argc, char * argv[]) {
} else if (argv[i][1] == 'b') {
block_size = MiB(atoi(argv[i + 1]));
i++;
+ } else if (argv[i][1] == 'c') {
+ force_stdstreams = 1;
}
} else {
- if (input == NULL) {
- input = argv[i];
- } else if (output == NULL) {
- output = argv[i];
+ if(strlen(argv[i]) > 4 && !strcmp(argv[i] + strlen(argv[i]) - 4, ".bz3")) {
+ bz3_file = argv[i];
+ } else {
+ regular_file = argv[i];
}
}
}
if (mode == 0) {
- fprintf(stderr, "Usage: %s [-e/-d/-t] [-b block_size] input output\n", argv[0]);
- fprintf(stderr,
- "If input or output are not specified, they default to stdin "
- "and stdout.\n");
+ fprintf(stderr, "bzip3 - A better and stronger spiritual successor to bzip2.\n");
+ fprintf(stderr, "Copyright (C) by Kamila Szewczyk, 2022. Licensed under the terms of GPLv3.\n");
+ fprintf(stderr, "Usage: bzip3 [-e/-d/-t/-c] [-b block_size] input output\n");
+ fprintf(stderr, "Operations:\n");
+ fprintf(stderr, " -e: encode\n");
+ fprintf(stderr, " -d: decode\n");
+ fprintf(stderr, " -t: test\n");
+ fprintf(stderr, "Extra flags:\n");
+ fprintf(stderr, " -c: force reading/writing from standard streams\n");
+ fprintf(stderr, " -b N: set block size in MiB\n");
return 1;
}
+ if(mode == 1) {
+ input = regular_file;
+ output = bz3_file;
+ } else {
+ input = bz3_file;
+ output = regular_file;
+ }
+
FILE * input_des, * output_des;
if (input != NULL) {
@@ -72,7 +92,7 @@ int main(int argc, char * argv[]) {
perror("fopen");
return 1;
}
- } else {
+ } else if(force_stdstreams) {
input_des = stdin;
}
@@ -82,7 +102,7 @@ int main(int argc, char * argv[]) {
perror("open");
return 1;
}
- } else {
+ } else if(force_stdstreams) {
output_des = stdout;
}
