:: commit 9755d1a16785a996445cdf50ebce096a373b7c54

Kamila Szewczyk <kspalaiologos@gmail.com> — 2022-05-06 18:51

parents: 7206e93315

uniform memory access pattern

diff --git a/include/cm.h b/include/cm.h
index 3a6fdb3..ebe5d76 100644
--- a/include/cm.h
+++ b/include/cm.h
@@ -13,7 +13,7 @@ typedef struct {
     u8 *in_queue, *out_queue;
     s32 input_ptr, output_ptr, input_max;
 
-    u16 C0[256], C1[256][256], C2[2][256][17];
+    u16 C0[256], C1[256][256], C2[512][17];
 } state;
 
 void flush(state * s);
diff --git a/src/cm.c b/src/cm.c
index 727207d..7364ed6 100644
--- a/src/cm.c
+++ b/src/cm.c
@@ -105,7 +105,7 @@ void begin(state * s) {
         for (int j = 0; j < 256; j++) s->C1[i][j] = 1 << 15;
     for (int i = 0; i < 2; i++)
         for (int j = 0; j < 256; j++)
-            for (int k = 0; k < 17; k++) s->C2[i][j][k] = (k << 12) - (k == 16);
+            for (int k = 0; k < 17; k++) s->C2[2*j+i][k] = (k << 12) - (k == 16);
 }
 
 void encode_byte(state * s, u8 c) {
@@ -125,23 +125,23 @@ void encode_byte(state * s, u8 c) {
         const int p = ((p0 + p1) * 7 + p2 + p2) >> 4;
 
         const int j = p >> 12;
-        const int x1 = s->C2[f][ctx][j];
-        const int x2 = s->C2[f][ctx][j + 1];
+        const int x1 = s->C2[2*ctx+f][j];
+        const int x2 = s->C2[2*ctx+f][j + 1];
         const int ssep = x1 + (((x2 - x1) * (p & 4095)) >> 12);
 
         if (c & 128) {
             encodebit1(s, ssep * 3 + p);
             s->C0[ctx] = update1(s->C0[ctx], 2);
             s->C1[s->c1][ctx] = update1(s->C1[s->c1][ctx], 4);
-            s->C2[f][ctx][j] = update1(s->C2[f][ctx][j], 6);
-            s->C2[f][ctx][j + 1] = update1(s->C2[f][ctx][j + 1], 6);
+            s->C2[2*ctx+f][j] = update1(s->C2[2*ctx+f][j], 6);
+            s->C2[2*ctx+f][j + 1] = update1(s->C2[2*ctx+f][j + 1], 6);
             ctx += ctx + 1;
         } else {
             encodebit0(s, ssep * 3 + p);
             s->C0[ctx] = update0(s->C0[ctx], 2);
             s->C1[s->c1][ctx] = update0(s->C1[s->c1][ctx], 4);
-            s->C2[f][ctx][j] = update0(s->C2[f][ctx][j], 6);
-            s->C2[f][ctx][j + 1] = update0(s->C2[f][ctx][j + 1], 6);
+            s->C2[2*ctx+f][j] = update0(s->C2[2*ctx+f][j], 6);
+            s->C2[2*ctx+f][j + 1] = update0(s->C2[2*ctx+f][j + 1], 6);
             ctx += ctx;
         }
 
@@ -169,8 +169,8 @@ u8 decode_byte(state * s) {
         const int p = ((p0 + p1) * 7 + p2 + p2) >> 4;
 
         const int j = p >> 12;
-        const int x1 = s->C2[f][ctx][j];
-        const int x2 = s->C2[f][ctx][j + 1];
+        const int x1 = s->C2[2*ctx+f][j];
+        const int x2 = s->C2[2*ctx+f][j + 1];
         const int ssep = x1 + (((x2 - x1) * (p & 4095)) >> 12);
 
         const int bit = decodebit(s, ssep * 3 + p);
@@ -178,14 +178,14 @@ u8 decode_byte(state * s) {
         if (bit) {
             s->C0[ctx] = update1(s->C0[ctx], 2);
             s->C1[s->c1][ctx] = update1(s->C1[s->c1][ctx], 4);
-            s->C2[f][ctx][j] = update1(s->C2[f][ctx][j], 6);
-            s->C2[f][ctx][j + 1] = update1(s->C2[f][ctx][j + 1], 6);
+            s->C2[2*ctx+f][j] = update1(s->C2[2*ctx+f][j], 6);
+            s->C2[2*ctx+f][j + 1] = update1(s->C2[2*ctx+f][j + 1], 6);
             ctx += ctx + 1;
         } else {
             s->C0[ctx] = update0(s->C0[ctx], 2);
             s->C1[s->c1][ctx] = update0(s->C1[s->c1][ctx], 4);
-            s->C2[f][ctx][j] = update0(s->C2[f][ctx][j], 6);
-            s->C2[f][ctx][j + 1] = update0(s->C2[f][ctx][j + 1], 6);
+            s->C2[2*ctx+f][j] = update0(s->C2[2*ctx+f][j], 6);
+            s->C2[2*ctx+f][j + 1] = update0(s->C2[2*ctx+f][j + 1], 6);
             ctx += ctx;
         }
     }
tab: 248 wrap: offon