:: commit efd70955153c7b4ca28f20c12a94ef6aca6c484a

Kamila Szewczyk <kspalaiologos@gmail.com> — 2022-05-02 10:10

parents: 862f08a59b

rework the integer type convention

diff --git a/include/cm.h b/include/cm.h
index ff99d46..b59b614 100644
--- a/include/cm.h
+++ b/include/cm.h
@@ -2,22 +2,24 @@
 #ifndef _CM_H
 #define _CM_H
 
+#include "common.h"
+
 #include <inttypes.h>
 #include <stdint.h>
 
 typedef struct {
-    uint32_t low, high, code;
-    uint16_t C0[256], C1[256][256], C2[2][256][17];
-    int32_t c1, c2, run;
+    u32 low, high, code;
+    u16 C0[256], C1[256][256], C2[2][256][17];
+    s32 c1, c2, run;
 
-    uint8_t *in_queue, *out_queue;
-    int64_t input_ptr, output_ptr, input_max;
+    u8 *in_queue, *out_queue;
+    s32 input_ptr, output_ptr, input_max;
 } state;
 
 void flush(state * s);
 void init(state * s);
 void begin(state * s);
-void encode_byte(state * s, uint8_t c);
-uint8_t decode_byte(state * s);
+void encode_byte(state * s, u8 c);
+u8 decode_byte(state * s);
 
 #endif
diff --git a/include/common.h b/include/common.h
index 08ba53a..008419d 100644
--- a/include/common.h
+++ b/include/common.h
@@ -11,6 +11,7 @@
 typedef uint8_t u8;
 typedef uint16_t u16;
 typedef uint32_t u32;
+typedef uint64_t u64;
 
 typedef int8_t s8;
 typedef int16_t s16;
diff --git a/include/crc32.h b/include/crc32.h
index f67eb06..5119246 100644
--- a/include/crc32.h
+++ b/include/crc32.h
@@ -20,9 +20,11 @@
 #ifndef _CRC32_H
 #define _CRC32_H
 
+#include "common.h"
+
 #include <inttypes.h>
 #include <stddef.h>
 
-uint32_t crc32sum(uint32_t crc, uint8_t * buf, size_t size);
+u32 crc32sum(u32 crc, u8 * buf, size_t size);
 
 #endif
diff --git a/include/libsais.h b/include/libsais.h
index 57688a9..6625bd1 100644
--- a/include/libsais.h
+++ b/include/libsais.h
@@ -22,13 +22,9 @@ Please see the file LICENSE for full copyright information.
 --*/
 
 #ifndef LIBSAIS_H
-#define LIBSAIS_H 1
+#define LIBSAIS_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdint.h>
+#include "common.h"
 
 /**
  * Creates the libsais context that allows reusing allocated memory with each
@@ -38,18 +34,6 @@ extern "C" {
  */
 void * libsais_create_ctx(void);
 
-#if defined(_OPENMP)
-/**
- * Creates the libsais context that allows reusing allocated memory with each
- * parallel libsais operation using OpenMP. In multi-threaded environments, use
- * one context per thread for parallel executions.
- * @param threads The number of OpenMP threads to use (can be 0 for OpenMP
- * default).
- * @return the libsais context, NULL otherwise.
- */
-void * libsais_create_ctx_omp(int32_t threads);
-#endif
-
 /**
  * Destroys the libsass context and free previusly allocated memory.
  * @param ctx The libsais context (can be NULL).
@@ -66,8 +50,8 @@ void libsais_free_ctx(void * ctx);
  * @param freq [0..255] The output symbol frequency table (can be NULL).
  * @return 0 if no error occurred, -1 or -2 otherwise.
  */
-int32_t libsais(const uint8_t * T, int32_t * SA, int32_t n, int32_t fs,
-                int32_t * freq);
+s32 libsais(const u8 * T, s32 * SA, s32 n, s32 fs,
+                s32 * freq);
 
 /**
  * Constructs the suffix array of a given integer array.
@@ -81,8 +65,8 @@ int32_t libsais(const uint8_t * T, int32_t * SA, int32_t n, int32_t fs,
  * better 6k is recommended for optimal performance).
  * @return 0 if no error occurred, -1 or -2 otherwise.
  */
-int32_t libsais_int(int32_t * T, int32_t * SA, int32_t n, int32_t k,
-                    int32_t fs);
+s32 libsais_int(s32 * T, s32 * SA, s32 n, s32 k,
+                    s32 fs);
 
 /**
  * Constructs the suffix array of a given string using libsais context.
@@ -95,42 +79,8 @@ int32_t libsais_int(int32_t * T, int32_t * SA, int32_t n, int32_t k,
  * @param freq [0..255] The output symbol frequency table (can be NULL).
  * @return 0 if no error occurred, -1 or -2 otherwise.
  */
-int32_t libsais_ctx(const void * ctx, const uint8_t * T, int32_t * SA,
-                    int32_t n, int32_t fs, int32_t * freq);
-
-#if defined(_OPENMP)
-/**
- * Constructs the suffix array of a given string in parallel using OpenMP.
- * @param T [0..n-1] The input string.
- * @param SA [0..n-1+fs] The output array of suffixes.
- * @param n The length of the given string.
- * @param fs The extra space available at the end of SA array (0 should be
- * enough for most cases).
- * @param freq [0..255] The output symbol frequency table (can be NULL).
- * @param threads The number of OpenMP threads to use (can be 0 for OpenMP
- * default).
- * @return 0 if no error occurred, -1 or -2 otherwise.
- */
-int32_t libsais_omp(const uint8_t * T, int32_t * SA, int32_t n, int32_t fs,
-                    int32_t * freq, int32_t threads);
-
-/**
- * Constructs the suffix array of a given integer array in parallel using
- * OpenMP. Note, during construction input array will be modified, but restored
- * at the end if no errors occurred.
- * @param T [0..n-1] The input integer array.
- * @param SA [0..n-1+fs] The output array of suffixes.
- * @param n The length of the integer array.
- * @param k The alphabet size of the input integer array.
- * @param fs Extra space available at the end of SA array (can be 0, but 4k or
- * better 6k is recommended for optimal performance).
- * @param threads The number of OpenMP threads to use (can be 0 for OpenMP
- * default).
- * @return 0 if no error occurred, -1 or -2 otherwise.
- */
-int32_t libsais_int_omp(int32_t * T, int32_t * SA, int32_t n, int32_t k,
-                        int32_t fs, int32_t threads);
-#endif
+s32 libsais_ctx(const void * ctx, const u8 * T, s32 * SA,
+                    s32 n, s32 fs, s32 * freq);
 
 /**
  * Constructs the burrows-wheeler transformed string (BWT) of a given string.
@@ -143,8 +93,8 @@ int32_t libsais_int_omp(int32_t * T, int32_t * SA, int32_t n, int32_t k,
  * @param freq [0..255] The output symbol frequency table (can be NULL).
  * @return The primary index if no error occurred, -1 or -2 otherwise.
  */
-int32_t libsais_bwt(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n,
-                    int32_t fs, int32_t * freq);
+s32 libsais_bwt(const u8 * T, u8 * U, s32 * A, s32 n,
+                    s32 fs, s32 * freq);
 
 /**
  * Constructs the burrows-wheeler transformed string (BWT) of a given string
@@ -160,8 +110,8 @@ int32_t libsais_bwt(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n,
  * @param I [0..(n-1)/r] The output auxiliary indexes.
  * @return 0 if no error occurred, -1 or -2 otherwise.
  */
-int32_t libsais_bwt_aux(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n,
-                        int32_t fs, int32_t * freq, int32_t r, int32_t * I);
+s32 libsais_bwt_aux(const u8 * T, u8 * U, s32 * A, s32 n,
+                        s32 fs, s32 * freq, s32 r, s32 * I);
 
 /**
  * Constructs the burrows-wheeler transformed string (BWT) of a given string
@@ -176,8 +126,8 @@ int32_t libsais_bwt_aux(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n,
  * @param freq [0..255] The output symbol frequency table (can be NULL).
  * @return The primary index if no error occurred, -1 or -2 otherwise.
  */
-int32_t libsais_bwt_ctx(const void * ctx, const uint8_t * T, uint8_t * U,
-                        int32_t * A, int32_t n, int32_t fs, int32_t * freq);
+s32 libsais_bwt_ctx(const void * ctx, const u8 * T, u8 * U,
+                        s32 * A, s32 n, s32 fs, s32 * freq);
 
 /**
  * Constructs the burrows-wheeler transformed string (BWT) of a given string
@@ -194,48 +144,9 @@ int32_t libsais_bwt_ctx(const void * ctx, const uint8_t * T, uint8_t * U,
  * @param I [0..(n-1)/r] The output auxiliary indexes.
  * @return 0 if no error occurred, -1 or -2 otherwise.
  */
-int32_t libsais_bwt_aux_ctx(const void * ctx, const uint8_t * T, uint8_t * U,
-                            int32_t * A, int32_t n, int32_t fs, int32_t * freq,
-                            int32_t r, int32_t * I);
-
-#if defined(_OPENMP)
-/**
- * Constructs the burrows-wheeler transformed string (BWT) of a given string in
- * parallel using OpenMP.
- * @param T [0..n-1] The input string.
- * @param U [0..n-1] The output string (can be T).
- * @param A [0..n-1+fs] The temporary array.
- * @param n The length of the given string.
- * @param fs The extra space available at the end of A array (0 should be enough
- * for most cases).
- * @param freq [0..255] The output symbol frequency table (can be NULL).
- * @param threads The number of OpenMP threads to use (can be 0 for OpenMP
- * default).
- * @return The primary index if no error occurred, -1 or -2 otherwise.
- */
-int32_t libsais_bwt_omp(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n,
-                        int32_t fs, int32_t * freq, int32_t threads);
-
-/**
- * Constructs the burrows-wheeler transformed string (BWT) of a given string
- * with auxiliary indexes in parallel using OpenMP.
- * @param T [0..n-1] The input string.
- * @param U [0..n-1] The output string (can be T).
- * @param A [0..n-1+fs] The temporary array.
- * @param n The length of the given string.
- * @param fs The extra space available at the end of A array (0 should be enough
- * for most cases).
- * @param freq [0..255] The output symbol frequency table (can be NULL).
- * @param r The sampling rate for auxiliary indexes (must be power of 2).
- * @param I [0..(n-1)/r] The output auxiliary indexes.
- * @param threads The number of OpenMP threads to use (can be 0 for OpenMP
- * default).
- * @return 0 if no error occurred, -1 or -2 otherwise.
- */
-int32_t libsais_bwt_aux_omp(const uint8_t * T, uint8_t * U, int32_t * A,
-                            int32_t n, int32_t fs, int32_t * freq, int32_t r,
-                            int32_t * I, int32_t threads);
-#endif
+s32 libsais_bwt_aux_ctx(const void * ctx, const u8 * T, u8 * U,
+                            s32 * A, s32 n, s32 fs, s32 * freq,
+                            s32 r, s32 * I);
 
 /**
  * Creates the libsais reverse BWT context that allows reusing allocated memory
@@ -245,18 +156,6 @@ int32_t libsais_bwt_aux_omp(const uint8_t * T, uint8_t * U, int32_t * A,
  */
 void * libsais_unbwt_create_ctx(void);
 
-#if defined(_OPENMP)
-/**
- * Creates the libsais reverse BWT context that allows reusing allocated memory
- * with each parallel libsais_unbwt_* operation using OpenMP. In multi-threaded
- * environments, use one context per thread for parallel executions.
- * @param threads The number of OpenMP threads to use (can be 0 for OpenMP
- * default).
- * @return the libsais context, NULL otherwise.
- */
-void * libsais_unbwt_create_ctx_omp(int32_t threads);
-#endif
-
 /**
  * Destroys the libsass reverse BWT context and free previusly allocated memory.
  * @param ctx The libsais context (can be NULL).
@@ -275,8 +174,8 @@ void libsais_unbwt_free_ctx(void * ctx);
  * @param i The primary index.
  * @return 0 if no error occurred, -1 or -2 otherwise.
  */
-int32_t libsais_unbwt(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n,
-                      const int32_t * freq, int32_t i);
+s32 libsais_unbwt(const u8 * T, u8 * U, s32 * A, s32 n,
+                      const s32 * freq, s32 i);
 
 /**
  * Constructs the original string from a given burrows-wheeler transformed
@@ -291,9 +190,9 @@ int32_t libsais_unbwt(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n,
  * @param i The primary index.
  * @return 0 if no error occurred, -1 or -2 otherwise.
  */
-int32_t libsais_unbwt_ctx(const void * ctx, const uint8_t * T, uint8_t * U,
-                          int32_t * A, int32_t n, const int32_t * freq,
-                          int32_t i);
+s32 libsais_unbwt_ctx(const void * ctx, const u8 * T, u8 * U,
+                          s32 * A, s32 n, const s32 * freq,
+                          s32 i);
 
 /**
  * Constructs the original string from a given burrows-wheeler transformed
@@ -308,9 +207,9 @@ int32_t libsais_unbwt_ctx(const void * ctx, const uint8_t * T, uint8_t * U,
  * @param I [0..(n-1)/r] The input auxiliary indexes.
  * @return 0 if no error occurred, -1 or -2 otherwise.
  */
-int32_t libsais_unbwt_aux(const uint8_t * T, uint8_t * U, int32_t * A,
-                          int32_t n, const int32_t * freq, int32_t r,
-                          const int32_t * I);
+s32 libsais_unbwt_aux(const u8 * T, u8 * U, s32 * A,
+                          s32 n, const s32 * freq, s32 r,
+                          const s32 * I);
 
 /**
  * Constructs the original string from a given burrows-wheeler transformed
@@ -326,48 +225,9 @@ int32_t libsais_unbwt_aux(const uint8_t * T, uint8_t * U, int32_t * A,
  * @param I [0..(n-1)/r] The input auxiliary indexes.
  * @return 0 if no error occurred, -1 or -2 otherwise.
  */
-int32_t libsais_unbwt_aux_ctx(const void * ctx, const uint8_t * T, uint8_t * U,
-                              int32_t * A, int32_t n, const int32_t * freq,
-                              int32_t r, const int32_t * I);
-
-#if defined(_OPENMP)
-/**
- * Constructs the original string from a given burrows-wheeler transformed
- * string (BWT) with primary index in parallel using OpenMP.
- * @param T [0..n-1] The input string.
- * @param U [0..n-1] The output string (can be T).
- * @param A [0..n] The temporary array (NOTE, temporary array must be n + 1
- * size).
- * @param n The length of the given string.
- * @param freq [0..255] The input symbol frequency table (can be NULL).
- * @param i The primary index.
- * @param threads The number of OpenMP threads to use (can be 0 for OpenMP
- * default).
- * @return 0 if no error occurred, -1 or -2 otherwise.
- */
-int32_t libsais_unbwt_omp(const uint8_t * T, uint8_t * U, int32_t * A,
-                          int32_t n, const int32_t * freq, int32_t i,
-                          int32_t threads);
-
-/**
- * Constructs the original string from a given burrows-wheeler transformed
- * string (BWT) with auxiliary indexes in parallel using OpenMP.
- * @param T [0..n-1] The input string.
- * @param U [0..n-1] The output string (can be T).
- * @param A [0..n] The temporary array (NOTE, temporary array must be n + 1
- * size).
- * @param n The length of the given string.
- * @param freq [0..255] The input symbol frequency table (can be NULL).
- * @param r The sampling rate for auxiliary indexes (must be power of 2).
- * @param I [0..(n-1)/r] The input auxiliary indexes.
- * @param threads The number of OpenMP threads to use (can be 0 for OpenMP
- * default).
- * @return 0 if no error occurred, -1 or -2 otherwise.
- */
-int32_t libsais_unbwt_aux_omp(const uint8_t * T, uint8_t * U, int32_t * A,
-                              int32_t n, const int32_t * freq, int32_t r,
-                              const int32_t * I, int32_t threads);
-#endif
+s32 libsais_unbwt_aux_ctx(const void * ctx, const u8 * T, u8 * U,
+                              s32 * A, s32 n, const s32 * freq,
+                              s32 r, const s32 * I);
 
 /**
  * Constructs the permuted longest common prefix array (PLCP) of a given string
@@ -378,8 +238,8 @@ int32_t libsais_unbwt_aux_omp(const uint8_t * T, uint8_t * U, int32_t * A,
  * @param n The length of the string and the suffix array.
  * @return 0 if no error occurred, -1 otherwise.
  */
-int32_t libsais_plcp(const uint8_t * T, const int32_t * SA, int32_t * PLCP,
-                     int32_t n);
+s32 libsais_plcp(const u8 * T, const s32 * SA, s32 * PLCP,
+                     s32 n);
 
 /**
  * Constructs the longest common prefix array (LCP) of a given permuted longest
@@ -391,42 +251,7 @@ int32_t libsais_plcp(const uint8_t * T, const int32_t * SA, int32_t * PLCP,
  * suffix array.
  * @return 0 if no error occurred, -1 otherwise.
  */
-int32_t libsais_lcp(const int32_t * PLCP, const int32_t * SA, int32_t * LCP,
-                    int32_t n);
-
-#if defined(_OPENMP)
-/**
- * Constructs the permuted longest common prefix array (PLCP) of a given string
- * and a suffix array in parallel using OpenMP.
- * @param T [0..n-1] The input string.
- * @param SA [0..n-1] The input suffix array.
- * @param PLCP [0..n-1] The output permuted longest common prefix array.
- * @param n The length of the string and the suffix array.
- * @param threads The number of OpenMP threads to use (can be 0 for OpenMP
- * default).
- * @return 0 if no error occurred, -1 otherwise.
- */
-int32_t libsais_plcp_omp(const uint8_t * T, const int32_t * SA, int32_t * PLCP,
-                         int32_t n, int32_t threads);
-
-/**
- * Constructs the longest common prefix array (LCP) of a given permuted longest
- * common prefix array (PLCP) and a suffix array in parallel using OpenMP.
- * @param PLCP [0..n-1] The input permuted longest common prefix array.
- * @param SA [0..n-1] The input suffix array.
- * @param LCP [0..n-1] The output longest common prefix array (can be SA).
- * @param n The length of the permuted longest common prefix array and the
- * suffix array.
- * @param threads The number of OpenMP threads to use (can be 0 for OpenMP
- * default).
- * @return 0 if no error occurred, -1 otherwise.
- */
-int32_t libsais_lcp_omp(const int32_t * PLCP, const int32_t * SA, int32_t * LCP,
-                        int32_t n, int32_t threads);
-#endif
-
-#ifdef __cplusplus
-}
-#endif
+s32 libsais_lcp(const s32 * PLCP, const s32 * SA, s32 * LCP,
+                    s32 n);
 
 #endif
diff --git a/include/mtf.h b/include/mtf.h
index f775f2a..735cb6d 100644
--- a/include/mtf.h
+++ b/include/mtf.h
@@ -23,13 +23,15 @@
 #include <inttypes.h>
 #include <stddef.h>
 
+#include "common.h"
+
 struct mtf_state {
-    uint32_t prev[256], curr[256], symbols[256], ranks[256];
+    u32 prev[256], curr[256], symbols[256], ranks[256];
 };
 
-void mtf_encode(struct mtf_state * mtf, uint8_t * src, uint8_t * dst,
-                uint32_t count);
-void mtf_decode(struct mtf_state * mtf, uint8_t * src, uint8_t * dst,
-                uint32_t count);
+void mtf_encode(struct mtf_state * mtf, u8 * src, u8 * dst,
+                u32 count);
+void mtf_decode(struct mtf_state * mtf, u8 * src, u8 * dst,
+                u32 count);
 
 #endif
diff --git a/include/rle.h b/include/rle.h
index da5f579..41ac60b 100644
--- a/include/rle.h
+++ b/include/rle.h
@@ -5,7 +5,9 @@
 #include <stddef.h>
 #include <stdint.h>
 
-int32_t mrlec(uint8_t * in, int32_t inlen, uint8_t * out);
-int32_t mrled(uint8_t * in, uint8_t * out, int32_t outlen);
+#include "common.h"
+
+s32 mrlec(u8 * in, s32 inlen, u8 * out);
+s32 mrled(u8 * in, u8 * out, s32 outlen);
 
 #endif
diff --git a/include/srt.h b/include/srt.h
index 709c38a..17a56c5 100644
--- a/include/srt.h
+++ b/include/srt.h
@@ -23,18 +23,20 @@
 #include <inttypes.h>
 #include <stddef.h>
 
+#include "common.h"
+
 struct srt_state {
-    uint32_t freqs[256];
-    uint8_t symbols[256];
-    uint32_t r2s[256];
-    uint32_t s2r[256];
-    uint32_t buckets[256];
-    uint32_t bucket_ends[256];
+    u32 freqs[256];
+    u8 symbols[256];
+    u32 r2s[256];
+    u32 s2r[256];
+    u32 buckets[256];
+    u32 bucket_ends[256];
 };
 
-uint32_t srt_encode(struct srt_state * mtf, uint8_t * src, uint8_t * dst,
-                    uint32_t count);
-uint32_t srt_decode(struct srt_state * mtf, uint8_t * src, uint8_t * dst,
-                    uint32_t count);
+u32 srt_encode(struct srt_state * mtf, u8 * src, u8 * dst,
+                    u32 count);
+u32 srt_decode(struct srt_state * mtf, u8 * src, u8 * dst,
+                    u32 count);
 
 #endif
diff --git a/src/cm.c b/src/cm.c
index 0da95bf..336aca5 100644
--- a/src/cm.c
+++ b/src/cm.c
@@ -1,17 +1,17 @@
 
 #include "cm.h"
 
-static void write_out(state * s, uint8_t c) {
+static void write_out(state * s, u8 c) {
     s->out_queue[s->output_ptr++] = c;
 }
 
-static uint8_t read_in(state * s) {
+static u8 read_in(state * s) {
     if (s->input_ptr < s->input_max) return s->in_queue[s->input_ptr++];
     return -1;
 }
 
-static void encodebit0(state * s, uint32_t p) {
-    s->low += (((uint64_t)(s->high - s->low) * p) >> 18) + 1;
+static void encodebit0(state * s, u32 p) {
+    s->low += (((u64)(s->high - s->low) * p) >> 18) + 1;
     while ((s->low ^ s->high) < (1 << 24)) {
         write_out(s, s->low >> 24);
         s->low <<= 8;
@@ -19,8 +19,8 @@ static void encodebit0(state * s, uint32_t p) {
     }
 }
 
-static void encodebit1(state * s, uint32_t p) {
-    s->high = s->low + (((uint64_t)(s->high - s->low) * p) >> 18);
+static void encodebit1(state * s, u32 p) {
+    s->high = s->low + (((u64)(s->high - s->low) * p) >> 18);
     while ((s->low ^ s->high) < (1 << 24)) {
         write_out(s, s->low >> 24);
         s->low <<= 8;
@@ -28,9 +28,9 @@ static void encodebit1(state * s, uint32_t p) {
     }
 }
 
-static uint8_t decodebit(state * s, uint32_t p) {
-    const uint32_t mid = s->low + (((uint64_t)(s->high - s->low) * p) >> 18);
-    const uint8_t bit = s->code <= mid;
+static u8 decodebit(state * s, u32 p) {
+    const u32 mid = s->low + (((u64)(s->high - s->low) * p) >> 18);
+    const u8 bit = s->code <= mid;
     if (bit)
         s->high = mid;
     else
@@ -78,7 +78,7 @@ void begin(state * s) {
             for (int k = 0; k < 17; k++) s->C2[i][j][k] = (k << 12) - (k == 16);
 }
 
-void encode_byte(state * s, uint8_t c) {
+void encode_byte(state * s, u8 c) {
     if (s->c1 == s->c2)
         ++s->run;
     else
@@ -122,7 +122,7 @@ void encode_byte(state * s, uint8_t c) {
     s->c1 = ctx & 255;
 }
 
-uint8_t decode_byte(state * s) {
+u8 decode_byte(state * s) {
     if (s->c1 == s->c2)
         ++s->run;
     else
diff --git a/src/crc32.c b/src/crc32.c
index 94bff14..2b88e98 100644
--- a/src/crc32.c
+++ b/src/crc32.c
@@ -19,7 +19,7 @@
 
 #include "crc32.h"
 
-static const uint32_t crc32Table[256] = {
+static const u32 crc32Table[256] = {
     0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L, 0xC79A971FL,
     0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL, 0x8AD958CFL, 0x78B2DBCCL,
     0x6BE22838L, 0x9989AB3BL, 0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L,
@@ -74,7 +74,7 @@ static const uint32_t crc32Table[256] = {
     0xAD7D5351L
 };
 
-uint32_t crc32sum(uint32_t crc, uint8_t * buf, size_t size) {
+u32 crc32sum(u32 crc, u8 * buf, size_t size) {
     while (size--) crc = crc32Table[(crc ^ *(buf++)) & 0xff] ^ (crc >> 8);
     return crc;
 }
diff --git a/src/libsais.c b/src/libsais.c
index 03dd7a4..77e34da 100644
--- a/src/libsais.c
+++ b/src/libsais.c
@@ -35,8 +35,8 @@ Please see the file LICENSE for full copyright information.
     #define UNUSED(_x) (void)(_x)
 #endif
 
-typedef int32_t sa_sint_t;
-typedef uint32_t sa_uint_t;
+typedef s32 sa_sint_t;
+typedef u32 sa_uint_t;
 typedef ptrdiff_t fast_sint_t;
 typedef size_t fast_uint_t;
 
@@ -72,7 +72,7 @@ typedef union LIBSAIS_THREAD_STATE {
         LIBSAIS_THREAD_CACHE * cache;
     } state;
 
-    uint8_t padding[64];
+    u8 padding[64];
 } LIBSAIS_THREAD_STATE;
 
 typedef struct LIBSAIS_CONTEXT {
@@ -83,7 +83,7 @@ typedef struct LIBSAIS_CONTEXT {
 
 typedef struct LIBSAIS_UNBWT_CONTEXT {
     sa_uint_t * bucket2;
-    uint16_t * fastbits;
+    u16 * fastbits;
     sa_uint_t * buckets;
     fast_sint_t threads;
 } LIBSAIS_UNBWT_CONTEXT;
@@ -168,7 +168,7 @@ typedef struct LIBSAIS_UNBWT_CONTEXT {
     #elif defined(_MSC_VER) && !defined(__INTEL_COMPILER)
         #define libsais_bswap16(x) (_byteswap_ushort(x))
     #else
-        #define libsais_bswap16(x) ((uint16_t)(x >> 8) | (uint16_t)(x << 8))
+        #define libsais_bswap16(x) ((u16)(x >> 8) | (u16)(x << 8))
     #endif
 #elif !defined(__LITTLE_ENDIAN__) && defined(__BIG_ENDIAN__)
     #define libsais_bswap16(x) (x)
@@ -520,7 +520,7 @@ static void libsais_accumulate_counts_s32(sa_sint_t * RESTRICT buckets,
 
 #endif
 
-static void libsais_gather_lms_suffixes_8u(const uint8_t * RESTRICT T,
+static void libsais_gather_lms_suffixes_8u(const u8 * RESTRICT T,
                                            sa_sint_t * RESTRICT SA, sa_sint_t n,
                                            fast_sint_t m,
                                            fast_sint_t omp_block_start,
@@ -572,7 +572,7 @@ static void libsais_gather_lms_suffixes_8u(const uint8_t * RESTRICT T,
 }
 
 static void libsais_gather_lms_suffixes_8u_omp(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
     sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
 #if defined(_OPENMP)
     #pragma omp parallel num_threads(threads) if (threads > 1 && n >= 65536 && \
@@ -874,7 +874,7 @@ static void libsais_count_compacted_lms_suffixes_32s_2k(
 #endif
 
 static sa_sint_t libsais_count_and_gather_lms_suffixes_8u(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
     sa_sint_t * RESTRICT buckets, fast_sint_t omp_block_start,
     fast_sint_t omp_block_size) {
     memset(buckets, 0, 4 * ALPHABET_SIZE * sizeof(sa_sint_t));
@@ -940,7 +940,7 @@ static sa_sint_t libsais_count_and_gather_lms_suffixes_8u(
 }
 
 static sa_sint_t libsais_count_and_gather_lms_suffixes_8u_omp(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
     sa_sint_t * RESTRICT buckets, sa_sint_t threads,
     LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
     sa_sint_t m = 0;
@@ -1846,7 +1846,7 @@ static void libsais_initialize_buckets_end_32s_1k(
 }
 
 static sa_sint_t libsais_initialize_buckets_for_lms_suffixes_radix_sort_8u(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT buckets,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT buckets,
     sa_sint_t first_lms_suffix) {
     {
         fast_uint_t s = 0;
@@ -1961,7 +1961,7 @@ static void libsais_initialize_buckets_for_radix_and_partial_sorting_32s_4k(
 }
 
 static void libsais_radix_sort_lms_suffixes_8u(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
     sa_sint_t * RESTRICT induction_bucket, fast_sint_t omp_block_start,
     fast_sint_t omp_block_size) {
     const fast_sint_t prefetch_distance = 32;
@@ -1994,7 +1994,7 @@ static void libsais_radix_sort_lms_suffixes_8u(
 }
 
 static void libsais_radix_sort_lms_suffixes_8u_omp(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
     sa_sint_t m, sa_sint_t * RESTRICT buckets, sa_sint_t threads,
     LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
 #if defined(_OPENMP)
@@ -2598,7 +2598,7 @@ static void libsais_radix_sort_set_markers_32s_4k_omp(
 }
 
 static void libsais_initialize_buckets_for_partial_sorting_8u(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT buckets,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT buckets,
     sa_sint_t first_lms_suffix, sa_sint_t left_suffixes_count) {
     sa_sint_t * RESTRICT temp_bucket = &buckets[4 * ALPHABET_SIZE];
 
@@ -2671,7 +2671,7 @@ static void libsais_initialize_buckets_for_partial_sorting_32s_6k(
 }
 
 static sa_sint_t libsais_partial_sorting_scan_left_to_right_8u(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
     sa_sint_t * RESTRICT buckets, sa_sint_t d, fast_sint_t omp_block_start,
     fast_sint_t omp_block_size) {
     const fast_sint_t prefetch_distance = 32;
@@ -2725,7 +2725,7 @@ static sa_sint_t libsais_partial_sorting_scan_left_to_right_8u(
 #if defined(_OPENMP)
 
 static void libsais_partial_sorting_scan_left_to_right_8u_block_prepare(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
     sa_sint_t * RESTRICT buckets, LIBSAIS_THREAD_CACHE * RESTRICT cache,
     fast_sint_t omp_block_start, fast_sint_t omp_block_size,
     LIBSAIS_THREAD_STATE * RESTRICT state) {
@@ -2818,7 +2818,7 @@ static void libsais_partial_sorting_scan_left_to_right_8u_block_place(
 }
 
 static sa_sint_t libsais_partial_sorting_scan_left_to_right_8u_block_omp(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
     sa_sint_t * RESTRICT buckets, sa_sint_t d, fast_sint_t block_start,
     fast_sint_t block_size, sa_sint_t threads,
     LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
@@ -2914,7 +2914,7 @@ static sa_sint_t libsais_partial_sorting_scan_left_to_right_8u_block_omp(
 #endif
 
 static sa_sint_t libsais_partial_sorting_scan_left_to_right_8u_omp(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
     sa_sint_t * RESTRICT buckets, sa_sint_t left_suffixes_count, sa_sint_t d,
     sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
     sa_sint_t * RESTRICT induction_bucket = &buckets[4 * ALPHABET_SIZE];
@@ -3997,7 +3997,7 @@ static void libsais_partial_sorting_shift_buckets_32s_6k(
 }
 
 static sa_sint_t libsais_partial_sorting_scan_right_to_left_8u(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
     sa_sint_t * RESTRICT buckets, sa_sint_t d, fast_sint_t omp_block_start,
     fast_sint_t omp_block_size) {
     const fast_sint_t prefetch_distance = 32;
@@ -4051,7 +4051,7 @@ static sa_sint_t libsais_partial_sorting_scan_right_to_left_8u(
 #if defined(_OPENMP)
 
 static void libsais_partial_sorting_scan_right_to_left_8u_block_prepare(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
     sa_sint_t * RESTRICT buckets, LIBSAIS_THREAD_CACHE * RESTRICT cache,
     fast_sint_t omp_block_start, fast_sint_t omp_block_size,
     LIBSAIS_THREAD_STATE * RESTRICT state) {
@@ -4144,7 +4144,7 @@ static void libsais_partial_sorting_scan_right_to_left_8u_block_place(
 }
 
 static sa_sint_t libsais_partial_sorting_scan_right_to_left_8u_block_omp(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
     sa_sint_t * RESTRICT buckets, sa_sint_t d, fast_sint_t block_start,
     fast_sint_t block_size, sa_sint_t threads,
     LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
@@ -4240,7 +4240,7 @@ static sa_sint_t libsais_partial_sorting_scan_right_to_left_8u_block_omp(
 #endif
 
 static void libsais_partial_sorting_scan_right_to_left_8u_omp(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
     sa_sint_t * RESTRICT buckets, sa_sint_t first_lms_suffix,
     sa_sint_t left_suffixes_count, sa_sint_t d, sa_sint_t threads,
     LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
@@ -5335,7 +5335,7 @@ static void libsais_partial_sorting_gather_lms_suffixes_32s_1k_omp(
 }
 
 static void libsais_induce_partial_order_8u_omp(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
     sa_sint_t * RESTRICT buckets, sa_sint_t first_lms_suffix,
     sa_sint_t left_suffixes_count, sa_sint_t threads,
     LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
@@ -6273,7 +6273,7 @@ static void libsais_place_lms_suffixes_histogram_32s_2k(
 }
 
 static void libsais_final_bwt_scan_left_to_right_8u(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
     sa_sint_t * RESTRICT induction_bucket, fast_sint_t omp_block_start,
     fast_sint_t omp_block_size) {
     const fast_sint_t prefetch_distance = 32;
@@ -6285,12 +6285,12 @@ static void libsais_final_bwt_scan_left_to_right_8u(
         libsais_prefetchw(&SA[i + 2 * prefetch_distance]);
 
         sa_sint_t s0 = SA[i + prefetch_distance + 0];
-        const uint8_t * Ts0 = &T[s0] - 1;
+        const u8 * Ts0 = &T[s0] - 1;
         libsais_prefetch(s0 > 0 ? Ts0 : NULL);
         Ts0--;
         libsais_prefetch(s0 > 0 ? Ts0 : NULL);
         sa_sint_t s1 = SA[i + prefetch_distance + 1];
-        const uint8_t * Ts1 = &T[s1] - 1;
+        const u8 * Ts1 = &T[s1] - 1;
         libsais_prefetch(s1 > 0 ? Ts1 : NULL);
         Ts1--;
         libsais_prefetch(s1 > 0 ? Ts1 : NULL);
@@ -6326,7 +6326,7 @@ static void libsais_final_bwt_scan_left_to_right_8u(
 }
 
 static void libsais_final_bwt_aux_scan_left_to_right_8u(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t rm,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t rm,
     sa_sint_t * RESTRICT I, sa_sint_t * RESTRICT induction_bucket,
     fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
     const fast_sint_t prefetch_distance = 32;
@@ -6338,12 +6338,12 @@ static void libsais_final_bwt_aux_scan_left_to_right_8u(
         libsais_prefetchw(&SA[i + 2 * prefetch_distance]);
 
         sa_sint_t s0 = SA[i + prefetch_distance + 0];
-        const uint8_t * Ts0 = &T[s0] - 1;
+        const u8 * Ts0 = &T[s0] - 1;
         libsais_prefetch(s0 > 0 ? Ts0 : NULL);
         Ts0--;
         libsais_prefetch(s0 > 0 ? Ts0 : NULL);
         sa_sint_t s1 = SA[i + prefetch_distance + 1];
-        const uint8_t * Ts1 = &T[s1] - 1;
+        const u8 * Ts1 = &T[s1] - 1;
         libsais_prefetch(s1 > 0 ? Ts1 : NULL);
         Ts1--;
         libsais_prefetch(s1 > 0 ? Ts1 : NULL);
@@ -6388,7 +6388,7 @@ static void libsais_final_bwt_aux_scan_left_to_right_8u(
 }
 
 static void libsais_final_sorting_scan_left_to_right_8u(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
     sa_sint_t * RESTRICT induction_bucket, fast_sint_t omp_block_start,
     fast_sint_t omp_block_size) {
     const fast_sint_t prefetch_distance = 32;
@@ -6400,12 +6400,12 @@ static void libsais_final_sorting_scan_left_to_right_8u(
         libsais_prefetchw(&SA[i + 2 * prefetch_distance]);
 
         sa_sint_t s0 = SA[i + prefetch_distance + 0];
-        const uint8_t * Ts0 = &T[s0] - 1;
+        const u8 * Ts0 = &T[s0] - 1;
         libsais_prefetch(s0 > 0 ? Ts0 : NULL);
         Ts0--;
         libsais_prefetch(s0 > 0 ? Ts0 : NULL);
         sa_sint_t s1 = SA[i + prefetch_distance + 1];
-        const uint8_t * Ts1 = &T[s1] - 1;
+        const u8 * Ts1 = &T[s1] - 1;
         libsais_prefetch(s1 > 0 ? Ts1 : NULL);
         Ts1--;
         libsais_prefetch(s1 > 0 ? Ts1 : NULL);
@@ -6496,7 +6496,7 @@ static void libsais_final_sorting_scan_left_to_right_32s(
 #if defined(_OPENMP)
 
 static fast_sint_t libsais_final_bwt_scan_left_to_right_8u_block_prepare(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
     sa_sint_t * RESTRICT buckets, LIBSAIS_THREAD_CACHE * RESTRICT cache,
     fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
     const fast_sint_t prefetch_distance = 32;
@@ -6510,12 +6510,12 @@ static fast_sint_t libsais_final_bwt_scan_left_to_right_8u_block_prepare(
         libsais_prefetchw(&SA[i + 2 * prefetch_distance]);
 
         sa_sint_t s0 = SA[i + prefetch_distance + 0];
-        const uint8_t * Ts0 = &T[s0] - 1;
+        const u8 * Ts0 = &T[s0] - 1;
         libsais_prefetch(s0 > 0 ? Ts0 : NULL);
         Ts0--;
         libsais_prefetch(s0 > 0 ? Ts0 : NULL);
         sa_sint_t s1 = SA[i + prefetch_distance + 1];
-        const uint8_t * Ts1 = &T[s1] - 1;
+        const u8 * Ts1 = &T[s1] - 1;
         libsais_prefetch(s1 > 0 ? Ts1 : NULL);
         Ts1--;
         libsais_prefetch(s1 > 0 ? Ts1 : NULL);
@@ -6556,7 +6556,7 @@ static fast_sint_t libsais_final_bwt_scan_left_to_right_8u_block_prepare(
 }
 
 static fast_sint_t libsais_final_sorting_scan_left_to_right_8u_block_prepare(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
     sa_sint_t * RESTRICT buckets, LIBSAIS_THREAD_CACHE * RESTRICT cache,
     fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
     const fast_sint_t prefetch_distance = 32;
@@ -6570,12 +6570,12 @@ static fast_sint_t libsais_final_sorting_scan_left_to_right_8u_block_prepare(
         libsais_prefetchw(&SA[i + 2 * prefetch_distance]);
 
         sa_sint_t s0 = SA[i + prefetch_distance + 0];
-        const uint8_t * Ts0 = &T[s0] - 1;
+        const u8 * Ts0 = &T[s0] - 1;
         libsais_prefetch(s0 > 0 ? Ts0 : NULL);
         Ts0--;
         libsais_prefetch(s0 > 0 ? Ts0 : NULL);
         sa_sint_t s1 = SA[i + prefetch_distance + 1];
-        const uint8_t * Ts1 = &T[s1] - 1;
+        const u8 * Ts1 = &T[s1] - 1;
         libsais_prefetch(s1 > 0 ? Ts1 : NULL);
         Ts1--;
         libsais_prefetch(s1 > 0 ? Ts1 : NULL);
@@ -6802,7 +6802,7 @@ static void libsais_final_sorting_scan_left_to_right_32s_block_sort(
 }
 
 static void libsais_final_bwt_scan_left_to_right_8u_block_omp(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
     sa_sint_t * RESTRICT induction_bucket, fast_sint_t block_start,
     fast_sint_t block_size, sa_sint_t threads,
     LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
@@ -6875,7 +6875,7 @@ static void libsais_final_bwt_scan_left_to_right_8u_block_omp(
 }
 
 static void libsais_final_bwt_aux_scan_left_to_right_8u_block_omp(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t rm,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t rm,
     sa_sint_t * RESTRICT I, sa_sint_t * RESTRICT induction_bucket,
     fast_sint_t block_start, fast_sint_t block_size, sa_sint_t threads,
     LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
@@ -6949,7 +6949,7 @@ static void libsais_final_bwt_aux_scan_left_to_right_8u_block_omp(
 }
 
 static void libsais_final_sorting_scan_left_to_right_8u_block_omp(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
     sa_sint_t * RESTRICT induction_bucket, fast_sint_t block_start,
     fast_sint_t block_size, sa_sint_t threads,
     LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
@@ -7082,7 +7082,7 @@ static void libsais_final_sorting_scan_left_to_right_32s_block_omp(
 #endif
 
 static void libsais_final_bwt_scan_left_to_right_8u_omp(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, fast_sint_t n,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, fast_sint_t n,
     sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads,
     LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
     SA[induction_bucket[T[(sa_sint_t)n - 1]]++] =
@@ -7140,7 +7140,7 @@ static void libsais_final_bwt_scan_left_to_right_8u_omp(
 }
 
 static void libsais_final_bwt_aux_scan_left_to_right_8u_omp(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, fast_sint_t n,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, fast_sint_t n,
     sa_sint_t rm, sa_sint_t * RESTRICT I, sa_sint_t * RESTRICT induction_bucket,
     sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
     SA[induction_bucket[T[(sa_sint_t)n - 1]]++] =
@@ -7207,7 +7207,7 @@ static void libsais_final_bwt_aux_scan_left_to_right_8u_omp(
 }
 
 static void libsais_final_sorting_scan_left_to_right_8u_omp(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, fast_sint_t n,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, fast_sint_t n,
     sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads,
     LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
     SA[induction_bucket[T[(sa_sint_t)n - 1]]++] =
@@ -7296,7 +7296,7 @@ static void libsais_final_sorting_scan_left_to_right_32s_omp(
 }
 
 static sa_sint_t libsais_final_bwt_scan_right_to_left_8u(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
     sa_sint_t * RESTRICT induction_bucket, fast_sint_t omp_block_start,
     fast_sint_t omp_block_size) {
     const fast_sint_t prefetch_distance = 32;
@@ -7309,12 +7309,12 @@ static sa_sint_t libsais_final_bwt_scan_right_to_left_8u(
         libsais_prefetchw(&SA[i - 2 * prefetch_distance]);
 
         sa_sint_t s0 = SA[i - prefetch_distance - 0];
-        const uint8_t * Ts0 = &T[s0] - 1;
+        const u8 * Ts0 = &T[s0] - 1;
         libsais_prefetch(s0 > 0 ? Ts0 : NULL);
         Ts0--;
         libsais_prefetch(s0 > 0 ? Ts0 : NULL);
         sa_sint_t s1 = SA[i - prefetch_distance - 1];
-        const uint8_t * Ts1 = &T[s1] - 1;
+        const u8 * Ts1 = &T[s1] - 1;
         libsais_prefetch(s1 > 0 ? Ts1 : NULL);
         Ts1--;
         libsais_prefetch(s1 > 0 ? Ts1 : NULL);
@@ -7324,7 +7324,7 @@ static sa_sint_t libsais_final_bwt_scan_right_to_left_8u(
         SA[i - 0] = p0 & SAINT_MAX;
         if (p0 > 0) {
             p0--;
-            uint8_t c0 = T[p0 - (p0 > 0)], c1 = T[p0];
+            u8 c0 = T[p0 - (p0 > 0)], c1 = T[p0];
             SA[i - 0] = c1;
             sa_sint_t t = c0 | SAINT_MIN;
             SA[--induction_bucket[c1]] = (c0 <= c1) ? p0 : t;
@@ -7335,7 +7335,7 @@ static sa_sint_t libsais_final_bwt_scan_right_to_left_8u(
         SA[i - 1] = p1 & SAINT_MAX;
         if (p1 > 0) {
             p1--;
-            uint8_t c0 = T[p1 - (p1 > 0)], c1 = T[p1];
+            u8 c0 = T[p1 - (p1 > 0)], c1 = T[p1];
             SA[i - 1] = c1;
             sa_sint_t t = c0 | SAINT_MIN;
             SA[--induction_bucket[c1]] = (c0 <= c1) ? p1 : t;
@@ -7348,7 +7348,7 @@ static sa_sint_t libsais_final_bwt_scan_right_to_left_8u(
         SA[i] = p & SAINT_MAX;
         if (p > 0) {
             p--;
-            uint8_t c0 = T[p - (p > 0)], c1 = T[p];
+            u8 c0 = T[p - (p > 0)], c1 = T[p];
             SA[i] = c1;
             sa_sint_t t = c0 | SAINT_MIN;
             SA[--induction_bucket[c1]] = (c0 <= c1) ? p : t;
@@ -7359,7 +7359,7 @@ static sa_sint_t libsais_final_bwt_scan_right_to_left_8u(
 }
 
 static void libsais_final_bwt_aux_scan_right_to_left_8u(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t rm,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t rm,
     sa_sint_t * RESTRICT I, sa_sint_t * RESTRICT induction_bucket,
     fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
     const fast_sint_t prefetch_distance = 32;
@@ -7371,12 +7371,12 @@ static void libsais_final_bwt_aux_scan_right_to_left_8u(
         libsais_prefetchw(&SA[i - 2 * prefetch_distance]);
 
         sa_sint_t s0 = SA[i - prefetch_distance - 0];
-        const uint8_t * Ts0 = &T[s0] - 1;
+        const u8 * Ts0 = &T[s0] - 1;
         libsais_prefetch(s0 > 0 ? Ts0 : NULL);
         Ts0--;
         libsais_prefetch(s0 > 0 ? Ts0 : NULL);
         sa_sint_t s1 = SA[i - prefetch_distance - 1];
-        const uint8_t * Ts1 = &T[s1] - 1;
+        const u8 * Ts1 = &T[s1] - 1;
         libsais_prefetch(s1 > 0 ? Ts1 : NULL);
         Ts1--;
         libsais_prefetch(s1 > 0 ? Ts1 : NULL);
@@ -7385,7 +7385,7 @@ static void libsais_final_bwt_aux_scan_right_to_left_8u(
         SA[i - 0] = p0 & SAINT_MAX;
         if (p0 > 0) {
             p0--;
-            uint8_t c0 = T[p0 - (p0 > 0)], c1 = T[p0];
+            u8 c0 = T[p0 - (p0 > 0)], c1 = T[p0];
             SA[i - 0] = c1;
             sa_sint_t t = c0 | SAINT_MIN;
             SA[--induction_bucket[c1]] = (c0 <= c1) ? p0 : t;
@@ -7398,7 +7398,7 @@ static void libsais_final_bwt_aux_scan_right_to_left_8u(
         SA[i - 1] = p1 & SAINT_MAX;
         if (p1 > 0) {
             p1--;
-            uint8_t c0 = T[p1 - (p1 > 0)], c1 = T[p1];
+            u8 c0 = T[p1 - (p1 > 0)], c1 = T[p1];
             SA[i - 1] = c1;
             sa_sint_t t = c0 | SAINT_MIN;
             SA[--induction_bucket[c1]] = (c0 <= c1) ? p1 : t;
@@ -7413,7 +7413,7 @@ static void libsais_final_bwt_aux_scan_right_to_left_8u(
         SA[i] = p & SAINT_MAX;
         if (p > 0) {
             p--;
-            uint8_t c0 = T[p - (p > 0)], c1 = T[p];
+            u8 c0 = T[p - (p > 0)], c1 = T[p];
             SA[i] = c1;
             sa_sint_t t = c0 | SAINT_MIN;
             SA[--induction_bucket[c1]] = (c0 <= c1) ? p : t;
@@ -7425,7 +7425,7 @@ static void libsais_final_bwt_aux_scan_right_to_left_8u(
 }
 
 static void libsais_final_sorting_scan_right_to_left_8u(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
     sa_sint_t * RESTRICT induction_bucket, fast_sint_t omp_block_start,
     fast_sint_t omp_block_size) {
     const fast_sint_t prefetch_distance = 32;
@@ -7437,12 +7437,12 @@ static void libsais_final_sorting_scan_right_to_left_8u(
         libsais_prefetchw(&SA[i - 2 * prefetch_distance]);
 
         sa_sint_t s0 = SA[i - prefetch_distance - 0];
-        const uint8_t * Ts0 = &T[s0] - 1;
+        const u8 * Ts0 = &T[s0] - 1;
         libsais_prefetch(s0 > 0 ? Ts0 : NULL);
         Ts0--;
         libsais_prefetch(s0 > 0 ? Ts0 : NULL);
         sa_sint_t s1 = SA[i - prefetch_distance - 1];
-        const uint8_t * Ts1 = &T[s1] - 1;
+        const u8 * Ts1 = &T[s1] - 1;
         libsais_prefetch(s1 > 0 ? Ts1 : NULL);
         Ts1--;
         libsais_prefetch(s1 > 0 ? Ts1 : NULL);
@@ -7533,7 +7533,7 @@ static void libsais_final_sorting_scan_right_to_left_32s(
 #if defined(_OPENMP)
 
 static fast_sint_t libsais_final_bwt_scan_right_to_left_8u_block_prepare(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
     sa_sint_t * RESTRICT buckets, LIBSAIS_THREAD_CACHE * RESTRICT cache,
     fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
     const fast_sint_t prefetch_distance = 32;
@@ -7547,12 +7547,12 @@ static fast_sint_t libsais_final_bwt_scan_right_to_left_8u_block_prepare(
         libsais_prefetchw(&SA[i - 2 * prefetch_distance]);
 
         sa_sint_t s0 = SA[i - prefetch_distance - 0];
-        const uint8_t * Ts0 = &T[s0] - 1;
+        const u8 * Ts0 = &T[s0] - 1;
         libsais_prefetch(s0 > 0 ? Ts0 : NULL);
         Ts0--;
         libsais_prefetch(s0 > 0 ? Ts0 : NULL);
         sa_sint_t s1 = SA[i - prefetch_distance - 1];
-        const uint8_t * Ts1 = &T[s1] - 1;
+        const u8 * Ts1 = &T[s1] - 1;
         libsais_prefetch(s1 > 0 ? Ts1 : NULL);
         Ts1--;
         libsais_prefetch(s1 > 0 ? Ts1 : NULL);
@@ -7561,7 +7561,7 @@ static fast_sint_t libsais_final_bwt_scan_right_to_left_8u_block_prepare(
         SA[i - 0] = p0 & SAINT_MAX;
         if (p0 > 0) {
             p0--;
-            uint8_t c0 = T[p0 - (p0 > 0)], c1 = T[p0];
+            u8 c0 = T[p0 - (p0 > 0)], c1 = T[p0];
             SA[i - 0] = c1;
             sa_sint_t t = c0 | SAINT_MIN;
             buckets[cache[count].symbol = c1]++;
@@ -7571,7 +7571,7 @@ static fast_sint_t libsais_final_bwt_scan_right_to_left_8u_block_prepare(
         SA[i - 1] = p1 & SAINT_MAX;
         if (p1 > 0) {
             p1--;
-            uint8_t c0 = T[p1 - (p1 > 0)], c1 = T[p1];
+            u8 c0 = T[p1 - (p1 > 0)], c1 = T[p1];
             SA[i - 1] = c1;
             sa_sint_t t = c0 | SAINT_MIN;
             buckets[cache[count].symbol = c1]++;
@@ -7584,7 +7584,7 @@ static fast_sint_t libsais_final_bwt_scan_right_to_left_8u_block_prepare(
         SA[i] = p & SAINT_MAX;
         if (p > 0) {
             p--;
-            uint8_t c0 = T[p - (p > 0)], c1 = T[p];
+            u8 c0 = T[p - (p > 0)], c1 = T[p];
             SA[i] = c1;
             sa_sint_t t = c0 | SAINT_MIN;
             buckets[cache[count].symbol = c1]++;
@@ -7596,7 +7596,7 @@ static fast_sint_t libsais_final_bwt_scan_right_to_left_8u_block_prepare(
 }
 
 static fast_sint_t libsais_final_bwt_aux_scan_right_to_left_8u_block_prepare(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
     sa_sint_t * RESTRICT buckets, LIBSAIS_THREAD_CACHE * RESTRICT cache,
     fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
     const fast_sint_t prefetch_distance = 32;
@@ -7610,12 +7610,12 @@ static fast_sint_t libsais_final_bwt_aux_scan_right_to_left_8u_block_prepare(
         libsais_prefetchw(&SA[i - 2 * prefetch_distance]);
 
         sa_sint_t s0 = SA[i - prefetch_distance - 0];
-        const uint8_t * Ts0 = &T[s0] - 1;
+        const u8 * Ts0 = &T[s0] - 1;
         libsais_prefetch(s0 > 0 ? Ts0 : NULL);
         Ts0--;
         libsais_prefetch(s0 > 0 ? Ts0 : NULL);
         sa_sint_t s1 = SA[i - prefetch_distance - 1];
-        const uint8_t * Ts1 = &T[s1] - 1;
+        const u8 * Ts1 = &T[s1] - 1;
         libsais_prefetch(s1 > 0 ? Ts1 : NULL);
         Ts1--;
         libsais_prefetch(s1 > 0 ? Ts1 : NULL);
@@ -7624,7 +7624,7 @@ static fast_sint_t libsais_final_bwt_aux_scan_right_to_left_8u_block_prepare(
         SA[i - 0] = p0 & SAINT_MAX;
         if (p0 > 0) {
             p0--;
-            uint8_t c0 = T[p0 - (p0 > 0)], c1 = T[p0];
+            u8 c0 = T[p0 - (p0 > 0)], c1 = T[p0];
             SA[i - 0] = c1;
             sa_sint_t t = c0 | SAINT_MIN;
             buckets[cache[count].symbol = c1]++;
@@ -7636,7 +7636,7 @@ static fast_sint_t libsais_final_bwt_aux_scan_right_to_left_8u_block_prepare(
         SA[i - 1] = p1 & SAINT_MAX;
         if (p1 > 0) {
             p1--;
-            uint8_t c0 = T[p1 - (p1 > 0)], c1 = T[p1];
+            u8 c0 = T[p1 - (p1 > 0)], c1 = T[p1];
             SA[i - 1] = c1;
             sa_sint_t t = c0 | SAINT_MIN;
             buckets[cache[count].symbol = c1]++;
@@ -7651,7 +7651,7 @@ static fast_sint_t libsais_final_bwt_aux_scan_right_to_left_8u_block_prepare(
         SA[i] = p & SAINT_MAX;
         if (p > 0) {
             p--;
-            uint8_t c0 = T[p - (p > 0)], c1 = T[p];
+            u8 c0 = T[p - (p > 0)], c1 = T[p];
             SA[i] = c1;
             sa_sint_t t = c0 | SAINT_MIN;
             buckets[cache[count].symbol = c1]++;
@@ -7665,7 +7665,7 @@ static fast_sint_t libsais_final_bwt_aux_scan_right_to_left_8u_block_prepare(
 }
 
 static fast_sint_t libsais_final_sorting_scan_right_to_left_8u_block_prepare(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
     sa_sint_t * RESTRICT buckets, LIBSAIS_THREAD_CACHE * RESTRICT cache,
     fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
     const fast_sint_t prefetch_distance = 32;
@@ -7679,12 +7679,12 @@ static fast_sint_t libsais_final_sorting_scan_right_to_left_8u_block_prepare(
         libsais_prefetchw(&SA[i - 2 * prefetch_distance]);
 
         sa_sint_t s0 = SA[i - prefetch_distance - 0];
-        const uint8_t * Ts0 = &T[s0] - 1;
+        const u8 * Ts0 = &T[s0] - 1;
         libsais_prefetch(s0 > 0 ? Ts0 : NULL);
         Ts0--;
         libsais_prefetch(s0 > 0 ? Ts0 : NULL);
         sa_sint_t s1 = SA[i - prefetch_distance - 1];
-        const uint8_t * Ts1 = &T[s1] - 1;
+        const u8 * Ts1 = &T[s1] - 1;
         libsais_prefetch(s1 > 0 ? Ts1 : NULL);
         Ts1--;
         libsais_prefetch(s1 > 0 ? Ts1 : NULL);
@@ -7908,7 +7908,7 @@ static void libsais_final_sorting_scan_right_to_left_32s_block_sort(
 }
 
 static void libsais_final_bwt_scan_right_to_left_8u_block_omp(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
     sa_sint_t * RESTRICT induction_bucket, fast_sint_t block_start,
     fast_sint_t block_size, sa_sint_t threads,
     LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
@@ -7981,7 +7981,7 @@ static void libsais_final_bwt_scan_right_to_left_8u_block_omp(
 }
 
 static void libsais_final_bwt_aux_scan_right_to_left_8u_block_omp(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t rm,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t rm,
     sa_sint_t * RESTRICT I, sa_sint_t * RESTRICT induction_bucket,
     fast_sint_t block_start, fast_sint_t block_size, sa_sint_t threads,
     LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
@@ -8055,7 +8055,7 @@ static void libsais_final_bwt_aux_scan_right_to_left_8u_block_omp(
 }
 
 static void libsais_final_sorting_scan_right_to_left_8u_block_omp(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
     sa_sint_t * RESTRICT induction_bucket, fast_sint_t block_start,
     fast_sint_t block_size, sa_sint_t threads,
     LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
@@ -8188,7 +8188,7 @@ static void libsais_final_sorting_scan_right_to_left_32s_block_omp(
 #endif
 
 static sa_sint_t libsais_final_bwt_scan_right_to_left_8u_omp(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
     sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads,
     LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
     sa_sint_t index = -1;
@@ -8223,7 +8223,7 @@ static sa_sint_t libsais_final_bwt_scan_right_to_left_8u_omp(
                         SA[block_start] = p & SAINT_MAX;
                         if (p > 0) {
                             p--;
-                            uint8_t c0 = T[p - (p > 0)], c1 = T[p];
+                            u8 c0 = T[p - (p > 0)], c1 = T[p];
                             SA[block_start] = c1;
                             sa_sint_t t = c0 | SAINT_MIN;
                             SA[--induction_bucket[c1]] = (c0 <= c1) ? p : t;
@@ -8246,7 +8246,7 @@ static sa_sint_t libsais_final_bwt_scan_right_to_left_8u_omp(
 }
 
 static void libsais_final_bwt_aux_scan_right_to_left_8u_omp(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
     sa_sint_t rm, sa_sint_t * RESTRICT I, sa_sint_t * RESTRICT induction_bucket,
     sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
     if (threads == 1 || n < 65536) {
@@ -8280,7 +8280,7 @@ static void libsais_final_bwt_aux_scan_right_to_left_8u_omp(
                         SA[block_start] = p & SAINT_MAX;
                         if (p > 0) {
                             p--;
-                            uint8_t c0 = T[p - (p > 0)], c1 = T[p];
+                            u8 c0 = T[p - (p > 0)], c1 = T[p];
                             SA[block_start] = c1;
                             sa_sint_t t = c0 | SAINT_MIN;
                             SA[--induction_bucket[c1]] = (c0 <= c1) ? p : t;
@@ -8304,7 +8304,7 @@ static void libsais_final_bwt_aux_scan_right_to_left_8u_omp(
 }
 
 static void libsais_final_sorting_scan_right_to_left_8u_omp(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
     sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads,
     LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
     if (threads == 1 || n < 65536) {
@@ -8409,7 +8409,7 @@ static void libsais_clear_lms_suffixes_omp(sa_sint_t * RESTRICT SA, sa_sint_t n,
 }
 
 static sa_sint_t libsais_induce_final_order_8u_omp(
-    const uint8_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+    const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
     sa_sint_t bwt, sa_sint_t r, sa_sint_t * RESTRICT I,
     sa_sint_t * RESTRICT buckets, sa_sint_t threads,
     LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
@@ -9335,7 +9335,7 @@ static sa_sint_t libsais_main_32s(
     }
 }
 
-static sa_sint_t libsais_main_8u(const uint8_t * T, sa_sint_t * SA, sa_sint_t n,
+static sa_sint_t libsais_main_8u(const u8 * T, sa_sint_t * SA, sa_sint_t n,
                                  sa_sint_t * RESTRICT buckets, sa_sint_t bwt,
                                  sa_sint_t r, sa_sint_t * RESTRICT I,
                                  sa_sint_t fs, sa_sint_t * freq,
@@ -9391,7 +9391,7 @@ static sa_sint_t libsais_main_8u(const uint8_t * T, sa_sint_t * SA, sa_sint_t n,
                                              threads, thread_state);
 }
 
-static sa_sint_t libsais_main(const uint8_t * T, sa_sint_t * SA, sa_sint_t n,
+static sa_sint_t libsais_main(const u8 * T, sa_sint_t * SA, sa_sint_t n,
                               sa_sint_t bwt, sa_sint_t r, sa_sint_t * I,
                               sa_sint_t fs, sa_sint_t * freq,
                               sa_sint_t threads) {
@@ -9411,7 +9411,7 @@ static sa_sint_t libsais_main(const uint8_t * T, sa_sint_t * SA, sa_sint_t n,
     return index;
 }
 
-static int32_t libsais_main_int(sa_sint_t * T, sa_sint_t * SA, sa_sint_t n,
+static s32 libsais_main_int(sa_sint_t * T, sa_sint_t * SA, sa_sint_t n,
                                 sa_sint_t k, sa_sint_t fs, sa_sint_t threads) {
     LIBSAIS_THREAD_STATE * RESTRICT thread_state =
         threads > 1 ? libsais_alloc_thread_state(threads) : NULL;
@@ -9427,7 +9427,7 @@ static int32_t libsais_main_int(sa_sint_t * T, sa_sint_t * SA, sa_sint_t n,
 }
 
 static sa_sint_t libsais_main_ctx(const LIBSAIS_CONTEXT * ctx,
-                                  const uint8_t * T, sa_sint_t * SA,
+                                  const u8 * T, sa_sint_t * SA,
                                   sa_sint_t n, sa_sint_t bwt, sa_sint_t r,
                                   sa_sint_t * I, sa_sint_t fs,
                                   sa_sint_t * freq) {
@@ -9438,7 +9438,7 @@ static sa_sint_t libsais_main_ctx(const LIBSAIS_CONTEXT * ctx,
                : -2;
 }
 
-static void libsais_bwt_copy_8u(uint8_t * RESTRICT U, sa_sint_t * RESTRICT A,
+static void libsais_bwt_copy_8u(u8 * RESTRICT U, sa_sint_t * RESTRICT A,
                                 sa_sint_t n) {
     const fast_sint_t prefetch_distance = 32;
 
@@ -9446,24 +9446,24 @@ static void libsais_bwt_copy_8u(uint8_t * RESTRICT U, sa_sint_t * RESTRICT A,
     for (i = 0, j = (fast_sint_t)n - 7; i < j; i += 8) {
         libsais_prefetch(&A[i + prefetch_distance]);
 
-        U[i + 0] = (uint8_t)A[i + 0];
-        U[i + 1] = (uint8_t)A[i + 1];
-        U[i + 2] = (uint8_t)A[i + 2];
-        U[i + 3] = (uint8_t)A[i + 3];
-        U[i + 4] = (uint8_t)A[i + 4];
-        U[i + 5] = (uint8_t)A[i + 5];
-        U[i + 6] = (uint8_t)A[i + 6];
-        U[i + 7] = (uint8_t)A[i + 7];
+        U[i + 0] = (u8)A[i + 0];
+        U[i + 1] = (u8)A[i + 1];
+        U[i + 2] = (u8)A[i + 2];
+        U[i + 3] = (u8)A[i + 3];
+        U[i + 4] = (u8)A[i + 4];
+        U[i + 5] = (u8)A[i + 5];
+        U[i + 6] = (u8)A[i + 6];
+        U[i + 7] = (u8)A[i + 7];
     }
 
     for (j += 7; i < j; i += 1) {
-        U[i] = (uint8_t)A[i];
+        U[i] = (u8)A[i];
     }
 }
 
 #if defined(_OPENMP)
 
-static void libsais_bwt_copy_8u_omp(uint8_t * RESTRICT U,
+static void libsais_bwt_copy_8u_omp(u8 * RESTRICT U,
                                     sa_sint_t * RESTRICT A, sa_sint_t n,
                                     sa_sint_t threads) {
     #if defined(_OPENMP)
@@ -9499,13 +9499,13 @@ void libsais_free_ctx(void * ctx) {
     libsais_free_ctx_main((LIBSAIS_CONTEXT *)ctx);
 }
 
-int32_t libsais(const uint8_t * T, int32_t * SA, int32_t n, int32_t fs,
-                int32_t * freq) {
+s32 libsais(const u8 * T, s32 * SA, s32 n, s32 fs,
+                s32 * freq) {
     if ((T == NULL) || (SA == NULL) || (n < 0) || (fs < 0)) {
         return -1;
     } else if (n < 2) {
         if (freq != NULL) {
-            memset(freq, 0, ALPHABET_SIZE * sizeof(int32_t));
+            memset(freq, 0, ALPHABET_SIZE * sizeof(s32));
         }
         if (n == 1) {
             SA[0] = 0;
@@ -9519,8 +9519,8 @@ int32_t libsais(const uint8_t * T, int32_t * SA, int32_t n, int32_t fs,
     return libsais_main(T, SA, n, 0, 0, NULL, fs, freq, 1);
 }
 
-int32_t libsais_int(int32_t * T, int32_t * SA, int32_t n, int32_t k,
-                    int32_t fs) {
+s32 libsais_int(s32 * T, s32 * SA, s32 n, s32 k,
+                    s32 fs) {
     if ((T == NULL) || (SA == NULL) || (n < 0) || (fs < 0)) {
         return -1;
     } else if (n < 2) {
@@ -9533,13 +9533,13 @@ int32_t libsais_int(int32_t * T, int32_t * SA, int32_t n, int32_t k,
     return libsais_main_int(T, SA, n, k, fs, 1);
 }
 
-int32_t libsais_ctx(const void * ctx, const uint8_t * T, int32_t * SA,
-                    int32_t n, int32_t fs, int32_t * freq) {
+s32 libsais_ctx(const void * ctx, const u8 * T, s32 * SA,
+                    s32 n, s32 fs, s32 * freq) {
     if ((ctx == NULL) || (T == NULL) || (SA == NULL) || (n < 0) || (fs < 0)) {
         return -1;
     } else if (n < 2) {
         if (freq != NULL) {
-            memset(freq, 0, ALPHABET_SIZE * sizeof(int32_t));
+            memset(freq, 0, ALPHABET_SIZE * sizeof(s32));
         }
         if (n == 1) {
             SA[0] = 0;
@@ -9554,13 +9554,13 @@ int32_t libsais_ctx(const void * ctx, const uint8_t * T, int32_t * SA,
                             fs, freq);
 }
 
-int32_t libsais_bwt(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n,
-                    int32_t fs, int32_t * freq) {
+s32 libsais_bwt(const u8 * T, u8 * U, s32 * A, s32 n,
+                    s32 fs, s32 * freq) {
     if ((T == NULL) || (U == NULL) || (A == NULL) || (n < 0) || (fs < 0)) {
         return -1;
     } else if (n <= 1) {
         if (freq != NULL) {
-            memset(freq, 0, ALPHABET_SIZE * sizeof(int32_t));
+            memset(freq, 0, ALPHABET_SIZE * sizeof(s32));
         }
         if (n == 1) {
             U[0] = T[0];
@@ -9583,14 +9583,14 @@ int32_t libsais_bwt(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n,
     return index;
 }
 
-int32_t libsais_bwt_aux(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n,
-                        int32_t fs, int32_t * freq, int32_t r, int32_t * I) {
+s32 libsais_bwt_aux(const u8 * T, u8 * U, s32 * A, s32 n,
+                        s32 fs, s32 * freq, s32 r, s32 * I) {
     if ((T == NULL) || (U == NULL) || (A == NULL) || (n < 0) || (fs < 0) ||
         (r < 2) || ((r & (r - 1)) != 0) || (I == NULL)) {
         return -1;
     } else if (n <= 1) {
         if (freq != NULL) {
-            memset(freq, 0, ALPHABET_SIZE * sizeof(int32_t));
+            memset(freq, 0, ALPHABET_SIZE * sizeof(s32));
         }
         if (n == 1) {
             U[0] = T[0];
@@ -9613,14 +9613,14 @@ int32_t libsais_bwt_aux(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n,
     return 0;
 }
 
-int32_t libsais_bwt_ctx(const void * ctx, const uint8_t * T, uint8_t * U,
-                        int32_t * A, int32_t n, int32_t fs, int32_t * freq) {
+s32 libsais_bwt_ctx(const void * ctx, const u8 * T, u8 * U,
+                        s32 * A, s32 n, s32 fs, s32 * freq) {
     if ((ctx == NULL) || (T == NULL) || (U == NULL) || (A == NULL) || (n < 0) ||
         (fs < 0)) {
         return -1;
     } else if (n <= 1) {
         if (freq != NULL) {
-            memset(freq, 0, ALPHABET_SIZE * sizeof(int32_t));
+            memset(freq, 0, ALPHABET_SIZE * sizeof(s32));
         }
         if (n == 1) {
             U[0] = T[0];
@@ -9654,15 +9654,15 @@ int32_t libsais_bwt_ctx(const void * ctx, const uint8_t * T, uint8_t * U,
     return index;
 }
 
-int32_t libsais_bwt_aux_ctx(const void * ctx, const uint8_t * T, uint8_t * U,
-                            int32_t * A, int32_t n, int32_t fs, int32_t * freq,
-                            int32_t r, int32_t * I) {
+s32 libsais_bwt_aux_ctx(const void * ctx, const u8 * T, u8 * U,
+                            s32 * A, s32 n, s32 fs, s32 * freq,
+                            s32 r, s32 * I) {
     if ((ctx == NULL) || (T == NULL) || (U == NULL) || (A == NULL) || (n < 0) ||
         (fs < 0) || (r < 2) || ((r & (r - 1)) != 0) || (I == NULL)) {
         return -1;
     } else if (n <= 1) {
         if (freq != NULL) {
-            memset(freq, 0, ALPHABET_SIZE * sizeof(int32_t));
+            memset(freq, 0, ALPHABET_SIZE * sizeof(s32));
         }
         if (n == 1) {
             U[0] = T[0];
@@ -9696,7 +9696,7 @@ int32_t libsais_bwt_aux_ctx(const void * ctx, const uint8_t * T, uint8_t * U,
 
 #if defined(_OPENMP)
 
-void * libsais_create_ctx_omp(int32_t threads) {
+void * libsais_create_ctx_omp(s32 threads) {
     if (threads < 0) {
         return NULL;
     }
@@ -9705,13 +9705,13 @@ void * libsais_create_ctx_omp(int32_t threads) {
     return (void *)libsais_create_ctx_main(threads);
 }
 
-int32_t libsais_omp(const uint8_t * T, int32_t * SA, int32_t n, int32_t fs,
-                    int32_t * freq, int32_t threads) {
+s32 libsais_omp(const u8 * T, s32 * SA, s32 n, s32 fs,
+                    s32 * freq, s32 threads) {
     if ((T == NULL) || (SA == NULL) || (n < 0) || (fs < 0) || (threads < 0)) {
         return -1;
     } else if (n < 2) {
         if (freq != NULL) {
-            memset(freq, 0, ALPHABET_SIZE * sizeof(int32_t));
+            memset(freq, 0, ALPHABET_SIZE * sizeof(s32));
         }
         if (n == 1) {
             SA[0] = 0;
@@ -9727,8 +9727,8 @@ int32_t libsais_omp(const uint8_t * T, int32_t * SA, int32_t n, int32_t fs,
     return libsais_main(T, SA, n, 0, 0, NULL, fs, freq, threads);
 }
 
-int32_t libsais_int_omp(int32_t * T, int32_t * SA, int32_t n, int32_t k,
-                        int32_t fs, int32_t threads) {
+s32 libsais_int_omp(s32 * T, s32 * SA, s32 n, s32 k,
+                        s32 fs, s32 threads) {
     if ((T == NULL) || (SA == NULL) || (n < 0) || (fs < 0) || (threads < 0)) {
         return -1;
     } else if (n < 2) {
@@ -9743,14 +9743,14 @@ int32_t libsais_int_omp(int32_t * T, int32_t * SA, int32_t n, int32_t k,
     return libsais_main_int(T, SA, n, k, fs, threads);
 }
 
-int32_t libsais_bwt_omp(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n,
-                        int32_t fs, int32_t * freq, int32_t threads) {
+s32 libsais_bwt_omp(const u8 * T, u8 * U, s32 * A, s32 n,
+                        s32 fs, s32 * freq, s32 threads) {
     if ((T == NULL) || (U == NULL) || (A == NULL) || (n < 0) || (fs < 0) ||
         (threads < 0)) {
         return -1;
     } else if (n <= 1) {
         if (freq != NULL) {
-            memset(freq, 0, ALPHABET_SIZE * sizeof(int32_t));
+            memset(freq, 0, ALPHABET_SIZE * sizeof(s32));
         }
         if (n == 1) {
             U[0] = T[0];
@@ -9775,15 +9775,15 @@ int32_t libsais_bwt_omp(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n,
     return index;
 }
 
-int32_t libsais_bwt_aux_omp(const uint8_t * T, uint8_t * U, int32_t * A,
-                            int32_t n, int32_t fs, int32_t * freq, int32_t r,
-                            int32_t * I, int32_t threads) {
+s32 libsais_bwt_aux_omp(const u8 * T, u8 * U, s32 * A,
+                            s32 n, s32 fs, s32 * freq, s32 r,
+                            s32 * I, s32 threads) {
     if ((T == NULL) || (U == NULL) || (A == NULL) || (n < 0) || (fs < 0) ||
         (r < 2) || ((r & (r - 1)) != 0) || (I == NULL) || (threads < 0)) {
         return -1;
     } else if (n <= 1) {
         if (freq != NULL) {
-            memset(freq, 0, ALPHABET_SIZE * sizeof(int32_t));
+            memset(freq, 0, ALPHABET_SIZE * sizeof(s32));
         }
         if (n == 1) {
             U[0] = T[0];
@@ -9817,8 +9817,8 @@ static LIBSAIS_UNBWT_CONTEXT * libsais_unbwt_create_ctx_main(
             sizeof(LIBSAIS_UNBWT_CONTEXT), 64);
     sa_uint_t * RESTRICT bucket2 = (sa_uint_t *)libsais_alloc_aligned(
         ALPHABET_SIZE * ALPHABET_SIZE * sizeof(sa_uint_t), 4096);
-    uint16_t * RESTRICT fastbits = (uint16_t *)libsais_alloc_aligned(
-        (1 + (1 << UNBWT_FASTBITS)) * sizeof(uint16_t), 4096);
+    u16 * RESTRICT fastbits = (u16 *)libsais_alloc_aligned(
+        (1 + (1 << UNBWT_FASTBITS)) * sizeof(u16), 4096);
     sa_uint_t * RESTRICT buckets =
         threads > 1
             ? (sa_uint_t *)libsais_alloc_aligned(
@@ -9855,12 +9855,12 @@ static void libsais_unbwt_free_ctx_main(LIBSAIS_UNBWT_CONTEXT * ctx) {
     }
 }
 
-static void libsais_unbwt_compute_histogram(const uint8_t * RESTRICT T,
+static void libsais_unbwt_compute_histogram(const u8 * RESTRICT T,
                                             fast_sint_t n,
                                             sa_uint_t * RESTRICT count) {
     const fast_sint_t prefetch_distance = 256;
 
-    const uint8_t * RESTRICT T_p = T;
+    const u8 * RESTRICT T_p = T;
 
     if (n >= 1024) {
         sa_uint_t copy[4 * (ALPHABET_SIZE + 16)];
@@ -9872,165 +9872,165 @@ static void libsais_unbwt_compute_histogram(const uint8_t * RESTRICT T,
         sa_uint_t * RESTRICT copy2 = copy + 2 * (ALPHABET_SIZE + 16);
         sa_uint_t * RESTRICT copy3 = copy + 3 * (ALPHABET_SIZE + 16);
 
-        for (; T_p < (uint8_t *)((ptrdiff_t)(T + 63) & (-64)); T_p += 1) {
+        for (; T_p < (u8 *)((ptrdiff_t)(T + 63) & (-64)); T_p += 1) {
             copy0[T_p[0]]++;
         }
 
-        fast_uint_t x = ((const uint32_t *)(const void *)T_p)[0],
-                    y = ((const uint32_t *)(const void *)T_p)[1];
+        fast_uint_t x = ((const u32 *)(const void *)T_p)[0],
+                    y = ((const u32 *)(const void *)T_p)[1];
 
-        for (; T_p < (uint8_t *)((ptrdiff_t)(T + n - 8) & (-64)); T_p += 64) {
+        for (; T_p < (u8 *)((ptrdiff_t)(T + n - 8) & (-64)); T_p += 64) {
             libsais_prefetch(&T_p[prefetch_distance]);
 
-            fast_uint_t z = ((const uint32_t *)(const void *)T_p)[2],
-                        w = ((const uint32_t *)(const void *)T_p)[3];
-            copy0[(uint8_t)x]++;
+            fast_uint_t z = ((const u32 *)(const void *)T_p)[2],
+                        w = ((const u32 *)(const void *)T_p)[3];
+            copy0[(u8)x]++;
             x >>= 8;
-            copy1[(uint8_t)x]++;
+            copy1[(u8)x]++;
             x >>= 8;
-            copy2[(uint8_t)x]++;
+            copy2[(u8)x]++;
             x >>= 8;
             copy3[x]++;
-            copy0[(uint8_t)y]++;
+            copy0[(u8)y]++;
             y >>= 8;
-            copy1[(uint8_t)y]++;
+            copy1[(u8)y]++;
             y >>= 8;
-            copy2[(uint8_t)y]++;
+            copy2[(u8)y]++;
             y >>= 8;
             copy3[y]++;
 
-            x = ((const uint32_t *)(const void *)T_p)[4];
-            y = ((const uint32_t *)(const void *)T_p)[5];
-            copy0[(uint8_t)z]++;
+            x = ((const u32 *)(const void *)T_p)[4];
+            y = ((const u32 *)(const void *)T_p)[5];
+            copy0[(u8)z]++;
             z >>= 8;
-            copy1[(uint8_t)z]++;
+            copy1[(u8)z]++;
             z >>= 8;
-            copy2[(uint8_t)z]++;
+            copy2[(u8)z]++;
             z >>= 8;
             copy3[z]++;
-            copy0[(uint8_t)w]++;
+            copy0[(u8)w]++;
             w >>= 8;
-            copy1[(uint8_t)w]++;
+            copy1[(u8)w]++;
             w >>= 8;
-            copy2[(uint8_t)w]++;
+            copy2[(u8)w]++;
             w >>= 8;
             copy3[w]++;
 
-            z = ((const uint32_t *)(const void *)T_p)[6];
-            w = ((const uint32_t *)(const void *)T_p)[7];
-            copy0[(uint8_t)x]++;
+            z = ((const u32 *)(const void *)T_p)[6];
+            w = ((const u32 *)(const void *)T_p)[7];
+            copy0[(u8)x]++;
             x >>= 8;
-            copy1[(uint8_t)x]++;
+            copy1[(u8)x]++;
             x >>= 8;
-            copy2[(uint8_t)x]++;
+            copy2[(u8)x]++;
             x >>= 8;
             copy3[x]++;
-            copy0[(uint8_t)y]++;
+            copy0[(u8)y]++;
             y >>= 8;
-            copy1[(uint8_t)y]++;
+            copy1[(u8)y]++;
             y >>= 8;
-            copy2[(uint8_t)y]++;
+            copy2[(u8)y]++;
             y >>= 8;
             copy3[y]++;
 
-            x = ((const uint32_t *)(const void *)T_p)[8];
-            y = ((const uint32_t *)(const void *)T_p)[9];
-            copy0[(uint8_t)z]++;
+            x = ((const u32 *)(const void *)T_p)[8];
+            y = ((const u32 *)(const void *)T_p)[9];
+            copy0[(u8)z]++;
             z >>= 8;
-            copy1[(uint8_t)z]++;
+            copy1[(u8)z]++;
             z >>= 8;
-            copy2[(uint8_t)z]++;
+            copy2[(u8)z]++;
             z >>= 8;
             copy3[z]++;
-            copy0[(uint8_t)w]++;
+            copy0[(u8)w]++;
             w >>= 8;
-            copy1[(uint8_t)w]++;
+            copy1[(u8)w]++;
             w >>= 8;
-            copy2[(uint8_t)w]++;
+            copy2[(u8)w]++;
             w >>= 8;
             copy3[w]++;
 
-            z = ((const uint32_t *)(const void *)T_p)[10];
-            w = ((const uint32_t *)(const void *)T_p)[11];
-            copy0[(uint8_t)x]++;
+            z = ((const u32 *)(const void *)T_p)[10];
+            w = ((const u32 *)(const void *)T_p)[11];
+            copy0[(u8)x]++;
             x >>= 8;
-            copy1[(uint8_t)x]++;
+            copy1[(u8)x]++;
             x >>= 8;
-            copy2[(uint8_t)x]++;
+            copy2[(u8)x]++;
             x >>= 8;
             copy3[x]++;
-            copy0[(uint8_t)y]++;
+            copy0[(u8)y]++;
             y >>= 8;
-            copy1[(uint8_t)y]++;
+            copy1[(u8)y]++;
             y >>= 8;
-            copy2[(uint8_t)y]++;
+            copy2[(u8)y]++;
             y >>= 8;
             copy3[y]++;
 
-            x = ((const uint32_t *)(const void *)T_p)[12];
-            y = ((const uint32_t *)(const void *)T_p)[13];
-            copy0[(uint8_t)z]++;
+            x = ((const u32 *)(const void *)T_p)[12];
+            y = ((const u32 *)(const void *)T_p)[13];
+            copy0[(u8)z]++;
             z >>= 8;
-            copy1[(uint8_t)z]++;
+            copy1[(u8)z]++;
             z >>= 8;
-            copy2[(uint8_t)z]++;
+            copy2[(u8)z]++;
             z >>= 8;
             copy3[z]++;
-            copy0[(uint8_t)w]++;
+            copy0[(u8)w]++;
             w >>= 8;
-            copy1[(uint8_t)w]++;
+            copy1[(u8)w]++;
             w >>= 8;
-            copy2[(uint8_t)w]++;
+            copy2[(u8)w]++;
             w >>= 8;
             copy3[w]++;
 
-            z = ((const uint32_t *)(const void *)T_p)[14];
-            w = ((const uint32_t *)(const void *)T_p)[15];
-            copy0[(uint8_t)x]++;
+            z = ((const u32 *)(const void *)T_p)[14];
+            w = ((const u32 *)(const void *)T_p)[15];
+            copy0[(u8)x]++;
             x >>= 8;
-            copy1[(uint8_t)x]++;
+            copy1[(u8)x]++;
             x >>= 8;
-            copy2[(uint8_t)x]++;
+            copy2[(u8)x]++;
             x >>= 8;
             copy3[x]++;
-            copy0[(uint8_t)y]++;
+            copy0[(u8)y]++;
             y >>= 8;
-            copy1[(uint8_t)y]++;
+            copy1[(u8)y]++;
             y >>= 8;
-            copy2[(uint8_t)y]++;
+            copy2[(u8)y]++;
             y >>= 8;
             copy3[y]++;
 
-            x = ((const uint32_t *)(const void *)T_p)[16];
-            y = ((const uint32_t *)(const void *)T_p)[17];
-            copy0[(uint8_t)z]++;
+            x = ((const u32 *)(const void *)T_p)[16];
+            y = ((const u32 *)(const void *)T_p)[17];
+            copy0[(u8)z]++;
             z >>= 8;
-            copy1[(uint8_t)z]++;
+            copy1[(u8)z]++;
             z >>= 8;
-            copy2[(uint8_t)z]++;
+            copy2[(u8)z]++;
             z >>= 8;
             copy3[z]++;
-            copy0[(uint8_t)w]++;
+            copy0[(u8)w]++;
             w >>= 8;
-            copy1[(uint8_t)w]++;
+            copy1[(u8)w]++;
             w >>= 8;
-            copy2[(uint8_t)w]++;
+            copy2[(u8)w]++;
             w >>= 8;
             copy3[w]++;
         }
 
-        copy0[(uint8_t)x]++;
+        copy0[(u8)x]++;
         x >>= 8;
-        copy1[(uint8_t)x]++;
+        copy1[(u8)x]++;
         x >>= 8;
-        copy2[(uint8_t)x]++;
+        copy2[(u8)x]++;
         x >>= 8;
         copy3[x]++;
-        copy0[(uint8_t)y]++;
+        copy0[(u8)y]++;
         y >>= 8;
-        copy1[(uint8_t)y]++;
+        copy1[(u8)y]++;
         y >>= 8;
-        copy2[(uint8_t)y]++;
+        copy2[(u8)y]++;
         y >>= 8;
         copy3[y]++;
 
@@ -10117,7 +10117,7 @@ static void libsais_unbwt_transpose_bucket2(sa_uint_t * RESTRICT bucket2) {
 }
 
 static void libsais_unbwt_compute_bigram_histogram_single(
-    const uint8_t * RESTRICT T, sa_uint_t * RESTRICT bucket1,
+    const u8 * RESTRICT T, sa_uint_t * RESTRICT bucket1,
     sa_uint_t * RESTRICT bucket2, fast_uint_t index) {
     fast_uint_t sum, c;
     for (sum = 1, c = 0; c < ALPHABET_SIZE; ++c) {
@@ -10151,7 +10151,7 @@ static void libsais_unbwt_compute_bigram_histogram_single(
 }
 
 static void libsais_unbwt_calculate_fastbits(sa_uint_t * RESTRICT bucket2,
-                                             uint16_t * RESTRICT fastbits,
+                                             u16 * RESTRICT fastbits,
                                              fast_uint_t lastc,
                                              fast_uint_t shift) {
     fast_uint_t v, w, sum, c, d;
@@ -10166,7 +10166,7 @@ static void libsais_unbwt_calculate_fastbits(sa_uint_t * RESTRICT bucket2,
             bucket2[w] = (sa_uint_t)prev;
             if (prev != sum) {
                 for (; v <= ((sum - 1) >> shift); ++v) {
-                    fastbits[v] = (uint16_t)w;
+                    fastbits[v] = (u16)w;
                 }
             }
         }
@@ -10174,7 +10174,7 @@ static void libsais_unbwt_calculate_fastbits(sa_uint_t * RESTRICT bucket2,
 }
 
 static void libsais_unbwt_calculate_biPSI(
-    const uint8_t * RESTRICT T, sa_uint_t * RESTRICT P,
+    const u8 * RESTRICT T, sa_uint_t * RESTRICT P,
     sa_uint_t * RESTRICT bucket1, sa_uint_t * RESTRICT bucket2,
     fast_uint_t index, fast_sint_t omp_block_start, fast_sint_t omp_block_end) {
     {
@@ -10222,12 +10222,12 @@ static void libsais_unbwt_calculate_biPSI(
     }
 }
 
-static void libsais_unbwt_init_single(const uint8_t * RESTRICT T,
+static void libsais_unbwt_init_single(const u8 * RESTRICT T,
                                       sa_uint_t * RESTRICT P, sa_sint_t n,
                                       const sa_sint_t * freq,
                                       const sa_uint_t * RESTRICT I,
                                       sa_uint_t * RESTRICT bucket2,
-                                      uint16_t * RESTRICT fastbits) {
+                                      u16 * RESTRICT fastbits) {
     sa_uint_t bucket1[ALPHABET_SIZE];
 
     fast_uint_t index = I[0];
@@ -10254,7 +10254,7 @@ static void libsais_unbwt_init_single(const uint8_t * RESTRICT T,
 #if defined(_OPENMP)
 
 static void libsais_unbwt_compute_bigram_histogram_parallel(
-    const uint8_t * RESTRICT T, fast_uint_t index, sa_uint_t * RESTRICT bucket1,
+    const u8 * RESTRICT T, fast_uint_t index, sa_uint_t * RESTRICT bucket1,
     sa_uint_t * RESTRICT bucket2, fast_sint_t omp_block_start,
     fast_sint_t omp_block_size) {
     fast_sint_t i;
@@ -10276,9 +10276,9 @@ static void libsais_unbwt_compute_bigram_histogram_parallel(
 }
 
 static void libsais_unbwt_init_parallel(
-    const uint8_t * RESTRICT T, sa_uint_t * RESTRICT P, sa_sint_t n,
+    const u8 * RESTRICT T, sa_uint_t * RESTRICT P, sa_sint_t n,
     const sa_sint_t * freq, const sa_uint_t * RESTRICT I,
-    sa_uint_t * RESTRICT bucket2, uint16_t * RESTRICT fastbits,
+    sa_uint_t * RESTRICT bucket2, u16 * RESTRICT fastbits,
     sa_uint_t * RESTRICT buckets, sa_sint_t threads) {
     sa_uint_t bucket1[ALPHABET_SIZE];
 
@@ -10447,17 +10447,17 @@ static void libsais_unbwt_init_parallel(
 
 #endif
 
-static void libsais_unbwt_decode_1(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P,
+static void libsais_unbwt_decode_1(u8 * RESTRICT U, sa_uint_t * RESTRICT P,
                                    sa_uint_t * RESTRICT bucket2,
-                                   uint16_t * RESTRICT fastbits,
+                                   u16 * RESTRICT fastbits,
                                    fast_uint_t shift, fast_uint_t * i0,
                                    fast_uint_t k) {
-    uint16_t * RESTRICT U0 = (uint16_t *)(void *)U;
+    u16 * RESTRICT U0 = (u16 *)(void *)U;
 
     fast_uint_t i, p0 = *i0;
 
     for (i = 0; i != k; ++i) {
-        uint16_t c0 = fastbits[p0 >> shift];
+        u16 c0 = fastbits[p0 >> shift];
         if (bucket2[c0] <= p0) {
             do {
                 c0++;
@@ -10470,19 +10470,19 @@ static void libsais_unbwt_decode_1(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P,
     *i0 = p0;
 }
 
-static void libsais_unbwt_decode_2(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P,
+static void libsais_unbwt_decode_2(u8 * RESTRICT U, sa_uint_t * RESTRICT P,
                                    sa_uint_t * RESTRICT bucket2,
-                                   uint16_t * RESTRICT fastbits,
+                                   u16 * RESTRICT fastbits,
                                    fast_uint_t shift, fast_uint_t r,
                                    fast_uint_t * i0, fast_uint_t * i1,
                                    fast_uint_t k) {
-    uint16_t * RESTRICT U0 = (uint16_t *)(void *)U;
-    uint16_t * RESTRICT U1 = (uint16_t *)(void *)(((uint8_t *)U0) + r);
+    u16 * RESTRICT U0 = (u16 *)(void *)U;
+    u16 * RESTRICT U1 = (u16 *)(void *)(((u8 *)U0) + r);
 
     fast_uint_t i, p0 = *i0, p1 = *i1;
 
     for (i = 0; i != k; ++i) {
-        uint16_t c0 = fastbits[p0 >> shift];
+        u16 c0 = fastbits[p0 >> shift];
         if (bucket2[c0] <= p0) {
             do {
                 c0++;
@@ -10490,7 +10490,7 @@ static void libsais_unbwt_decode_2(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P,
         }
         p0 = P[p0];
         U0[i] = libsais_bswap16(c0);
-        uint16_t c1 = fastbits[p1 >> shift];
+        u16 c1 = fastbits[p1 >> shift];
         if (bucket2[c1] <= p1) {
             do {
                 c1++;
@@ -10504,20 +10504,20 @@ static void libsais_unbwt_decode_2(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P,
     *i1 = p1;
 }
 
-static void libsais_unbwt_decode_3(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P,
+static void libsais_unbwt_decode_3(u8 * RESTRICT U, sa_uint_t * RESTRICT P,
                                    sa_uint_t * RESTRICT bucket2,
-                                   uint16_t * RESTRICT fastbits,
+                                   u16 * RESTRICT fastbits,
                                    fast_uint_t shift, fast_uint_t r,
                                    fast_uint_t * i0, fast_uint_t * i1,
                                    fast_uint_t * i2, fast_uint_t k) {
-    uint16_t * RESTRICT U0 = (uint16_t *)(void *)U;
-    uint16_t * RESTRICT U1 = (uint16_t *)(void *)(((uint8_t *)U0) + r);
-    uint16_t * RESTRICT U2 = (uint16_t *)(void *)(((uint8_t *)U1) + r);
+    u16 * RESTRICT U0 = (u16 *)(void *)U;
+    u16 * RESTRICT U1 = (u16 *)(void *)(((u8 *)U0) + r);
+    u16 * RESTRICT U2 = (u16 *)(void *)(((u8 *)U1) + r);
 
     fast_uint_t i, p0 = *i0, p1 = *i1, p2 = *i2;
 
     for (i = 0; i != k; ++i) {
-        uint16_t c0 = fastbits[p0 >> shift];
+        u16 c0 = fastbits[p0 >> shift];
         if (bucket2[c0] <= p0) {
             do {
                 c0++;
@@ -10525,7 +10525,7 @@ static void libsais_unbwt_decode_3(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P,
         }
         p0 = P[p0];
         U0[i] = libsais_bswap16(c0);
-        uint16_t c1 = fastbits[p1 >> shift];
+        u16 c1 = fastbits[p1 >> shift];
         if (bucket2[c1] <= p1) {
             do {
                 c1++;
@@ -10533,7 +10533,7 @@ static void libsais_unbwt_decode_3(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P,
         }
         p1 = P[p1];
         U1[i] = libsais_bswap16(c1);
-        uint16_t c2 = fastbits[p2 >> shift];
+        u16 c2 = fastbits[p2 >> shift];
         if (bucket2[c2] <= p2) {
             do {
                 c2++;
@@ -10548,22 +10548,22 @@ static void libsais_unbwt_decode_3(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P,
     *i2 = p2;
 }
 
-static void libsais_unbwt_decode_4(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P,
+static void libsais_unbwt_decode_4(u8 * RESTRICT U, sa_uint_t * RESTRICT P,
                                    sa_uint_t * RESTRICT bucket2,
-                                   uint16_t * RESTRICT fastbits,
+                                   u16 * RESTRICT fastbits,
                                    fast_uint_t shift, fast_uint_t r,
                                    fast_uint_t * i0, fast_uint_t * i1,
                                    fast_uint_t * i2, fast_uint_t * i3,
                                    fast_uint_t k) {
-    uint16_t * RESTRICT U0 = (uint16_t *)(void *)U;
-    uint16_t * RESTRICT U1 = (uint16_t *)(void *)(((uint8_t *)U0) + r);
-    uint16_t * RESTRICT U2 = (uint16_t *)(void *)(((uint8_t *)U1) + r);
-    uint16_t * RESTRICT U3 = (uint16_t *)(void *)(((uint8_t *)U2) + r);
+    u16 * RESTRICT U0 = (u16 *)(void *)U;
+    u16 * RESTRICT U1 = (u16 *)(void *)(((u8 *)U0) + r);
+    u16 * RESTRICT U2 = (u16 *)(void *)(((u8 *)U1) + r);
+    u16 * RESTRICT U3 = (u16 *)(void *)(((u8 *)U2) + r);
 
     fast_uint_t i, p0 = *i0, p1 = *i1, p2 = *i2, p3 = *i3;
 
     for (i = 0; i != k; ++i) {
-        uint16_t c0 = fastbits[p0 >> shift];
+        u16 c0 = fastbits[p0 >> shift];
         if (bucket2[c0] <= p0) {
             do {
                 c0++;
@@ -10571,7 +10571,7 @@ static void libsais_unbwt_decode_4(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P,
         }
         p0 = P[p0];
         U0[i] = libsais_bswap16(c0);
-        uint16_t c1 = fastbits[p1 >> shift];
+        u16 c1 = fastbits[p1 >> shift];
         if (bucket2[c1] <= p1) {
             do {
                 c1++;
@@ -10579,7 +10579,7 @@ static void libsais_unbwt_decode_4(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P,
         }
         p1 = P[p1];
         U1[i] = libsais_bswap16(c1);
-        uint16_t c2 = fastbits[p2 >> shift];
+        u16 c2 = fastbits[p2 >> shift];
         if (bucket2[c2] <= p2) {
             do {
                 c2++;
@@ -10587,7 +10587,7 @@ static void libsais_unbwt_decode_4(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P,
         }
         p2 = P[p2];
         U2[i] = libsais_bswap16(c2);
-        uint16_t c3 = fastbits[p3 >> shift];
+        u16 c3 = fastbits[p3 >> shift];
         if (bucket2[c3] <= p3) {
             do {
                 c3++;
@@ -10603,23 +10603,23 @@ static void libsais_unbwt_decode_4(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P,
     *i3 = p3;
 }
 
-static void libsais_unbwt_decode_5(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P,
+static void libsais_unbwt_decode_5(u8 * RESTRICT U, sa_uint_t * RESTRICT P,
                                    sa_uint_t * RESTRICT bucket2,
-                                   uint16_t * RESTRICT fastbits,
+                                   u16 * RESTRICT fastbits,
                                    fast_uint_t shift, fast_uint_t r,
                                    fast_uint_t * i0, fast_uint_t * i1,
                                    fast_uint_t * i2, fast_uint_t * i3,
                                    fast_uint_t * i4, fast_uint_t k) {
-    uint16_t * RESTRICT U0 = (uint16_t *)(void *)U;
-    uint16_t * RESTRICT U1 = (uint16_t *)(void *)(((uint8_t *)U0) + r);
-    uint16_t * RESTRICT U2 = (uint16_t *)(void *)(((uint8_t *)U1) + r);
-    uint16_t * RESTRICT U3 = (uint16_t *)(void *)(((uint8_t *)U2) + r);
-    uint16_t * RESTRICT U4 = (uint16_t *)(void *)(((uint8_t *)U3) + r);
+    u16 * RESTRICT U0 = (u16 *)(void *)U;
+    u16 * RESTRICT U1 = (u16 *)(void *)(((u8 *)U0) + r);
+    u16 * RESTRICT U2 = (u16 *)(void *)(((u8 *)U1) + r);
+    u16 * RESTRICT U3 = (u16 *)(void *)(((u8 *)U2) + r);
+    u16 * RESTRICT U4 = (u16 *)(void *)(((u8 *)U3) + r);
 
     fast_uint_t i, p0 = *i0, p1 = *i1, p2 = *i2, p3 = *i3, p4 = *i4;
 
     for (i = 0; i != k; ++i) {
-        uint16_t c0 = fastbits[p0 >> shift];
+        u16 c0 = fastbits[p0 >> shift];
         if (bucket2[c0] <= p0) {
             do {
                 c0++;
@@ -10627,7 +10627,7 @@ static void libsais_unbwt_decode_5(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P,
         }
         p0 = P[p0];
         U0[i] = libsais_bswap16(c0);
-        uint16_t c1 = fastbits[p1 >> shift];
+        u16 c1 = fastbits[p1 >> shift];
         if (bucket2[c1] <= p1) {
             do {
                 c1++;
@@ -10635,7 +10635,7 @@ static void libsais_unbwt_decode_5(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P,
         }
         p1 = P[p1];
         U1[i] = libsais_bswap16(c1);
-        uint16_t c2 = fastbits[p2 >> shift];
+        u16 c2 = fastbits[p2 >> shift];
         if (bucket2[c2] <= p2) {
             do {
                 c2++;
@@ -10643,7 +10643,7 @@ static void libsais_unbwt_decode_5(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P,
         }
         p2 = P[p2];
         U2[i] = libsais_bswap16(c2);
-        uint16_t c3 = fastbits[p3 >> shift];
+        u16 c3 = fastbits[p3 >> shift];
         if (bucket2[c3] <= p3) {
             do {
                 c3++;
@@ -10651,7 +10651,7 @@ static void libsais_unbwt_decode_5(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P,
         }
         p3 = P[p3];
         U3[i] = libsais_bswap16(c3);
-        uint16_t c4 = fastbits[p4 >> shift];
+        u16 c4 = fastbits[p4 >> shift];
         if (bucket2[c4] <= p4) {
             do {
                 c4++;
@@ -10669,21 +10669,21 @@ static void libsais_unbwt_decode_5(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P,
 }
 
 static void libsais_unbwt_decode_6(
-    uint8_t * RESTRICT U, sa_uint_t * RESTRICT P, sa_uint_t * RESTRICT bucket2,
-    uint16_t * RESTRICT fastbits, fast_uint_t shift, fast_uint_t r,
+    u8 * RESTRICT U, sa_uint_t * RESTRICT P, sa_uint_t * RESTRICT bucket2,
+    u16 * RESTRICT fastbits, fast_uint_t shift, fast_uint_t r,
     fast_uint_t * i0, fast_uint_t * i1, fast_uint_t * i2, fast_uint_t * i3,
     fast_uint_t * i4, fast_uint_t * i5, fast_uint_t k) {
-    uint16_t * RESTRICT U0 = (uint16_t *)(void *)U;
-    uint16_t * RESTRICT U1 = (uint16_t *)(void *)(((uint8_t *)U0) + r);
-    uint16_t * RESTRICT U2 = (uint16_t *)(void *)(((uint8_t *)U1) + r);
-    uint16_t * RESTRICT U3 = (uint16_t *)(void *)(((uint8_t *)U2) + r);
-    uint16_t * RESTRICT U4 = (uint16_t *)(void *)(((uint8_t *)U3) + r);
-    uint16_t * RESTRICT U5 = (uint16_t *)(void *)(((uint8_t *)U4) + r);
+    u16 * RESTRICT U0 = (u16 *)(void *)U;
+    u16 * RESTRICT U1 = (u16 *)(void *)(((u8 *)U0) + r);
+    u16 * RESTRICT U2 = (u16 *)(void *)(((u8 *)U1) + r);
+    u16 * RESTRICT U3 = (u16 *)(void *)(((u8 *)U2) + r);
+    u16 * RESTRICT U4 = (u16 *)(void *)(((u8 *)U3) + r);
+    u16 * RESTRICT U5 = (u16 *)(void *)(((u8 *)U4) + r);
 
     fast_uint_t i, p0 = *i0, p1 = *i1, p2 = *i2, p3 = *i3, p4 = *i4, p5 = *i5;
 
     for (i = 0; i != k; ++i) {
-        uint16_t c0 = fastbits[p0 >> shift];
+        u16 c0 = fastbits[p0 >> shift];
         if (bucket2[c0] <= p0) {
             do {
                 c0++;
@@ -10691,7 +10691,7 @@ static void libsais_unbwt_decode_6(
         }
         p0 = P[p0];
         U0[i] = libsais_bswap16(c0);
-        uint16_t c1 = fastbits[p1 >> shift];
+        u16 c1 = fastbits[p1 >> shift];
         if (bucket2[c1] <= p1) {
             do {
                 c1++;
@@ -10699,7 +10699,7 @@ static void libsais_unbwt_decode_6(
         }
         p1 = P[p1];
         U1[i] = libsais_bswap16(c1);
-        uint16_t c2 = fastbits[p2 >> shift];
+        u16 c2 = fastbits[p2 >> shift];
         if (bucket2[c2] <= p2) {
             do {
                 c2++;
@@ -10707,7 +10707,7 @@ static void libsais_unbwt_decode_6(
         }
         p2 = P[p2];
         U2[i] = libsais_bswap16(c2);
-        uint16_t c3 = fastbits[p3 >> shift];
+        u16 c3 = fastbits[p3 >> shift];
         if (bucket2[c3] <= p3) {
             do {
                 c3++;
@@ -10715,7 +10715,7 @@ static void libsais_unbwt_decode_6(
         }
         p3 = P[p3];
         U3[i] = libsais_bswap16(c3);
-        uint16_t c4 = fastbits[p4 >> shift];
+        u16 c4 = fastbits[p4 >> shift];
         if (bucket2[c4] <= p4) {
             do {
                 c4++;
@@ -10723,7 +10723,7 @@ static void libsais_unbwt_decode_6(
         }
         p4 = P[p4];
         U4[i] = libsais_bswap16(c4);
-        uint16_t c5 = fastbits[p5 >> shift];
+        u16 c5 = fastbits[p5 >> shift];
         if (bucket2[c5] <= p5) {
             do {
                 c5++;
@@ -10742,23 +10742,23 @@ static void libsais_unbwt_decode_6(
 }
 
 static void libsais_unbwt_decode_7(
-    uint8_t * RESTRICT U, sa_uint_t * RESTRICT P, sa_uint_t * RESTRICT bucket2,
-    uint16_t * RESTRICT fastbits, fast_uint_t shift, fast_uint_t r,
+    u8 * RESTRICT U, sa_uint_t * RESTRICT P, sa_uint_t * RESTRICT bucket2,
+    u16 * RESTRICT fastbits, fast_uint_t shift, fast_uint_t r,
     fast_uint_t * i0, fast_uint_t * i1, fast_uint_t * i2, fast_uint_t * i3,
     fast_uint_t * i4, fast_uint_t * i5, fast_uint_t * i6, fast_uint_t k) {
-    uint16_t * RESTRICT U0 = (uint16_t *)(void *)U;
-    uint16_t * RESTRICT U1 = (uint16_t *)(void *)(((uint8_t *)U0) + r);
-    uint16_t * RESTRICT U2 = (uint16_t *)(void *)(((uint8_t *)U1) + r);
-    uint16_t * RESTRICT U3 = (uint16_t *)(void *)(((uint8_t *)U2) + r);
-    uint16_t * RESTRICT U4 = (uint16_t *)(void *)(((uint8_t *)U3) + r);
-    uint16_t * RESTRICT U5 = (uint16_t *)(void *)(((uint8_t *)U4) + r);
-    uint16_t * RESTRICT U6 = (uint16_t *)(void *)(((uint8_t *)U5) + r);
+    u16 * RESTRICT U0 = (u16 *)(void *)U;
+    u16 * RESTRICT U1 = (u16 *)(void *)(((u8 *)U0) + r);
+    u16 * RESTRICT U2 = (u16 *)(void *)(((u8 *)U1) + r);
+    u16 * RESTRICT U3 = (u16 *)(void *)(((u8 *)U2) + r);
+    u16 * RESTRICT U4 = (u16 *)(void *)(((u8 *)U3) + r);
+    u16 * RESTRICT U5 = (u16 *)(void *)(((u8 *)U4) + r);
+    u16 * RESTRICT U6 = (u16 *)(void *)(((u8 *)U5) + r);
 
     fast_uint_t i, p0 = *i0, p1 = *i1, p2 = *i2, p3 = *i3, p4 = *i4, p5 = *i5,
                    p6 = *i6;
 
     for (i = 0; i != k; ++i) {
-        uint16_t c0 = fastbits[p0 >> shift];
+        u16 c0 = fastbits[p0 >> shift];
         if (bucket2[c0] <= p0) {
             do {
                 c0++;
@@ -10766,7 +10766,7 @@ static void libsais_unbwt_decode_7(
         }
         p0 = P[p0];
         U0[i] = libsais_bswap16(c0);
-        uint16_t c1 = fastbits[p1 >> shift];
+        u16 c1 = fastbits[p1 >> shift];
         if (bucket2[c1] <= p1) {
             do {
                 c1++;
@@ -10774,7 +10774,7 @@ static void libsais_unbwt_decode_7(
         }
         p1 = P[p1];
         U1[i] = libsais_bswap16(c1);
-        uint16_t c2 = fastbits[p2 >> shift];
+        u16 c2 = fastbits[p2 >> shift];
         if (bucket2[c2] <= p2) {
             do {
                 c2++;
@@ -10782,7 +10782,7 @@ static void libsais_unbwt_decode_7(
         }
         p2 = P[p2];
         U2[i] = libsais_bswap16(c2);
-        uint16_t c3 = fastbits[p3 >> shift];
+        u16 c3 = fastbits[p3 >> shift];
         if (bucket2[c3] <= p3) {
             do {
                 c3++;
@@ -10790,7 +10790,7 @@ static void libsais_unbwt_decode_7(
         }
         p3 = P[p3];
         U3[i] = libsais_bswap16(c3);
-        uint16_t c4 = fastbits[p4 >> shift];
+        u16 c4 = fastbits[p4 >> shift];
         if (bucket2[c4] <= p4) {
             do {
                 c4++;
@@ -10798,7 +10798,7 @@ static void libsais_unbwt_decode_7(
         }
         p4 = P[p4];
         U4[i] = libsais_bswap16(c4);
-        uint16_t c5 = fastbits[p5 >> shift];
+        u16 c5 = fastbits[p5 >> shift];
         if (bucket2[c5] <= p5) {
             do {
                 c5++;
@@ -10806,7 +10806,7 @@ static void libsais_unbwt_decode_7(
         }
         p5 = P[p5];
         U5[i] = libsais_bswap16(c5);
-        uint16_t c6 = fastbits[p6 >> shift];
+        u16 c6 = fastbits[p6 >> shift];
         if (bucket2[c6] <= p6) {
             do {
                 c6++;
@@ -10826,25 +10826,25 @@ static void libsais_unbwt_decode_7(
 }
 
 static void libsais_unbwt_decode_8(
-    uint8_t * RESTRICT U, sa_uint_t * RESTRICT P, sa_uint_t * RESTRICT bucket2,
-    uint16_t * RESTRICT fastbits, fast_uint_t shift, fast_uint_t r,
+    u8 * RESTRICT U, sa_uint_t * RESTRICT P, sa_uint_t * RESTRICT bucket2,
+    u16 * RESTRICT fastbits, fast_uint_t shift, fast_uint_t r,
     fast_uint_t * i0, fast_uint_t * i1, fast_uint_t * i2, fast_uint_t * i3,
     fast_uint_t * i4, fast_uint_t * i5, fast_uint_t * i6, fast_uint_t * i7,
     fast_uint_t k) {
-    uint16_t * RESTRICT U0 = (uint16_t *)(void *)U;
-    uint16_t * RESTRICT U1 = (uint16_t *)(void *)(((uint8_t *)U0) + r);
-    uint16_t * RESTRICT U2 = (uint16_t *)(void *)(((uint8_t *)U1) + r);
-    uint16_t * RESTRICT U3 = (uint16_t *)(void *)(((uint8_t *)U2) + r);
-    uint16_t * RESTRICT U4 = (uint16_t *)(void *)(((uint8_t *)U3) + r);
-    uint16_t * RESTRICT U5 = (uint16_t *)(void *)(((uint8_t *)U4) + r);
-    uint16_t * RESTRICT U6 = (uint16_t *)(void *)(((uint8_t *)U5) + r);
-    uint16_t * RESTRICT U7 = (uint16_t *)(void *)(((uint8_t *)U6) + r);
+    u16 * RESTRICT U0 = (u16 *)(void *)U;
+    u16 * RESTRICT U1 = (u16 *)(void *)(((u8 *)U0) + r);
+    u16 * RESTRICT U2 = (u16 *)(void *)(((u8 *)U1) + r);
+    u16 * RESTRICT U3 = (u16 *)(void *)(((u8 *)U2) + r);
+    u16 * RESTRICT U4 = (u16 *)(void *)(((u8 *)U3) + r);
+    u16 * RESTRICT U5 = (u16 *)(void *)(((u8 *)U4) + r);
+    u16 * RESTRICT U6 = (u16 *)(void *)(((u8 *)U5) + r);
+    u16 * RESTRICT U7 = (u16 *)(void *)(((u8 *)U6) + r);
 
     fast_uint_t i, p0 = *i0, p1 = *i1, p2 = *i2, p3 = *i3, p4 = *i4, p5 = *i5,
                    p6 = *i6, p7 = *i7;
 
     for (i = 0; i != k; ++i) {
-        uint16_t c0 = fastbits[p0 >> shift];
+        u16 c0 = fastbits[p0 >> shift];
         if (bucket2[c0] <= p0) {
             do {
                 c0++;
@@ -10852,7 +10852,7 @@ static void libsais_unbwt_decode_8(
         }
         p0 = P[p0];
         U0[i] = libsais_bswap16(c0);
-        uint16_t c1 = fastbits[p1 >> shift];
+        u16 c1 = fastbits[p1 >> shift];
         if (bucket2[c1] <= p1) {
             do {
                 c1++;
@@ -10860,7 +10860,7 @@ static void libsais_unbwt_decode_8(
         }
         p1 = P[p1];
         U1[i] = libsais_bswap16(c1);
-        uint16_t c2 = fastbits[p2 >> shift];
+        u16 c2 = fastbits[p2 >> shift];
         if (bucket2[c2] <= p2) {
             do {
                 c2++;
@@ -10868,7 +10868,7 @@ static void libsais_unbwt_decode_8(
         }
         p2 = P[p2];
         U2[i] = libsais_bswap16(c2);
-        uint16_t c3 = fastbits[p3 >> shift];
+        u16 c3 = fastbits[p3 >> shift];
         if (bucket2[c3] <= p3) {
             do {
                 c3++;
@@ -10876,7 +10876,7 @@ static void libsais_unbwt_decode_8(
         }
         p3 = P[p3];
         U3[i] = libsais_bswap16(c3);
-        uint16_t c4 = fastbits[p4 >> shift];
+        u16 c4 = fastbits[p4 >> shift];
         if (bucket2[c4] <= p4) {
             do {
                 c4++;
@@ -10884,7 +10884,7 @@ static void libsais_unbwt_decode_8(
         }
         p4 = P[p4];
         U4[i] = libsais_bswap16(c4);
-        uint16_t c5 = fastbits[p5 >> shift];
+        u16 c5 = fastbits[p5 >> shift];
         if (bucket2[c5] <= p5) {
             do {
                 c5++;
@@ -10892,7 +10892,7 @@ static void libsais_unbwt_decode_8(
         }
         p5 = P[p5];
         U5[i] = libsais_bswap16(c5);
-        uint16_t c6 = fastbits[p6 >> shift];
+        u16 c6 = fastbits[p6 >> shift];
         if (bucket2[c6] <= p6) {
             do {
                 c6++;
@@ -10900,7 +10900,7 @@ static void libsais_unbwt_decode_8(
         }
         p6 = P[p6];
         U6[i] = libsais_bswap16(c6);
-        uint16_t c7 = fastbits[p7 >> shift];
+        u16 c7 = fastbits[p7 >> shift];
         if (bucket2[c7] <= p7) {
             do {
                 c7++;
@@ -10920,11 +10920,11 @@ static void libsais_unbwt_decode_8(
     *i7 = p7;
 }
 
-static void libsais_unbwt_decode(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P,
+static void libsais_unbwt_decode(u8 * RESTRICT U, sa_uint_t * RESTRICT P,
                                  sa_sint_t n, sa_sint_t r,
                                  const sa_uint_t * RESTRICT I,
                                  sa_uint_t * RESTRICT bucket2,
-                                 uint16_t * RESTRICT fastbits,
+                                 u16 * RESTRICT fastbits,
                                  fast_sint_t blocks, fast_uint_t reminder) {
     fast_uint_t shift = 0;
     while ((n >> shift) > (1 << UNBWT_FASTBITS)) {
@@ -11010,12 +11010,12 @@ static void libsais_unbwt_decode(uint8_t * RESTRICT U, sa_uint_t * RESTRICT P,
     }
 }
 
-static void libsais_unbwt_decode_omp(const uint8_t * RESTRICT T,
-                                     uint8_t * RESTRICT U,
+static void libsais_unbwt_decode_omp(const u8 * RESTRICT T,
+                                     u8 * RESTRICT U,
                                      sa_uint_t * RESTRICT P, sa_sint_t n,
                                      sa_sint_t r, const sa_uint_t * RESTRICT I,
                                      sa_uint_t * RESTRICT bucket2,
-                                     uint16_t * RESTRICT fastbits,
+                                     u16 * RESTRICT fastbits,
                                      sa_sint_t threads) {
     fast_uint_t lastc = T[0];
     fast_sint_t blocks = 1 + (((fast_sint_t)n - 1) / (fast_sint_t)r);
@@ -11053,14 +11053,14 @@ static void libsais_unbwt_decode_omp(const uint8_t * RESTRICT T,
             omp_thread_num < omp_num_threads - 1 ? (fast_uint_t)r : reminder);
     }
 
-    U[n - 1] = (uint8_t)lastc;
+    U[n - 1] = (u8)lastc;
 }
 
 static sa_sint_t libsais_unbwt_core(
-    const uint8_t * RESTRICT T, uint8_t * RESTRICT U, sa_uint_t * RESTRICT P,
+    const u8 * RESTRICT T, u8 * RESTRICT U, sa_uint_t * RESTRICT P,
     sa_sint_t n, const sa_sint_t * freq, sa_sint_t r,
     const sa_uint_t * RESTRICT I, sa_uint_t * RESTRICT bucket2,
-    uint16_t * RESTRICT fastbits, sa_uint_t * RESTRICT buckets,
+    u16 * RESTRICT fastbits, sa_uint_t * RESTRICT buckets,
     sa_sint_t threads) {
 #if defined(_OPENMP)
     if (threads > 1 && n >= 262144) {
@@ -11078,7 +11078,7 @@ static sa_sint_t libsais_unbwt_core(
     return 0;
 }
 
-static sa_sint_t libsais_unbwt_main(const uint8_t * T, uint8_t * U,
+static sa_sint_t libsais_unbwt_main(const u8 * T, u8 * U,
                                     sa_uint_t * P, sa_sint_t n,
                                     const sa_sint_t * freq, sa_sint_t r,
                                     const sa_uint_t * I, sa_sint_t threads) {
@@ -11089,8 +11089,8 @@ static sa_sint_t libsais_unbwt_main(const uint8_t * T, uint8_t * U,
 
     sa_uint_t * RESTRICT bucket2 = (sa_uint_t *)libsais_alloc_aligned(
         ALPHABET_SIZE * ALPHABET_SIZE * sizeof(sa_uint_t), 4096);
-    uint16_t * RESTRICT fastbits = (uint16_t *)libsais_alloc_aligned(
-        ((size_t)1 + (size_t)(n >> shift)) * sizeof(uint16_t), 4096);
+    u16 * RESTRICT fastbits = (u16 *)libsais_alloc_aligned(
+        ((size_t)1 + (size_t)(n >> shift)) * sizeof(u16), 4096);
     sa_uint_t * RESTRICT buckets =
         threads > 1 && n >= 262144
             ? (sa_uint_t *)libsais_alloc_aligned(
@@ -11114,7 +11114,7 @@ static sa_sint_t libsais_unbwt_main(const uint8_t * T, uint8_t * U,
 }
 
 static sa_sint_t libsais_unbwt_main_ctx(const LIBSAIS_UNBWT_CONTEXT * ctx,
-                                        const uint8_t * T, uint8_t * U,
+                                        const u8 * T, u8 * U,
                                         sa_uint_t * P, sa_sint_t n,
                                         const sa_sint_t * freq, sa_sint_t r,
                                         const sa_uint_t * I) {
@@ -11134,20 +11134,20 @@ void libsais_unbwt_free_ctx(void * ctx) {
     libsais_unbwt_free_ctx_main((LIBSAIS_UNBWT_CONTEXT *)ctx);
 }
 
-int32_t libsais_unbwt(const uint8_t * T, uint8_t * U, int32_t * A, int32_t n,
-                      const int32_t * freq, int32_t i) {
+s32 libsais_unbwt(const u8 * T, u8 * U, s32 * A, s32 n,
+                      const s32 * freq, s32 i) {
     return libsais_unbwt_aux(T, U, A, n, freq, n, &i);
 }
 
-int32_t libsais_unbwt_ctx(const void * ctx, const uint8_t * T, uint8_t * U,
-                          int32_t * A, int32_t n, const int32_t * freq,
-                          int32_t i) {
+s32 libsais_unbwt_ctx(const void * ctx, const u8 * T, u8 * U,
+                          s32 * A, s32 n, const s32 * freq,
+                          s32 i) {
     return libsais_unbwt_aux_ctx(ctx, T, U, A, n, freq, n, &i);
 }
 
-int32_t libsais_unbwt_aux(const uint8_t * T, uint8_t * U, int32_t * A,
-                          int32_t n, const int32_t * freq, int32_t r,
-                          const int32_t * I) {
+s32 libsais_unbwt_aux(const u8 * T, u8 * U, s32 * A,
+                          s32 n, const s32 * freq, s32 r,
+                          const s32 * I) {
     if ((T == NULL) || (U == NULL) || (A == NULL) || (n < 0) ||
         ((r != n) && ((r < 2) || ((r & (r - 1)) != 0))) || (I == NULL)) {
         return -1;
@@ -11172,9 +11172,9 @@ int32_t libsais_unbwt_aux(const uint8_t * T, uint8_t * U, int32_t * A,
                               (const sa_uint_t *)I, 1);
 }
 
-int32_t libsais_unbwt_aux_ctx(const void * ctx, const uint8_t * T, uint8_t * U,
-                              int32_t * A, int32_t n, const int32_t * freq,
-                              int32_t r, const int32_t * I) {
+s32 libsais_unbwt_aux_ctx(const void * ctx, const u8 * T, u8 * U,
+                              s32 * A, s32 n, const s32 * freq,
+                              s32 r, const s32 * I) {
     if ((T == NULL) || (U == NULL) || (A == NULL) || (n < 0) ||
         ((r != n) && ((r < 2) || ((r & (r - 1)) != 0))) || (I == NULL)) {
         return -1;
@@ -11202,7 +11202,7 @@ int32_t libsais_unbwt_aux_ctx(const void * ctx, const uint8_t * T, uint8_t * U,
 
 #if defined(_OPENMP)
 
-void * libsais_unbwt_create_ctx_omp(int32_t threads) {
+void * libsais_unbwt_create_ctx_omp(s32 threads) {
     if (threads < 0) {
         return NULL;
     }
@@ -11211,15 +11211,15 @@ void * libsais_unbwt_create_ctx_omp(int32_t threads) {
     return (void *)libsais_unbwt_create_ctx_main(threads);
 }
 
-int32_t libsais_unbwt_omp(const uint8_t * T, uint8_t * U, int32_t * A,
-                          int32_t n, const int32_t * freq, int32_t i,
-                          int32_t threads) {
+s32 libsais_unbwt_omp(const u8 * T, u8 * U, s32 * A,
+                          s32 n, const s32 * freq, s32 i,
+                          s32 threads) {
     return libsais_unbwt_aux_omp(T, U, A, n, freq, n, &i, threads);
 }
 
-int32_t libsais_unbwt_aux_omp(const uint8_t * T, uint8_t * U, int32_t * A,
-                              int32_t n, const int32_t * freq, int32_t r,
-                              const int32_t * I, int32_t threads) {
+s32 libsais_unbwt_aux_omp(const u8 * T, u8 * U, s32 * A,
+                              s32 n, const s32 * freq, s32 r,
+                              const s32 * I, s32 threads) {
     if ((T == NULL) || (U == NULL) || (A == NULL) || (n < 0) ||
         ((r != n) && ((r < 2) || ((r & (r - 1)) != 0))) || (I == NULL) ||
         (threads < 0)) {
@@ -11308,7 +11308,7 @@ static void libsais_compute_phi_omp(const sa_sint_t * RESTRICT SA,
     }
 }
 
-static void libsais_compute_plcp(const uint8_t * RESTRICT T,
+static void libsais_compute_plcp(const u8 * RESTRICT T,
                                  sa_sint_t * RESTRICT PLCP, fast_sint_t n,
                                  fast_sint_t omp_block_start,
                                  fast_sint_t omp_block_size) {
@@ -11340,7 +11340,7 @@ static void libsais_compute_plcp(const uint8_t * RESTRICT T,
     }
 }
 
-static void libsais_compute_plcp_omp(const uint8_t * RESTRICT T,
+static void libsais_compute_plcp_omp(const u8 * RESTRICT T,
                                      sa_sint_t * RESTRICT PLCP, sa_sint_t n,
                                      sa_sint_t threads) {
 #if defined(_OPENMP)
@@ -11422,8 +11422,8 @@ static void libsais_compute_lcp_omp(const sa_sint_t * RESTRICT PLCP,
     }
 }
 
-int32_t libsais_plcp(const uint8_t * T, const int32_t * SA, int32_t * PLCP,
-                     int32_t n) {
+s32 libsais_plcp(const u8 * T, const s32 * SA, s32 * PLCP,
+                     s32 n) {
     if ((T == NULL) || (SA == NULL) || (PLCP == NULL) || (n < 0)) {
         return -1;
     } else if (n <= 1) {
@@ -11439,8 +11439,8 @@ int32_t libsais_plcp(const uint8_t * T, const int32_t * SA, int32_t * PLCP,
     return 0;
 }
 
-int32_t libsais_lcp(const int32_t * PLCP, const int32_t * SA, int32_t * LCP,
-                    int32_t n) {
+s32 libsais_lcp(const s32 * PLCP, const s32 * SA, s32 * LCP,
+                    s32 n) {
     if ((PLCP == NULL) || (SA == NULL) || (LCP == NULL) || (n < 0)) {
         return -1;
     } else if (n <= 1) {
@@ -11457,8 +11457,8 @@ int32_t libsais_lcp(const int32_t * PLCP, const int32_t * SA, int32_t * LCP,
 
 #if defined(_OPENMP)
 
-int32_t libsais_plcp_omp(const uint8_t * T, const int32_t * SA, int32_t * PLCP,
-                         int32_t n, int32_t threads) {
+s32 libsais_plcp_omp(const u8 * T, const s32 * SA, s32 * PLCP,
+                         s32 n, s32 threads) {
     if ((T == NULL) || (SA == NULL) || (PLCP == NULL) || (n < 0) ||
         (threads < 0)) {
         return -1;
@@ -11477,8 +11477,8 @@ int32_t libsais_plcp_omp(const uint8_t * T, const int32_t * SA, int32_t * PLCP,
     return 0;
 }
 
-int32_t libsais_lcp_omp(const int32_t * PLCP, const int32_t * SA, int32_t * LCP,
-                        int32_t n, int32_t threads) {
+s32 libsais_lcp_omp(const s32 * PLCP, const s32 * SA, s32 * LCP,
+                        s32 n, s32 threads) {
     if ((PLCP == NULL) || (SA == NULL) || (LCP == NULL) || (n < 0) ||
         (threads < 0)) {
         return -1;
diff --git a/src/main.c b/src/main.c
index 24adafc..09a1575 100644
--- a/src/main.c
+++ b/src/main.c
@@ -31,16 +31,16 @@
 #include "rle.h"
 #include "srt.h"
 
-void encode_block(int output_des, int32_t bytes_read, uint8_t * buffer,
-                  uint8_t * output, int32_t * sais_array,
+void encode_block(int output_des, s32 bytes_read, u8 * buffer,
+                  u8 * output, s32 * sais_array,
                   struct srt_state * srt_state, state * cm_state,
-                  uint32_t block_size, struct mtf_state * mtf_state) {
-    uint32_t crc32 = crc32sum(1, buffer, bytes_read);
+                  u32 block_size, struct mtf_state * mtf_state) {
+    u32 crc32 = crc32sum(1, buffer, bytes_read);
 
-    int32_t new_size = mrlec(buffer, bytes_read, output);
-    int32_t bwt_index =
+    s32 new_size = mrlec(buffer, bytes_read, output);
+    s32 bwt_index =
         libsais_bwt(output, output, sais_array, new_size, 16, NULL);
-    int32_t new_size2;
+    s32 new_size2;
 
     if (new_size > MiB(3)) {
         new_size2 = srt_encode(srt_state, output, buffer, new_size);
@@ -53,38 +53,38 @@ void encode_block(int output_des, int32_t bytes_read, uint8_t * buffer,
     cm_state->out_queue = output;
     cm_state->output_ptr = 0;
     if (new_size2 != -1)
-        for (int32_t i = 0; i < new_size2; i++)
+        for (s32 i = 0; i < new_size2; i++)
             encode_byte(cm_state, buffer[i]);
     else
-        for (int32_t i = 0; i < new_size; i++) encode_byte(cm_state, buffer[i]);
+        for (s32 i = 0; i < new_size; i++) encode_byte(cm_state, buffer[i]);
     flush(cm_state);
-    int32_t new_size3 = cm_state->output_ptr;
-
-    write(output_des, &crc32, sizeof(uint32_t));
-    write(output_des, &bytes_read, sizeof(int32_t));
-    write(output_des, &bwt_index, sizeof(int32_t));
-    write(output_des, &new_size, sizeof(int32_t));
-    write(output_des, &new_size2, sizeof(int32_t));
-    write(output_des, &new_size3, sizeof(int32_t));
+    s32 new_size3 = cm_state->output_ptr;
+
+    write(output_des, &crc32, sizeof(u32));
+    write(output_des, &bytes_read, sizeof(s32));
+    write(output_des, &bwt_index, sizeof(s32));
+    write(output_des, &new_size, sizeof(s32));
+    write(output_des, &new_size2, sizeof(s32));
+    write(output_des, &new_size3, sizeof(s32));
     write(output_des, output, new_size3);
 }
 
-int decode_block(int input_des, int output_des, uint8_t * buffer,
-                 uint8_t * output, int32_t * sais_array,
+int decode_block(int input_des, int output_des, u8 * buffer,
+                 u8 * output, s32 * sais_array,
                  struct srt_state * srt_state, state * cm_state,
                  struct mtf_state * mtf_state) {
 #define safe_read(fd, buf, size) \
     if (read(fd, buf, size) != size) return 1;
 
-    uint32_t crc32;
-    int32_t bytes_read, bwt_index, new_size, new_size2, new_size3;
+    u32 crc32;
+    s32 bytes_read, bwt_index, new_size, new_size2, new_size3;
 
-    safe_read(input_des, &crc32, sizeof(uint32_t));
-    safe_read(input_des, &bytes_read, sizeof(int32_t));
-    safe_read(input_des, &bwt_index, sizeof(int32_t));
-    safe_read(input_des, &new_size, sizeof(int32_t));
-    safe_read(input_des, &new_size2, sizeof(int32_t));
-    safe_read(input_des, &new_size3, sizeof(int32_t));
+    safe_read(input_des, &crc32, sizeof(u32));
+    safe_read(input_des, &bytes_read, sizeof(s32));
+    safe_read(input_des, &bwt_index, sizeof(s32));
+    safe_read(input_des, &new_size, sizeof(s32));
+    safe_read(input_des, &new_size2, sizeof(s32));
+    safe_read(input_des, &new_size3, sizeof(s32));
     safe_read(input_des, buffer, new_size3);
 
     begin(cm_state);
@@ -93,11 +93,11 @@ int decode_block(int input_des, int output_des, uint8_t * buffer,
     cm_state->input_max = new_size3;
     init(cm_state);
     if (new_size2 != -1) {
-        for (int32_t i = 0; i < new_size2; i++)
+        for (s32 i = 0; i < new_size2; i++)
             output[i] = decode_byte(cm_state);
         srt_decode(srt_state, output, buffer, new_size2);
     } else {
-        for (int32_t i = 0; i < new_size; i++)
+        for (s32 i = 0; i < new_size; i++)
             output[i] = decode_byte(cm_state);
         mtf_decode(mtf_state, output, buffer, new_size);
     }
@@ -114,7 +114,7 @@ int decode_block(int input_des, int output_des, uint8_t * buffer,
 int main(int argc, char * argv[]) {
     int mode = 0;  // -1: encode, 0: unspecified, 1: encode
     char *input = NULL, *output = NULL;     // input and output file names
-    uint32_t block_size = 8 * 1024 * 1024;  // the block size
+    u32 block_size = 8 * 1024 * 1024;  // the block size
 
     for (int i = 1; i < argc; i++) {
         if (argv[i][0] == '-') {
@@ -181,15 +181,15 @@ int main(int argc, char * argv[]) {
 
     if (mode == 1) {
         // Encode
-        uint8_t * buffer = malloc(block_size + block_size / 3);
-        uint8_t * output = malloc(block_size + block_size / 3);
-        int32_t * sais_array = malloc(block_size * sizeof(int32_t) + 16);
-        int32_t bytes_read;
+        u8 * buffer = malloc(block_size + block_size / 3);
+        u8 * output = malloc(block_size + block_size / 3);
+        s32 * sais_array = malloc(block_size * sizeof(s32) + 16);
+        s32 bytes_read;
 
         state s;
 
         write(output_des, "BZ3v1", 5);
-        write(output_des, &block_size, sizeof(uint32_t));
+        write(output_des, &block_size, sizeof(u32));
 
         while ((bytes_read = read(input_des, buffer, block_size)) > 0) {
             encode_block(output_des, bytes_read, buffer, output, sais_array,
@@ -207,10 +207,10 @@ int main(int argc, char * argv[]) {
             fprintf(stderr, "Invalid signature.\n");
             return 1;
         }
-        read(input_des, &block_size, sizeof(uint32_t));
-        uint8_t * buffer = malloc(block_size + block_size / 2);
-        uint8_t * output = malloc(block_size + block_size / 2);
-        int32_t * sais_array = malloc(block_size * sizeof(int32_t) + 16);
+        read(input_des, &block_size, sizeof(u32));
+        u8 * buffer = malloc(block_size + block_size / 2);
+        u8 * output = malloc(block_size + block_size / 2);
+        s32 * sais_array = malloc(block_size * sizeof(s32) + 16);
 
         state s;
 
diff --git a/src/mtf.c b/src/mtf.c
index d2a0d94..1f1f61e 100644
--- a/src/mtf.c
+++ b/src/mtf.c
@@ -19,15 +19,15 @@
 
 #include "mtf.h"
 
-void mtf_encode(struct mtf_state * mtf, uint8_t * src, uint8_t * dst,
-                uint32_t count) {
-    for (uint32_t i = 0; i < 256; i++) {
+void mtf_encode(struct mtf_state * mtf, u8 * src, u8 * dst,
+                u32 count) {
+    for (u32 i = 0; i < 256; i++) {
         mtf->prev[i] = mtf->curr[i] = 0;
         mtf->symbols[i] = mtf->ranks[i] = i;
     }
 
-    for (uint32_t i = 0; i < count; i++) {
-        uint32_t r = mtf->symbols[src[i]];
+    for (u32 i = 0; i < count; i++) {
+        u32 r = mtf->symbols[src[i]];
         dst[i] = r;
 
         mtf->prev[src[i]] = mtf->curr[src[i]] = i;
@@ -42,18 +42,18 @@ void mtf_encode(struct mtf_state * mtf, uint8_t * src, uint8_t * dst,
     }
 }
 
-void mtf_decode(struct mtf_state * mtf, uint8_t * src, uint8_t * dst,
-                uint32_t count) {
-    for (uint32_t i = 0; i < 256; i++) {
+void mtf_decode(struct mtf_state * mtf, u8 * src, u8 * dst,
+                u32 count) {
+    for (u32 i = 0; i < 256; i++) {
         mtf->prev[i] = mtf->curr[i] = 0;
         mtf->ranks[i] = i;
     }
 
-    for (uint32_t i = 0; i < count; i++) {
-        uint32_t r = src[i] & 0xFF;
+    for (u32 i = 0; i < count; i++) {
+        u32 r = src[i] & 0xFF;
 
-        const uint32_t c = mtf->ranks[r];
-        dst[i] = (int8_t)c;
+        const u32 c = mtf->ranks[r];
+        dst[i] = (s8)c;
 
         mtf->prev[c] = mtf->curr[c] = i;
 
diff --git a/src/rle.c b/src/rle.c
index 3b2c5da..f80429e 100644
--- a/src/rle.c
+++ b/src/rle.c
@@ -6,12 +6,12 @@
 #define buffer_write(__ch, __out) *__out++ = (__ch)
 #define buffer_read(in, in_) (in < in_ ? (*in++) : -1)
 
-int32_t mrlec(uint8_t * in, int32_t inlen, uint8_t * out) {
-    uint8_t *ip = in, *in_ = in + inlen, *op = out;
-    int32_t i;
-    int32_t c, pc = -1;
-    int32_t t[256] = { 0 };
-    int32_t run = 0;
+s32 mrlec(u8 * in, s32 inlen, u8 * out) {
+    u8 *ip = in, *in_ = in + inlen, *op = out;
+    s32 i;
+    s32 c, pc = -1;
+    s32 t[256] = { 0 };
+    s32 run = 0;
     while ((c = buffer_read(ip, in_)) != -1) {
         if (c == pc)
             t[c] += (++run % 255) != 0;
@@ -20,7 +20,7 @@ int32_t mrlec(uint8_t * in, int32_t inlen, uint8_t * out) {
         pc = c;
     }
     for (i = 0; i < 32; ++i) {
-        int32_t j;
+        s32 j;
         c = 0;
         for (j = 0; j < 8; ++j) c += (t[i * 8 + j] > 0) << j;
         buffer_write(c, op);
@@ -45,16 +45,16 @@ int32_t mrlec(uint8_t * in, int32_t inlen, uint8_t * out) {
     return op - out;
 }
 
-int32_t mrled(uint8_t * in, uint8_t * out, int32_t outlen) {
-    uint8_t *ip = in, *op = out;
-    int32_t i;
+s32 mrled(u8 * in, u8 * out, s32 outlen) {
+    u8 *ip = in, *op = out;
+    s32 i;
 
-    int32_t c, pc = -1;
-    int32_t t[256] = { 0 };
-    int32_t run = 0;
+    s32 c, pc = -1;
+    s32 t[256] = { 0 };
+    s32 run = 0;
 
     for (i = 0; i < 32; ++i) {
-        int32_t j;
+        s32 j;
         c = *ip++;
         for (j = 0; j < 8; ++j) t[i * 8 + j] = (c >> j) & 1;
     }
diff --git a/src/srt.c b/src/srt.c
index 5b34109..b5f60bc 100644
--- a/src/srt.c
+++ b/src/srt.c
@@ -19,19 +19,19 @@
 
 #include "srt.h"
 
-static const int32_t MAX_HDR_SIZE = 4 * 256;
+static const s32 MAX_HDR_SIZE = 4 * 256;
 
-static int32_t preprocess(const uint32_t * freqs, uint8_t * symbols) {
-    int32_t nb_symbols = 0;
-    for (int32_t i = 0; i < 256; i++)
+static s32 preprocess(const u32 * freqs, u8 * symbols) {
+    s32 nb_symbols = 0;
+    for (s32 i = 0; i < 256; i++)
         if (freqs[i] > 0) symbols[nb_symbols++] = i;
-    uint32_t h = 4;
+    u32 h = 4;
     while (h < nb_symbols) h = h * 3 + 1;
     while (1) {
         h /= 3;
-        for (uint32_t i = h; i < nb_symbols; i++) {
-            const int32_t t = symbols[i] & 0xFF;
-            int32_t b = i - h;
+        for (u32 i = h; i < nb_symbols; i++) {
+            const s32 t = symbols[i] & 0xFF;
+            s32 b = i - h;
             while ((b >= 0) && freqs[symbols[b]] < freqs[t] ||
                    (freqs[t] == freqs[symbols[b]]) && t < symbols[b]) {
                 symbols[b + h] = symbols[b];
@@ -44,25 +44,25 @@ static int32_t preprocess(const uint32_t * freqs, uint8_t * symbols) {
     return nb_symbols;
 }
 
-static int32_t encode_header(uint32_t * freqs, uint8_t * dst) {
-    uint32_t idx = 0;
-    for (int32_t i = 0; i < 256; i++) {
-        uint32_t f = freqs[i];
+static s32 encode_header(u32 * freqs, u8 * dst) {
+    u32 idx = 0;
+    for (s32 i = 0; i < 256; i++) {
+        u32 f = freqs[i];
         while (f >= 128) {
-            dst[idx++] = (uint8_t)(f | 0x80);
+            dst[idx++] = (u8)(f | 0x80);
             f >>= 7;
         }
-        dst[idx++] = (uint8_t)f;
+        dst[idx++] = (u8)f;
     }
     return idx;
 }
 
-static int32_t decode_header(uint8_t * src, uint32_t * freqs) {
-    uint32_t idx = 0;
-    for (int32_t i = 0; i < 256; i++) {
-        int32_t val = src[idx++] & 0xFF;
-        int32_t res = val & 0x7F;
-        int32_t shift = 7;
+static s32 decode_header(u8 * src, u32 * freqs) {
+    u32 idx = 0;
+    for (s32 i = 0; i < 256; i++) {
+        s32 val = src[idx++] & 0xFF;
+        s32 res = val & 0x7F;
+        s32 shift = 7;
         while (val >= 128) {
             val = src[idx++] & 0xFF;
             res |= (val & 0x7F) << shift;
@@ -74,34 +74,34 @@ static int32_t decode_header(uint8_t * src, uint32_t * freqs) {
     return idx;
 }
 
-uint32_t srt_encode(struct srt_state * mtf, uint8_t * src, uint8_t * dst,
-                    uint32_t count) {
+u32 srt_encode(struct srt_state * mtf, u8 * src, u8 * dst,
+                    u32 count) {
     // Find first symbols and build a histogram.
-    for (int32_t i = 0; i < 256; i++) mtf->freqs[i] = 0;
-    for (uint32_t i = 0, b = 0; i < count;) {
+    for (s32 i = 0; i < 256; i++) mtf->freqs[i] = 0;
+    for (u32 i = 0, b = 0; i < count;) {
         if (mtf->freqs[src[i]] == 0) {
             mtf->r2s[b] = src[i];
             mtf->s2r[src[i]] = b;
             b++;
         }
-        uint32_t j = i + 1;
+        u32 j = i + 1;
         while (j < count && src[j] == src[i]) j++;
         mtf->freqs[src[i]] += j - i;
         i = j;
     }
 
-    int32_t n_symbols = preprocess(mtf->freqs, mtf->symbols);
-    for (uint32_t i = 0, bucket_pos = 0; i < n_symbols; i++) {
+    s32 n_symbols = preprocess(mtf->freqs, mtf->symbols);
+    for (u32 i = 0, bucket_pos = 0; i < n_symbols; i++) {
         mtf->buckets[mtf->symbols[i]] = bucket_pos;
         bucket_pos += mtf->freqs[mtf->symbols[i]];
     }
 
-    const uint32_t header_size = encode_header(mtf->freqs, dst);
-    const int32_t dst_idx = header_size;
-    for (uint32_t i = 0; i < count;) {
-        const int32_t c = src[i] & 0xFF;
-        int32_t r = mtf->s2r[c] & 0xFF;
-        uint32_t p = mtf->buckets[c];
+    const u32 header_size = encode_header(mtf->freqs, dst);
+    const s32 dst_idx = header_size;
+    for (u32 i = 0; i < count;) {
+        const s32 c = src[i] & 0xFF;
+        s32 r = mtf->s2r[c] & 0xFF;
+        u32 p = mtf->buckets[c];
         dst[dst_idx + p++] = r;
         if (r != 0) {
             do {
@@ -122,32 +122,32 @@ uint32_t srt_encode(struct srt_state * mtf, uint8_t * src, uint8_t * dst,
     return count + header_size;
 }
 
-uint32_t srt_decode(struct srt_state * mtf, uint8_t * src, uint8_t * dst,
-                    uint32_t count) {
-    const uint32_t header_size = decode_header(src, mtf->freqs);
-    const uint32_t src_idx = header_size;
-    int32_t nb_symbols = preprocess(mtf->freqs, mtf->symbols);
-    for (uint32_t i = 0, bucket_pos = 0; i < nb_symbols; i++) {
-        const int32_t c = mtf->symbols[i] & 0xFF;
+u32 srt_decode(struct srt_state * mtf, u8 * src, u8 * dst,
+                    u32 count) {
+    const u32 header_size = decode_header(src, mtf->freqs);
+    const u32 src_idx = header_size;
+    s32 nb_symbols = preprocess(mtf->freqs, mtf->symbols);
+    for (u32 i = 0, bucket_pos = 0; i < nb_symbols; i++) {
+        const s32 c = mtf->symbols[i] & 0xFF;
         mtf->r2s[src[src_idx + bucket_pos] & 0xFF] = c;
         mtf->buckets[c] = bucket_pos + 1;
         bucket_pos += mtf->freqs[c];
         mtf->bucket_ends[c] = bucket_pos;
     }
-    uint32_t c = mtf->r2s[0];
-    for (uint32_t i = 0; i < count; i++) {
+    u32 c = mtf->r2s[0];
+    for (u32 i = 0; i < count; i++) {
         dst[i] = c;
         if (mtf->buckets[c] < mtf->bucket_ends[c]) {
-            const int32_t r = src[src_idx + mtf->buckets[c]] & 0xFF;
+            const s32 r = src[src_idx + mtf->buckets[c]] & 0xFF;
             mtf->buckets[c]++;
             if (r == 0) continue;
-            for (int32_t s = 0; s < r; s++) mtf->r2s[s] = mtf->r2s[s + 1];
+            for (s32 s = 0; s < r; s++) mtf->r2s[s] = mtf->r2s[s + 1];
             mtf->r2s[r] = c;
             c = mtf->r2s[0];
         } else {
             if (nb_symbols == 1) continue;
             nb_symbols--;
-            for (int32_t s = 0; s < nb_symbols; s++)
+            for (s32 s = 0; s < nb_symbols; s++)
                 mtf->r2s[s] = mtf->r2s[s + 1];
             c = mtf->r2s[0];
         }
tab: 248 wrap: offon