refurbished api
diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 0000000..4bae5fb
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,34 @@
+{
+ // Use IntelliSense to learn about possible attributes.
+ // Hover to view descriptions of existing attributes.
+ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+ "version": "0.2.0",
+ "configurations": [
+ {
+ "name": "(gdb) Launch",
+ "type": "cppdbg",
+ "request": "launch",
+ "program": "${workspaceFolder}/bzip3",
+ "args": ["-e", "${workspaceFolder}/corpus/cm.c", "${workspaceFolder}/corpus/cm.bz3"],
+ "stopAtEntry": false,
+ "cwd": "${fileDirname}",
+ "environment": [],
+ "externalConsole": false,
+ "MIMode": "gdb",
+ "setupCommands": [
+ {
+ "description": "Enable pretty-printing for gdb",
+ "text": "-enable-pretty-printing",
+ "ignoreFailures": true
+ },
+ {
+ "description": "Set Disassembly Flavor to Intel",
+ "text": "-gdb-set disassembly-flavor intel",
+ "ignoreFailures": true
+ }
+ ]
+ }
+
+
+ ]
+}
\ No newline at end of file
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 6e20f5f..3f99eae 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -7,6 +7,13 @@
"common.h": "c",
"libsais.h": "c",
"cstring": "c",
- "variant": "c"
+ "variant": "c",
+ "strstream": "c",
+ "*.tcc": "c",
+ "bitset": "c",
+ "fstream": "c",
+ "istream": "c",
+ "sstream": "c",
+ "streambuf": "c"
}
}
\ No newline at end of file
diff --git a/include/libbz3.h b/include/libbz3.h
index 3431268..99e65d1 100644
--- a/include/libbz3.h
+++ b/include/libbz3.h
@@ -10,64 +10,41 @@
#define BZ3_ERR_CRC -3
#define BZ3_ERR_MALFORMED_HEADER -4
#define BZ3_ERR_TRUNCATED_DATA -5
+#define BZ3_ERR_DATA_TOO_BIG -6
-struct block_encoder_state;
-
-struct encoding_result {
- u8 * buffer;
- s32 size;
-};
+struct bz3_state;
/**
* @brief Get the last error number associated with a given state.
*/
-s8 get_last_error(struct block_encoder_state * state);
+s8 bz3_last_error(struct bz3_state * state);
/**
* @brief Return a user-readable message explaining the cause of the error.
*/
-const char * str_last_error(struct block_encoder_state * state);
-
-/**
- * @brief Get the input buffer associated with given state. Fill it with data
- * of length not exceeding the block size and call commit_read() to commit
- * the read operation with the number of bytes read.
- */
-u8 * get_buffer(struct block_encoder_state * state);
-
-/**
- * @brief Commit the amount of bytes inserted into the buffer.
- */
-s32 commit_read(struct block_encoder_state * state, s32 bytes_read);
+const char * bz3_strerror(struct bz3_state * state);
/**
* @brief Construct a new block encoder state.
*/
-struct block_encoder_state * new_block_encoder_state(s32 block_size);
+struct bz3_state * bz3_new(s32 block_size);
/**
* @brief Free the memory occupied by a block encoder state.
*/
-void delete_block_encoder_state(struct block_encoder_state * state);
-
-/**
- * @brief Read a block of data from provided file descriptor, put it in
- * the input buffer and commit the read.
- *
- * @param filedes
- * @param state
- * @return s32
- */
-s32 read_block(int filedes, struct block_encoder_state * state);
+void bz3_free(struct bz3_state * state);
/**
* @brief Encode a single block.
+ * Returns the amount of bytes written to `buffer'.
+ * `buffer' must be able to hold at least `size + size / 4' bytes.
*/
-struct encoding_result encode_block(struct block_encoder_state * state);
+s32 bz3_encode_block(struct bz3_state * state, u8 * buffer, s32 size);
/**
* @brief Decode a single block.
+ * `buffer' must be able to hold at least `size + size / 4' bytes.
*/
-struct encoding_result decode_block(struct block_encoder_state * state);
+s32 bz3_decode_block(struct bz3_state * state, u8 * buffer, s32 size, s32 orig_size);
#endif
diff --git a/include/libsais.h b/include/libsais.h
index b0cae1e..dc3b704 100644
--- a/include/libsais.h
+++ b/include/libsais.h
@@ -138,8 +138,7 @@ s32 libsais_bwt_ctx(const void * ctx, const u8 * T, u8 * U, s32 * A, s32 n, s32
* @param I [0..(n-1)/r] The output auxiliary indexes.
* @return 0 if no error occurred, -1 or -2 otherwise.
*/
-s32 libsais_bwt_aux_ctx(const void * ctx, const u8 * T, u8 * U, s32 * A, s32 n, s32 fs, s32 * freq,
- s32 r, s32 * I);
+s32 libsais_bwt_aux_ctx(const void * ctx, const u8 * T, u8 * U, s32 * A, s32 n, s32 fs, s32 * freq, s32 r, s32 * I);
/**
* Creates the libsais reverse BWT context that allows reusing allocated memory
@@ -182,8 +181,7 @@ s32 libsais_unbwt(const u8 * T, u8 * U, s32 * A, s32 n, const s32 * freq, s32 i)
* @param i The primary index.
* @return 0 if no error occurred, -1 or -2 otherwise.
*/
-s32 libsais_unbwt_ctx(const void * ctx, const u8 * T, u8 * U, s32 * A, s32 n, const s32 * freq,
- s32 i);
+s32 libsais_unbwt_ctx(const void * ctx, const u8 * T, u8 * U, s32 * A, s32 n, const s32 * freq, s32 i);
/**
* Constructs the original string from a given burrows-wheeler transformed
@@ -214,8 +212,8 @@ s32 libsais_unbwt_aux(const u8 * T, u8 * U, s32 * A, s32 n, const s32 * freq, s3
* @param I [0..(n-1)/r] The input auxiliary indexes.
* @return 0 if no error occurred, -1 or -2 otherwise.
*/
-s32 libsais_unbwt_aux_ctx(const void * ctx, const u8 * T, u8 * U, s32 * A, s32 n, const s32 * freq,
- s32 r, const s32 * I);
+s32 libsais_unbwt_aux_ctx(const void * ctx, const u8 * T, u8 * U, s32 * A, s32 n, const s32 * freq, s32 r,
+ const s32 * I);
/**
* Constructs the permuted longest common prefix array (PLCP) of a given string
diff --git a/src/crc32.c b/src/crc32.c
index 8bed740..33d0b19 100644
--- a/src/crc32.c
+++ b/src/crc32.c
@@ -20,42 +20,34 @@
#include "crc32.h"
static const u32 crc32Table[256] = {
- 0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L, 0xC79A971FL, 0x35F1141CL, 0x26A1E7E8L,
- 0xD4CA64EBL, 0x8AD958CFL, 0x78B2DBCCL, 0x6BE22838L, 0x9989AB3BL, 0x4D43CFD0L, 0xBF284CD3L,
- 0xAC78BF27L, 0x5E133C24L, 0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL, 0xD7C45070L,
- 0x25AFD373L, 0x36FF2087L, 0xC494A384L, 0x9A879FA0L, 0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L,
- 0x5D1D08BFL, 0xAF768BBCL, 0xBC267848L, 0x4E4DFB4BL, 0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L,
- 0x33ED7D2AL, 0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L, 0xAA64D611L, 0x580F5512L,
- 0x4B5FA6E6L, 0xB93425E5L, 0x6DFE410EL, 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL, 0x30E349B1L,
- 0xC288CAB2L, 0xD1D83946L, 0x23B3BA45L, 0xF779DEAEL, 0x05125DADL, 0x1642AE59L, 0xE4292D5AL,
- 0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL, 0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L,
- 0x6EF07595L, 0x417B1DBCL, 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L, 0x86E18AA3L, 0x748A09A0L,
- 0x67DAFA54L, 0x95B17957L, 0xCBA24573L, 0x39C9C670L, 0x2A993584L, 0xD8F2B687L, 0x0C38D26CL,
- 0xFE53516FL, 0xED03A29BL, 0x1F682198L, 0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L,
- 0x96BF4DCCL, 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L, 0xDBFC821CL, 0x2997011FL, 0x3AC7F2EBL,
- 0xC8AC71E8L, 0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L, 0x0F36E6F7L, 0x61C69362L, 0x93AD1061L,
- 0x80FDE395L, 0x72966096L, 0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L, 0xEB1FCBADL,
- 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L, 0x2C855CB2L, 0xDEEEDFB1L, 0xCDBE2C45L, 0x3FD5AF46L,
- 0x7198540DL, 0x83F3D70EL, 0x90A324FAL, 0x62C8A7F9L, 0xB602C312L, 0x44694011L, 0x5739B3E5L,
- 0xA55230E6L, 0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L, 0x3CDB9BDDL, 0xCEB018DEL,
- 0xDDE0EB2AL, 0x2F8B6829L, 0x82F63B78L, 0x709DB87BL, 0x63CD4B8FL, 0x91A6C88CL, 0x456CAC67L,
- 0xB7072F64L, 0xA457DC90L, 0x563C5F93L, 0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L,
- 0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL, 0x92A8FC17L, 0x60C37F14L, 0x73938CE0L,
- 0x81F80FE3L, 0x55326B08L, 0xA759E80BL, 0xB4091BFFL, 0x466298FCL, 0x1871A4D8L, 0xEA1A27DBL,
- 0xF94AD42FL, 0x0B21572CL, 0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L, 0xA24BB5A6L,
- 0x502036A5L, 0x4370C551L, 0xB11B4652L, 0x65D122B9L, 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL,
- 0x2892ED69L, 0xDAF96E6AL, 0xC9A99D9EL, 0x3BC21E9DL, 0xEF087A76L, 0x1D63F975L, 0x0E330A81L,
- 0xFC588982L, 0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL, 0x758FE5D6L, 0x87E466D5L,
- 0x94B49521L, 0x66DF1622L, 0x38CC2A06L, 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L, 0xFF56BD19L,
- 0x0D3D3E1AL, 0x1E6DCDEEL, 0xEC064EEDL, 0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L, 0xD0DDD530L,
- 0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL, 0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL,
- 0x5A048DFFL, 0x8ECEE914L, 0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L, 0xD3D3E1ABL, 0x21B862A8L,
- 0x32E8915CL, 0xC083125FL, 0x144976B4L, 0xE622F5B7L, 0xF5720643L, 0x07198540L, 0x590AB964L,
- 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L, 0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL,
- 0xE330A81AL, 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL, 0x24AA3F05L, 0xD6C1BC06L, 0xC5914FF2L,
- 0x37FACCF1L, 0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L, 0x7AB90321L, 0xAE7367CAL, 0x5C18E4C9L,
- 0x4F48173DL, 0xBD23943EL, 0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L, 0x34F4F86AL,
- 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL, 0x79B737BAL, 0x8BDCB4B9L, 0x988C474DL, 0x6AE7C44EL,
+ 0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L, 0xC79A971FL, 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL, 0x8AD958CFL,
+ 0x78B2DBCCL, 0x6BE22838L, 0x9989AB3BL, 0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L, 0x5E133C24L, 0x105EC76FL, 0xE235446CL,
+ 0xF165B798L, 0x030E349BL, 0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L, 0x9A879FA0L, 0x68EC1CA3L, 0x7BBCEF57L,
+ 0x89D76C54L, 0x5D1D08BFL, 0xAF768BBCL, 0xBC267848L, 0x4E4DFB4BL, 0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L, 0x33ED7D2AL,
+ 0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L, 0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L, 0x6DFE410EL,
+ 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL, 0x30E349B1L, 0xC288CAB2L, 0xD1D83946L, 0x23B3BA45L, 0xF779DEAEL, 0x05125DADL,
+ 0x1642AE59L, 0xE4292D5AL, 0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL, 0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L,
+ 0x6EF07595L, 0x417B1DBCL, 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L, 0x86E18AA3L, 0x748A09A0L, 0x67DAFA54L, 0x95B17957L,
+ 0xCBA24573L, 0x39C9C670L, 0x2A993584L, 0xD8F2B687L, 0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L, 0x5125DAD3L,
+ 0xA34E59D0L, 0xB01EAA24L, 0x42752927L, 0x96BF4DCCL, 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L, 0xDBFC821CL, 0x2997011FL,
+ 0x3AC7F2EBL, 0xC8AC71E8L, 0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L, 0x0F36E6F7L, 0x61C69362L, 0x93AD1061L, 0x80FDE395L,
+ 0x72966096L, 0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L, 0xEB1FCBADL, 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L,
+ 0x2C855CB2L, 0xDEEEDFB1L, 0xCDBE2C45L, 0x3FD5AF46L, 0x7198540DL, 0x83F3D70EL, 0x90A324FAL, 0x62C8A7F9L, 0xB602C312L,
+ 0x44694011L, 0x5739B3E5L, 0xA55230E6L, 0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L, 0x3CDB9BDDL, 0xCEB018DEL,
+ 0xDDE0EB2AL, 0x2F8B6829L, 0x82F63B78L, 0x709DB87BL, 0x63CD4B8FL, 0x91A6C88CL, 0x456CAC67L, 0xB7072F64L, 0xA457DC90L,
+ 0x563C5F93L, 0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L, 0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL,
+ 0x92A8FC17L, 0x60C37F14L, 0x73938CE0L, 0x81F80FE3L, 0x55326B08L, 0xA759E80BL, 0xB4091BFFL, 0x466298FCL, 0x1871A4D8L,
+ 0xEA1A27DBL, 0xF94AD42FL, 0x0B21572CL, 0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L, 0xA24BB5A6L, 0x502036A5L,
+ 0x4370C551L, 0xB11B4652L, 0x65D122B9L, 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL, 0x2892ED69L, 0xDAF96E6AL, 0xC9A99D9EL,
+ 0x3BC21E9DL, 0xEF087A76L, 0x1D63F975L, 0x0E330A81L, 0xFC588982L, 0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL,
+ 0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L, 0x38CC2A06L, 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L, 0xFF56BD19L,
+ 0x0D3D3E1AL, 0x1E6DCDEEL, 0xEC064EEDL, 0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L, 0xD0DDD530L, 0x0417B1DBL, 0xF67C32D8L,
+ 0xE52CC12CL, 0x1747422FL, 0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL, 0x8ECEE914L, 0x7CA56A17L, 0x6FF599E3L,
+ 0x9D9E1AE0L, 0xD3D3E1ABL, 0x21B862A8L, 0x32E8915CL, 0xC083125FL, 0x144976B4L, 0xE622F5B7L, 0xF5720643L, 0x07198540L,
+ 0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L, 0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL, 0xE330A81AL,
+ 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL, 0x24AA3F05L, 0xD6C1BC06L, 0xC5914FF2L, 0x37FACCF1L, 0x69E9F0D5L, 0x9B8273D6L,
+ 0x88D28022L, 0x7AB90321L, 0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL, 0xF36E6F75L, 0x0105EC76L, 0x12551F82L,
+ 0xE03E9C81L, 0x34F4F86AL, 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL, 0x79B737BAL, 0x8BDCB4B9L, 0x988C474DL, 0x6AE7C44EL,
0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L, 0xAD7D5351L
};
diff --git a/src/libbz3.c b/src/libbz3.c
index 2043edc..51d5da0 100644
--- a/src/libbz3.c
+++ b/src/libbz3.c
@@ -20,9 +20,9 @@
#define LZP_DICTIONARY 18
#define LZP_MIN_MATCH 40
-struct block_encoder_state {
- u8 *buf1, *buf2;
- s32 bytes_read, block_size;
+struct bz3_state {
+ u8 *swap_buffer;
+ s32 block_size;
s32 * sais_array;
struct srt_state * srt_state;
struct mtf_state * mtf_state;
@@ -30,9 +30,9 @@ struct block_encoder_state {
s8 last_error;
};
-s8 get_last_error(struct block_encoder_state * state) { return state->last_error; }
+s8 bz3_last_error(struct bz3_state * state) { return state->last_error; }
-const char * str_last_error(struct block_encoder_state * state) {
+const char * bz3_strerror(struct bz3_state * state) {
switch (state->last_error) {
case BZ3_OK:
return "No error";
@@ -46,47 +46,36 @@ const char * str_last_error(struct block_encoder_state * state) {
return "Malformed header";
case BZ3_ERR_TRUNCATED_DATA:
return "Truncated data";
+ case BZ3_ERR_DATA_TOO_BIG:
+ return "Too much data";
default:
return "Unknown error";
}
}
-u8 * get_buffer(struct block_encoder_state * state) { return state->buf1; }
+struct bz3_state * bz3_new(s32 block_size) {
+ struct bz3_state * bz3_state = malloc(sizeof(struct bz3_state));
-s32 commit_read(struct block_encoder_state * state, s32 bytes_read) {
- if (bytes_read > state->block_size) {
- state->last_error = BZ3_ERR_OUT_OF_BOUNDS;
- return -1;
- }
- state->last_error = BZ3_OK;
- return state->bytes_read = bytes_read;
-}
-
-struct block_encoder_state * new_block_encoder_state(s32 block_size) {
- struct block_encoder_state * block_encoder_state = malloc(sizeof(struct block_encoder_state));
-
- if (!block_encoder_state) {
+ if (!bz3_state) {
return NULL;
}
- block_encoder_state->cm_state = malloc(sizeof(state));
- block_encoder_state->srt_state = malloc(sizeof(struct srt_state));
- block_encoder_state->mtf_state = malloc(sizeof(struct mtf_state));
+ bz3_state->cm_state = malloc(sizeof(state));
+ bz3_state->srt_state = malloc(sizeof(struct srt_state));
+ bz3_state->mtf_state = malloc(sizeof(struct mtf_state));
- block_encoder_state->buf1 = malloc(block_size + block_size / 4);
- block_encoder_state->buf2 = malloc(block_size + block_size / 4);
- block_encoder_state->sais_array = malloc(block_size * sizeof(s32) + 16);
+ bz3_state->swap_buffer = malloc(block_size + block_size / 4);
+ bz3_state->sais_array = malloc(block_size * sizeof(s32) + 16);
- block_encoder_state->block_size = block_size;
+ bz3_state->block_size = block_size;
- block_encoder_state->last_error = BZ3_OK;
+ bz3_state->last_error = BZ3_OK;
- return block_encoder_state;
+ return bz3_state;
}
-void delete_block_encoder_state(struct block_encoder_state * state) {
- free(state->buf1);
- free(state->buf2);
+void bz3_free(struct bz3_state * state) {
+ free(state->swap_buffer);
free(state->sais_array);
free(state->srt_state);
free(state->mtf_state);
@@ -96,19 +85,22 @@ void delete_block_encoder_state(struct block_encoder_state * state) {
#define swap(x, y) { u8 * tmp = x; x = y; y = tmp; }
-struct encoding_result encode_block(struct block_encoder_state * state) {
- u8 * b1 = state->buf1, * b2 = state->buf2;
- s32 data_size = state->bytes_read;
+s32 bz3_encode_block(struct bz3_state * state, u8 * buffer, s32 data_size) {
+ u8 * b1 = buffer, * b2 = state->swap_buffer; s32 initial_size = data_size;
+
+ if(data_size > state->block_size) {
+ state->last_error = BZ3_ERR_DATA_TOO_BIG;
+ return -1;
+ }
u32 crc32 = crc32sum(1, b1, data_size);
// Ignore small blocks. They won't benefit from the entropy coding step.
if(data_size < 64) {
- ((s32 *) (b2))[0] = htonl(data_size + 8);
- ((u32 *) (b2))[1] = htonl(crc32);
- ((s32 *) (b2))[2] = htonl(-1);
- memcpy(b2 + 12, b1, data_size);
- return (struct encoding_result) { .buffer = b2, .size = data_size + 12 };
+ ((u32 *) (b1))[0] = htonl(crc32);
+ ((s32 *) (b1))[1] = htonl(-1);
+ memmove(b1 + 8, b1, data_size);
+ return data_size + 8;
}
// Back to front:
@@ -138,75 +130,88 @@ struct encoding_result encode_block(struct block_encoder_state * state) {
s32 bwt_idx = libsais_bwt(b1, b2, state->sais_array, data_size, 16, NULL);
if(bwt_idx < 0) {
state->last_error = BZ3_ERR_BWT;
- return (struct encoding_result) { .buffer = NULL, .size = -1 };
+ return -1;
}
- swap(b1, b2);
+
+ // Important: b2 is the input now, b1 is the output.
+ // This avoids an expensive memory copy.
s32 srt_size;
if((model & 1) == 0) {
if(data_size > MiB(3)) {
- srt_size = srt_encode(state->srt_state, b1, b2, data_size);
+ srt_size = srt_encode(state->srt_state, b2, b1, data_size);
swap(b1, b2);
data_size = srt_size;
model |= 4;
} else {
- mtf_encode(state->mtf_state, b1, b2, data_size);
+ mtf_encode(state->mtf_state, b2, b1, data_size);
swap(b1, b2);
model |= 8;
}
}
// Compute the amount of overhead dwords.
- s32 overhead = 4; // CRC32 + BWT index + original size + new size
+ s32 overhead = 2; // CRC32 + BWT index
if((model & 2) || (model & 16)) overhead++; // LZP
if(model & 4) overhead++; // sorted rank transform
begin(state->cm_state);
- state->cm_state->out_queue = b2 + overhead * 4 + 1;
+ state->cm_state->out_queue = b1 + overhead * 4 + 1;
state->cm_state->output_ptr = 0;
- for (s32 i = 0; i < data_size; i++) encode_byte(state->cm_state, b1[i]);
+ for (s32 i = 0; i < data_size; i++) encode_byte(state->cm_state, b2[i]);
flush(state->cm_state);
data_size = state->cm_state->output_ptr;
- // Write the header. Starting with common entries:
- ((s32 *) (b2))[0] = htonl(data_size + overhead * 4 - 3);
- ((u32 *) (b2))[1] = htonl(crc32);
- ((s32 *) (b2))[2] = htonl(bwt_idx);
- ((s32 *) (b2))[3] = htonl(state->bytes_read);
- b2[16] = model;
+ // Write the header. Starting with common entries.
+ ((u32 *) (b1))[0] = htonl(crc32);
+ ((s32 *) (b1))[1] = htonl(bwt_idx);
+ b1[8] = model;
s32 p = 0;
- if((model & 2) || (model & 16)) ((s32 *)(b2 + 17))[p++] = htonl(lzp_size);
- if(model & 4) ((s32 *)(b2 + 17))[p++] = htonl(srt_size);
+ if((model & 2) || (model & 16)) ((s32 *)(b1 + 9))[p++] = htonl(lzp_size);
+ if(model & 4) ((s32 *)(b1 + 9))[p++] = htonl(srt_size);
+
+ state->last_error = BZ3_OK;
+
+ // XXX: Better solution
+ if(b1 != buffer)
+ memcpy(buffer, b1, data_size + overhead * 4 + 1);
- return (struct encoding_result) { .buffer = b2, .size = data_size + overhead * 4 + 1 };
+ return data_size + overhead * 4 + 1;
}
-struct encoding_result decode_block(struct block_encoder_state * state) {
+s32 bz3_decode_block(struct bz3_state * state, u8 * buffer, s32 data_size, s32 orig_size) {
// Read the header.
- s32 data_len = ntohl(((s32 *) state->buf1)[0]) - 1;
- u32 crc32 = ntohl(((u32 *) state->buf1)[1]);
- s32 bwt_idx = ntohl(((s32 *) state->buf1)[2]);
+ u32 crc32 = ntohl(((u32 *) buffer)[0]);
+ s32 bwt_idx = ntohl(((s32 *) buffer)[1]);
+
+ if(bwt_idx == -1) {
+ memmove(buffer, buffer + 8, data_size - 8);
+ return data_size - 8;
+ }
- if(bwt_idx == -1)
- return (struct encoding_result) { .buffer = state->buf1 + 12, .size = data_len - 7 };
+ if(orig_size > state->block_size) {
+ state->last_error = BZ3_ERR_DATA_TOO_BIG;
+ return -1;
+ }
- s32 orig_size = ntohl(((s32 *) state->buf1)[3]);
- s8 model = state->buf1[16];
+ s8 model = buffer[8];
s32 lzp_size = -1, srt_size = -1, p = 0;
- if((model & 2) || (model & 16)) lzp_size = ntohl(((s32 *) (state->buf1 + 17))[p++]);
- if(model & 4) srt_size = ntohl(((s32 *) (state->buf1 + 17))[p++]);
+ if((model & 2) || (model & 16)) lzp_size = ntohl(((s32 *) (buffer + 9))[p++]);
+ if(model & 4) srt_size = ntohl(((s32 *) (buffer + 9))[p++]);
+
+ p += 2;
- data_len -= p * 4;
+ data_size -= p * 4 + 1;
// Decode the data.
- u8 * b1 = state->buf1, * b2 = state->buf2;
+ u8 * b1 = buffer, * b2 = state->swap_buffer;
begin(state->cm_state);
- state->cm_state->in_queue = b1 + 17 + p * 4;
+ state->cm_state->in_queue = b1 + p * 4 + 1;
state->cm_state->input_ptr = 0;
- state->cm_state->input_max = data_len;
+ state->cm_state->input_max = data_size;
init(state->cm_state);
s32 size_src;
@@ -234,7 +239,7 @@ struct encoding_result decode_block(struct block_encoder_state * state) {
// Undo BWT
if (libsais_unbwt(b1, b2, state->sais_array, size_src, NULL, bwt_idx) < 0) {
state->last_error = BZ3_ERR_BWT;
- return (struct encoding_result) { .buffer = NULL, .size = -1 };
+ return -1;
}
swap(b1, b2);
@@ -248,28 +253,18 @@ struct encoding_result decode_block(struct block_encoder_state * state) {
swap(b1, b2);
}
- return (struct encoding_result) { .buffer = b1, .size = size_src };
-}
-
-#undef swap
+ state->last_error = BZ3_OK;
-s32 read_block(int filedes, struct block_encoder_state * state) {
- s32 bytes_read = read(filedes, state->buf1, 4);
- if (bytes_read == 0) return 0;
- if (bytes_read != 4) {
- state->last_error = BZ3_ERR_MALFORMED_HEADER;
- return -1;
- }
- s32 data_size = ntohl(((uint32_t *)state->buf1)[0]);
- if (data_size > state->block_size) {
- state->last_error = BZ3_ERR_MALFORMED_HEADER;
- return -1;
- }
- bytes_read = read(filedes, state->buf1 + 4, data_size);
- if (bytes_read != data_size) {
- state->last_error = BZ3_ERR_TRUNCATED_DATA;
+ // XXX: Better solution
+ if(b1 != buffer)
+ memcpy(buffer, b1, size_src);
+
+ if(crc32 != crc32sum(1, buffer, size_src)) {
+ state->last_error = BZ3_ERR_CRC;
return -1;
}
- state->last_error = BZ3_OK;
- return state->bytes_read = 4 + data_size;
+
+ return size_src;
}
+
+#undef swap
diff --git a/src/libsais.c b/src/libsais.c
index c2d97a8..7800bff 100644
--- a/src/libsais.c
+++ b/src/libsais.c
@@ -132,19 +132,15 @@ typedef struct LIBSAIS_UNBWT_CONTEXT {
#endif
#if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)
- #if defined(_LITTLE_ENDIAN) || \
- (defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && BYTE_ORDER == LITTLE_ENDIAN) || \
- (defined(_BYTE_ORDER) && defined(_LITTLE_ENDIAN) && _BYTE_ORDER == _LITTLE_ENDIAN) || \
- (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN) || \
- (defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
- __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+ #if defined(_LITTLE_ENDIAN) || (defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && BYTE_ORDER == LITTLE_ENDIAN) || \
+ (defined(_BYTE_ORDER) && defined(_LITTLE_ENDIAN) && _BYTE_ORDER == _LITTLE_ENDIAN) || \
+ (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN) || \
+ (defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
#define __LITTLE_ENDIAN__
- #elif defined(_BIG_ENDIAN) || \
- (defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN) || \
- (defined(_BYTE_ORDER) && defined(_BIG_ENDIAN) && _BYTE_ORDER == _BIG_ENDIAN) || \
- (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && __BYTE_ORDER == __BIG_ENDIAN) || \
- (defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \
- __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+ #elif defined(_BIG_ENDIAN) || (defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN) || \
+ (defined(_BYTE_ORDER) && defined(_BIG_ENDIAN) && _BYTE_ORDER == _BIG_ENDIAN) || \
+ (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && __BYTE_ORDER == __BIG_ENDIAN) || \
+ (defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
#define __BIG_ENDIAN__
#elif defined(_WIN32)
#define __LITTLE_ENDIAN__
@@ -166,15 +162,13 @@ typedef struct LIBSAIS_UNBWT_CONTEXT {
#endif
static void * libsais_align_up(const void * address, size_t alignment) {
- return (void *)((((ptrdiff_t)address) + ((ptrdiff_t)alignment) - 1) &
- (-((ptrdiff_t)alignment)));
+ return (void *)((((ptrdiff_t)address) + ((ptrdiff_t)alignment) - 1) & (-((ptrdiff_t)alignment)));
}
static void * libsais_alloc_aligned(size_t size, size_t alignment) {
void * address = malloc(size + sizeof(short) + alignment - 1);
if (address != NULL) {
- void * aligned_address =
- libsais_align_up((void *)((ptrdiff_t)address + (ptrdiff_t)(sizeof(short))), alignment);
+ void * aligned_address = libsais_align_up((void *)((ptrdiff_t)address + (ptrdiff_t)(sizeof(short))), alignment);
((short *)aligned_address)[-1] = (short)((ptrdiff_t)aligned_address - (ptrdiff_t)address);
return aligned_address;
@@ -190,10 +184,10 @@ static void libsais_free_aligned(void * aligned_address) {
}
static LIBSAIS_THREAD_STATE * libsais_alloc_thread_state(sa_sint_t threads) {
- LIBSAIS_THREAD_STATE * RESTRICT thread_state = (LIBSAIS_THREAD_STATE *)libsais_alloc_aligned(
- (size_t)threads * sizeof(LIBSAIS_THREAD_STATE), 4096);
- sa_sint_t * RESTRICT thread_buckets = (sa_sint_t *)libsais_alloc_aligned(
- (size_t)threads * 4 * ALPHABET_SIZE * sizeof(sa_sint_t), 4096);
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state =
+ (LIBSAIS_THREAD_STATE *)libsais_alloc_aligned((size_t)threads * sizeof(LIBSAIS_THREAD_STATE), 4096);
+ sa_sint_t * RESTRICT thread_buckets =
+ (sa_sint_t *)libsais_alloc_aligned((size_t)threads * 4 * ALPHABET_SIZE * sizeof(sa_sint_t), 4096);
LIBSAIS_THREAD_CACHE * RESTRICT thread_cache = (LIBSAIS_THREAD_CACHE *)libsais_alloc_aligned(
(size_t)threads * LIBSAIS_PER_THREAD_CACHE_SIZE * sizeof(LIBSAIS_THREAD_CACHE), 4096);
@@ -224,12 +218,9 @@ static void libsais_free_thread_state(LIBSAIS_THREAD_STATE * thread_state) {
}
static LIBSAIS_CONTEXT * libsais_create_ctx_main(sa_sint_t threads) {
- LIBSAIS_CONTEXT * RESTRICT ctx =
- (LIBSAIS_CONTEXT *)libsais_alloc_aligned(sizeof(LIBSAIS_CONTEXT), 64);
- sa_sint_t * RESTRICT buckets =
- (sa_sint_t *)libsais_alloc_aligned(8 * ALPHABET_SIZE * sizeof(sa_sint_t), 4096);
- LIBSAIS_THREAD_STATE * RESTRICT thread_state =
- threads > 1 ? libsais_alloc_thread_state(threads) : NULL;
+ LIBSAIS_CONTEXT * RESTRICT ctx = (LIBSAIS_CONTEXT *)libsais_alloc_aligned(sizeof(LIBSAIS_CONTEXT), 64);
+ sa_sint_t * RESTRICT buckets = (sa_sint_t *)libsais_alloc_aligned(8 * ALPHABET_SIZE * sizeof(sa_sint_t), 4096);
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state = threads > 1 ? libsais_alloc_thread_state(threads) : NULL;
if (ctx != NULL && buckets != NULL && (thread_state != NULL || threads == 1)) {
ctx->buckets = buckets;
@@ -252,14 +243,12 @@ static void libsais_free_ctx_main(LIBSAIS_CONTEXT * ctx) {
libsais_free_aligned(ctx);
}
}
-static void libsais_gather_lms_suffixes_8u(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
- sa_sint_t n, fast_sint_t m, fast_sint_t omp_block_start,
- fast_sint_t omp_block_size) {
+static void libsais_gather_lms_suffixes_8u(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, fast_sint_t m,
+ fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
if (omp_block_size > 0) {
const fast_sint_t prefetch_distance = 128;
- fast_sint_t i, j = omp_block_start + omp_block_size,
- c0 = T[omp_block_start + omp_block_size - 1], c1 = -1;
+ fast_sint_t i, j = omp_block_start + omp_block_size, c0 = T[omp_block_start + omp_block_size - 1], c1 = -1;
while (j < n && (c1 = T[j]) == c0) {
++j;
@@ -300,9 +289,8 @@ static void libsais_gather_lms_suffixes_8u(const u8 * RESTRICT T, sa_sint_t * RE
}
}
-static void libsais_gather_lms_suffixes_8u_omp(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
- sa_sint_t n, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static void libsais_gather_lms_suffixes_8u_omp(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
{
(void)(threads);
(void)(thread_state);
@@ -312,18 +300,15 @@ static void libsais_gather_lms_suffixes_8u_omp(const u8 * RESTRICT T, sa_sint_t
fast_sint_t omp_block_stride = (n / omp_num_threads) & (-16);
fast_sint_t omp_block_start = omp_thread_num * omp_block_stride;
- fast_sint_t omp_block_size =
- omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start;
+ fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start;
if (omp_num_threads == 1) {
- libsais_gather_lms_suffixes_8u(T, SA, n, (fast_sint_t)n - 1, omp_block_start,
- omp_block_size);
+ libsais_gather_lms_suffixes_8u(T, SA, n, (fast_sint_t)n - 1, omp_block_start, omp_block_size);
}
}
}
-static sa_sint_t libsais_gather_lms_suffixes_32s(const sa_sint_t * RESTRICT T,
- sa_sint_t * RESTRICT SA, sa_sint_t n) {
+static sa_sint_t libsais_gather_lms_suffixes_32s(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n) {
const fast_sint_t prefetch_distance = 32;
sa_sint_t i = n - 2;
@@ -364,8 +349,8 @@ static sa_sint_t libsais_gather_lms_suffixes_32s(const sa_sint_t * RESTRICT T,
return n - 1 - m;
}
-static sa_sint_t libsais_gather_compacted_lms_suffixes_32s(const sa_sint_t * RESTRICT T,
- sa_sint_t * RESTRICT SA, sa_sint_t n) {
+static sa_sint_t libsais_gather_compacted_lms_suffixes_32s(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+ sa_sint_t n) {
const fast_sint_t prefetch_distance = 32;
sa_sint_t i = n - 2;
@@ -405,8 +390,8 @@ static sa_sint_t libsais_gather_compacted_lms_suffixes_32s(const sa_sint_t * RES
return n - 1 - m;
}
-static void libsais_count_lms_suffixes_32s_2k(const sa_sint_t * RESTRICT T, sa_sint_t n,
- sa_sint_t k, sa_sint_t * RESTRICT buckets) {
+static void libsais_count_lms_suffixes_32s_2k(const sa_sint_t * RESTRICT T, sa_sint_t n, sa_sint_t k,
+ sa_sint_t * RESTRICT buckets) {
const fast_sint_t prefetch_distance = 32;
memset(buckets, 0, 2 * (size_t)k * sizeof(sa_sint_t));
@@ -450,10 +435,8 @@ static void libsais_count_lms_suffixes_32s_2k(const sa_sint_t * RESTRICT T, sa_s
buckets[BUCKETS_INDEX2((fast_uint_t)c0, 0)]++;
}
-static sa_sint_t libsais_count_and_gather_lms_suffixes_8u(const u8 * RESTRICT T,
- sa_sint_t * RESTRICT SA, sa_sint_t n,
- sa_sint_t * RESTRICT buckets,
- fast_sint_t omp_block_start,
+static sa_sint_t libsais_count_and_gather_lms_suffixes_8u(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t * RESTRICT buckets, fast_sint_t omp_block_start,
fast_sint_t omp_block_size) {
memset(buckets, 0, 4 * ALPHABET_SIZE * sizeof(sa_sint_t));
@@ -517,9 +500,10 @@ static sa_sint_t libsais_count_and_gather_lms_suffixes_8u(const u8 * RESTRICT T,
return (sa_sint_t)(omp_block_start + omp_block_size - 1 - m);
}
-static sa_sint_t libsais_count_and_gather_lms_suffixes_8u_omp(
- const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t * RESTRICT buckets,
- sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static sa_sint_t libsais_count_and_gather_lms_suffixes_8u_omp(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
+ sa_sint_t n, sa_sint_t * RESTRICT buckets,
+ sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
sa_sint_t m = 0;
{
@@ -531,21 +515,19 @@ static sa_sint_t libsais_count_and_gather_lms_suffixes_8u_omp(
fast_sint_t omp_block_stride = (n / omp_num_threads) & (-16);
fast_sint_t omp_block_start = omp_thread_num * omp_block_stride;
- fast_sint_t omp_block_size =
- omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start;
+ fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start;
if (omp_num_threads == 1) {
- m = libsais_count_and_gather_lms_suffixes_8u(T, SA, n, buckets, omp_block_start,
- omp_block_size);
+ m = libsais_count_and_gather_lms_suffixes_8u(T, SA, n, buckets, omp_block_start, omp_block_size);
}
}
return m;
}
-static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_4k(
- const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k,
- sa_sint_t * RESTRICT buckets, fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
+static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_4k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+ sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets,
+ fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
memset(buckets, 0, 4 * (size_t)k * sizeof(sa_sint_t));
fast_sint_t m = omp_block_start + omp_block_size - 1;
@@ -613,9 +595,9 @@ static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_4k(
return (sa_sint_t)(omp_block_start + omp_block_size - 1 - m);
}
-static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_2k(
- const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k,
- sa_sint_t * RESTRICT buckets, fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
+static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_2k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+ sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets,
+ fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
memset(buckets, 0, 2 * (size_t)k * sizeof(sa_sint_t));
fast_sint_t m = omp_block_start + omp_block_size - 1;
@@ -683,9 +665,11 @@ static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_2k(
return (sa_sint_t)(omp_block_start + omp_block_size - 1 - m);
}
-static sa_sint_t libsais_count_and_gather_compacted_lms_suffixes_32s_2k(
- const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k,
- sa_sint_t * RESTRICT buckets, fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
+static sa_sint_t libsais_count_and_gather_compacted_lms_suffixes_32s_2k(const sa_sint_t * RESTRICT T,
+ sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t k, sa_sint_t * RESTRICT buckets,
+ fast_sint_t omp_block_start,
+ fast_sint_t omp_block_size) {
memset(buckets, 0, 2 * (size_t)k * sizeof(sa_sint_t));
fast_sint_t m = omp_block_start + omp_block_size - 1;
@@ -704,14 +688,10 @@ static sa_sint_t libsais_count_and_gather_compacted_lms_suffixes_32s_2k(
for (i = m - 1, j = omp_block_start + prefetch_distance + 3; i >= j; i -= 4) {
libsais_prefetch(&T[i - 2 * prefetch_distance]);
- libsais_prefetchw(
- &buckets[BUCKETS_INDEX2(T[i - prefetch_distance - 0] & SAINT_MAX, 0)]);
- libsais_prefetchw(
- &buckets[BUCKETS_INDEX2(T[i - prefetch_distance - 1] & SAINT_MAX, 0)]);
- libsais_prefetchw(
- &buckets[BUCKETS_INDEX2(T[i - prefetch_distance - 2] & SAINT_MAX, 0)]);
- libsais_prefetchw(
- &buckets[BUCKETS_INDEX2(T[i - prefetch_distance - 3] & SAINT_MAX, 0)]);
+ libsais_prefetchw(&buckets[BUCKETS_INDEX2(T[i - prefetch_distance - 0] & SAINT_MAX, 0)]);
+ libsais_prefetchw(&buckets[BUCKETS_INDEX2(T[i - prefetch_distance - 1] & SAINT_MAX, 0)]);
+ libsais_prefetchw(&buckets[BUCKETS_INDEX2(T[i - prefetch_distance - 2] & SAINT_MAX, 0)]);
+ libsais_prefetchw(&buckets[BUCKETS_INDEX2(T[i - prefetch_distance - 3] & SAINT_MAX, 0)]);
c1 = T[i - 0];
s = (s << 1) + (fast_uint_t)(c1 > (c0 - (fast_sint_t)(s & 1)));
@@ -763,9 +743,8 @@ static sa_sint_t libsais_count_and_gather_compacted_lms_suffixes_32s_2k(
return (sa_sint_t)(omp_block_start + omp_block_size - 1 - m);
}
static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_4k_nofs_omp(const sa_sint_t * RESTRICT T,
- sa_sint_t * RESTRICT SA,
- sa_sint_t n, sa_sint_t k,
- sa_sint_t * RESTRICT buckets,
+ sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t k, sa_sint_t * RESTRICT buckets,
sa_sint_t threads) {
sa_sint_t m = 0;
{
@@ -782,9 +761,8 @@ static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_4k_nofs_omp(const sa_
}
static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_2k_nofs_omp(const sa_sint_t * RESTRICT T,
- sa_sint_t * RESTRICT SA,
- sa_sint_t n, sa_sint_t k,
- sa_sint_t * RESTRICT buckets,
+ sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t k, sa_sint_t * RESTRICT buckets,
sa_sint_t threads) {
sa_sint_t m = 0;
{
@@ -800,9 +778,11 @@ static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_2k_nofs_omp(const sa_
return m;
}
-static sa_sint_t libsais_count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp(
- const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k,
- sa_sint_t * RESTRICT buckets, sa_sint_t threads) {
+static sa_sint_t libsais_count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp(const sa_sint_t * RESTRICT T,
+ sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t k,
+ sa_sint_t * RESTRICT buckets,
+ sa_sint_t threads) {
sa_sint_t m = 0;
{
(void)(threads);
@@ -817,9 +797,10 @@ static sa_sint_t libsais_count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp
return m;
}
-static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_4k_omp(
- const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k,
- sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_4k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+ sa_sint_t n, sa_sint_t k,
+ sa_sint_t * RESTRICT buckets, sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
sa_sint_t m;
(void)(thread_state);
@@ -828,9 +809,10 @@ static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_4k_omp(
return m;
}
-static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_2k_omp(
- const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k,
- sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_2k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+ sa_sint_t n, sa_sint_t k,
+ sa_sint_t * RESTRICT buckets, sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
sa_sint_t m;
(void)(thread_state);
@@ -839,15 +821,14 @@ static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_2k_omp(
return m;
}
-static void libsais_count_and_gather_compacted_lms_suffixes_32s_2k_omp(
- const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k,
- sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static void libsais_count_and_gather_compacted_lms_suffixes_32s_2k_omp(const sa_sint_t * RESTRICT T,
+ sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t k, sa_sint_t * RESTRICT buckets,
+ sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
(void)(thread_state);
- {
- libsais_count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp(T, SA, n, k, buckets,
- threads);
- }
+ { libsais_count_and_gather_compacted_lms_suffixes_32s_2k_nofs_omp(T, SA, n, k, buckets, threads); }
}
static void libsais_count_suffixes_32s(const sa_sint_t * RESTRICT T, sa_sint_t n, sa_sint_t k,
@@ -875,8 +856,7 @@ static void libsais_count_suffixes_32s(const sa_sint_t * RESTRICT T, sa_sint_t n
}
}
-static void libsais_initialize_buckets_start_and_end_8u(sa_sint_t * RESTRICT buckets,
- sa_sint_t * RESTRICT freq) {
+static void libsais_initialize_buckets_start_and_end_8u(sa_sint_t * RESTRICT buckets, sa_sint_t * RESTRICT freq) {
sa_sint_t * RESTRICT bucket_start = &buckets[6 * ALPHABET_SIZE];
sa_sint_t * RESTRICT bucket_end = &buckets[7 * ALPHABET_SIZE];
@@ -886,9 +866,8 @@ static void libsais_initialize_buckets_start_and_end_8u(sa_sint_t * RESTRICT buc
for (i = BUCKETS_INDEX4(0, 0), j = 0; i <= BUCKETS_INDEX4(ALPHABET_SIZE - 1, 0);
i += BUCKETS_INDEX4(1, 0), j += 1) {
bucket_start[j] = sum;
- sum +=
- (freq[j] = buckets[i + BUCKETS_INDEX4(0, 0)] + buckets[i + BUCKETS_INDEX4(0, 1)] +
- buckets[i + BUCKETS_INDEX4(0, 2)] + buckets[i + BUCKETS_INDEX4(0, 3)]);
+ sum += (freq[j] = buckets[i + BUCKETS_INDEX4(0, 0)] + buckets[i + BUCKETS_INDEX4(0, 1)] +
+ buckets[i + BUCKETS_INDEX4(0, 2)] + buckets[i + BUCKETS_INDEX4(0, 3)]);
bucket_end[j] = sum;
}
} else {
@@ -904,8 +883,7 @@ static void libsais_initialize_buckets_start_and_end_8u(sa_sint_t * RESTRICT buc
}
}
-static void libsais_initialize_buckets_start_and_end_32s_6k(sa_sint_t k,
- sa_sint_t * RESTRICT buckets) {
+static void libsais_initialize_buckets_start_and_end_32s_6k(sa_sint_t k, sa_sint_t * RESTRICT buckets) {
sa_sint_t * RESTRICT bucket_start = &buckets[4 * k];
sa_sint_t * RESTRICT bucket_end = &buckets[5 * k];
@@ -920,8 +898,7 @@ static void libsais_initialize_buckets_start_and_end_32s_6k(sa_sint_t k,
}
}
-static void libsais_initialize_buckets_start_and_end_32s_4k(sa_sint_t k,
- sa_sint_t * RESTRICT buckets) {
+static void libsais_initialize_buckets_start_and_end_32s_4k(sa_sint_t k, sa_sint_t * RESTRICT buckets) {
sa_sint_t * RESTRICT bucket_start = &buckets[2 * k];
sa_sint_t * RESTRICT bucket_end = &buckets[3 * k];
@@ -938,15 +915,13 @@ static void libsais_initialize_buckets_start_and_end_32s_4k(sa_sint_t k,
static void libsais_initialize_buckets_end_32s_2k(sa_sint_t k, sa_sint_t * RESTRICT buckets) {
fast_sint_t i;
sa_sint_t sum0 = 0;
- for (i = BUCKETS_INDEX2(0, 0); i <= BUCKETS_INDEX2((fast_sint_t)k - 1, 0);
- i += BUCKETS_INDEX2(1, 0)) {
+ for (i = BUCKETS_INDEX2(0, 0); i <= BUCKETS_INDEX2((fast_sint_t)k - 1, 0); i += BUCKETS_INDEX2(1, 0)) {
sum0 += buckets[i + BUCKETS_INDEX2(0, 0)] + buckets[i + BUCKETS_INDEX2(0, 1)];
buckets[i + BUCKETS_INDEX2(0, 0)] = sum0;
}
}
-static void libsais_initialize_buckets_start_and_end_32s_2k(sa_sint_t k,
- sa_sint_t * RESTRICT buckets) {
+static void libsais_initialize_buckets_start_and_end_32s_2k(sa_sint_t k, sa_sint_t * RESTRICT buckets) {
fast_sint_t i, j;
for (i = BUCKETS_INDEX2(0, 0), j = 0; i <= BUCKETS_INDEX2((fast_sint_t)k - 1, 0);
i += BUCKETS_INDEX2(1, 0), j += 1) {
@@ -976,8 +951,9 @@ static void libsais_initialize_buckets_end_32s_1k(sa_sint_t k, sa_sint_t * RESTR
}
}
-static sa_sint_t libsais_initialize_buckets_for_lms_suffixes_radix_sort_8u(
- const u8 * RESTRICT T, sa_sint_t * RESTRICT buckets, sa_sint_t first_lms_suffix) {
+static sa_sint_t libsais_initialize_buckets_for_lms_suffixes_radix_sort_8u(const u8 * RESTRICT T,
+ sa_sint_t * RESTRICT buckets,
+ sa_sint_t first_lms_suffix) {
{
fast_uint_t s = 0;
fast_sint_t c0 = T[first_lms_suffix];
@@ -998,8 +974,7 @@ static sa_sint_t libsais_initialize_buckets_for_lms_suffixes_radix_sort_8u(
fast_sint_t i, j;
sa_sint_t sum = 0;
- for (i = BUCKETS_INDEX4(0, 0), j = BUCKETS_INDEX2(0, 0);
- i <= BUCKETS_INDEX4(ALPHABET_SIZE - 1, 0);
+ for (i = BUCKETS_INDEX4(0, 0), j = BUCKETS_INDEX2(0, 0); i <= BUCKETS_INDEX4(ALPHABET_SIZE - 1, 0);
i += BUCKETS_INDEX4(1, 0), j += BUCKETS_INDEX2(1, 0)) {
temp_bucket[j + BUCKETS_INDEX2(0, 1)] = sum;
sum += buckets[i + BUCKETS_INDEX4(0, 1)] + buckets[i + BUCKETS_INDEX4(0, 3)];
@@ -1010,16 +985,15 @@ static sa_sint_t libsais_initialize_buckets_for_lms_suffixes_radix_sort_8u(
}
}
-static void libsais_initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
- const sa_sint_t * RESTRICT T, sa_sint_t k, sa_sint_t * RESTRICT buckets,
- sa_sint_t first_lms_suffix) {
+static void libsais_initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(const sa_sint_t * RESTRICT T, sa_sint_t k,
+ sa_sint_t * RESTRICT buckets,
+ sa_sint_t first_lms_suffix) {
buckets[BUCKETS_INDEX2(T[first_lms_suffix], 0)]++;
buckets[BUCKETS_INDEX2(T[first_lms_suffix], 1)]--;
fast_sint_t i;
sa_sint_t sum0 = 0, sum1 = 0;
- for (i = BUCKETS_INDEX2(0, 0); i <= BUCKETS_INDEX2((fast_sint_t)k - 1, 0);
- i += BUCKETS_INDEX2(1, 0)) {
+ for (i = BUCKETS_INDEX2(0, 0); i <= BUCKETS_INDEX2((fast_sint_t)k - 1, 0); i += BUCKETS_INDEX2(1, 0)) {
sum0 += buckets[i + BUCKETS_INDEX2(0, 0)] + buckets[i + BUCKETS_INDEX2(0, 1)];
sum1 += buckets[i + BUCKETS_INDEX2(0, 1)];
@@ -1028,9 +1002,10 @@ static void libsais_initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(
}
}
-static sa_sint_t libsais_initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
- const sa_sint_t * RESTRICT T, sa_sint_t k, sa_sint_t * RESTRICT buckets,
- sa_sint_t first_lms_suffix) {
+static sa_sint_t libsais_initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(const sa_sint_t * RESTRICT T,
+ sa_sint_t k,
+ sa_sint_t * RESTRICT buckets,
+ sa_sint_t first_lms_suffix) {
{
fast_uint_t s = 0;
fast_sint_t c0 = T[first_lms_suffix];
@@ -1061,9 +1036,9 @@ static sa_sint_t libsais_initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(
}
}
-static void libsais_initialize_buckets_for_radix_and_partial_sorting_32s_4k(
- const sa_sint_t * RESTRICT T, sa_sint_t k, sa_sint_t * RESTRICT buckets,
- sa_sint_t first_lms_suffix) {
+static void libsais_initialize_buckets_for_radix_and_partial_sorting_32s_4k(const sa_sint_t * RESTRICT T, sa_sint_t k,
+ sa_sint_t * RESTRICT buckets,
+ sa_sint_t first_lms_suffix) {
sa_sint_t * RESTRICT bucket_start = &buckets[2 * k];
sa_sint_t * RESTRICT bucket_end = &buckets[3 * k];
@@ -1085,14 +1060,12 @@ static void libsais_initialize_buckets_for_radix_and_partial_sorting_32s_4k(
}
static void libsais_radix_sort_lms_suffixes_8u(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
- sa_sint_t * RESTRICT induction_bucket,
- fast_sint_t omp_block_start,
+ sa_sint_t * RESTRICT induction_bucket, fast_sint_t omp_block_start,
fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
fast_sint_t i, j;
- for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 3;
- i >= j; i -= 4) {
+ for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 3; i >= j; i -= 4) {
libsais_prefetch(&SA[i - 2 * prefetch_distance]);
libsais_prefetch(&T[SA[i - prefetch_distance - 0]]);
@@ -1116,9 +1089,8 @@ static void libsais_radix_sort_lms_suffixes_8u(const u8 * RESTRICT T, sa_sint_t
}
}
-static void libsais_radix_sort_lms_suffixes_8u_omp(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
- sa_sint_t n, sa_sint_t m,
- sa_sint_t * RESTRICT buckets, sa_sint_t threads,
+static void libsais_radix_sort_lms_suffixes_8u_omp(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t m, sa_sint_t * RESTRICT buckets, sa_sint_t threads,
LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
{
(void)(threads);
@@ -1127,23 +1099,19 @@ static void libsais_radix_sort_lms_suffixes_8u_omp(const u8 * RESTRICT T, sa_sin
fast_sint_t omp_num_threads = 1;
if (omp_num_threads == 1) {
- libsais_radix_sort_lms_suffixes_8u(T, SA, &buckets[4 * ALPHABET_SIZE],
- (fast_sint_t)n - (fast_sint_t)m + 1,
+ libsais_radix_sort_lms_suffixes_8u(T, SA, &buckets[4 * ALPHABET_SIZE], (fast_sint_t)n - (fast_sint_t)m + 1,
(fast_sint_t)m - 1);
}
}
}
-static void libsais_radix_sort_lms_suffixes_32s_6k(const sa_sint_t * RESTRICT T,
- sa_sint_t * RESTRICT SA,
- sa_sint_t * RESTRICT induction_bucket,
- fast_sint_t omp_block_start,
+static void libsais_radix_sort_lms_suffixes_32s_6k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+ sa_sint_t * RESTRICT induction_bucket, fast_sint_t omp_block_start,
fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
fast_sint_t i, j;
- for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + 2 * prefetch_distance + 3;
- i >= j; i -= 4) {
+ for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + 2 * prefetch_distance + 3; i >= j; i -= 4) {
libsais_prefetch(&SA[i - 3 * prefetch_distance]);
libsais_prefetch(&T[SA[i - 2 * prefetch_distance - 0]]);
@@ -1172,16 +1140,13 @@ static void libsais_radix_sort_lms_suffixes_32s_6k(const sa_sint_t * RESTRICT T,
}
}
-static void libsais_radix_sort_lms_suffixes_32s_2k(const sa_sint_t * RESTRICT T,
- sa_sint_t * RESTRICT SA,
- sa_sint_t * RESTRICT induction_bucket,
- fast_sint_t omp_block_start,
+static void libsais_radix_sort_lms_suffixes_32s_2k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+ sa_sint_t * RESTRICT induction_bucket, fast_sint_t omp_block_start,
fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
fast_sint_t i, j;
- for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + 2 * prefetch_distance + 3;
- i >= j; i -= 4) {
+ for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + 2 * prefetch_distance + 3; i >= j; i -= 4) {
libsais_prefetch(&SA[i - 3 * prefetch_distance]);
libsais_prefetch(&T[SA[i - 2 * prefetch_distance - 0]]);
@@ -1209,31 +1174,30 @@ static void libsais_radix_sort_lms_suffixes_32s_2k(const sa_sint_t * RESTRICT T,
SA[--induction_bucket[BUCKETS_INDEX2(T[p], 0)]] = p;
}
}
-static void libsais_radix_sort_lms_suffixes_32s_6k_omp(
- const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m,
- sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static void libsais_radix_sort_lms_suffixes_32s_6k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+ sa_sint_t n, sa_sint_t m, sa_sint_t * RESTRICT induction_bucket,
+ sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
if (threads == 1 || m < 65536) {
- libsais_radix_sort_lms_suffixes_32s_6k(
- T, SA, induction_bucket, (fast_sint_t)n - (fast_sint_t)m + 1, (fast_sint_t)m - 1);
+ libsais_radix_sort_lms_suffixes_32s_6k(T, SA, induction_bucket, (fast_sint_t)n - (fast_sint_t)m + 1,
+ (fast_sint_t)m - 1);
}
(void)(thread_state);
}
-static void libsais_radix_sort_lms_suffixes_32s_2k_omp(
- const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m,
- sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static void libsais_radix_sort_lms_suffixes_32s_2k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+ sa_sint_t n, sa_sint_t m, sa_sint_t * RESTRICT induction_bucket,
+ sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
if (threads == 1 || m < 65536) {
- libsais_radix_sort_lms_suffixes_32s_2k(
- T, SA, induction_bucket, (fast_sint_t)n - (fast_sint_t)m + 1, (fast_sint_t)m - 1);
+ libsais_radix_sort_lms_suffixes_32s_2k(T, SA, induction_bucket, (fast_sint_t)n - (fast_sint_t)m + 1,
+ (fast_sint_t)m - 1);
}
(void)(thread_state);
}
-static sa_sint_t libsais_radix_sort_lms_suffixes_32s_1k(const sa_sint_t * RESTRICT T,
- sa_sint_t * RESTRICT SA, sa_sint_t n,
- sa_sint_t * RESTRICT buckets) {
+static sa_sint_t libsais_radix_sort_lms_suffixes_32s_1k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+ sa_sint_t n, sa_sint_t * RESTRICT buckets) {
const fast_sint_t prefetch_distance = 32;
sa_sint_t i = n - 2;
@@ -1297,15 +1261,12 @@ static sa_sint_t libsais_radix_sort_lms_suffixes_32s_1k(const sa_sint_t * RESTRI
return m;
}
-static void libsais_radix_sort_set_markers_32s_6k(sa_sint_t * RESTRICT SA,
- sa_sint_t * RESTRICT induction_bucket,
- fast_sint_t omp_block_start,
- fast_sint_t omp_block_size) {
+static void libsais_radix_sort_set_markers_32s_6k(sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT induction_bucket,
+ fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
fast_sint_t i, j;
- for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 3; i < j;
- i += 4) {
+ for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 3; i < j; i += 4) {
libsais_prefetch(&induction_bucket[i + 2 * prefetch_distance]);
libsais_prefetchw(&SA[induction_bucket[i + prefetch_distance + 0]]);
@@ -1324,15 +1285,12 @@ static void libsais_radix_sort_set_markers_32s_6k(sa_sint_t * RESTRICT SA,
}
}
-static void libsais_radix_sort_set_markers_32s_4k(sa_sint_t * RESTRICT SA,
- sa_sint_t * RESTRICT induction_bucket,
- fast_sint_t omp_block_start,
- fast_sint_t omp_block_size) {
+static void libsais_radix_sort_set_markers_32s_4k(sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT induction_bucket,
+ fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
fast_sint_t i, j;
- for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 3; i < j;
- i += 4) {
+ for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 3; i < j; i += 4) {
libsais_prefetch(&induction_bucket[BUCKETS_INDEX2(i + 2 * prefetch_distance, 0)]);
libsais_prefetchw(&SA[induction_bucket[BUCKETS_INDEX2(i + prefetch_distance + 0, 0)]]);
@@ -1352,33 +1310,28 @@ static void libsais_radix_sort_set_markers_32s_4k(sa_sint_t * RESTRICT SA,
}
static void libsais_radix_sort_set_markers_32s_6k_omp(sa_sint_t * RESTRICT SA, sa_sint_t k,
- sa_sint_t * RESTRICT induction_bucket,
- sa_sint_t threads) {
+ sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads) {
{
(void)(threads);
fast_sint_t omp_block_start = 0;
fast_sint_t omp_block_size = (fast_sint_t)k - 1;
- libsais_radix_sort_set_markers_32s_6k(SA, induction_bucket, omp_block_start,
- omp_block_size);
+ libsais_radix_sort_set_markers_32s_6k(SA, induction_bucket, omp_block_start, omp_block_size);
}
}
static void libsais_radix_sort_set_markers_32s_4k_omp(sa_sint_t * RESTRICT SA, sa_sint_t k,
- sa_sint_t * RESTRICT induction_bucket,
- sa_sint_t threads) {
+ sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads) {
{
(void)(threads);
fast_sint_t omp_block_start = 0;
fast_sint_t omp_block_size = (fast_sint_t)k - 1;
- libsais_radix_sort_set_markers_32s_4k(SA, induction_bucket, omp_block_start,
- omp_block_size);
+ libsais_radix_sort_set_markers_32s_4k(SA, induction_bucket, omp_block_start, omp_block_size);
}
}
-static void libsais_initialize_buckets_for_partial_sorting_8u(const u8 * RESTRICT T,
- sa_sint_t * RESTRICT buckets,
+static void libsais_initialize_buckets_for_partial_sorting_8u(const u8 * RESTRICT T, sa_sint_t * RESTRICT buckets,
sa_sint_t first_lms_suffix,
sa_sint_t left_suffixes_count) {
sa_sint_t * RESTRICT temp_bucket = &buckets[4 * ALPHABET_SIZE];
@@ -1387,8 +1340,7 @@ static void libsais_initialize_buckets_for_partial_sorting_8u(const u8 * RESTRIC
fast_sint_t i, j;
sa_sint_t sum0 = left_suffixes_count + 1, sum1 = 0;
- for (i = BUCKETS_INDEX4(0, 0), j = BUCKETS_INDEX2(0, 0);
- i <= BUCKETS_INDEX4(ALPHABET_SIZE - 1, 0);
+ for (i = BUCKETS_INDEX4(0, 0), j = BUCKETS_INDEX2(0, 0); i <= BUCKETS_INDEX4(ALPHABET_SIZE - 1, 0);
i += BUCKETS_INDEX4(1, 0), j += BUCKETS_INDEX2(1, 0)) {
temp_bucket[j + BUCKETS_INDEX2(0, 0)] = sum0;
@@ -1400,8 +1352,7 @@ static void libsais_initialize_buckets_for_partial_sorting_8u(const u8 * RESTRIC
}
}
-static void libsais_initialize_buckets_for_partial_sorting_32s_6k(const sa_sint_t * RESTRICT T,
- sa_sint_t k,
+static void libsais_initialize_buckets_for_partial_sorting_32s_6k(const sa_sint_t * RESTRICT T, sa_sint_t k,
sa_sint_t * RESTRICT buckets,
sa_sint_t first_lms_suffix,
sa_sint_t left_suffixes_count) {
@@ -1430,8 +1381,7 @@ static void libsais_initialize_buckets_for_partial_sorting_32s_6k(const sa_sint_
temp_bucket[j + BUCKETS_INDEX2(0, 1)] = sum1;
}
- for (sum1 += 1; i <= BUCKETS_INDEX4((fast_sint_t)k - 1, 0);
- i += BUCKETS_INDEX4(1, 0), j += BUCKETS_INDEX2(1, 0)) {
+ for (sum1 += 1; i <= BUCKETS_INDEX4((fast_sint_t)k - 1, 0); i += BUCKETS_INDEX4(1, 0), j += BUCKETS_INDEX2(1, 0)) {
sa_sint_t SS = buckets[i + BUCKETS_INDEX4(0, 0)];
sa_sint_t LS = buckets[i + BUCKETS_INDEX4(0, 1)];
sa_sint_t SL = buckets[i + BUCKETS_INDEX4(0, 2)];
@@ -1451,17 +1401,17 @@ static void libsais_initialize_buckets_for_partial_sorting_32s_6k(const sa_sint_
}
}
-static sa_sint_t libsais_partial_sorting_scan_left_to_right_8u(
- const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, sa_sint_t d,
- fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
+static sa_sint_t libsais_partial_sorting_scan_left_to_right_8u(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
+ sa_sint_t * RESTRICT buckets, sa_sint_t d,
+ fast_sint_t omp_block_start,
+ fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
sa_sint_t * RESTRICT induction_bucket = &buckets[4 * ALPHABET_SIZE];
sa_sint_t * RESTRICT distinct_names = &buckets[2 * ALPHABET_SIZE];
fast_sint_t i, j;
- for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 1; i < j;
- i += 2) {
+ for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 1; i < j; i += 2) {
libsais_prefetch(&SA[i + 2 * prefetch_distance]);
libsais_prefetch(&T[SA[i + prefetch_distance + 0] & SAINT_MAX] - 1);
@@ -1473,16 +1423,14 @@ static sa_sint_t libsais_partial_sorting_scan_left_to_right_8u(
d += (p0 < 0);
p0 &= SAINT_MAX;
sa_sint_t v0 = BUCKETS_INDEX2(T[p0 - 1], T[p0 - 2] >= T[p0 - 1]);
- SA[induction_bucket[v0]++] =
- (p0 - 1) | ((sa_sint_t)(distinct_names[v0] != d) << (SAINT_BIT - 1));
+ SA[induction_bucket[v0]++] = (p0 - 1) | ((sa_sint_t)(distinct_names[v0] != d) << (SAINT_BIT - 1));
distinct_names[v0] = d;
sa_sint_t p1 = SA[i + 1];
d += (p1 < 0);
p1 &= SAINT_MAX;
sa_sint_t v1 = BUCKETS_INDEX2(T[p1 - 1], T[p1 - 2] >= T[p1 - 1]);
- SA[induction_bucket[v1]++] =
- (p1 - 1) | ((sa_sint_t)(distinct_names[v1] != d) << (SAINT_BIT - 1));
+ SA[induction_bucket[v1]++] = (p1 - 1) | ((sa_sint_t)(distinct_names[v1] != d) << (SAINT_BIT - 1));
distinct_names[v1] = d;
}
@@ -1491,17 +1439,17 @@ static sa_sint_t libsais_partial_sorting_scan_left_to_right_8u(
d += (p < 0);
p &= SAINT_MAX;
sa_sint_t v = BUCKETS_INDEX2(T[p - 1], T[p - 2] >= T[p - 1]);
- SA[induction_bucket[v]++] =
- (p - 1) | ((sa_sint_t)(distinct_names[v] != d) << (SAINT_BIT - 1));
+ SA[induction_bucket[v]++] = (p - 1) | ((sa_sint_t)(distinct_names[v] != d) << (SAINT_BIT - 1));
distinct_names[v] = d;
}
return d;
}
-static sa_sint_t libsais_partial_sorting_scan_left_to_right_8u_omp(
- const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t * RESTRICT buckets,
- sa_sint_t left_suffixes_count, sa_sint_t d, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static sa_sint_t libsais_partial_sorting_scan_left_to_right_8u_omp(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
+ sa_sint_t n, sa_sint_t * RESTRICT buckets,
+ sa_sint_t left_suffixes_count, sa_sint_t d,
+ sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
sa_sint_t * RESTRICT induction_bucket = &buckets[4 * ALPHABET_SIZE];
sa_sint_t * RESTRICT distinct_names = &buckets[2 * ALPHABET_SIZE];
@@ -1509,21 +1457,21 @@ static sa_sint_t libsais_partial_sorting_scan_left_to_right_8u_omp(
distinct_names[BUCKETS_INDEX2(T[n - 1], T[n - 2] >= T[n - 1])] = ++d;
if (threads == 1 || left_suffixes_count < 65536) {
- d = libsais_partial_sorting_scan_left_to_right_8u(T, SA, buckets, d, 0,
- left_suffixes_count);
+ d = libsais_partial_sorting_scan_left_to_right_8u(T, SA, buckets, d, 0, left_suffixes_count);
}
(void)(thread_state);
return d;
}
-static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_6k(
- const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets,
- sa_sint_t d, fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
+static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_6k(const sa_sint_t * RESTRICT T,
+ sa_sint_t * RESTRICT SA,
+ sa_sint_t * RESTRICT buckets, sa_sint_t d,
+ fast_sint_t omp_block_start,
+ fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
fast_sint_t i, j;
- for (i = omp_block_start, j = omp_block_start + omp_block_size - 2 * prefetch_distance - 1;
- i < j; i += 2) {
+ for (i = omp_block_start, j = omp_block_start + omp_block_size - 2 * prefetch_distance - 1; i < j; i += 2) {
libsais_prefetch(&SA[i + 3 * prefetch_distance]);
libsais_prefetch(&T[SA[i + 2 * prefetch_distance + 0] & SAINT_MAX] - 1);
@@ -1565,18 +1513,18 @@ static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_6k(
return d;
}
-static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_4k(
- const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t k,
- sa_sint_t * RESTRICT buckets, sa_sint_t d, fast_sint_t omp_block_start,
- fast_sint_t omp_block_size) {
+static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_4k(const sa_sint_t * RESTRICT T,
+ sa_sint_t * RESTRICT SA, sa_sint_t k,
+ sa_sint_t * RESTRICT buckets, sa_sint_t d,
+ fast_sint_t omp_block_start,
+ fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
sa_sint_t * RESTRICT induction_bucket = &buckets[2 * k];
sa_sint_t * RESTRICT distinct_names = &buckets[0 * k];
fast_sint_t i, j;
- for (i = omp_block_start, j = omp_block_start + omp_block_size - 2 * prefetch_distance - 1;
- i < j; i += 2) {
+ for (i = omp_block_start, j = omp_block_start + omp_block_size - 2 * prefetch_distance - 1; i < j; i += 2) {
libsais_prefetchw(&SA[i + 3 * prefetch_distance]);
sa_sint_t s0 = SA[i + 2 * prefetch_distance + 0];
@@ -1609,9 +1557,8 @@ static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_4k(
d += (p0 >> (SUFFIX_GROUP_BIT - 1));
p0 &= ~SUFFIX_GROUP_MARKER;
sa_sint_t v0 = BUCKETS_INDEX2(T[p0 - 1], T[p0 - 2] < T[p0 - 1]);
- SA[induction_bucket[T[p0 - 1]]++] =
- (p0 - 1) | ((sa_sint_t)(T[p0 - 2] < T[p0 - 1]) << (SAINT_BIT - 1)) |
- ((sa_sint_t)(distinct_names[v0] != d) << (SUFFIX_GROUP_BIT - 1));
+ SA[induction_bucket[T[p0 - 1]]++] = (p0 - 1) | ((sa_sint_t)(T[p0 - 2] < T[p0 - 1]) << (SAINT_BIT - 1)) |
+ ((sa_sint_t)(distinct_names[v0] != d) << (SUFFIX_GROUP_BIT - 1));
distinct_names[v0] = d;
}
@@ -1622,9 +1569,8 @@ static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_4k(
d += (p1 >> (SUFFIX_GROUP_BIT - 1));
p1 &= ~SUFFIX_GROUP_MARKER;
sa_sint_t v1 = BUCKETS_INDEX2(T[p1 - 1], T[p1 - 2] < T[p1 - 1]);
- SA[induction_bucket[T[p1 - 1]]++] =
- (p1 - 1) | ((sa_sint_t)(T[p1 - 2] < T[p1 - 1]) << (SAINT_BIT - 1)) |
- ((sa_sint_t)(distinct_names[v1] != d) << (SUFFIX_GROUP_BIT - 1));
+ SA[induction_bucket[T[p1 - 1]]++] = (p1 - 1) | ((sa_sint_t)(T[p1 - 2] < T[p1 - 1]) << (SAINT_BIT - 1)) |
+ ((sa_sint_t)(distinct_names[v1] != d) << (SUFFIX_GROUP_BIT - 1));
distinct_names[v1] = d;
}
}
@@ -1637,9 +1583,8 @@ static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_4k(
d += (p >> (SUFFIX_GROUP_BIT - 1));
p &= ~SUFFIX_GROUP_MARKER;
sa_sint_t v = BUCKETS_INDEX2(T[p - 1], T[p - 2] < T[p - 1]);
- SA[induction_bucket[T[p - 1]]++] =
- (p - 1) | ((sa_sint_t)(T[p - 2] < T[p - 1]) << (SAINT_BIT - 1)) |
- ((sa_sint_t)(distinct_names[v] != d) << (SUFFIX_GROUP_BIT - 1));
+ SA[induction_bucket[T[p - 1]]++] = (p - 1) | ((sa_sint_t)(T[p - 2] < T[p - 1]) << (SAINT_BIT - 1)) |
+ ((sa_sint_t)(distinct_names[v] != d) << (SUFFIX_GROUP_BIT - 1));
distinct_names[v] = d;
}
}
@@ -1647,16 +1592,13 @@ static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_4k(
return d;
}
-static void libsais_partial_sorting_scan_left_to_right_32s_1k(const sa_sint_t * RESTRICT T,
- sa_sint_t * RESTRICT SA,
+static void libsais_partial_sorting_scan_left_to_right_32s_1k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
sa_sint_t * RESTRICT induction_bucket,
- fast_sint_t omp_block_start,
- fast_sint_t omp_block_size) {
+ fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
fast_sint_t i, j;
- for (i = omp_block_start, j = omp_block_start + omp_block_size - 2 * prefetch_distance - 1;
- i < j; i += 2) {
+ for (i = omp_block_start, j = omp_block_start + omp_block_size - 2 * prefetch_distance - 1; i < j; i += 2) {
libsais_prefetchw(&SA[i + 3 * prefetch_distance]);
sa_sint_t s0 = SA[i + 2 * prefetch_distance + 0];
@@ -1680,15 +1622,13 @@ static void libsais_partial_sorting_scan_left_to_right_32s_1k(const sa_sint_t *
SA[i + 0] = p0 & SAINT_MAX;
if (p0 > 0) {
SA[i + 0] = 0;
- SA[induction_bucket[T[p0 - 1]]++] =
- (p0 - 1) | ((sa_sint_t)(T[p0 - 2] < T[p0 - 1]) << (SAINT_BIT - 1));
+ SA[induction_bucket[T[p0 - 1]]++] = (p0 - 1) | ((sa_sint_t)(T[p0 - 2] < T[p0 - 1]) << (SAINT_BIT - 1));
}
sa_sint_t p1 = SA[i + 1];
SA[i + 1] = p1 & SAINT_MAX;
if (p1 > 0) {
SA[i + 1] = 0;
- SA[induction_bucket[T[p1 - 1]]++] =
- (p1 - 1) | ((sa_sint_t)(T[p1 - 2] < T[p1 - 1]) << (SAINT_BIT - 1));
+ SA[induction_bucket[T[p1 - 1]]++] = (p1 - 1) | ((sa_sint_t)(T[p1 - 2] < T[p1 - 1]) << (SAINT_BIT - 1));
}
}
@@ -1697,30 +1637,28 @@ static void libsais_partial_sorting_scan_left_to_right_32s_1k(const sa_sint_t *
SA[i] = p & SAINT_MAX;
if (p > 0) {
SA[i] = 0;
- SA[induction_bucket[T[p - 1]]++] =
- (p - 1) | ((sa_sint_t)(T[p - 2] < T[p - 1]) << (SAINT_BIT - 1));
+ SA[induction_bucket[T[p - 1]]++] = (p - 1) | ((sa_sint_t)(T[p - 2] < T[p - 1]) << (SAINT_BIT - 1));
}
}
}
static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_6k_omp(
- const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
- sa_sint_t * RESTRICT buckets, sa_sint_t left_suffixes_count, sa_sint_t d, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+ const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t * RESTRICT buckets,
+ sa_sint_t left_suffixes_count, sa_sint_t d, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
SA[buckets[BUCKETS_INDEX4(T[n - 1], T[n - 2] >= T[n - 1])]++] = (n - 1) | SAINT_MIN;
buckets[2 + BUCKETS_INDEX4(T[n - 1], T[n - 2] >= T[n - 1])] = ++d;
if (threads == 1 || left_suffixes_count < 65536) {
- d = libsais_partial_sorting_scan_left_to_right_32s_6k(T, SA, buckets, d, 0,
- left_suffixes_count);
+ d = libsais_partial_sorting_scan_left_to_right_32s_6k(T, SA, buckets, d, 0, left_suffixes_count);
}
(void)(thread_state);
return d;
}
-static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_4k_omp(
- const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k,
- sa_sint_t * RESTRICT buckets, sa_sint_t d, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_4k_omp(const sa_sint_t * RESTRICT T,
+ sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t k, sa_sint_t * RESTRICT buckets,
+ sa_sint_t d, sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
sa_sint_t * RESTRICT induction_bucket = &buckets[2 * k];
sa_sint_t * RESTRICT distinct_names = &buckets[0 * k];
@@ -1735,9 +1673,10 @@ static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_4k_omp(
return d;
}
-static void libsais_partial_sorting_scan_left_to_right_32s_1k_omp(
- const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
- sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static void libsais_partial_sorting_scan_left_to_right_32s_1k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+ sa_sint_t n, sa_sint_t * RESTRICT buckets,
+ sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
SA[buckets[T[n - 1]]++] = (n - 1) | ((sa_sint_t)(T[n - 2] < T[n - 1]) << (SAINT_BIT - 1));
if (threads == 1 || n < 65536) {
@@ -1747,8 +1686,7 @@ static void libsais_partial_sorting_scan_left_to_right_32s_1k_omp(
}
static void libsais_partial_sorting_shift_markers_8u_omp(sa_sint_t * RESTRICT SA, sa_sint_t n,
- const sa_sint_t * RESTRICT buckets,
- sa_sint_t threads) {
+ const sa_sint_t * RESTRICT buckets, sa_sint_t threads) {
const fast_sint_t prefetch_distance = 32;
const sa_sint_t * RESTRICT temp_bucket = &buckets[4 * ALPHABET_SIZE];
@@ -1757,13 +1695,11 @@ static void libsais_partial_sorting_shift_markers_8u_omp(sa_sint_t * RESTRICT SA
(void)(threads);
(void)(n);
- for (c = BUCKETS_INDEX2(ALPHABET_SIZE - 1, 0); c >= BUCKETS_INDEX2(1, 0);
- c -= BUCKETS_INDEX2(1, 0)) {
+ for (c = BUCKETS_INDEX2(ALPHABET_SIZE - 1, 0); c >= BUCKETS_INDEX2(1, 0); c -= BUCKETS_INDEX2(1, 0)) {
fast_sint_t i, j;
sa_sint_t s = SAINT_MIN;
- for (i = (fast_sint_t)temp_bucket[c] - 1,
- j = (fast_sint_t)buckets[c - BUCKETS_INDEX2(1, 0)] + 3;
- i >= j; i -= 4) {
+ for (i = (fast_sint_t)temp_bucket[c] - 1, j = (fast_sint_t)buckets[c - BUCKETS_INDEX2(1, 0)] + 3; i >= j;
+ i -= 4) {
libsais_prefetchw(&SA[i - prefetch_distance]);
sa_sint_t p0 = SA[i - 0], q0 = (p0 & SAINT_MIN) ^ s;
@@ -1789,8 +1725,7 @@ static void libsais_partial_sorting_shift_markers_8u_omp(sa_sint_t * RESTRICT SA
}
static void libsais_partial_sorting_shift_markers_32s_6k_omp(sa_sint_t * RESTRICT SA, sa_sint_t k,
- const sa_sint_t * RESTRICT buckets,
- sa_sint_t threads) {
+ const sa_sint_t * RESTRICT buckets, sa_sint_t threads) {
const fast_sint_t prefetch_distance = 32;
const sa_sint_t * RESTRICT temp_bucket = &buckets[4 * k];
@@ -1836,55 +1771,52 @@ static void libsais_partial_sorting_shift_markers_32s_4k(sa_sint_t * RESTRICT SA
for (i = (fast_sint_t)n - 1; i >= 3; i -= 4) {
libsais_prefetchw(&SA[i - prefetch_distance]);
- sa_sint_t p0 = SA[i - 0], q0 = ((p0 & SUFFIX_GROUP_MARKER) ^ s) &
- ((sa_sint_t)(p0 > 0) << ((SUFFIX_GROUP_BIT - 1)));
+ sa_sint_t p0 = SA[i - 0],
+ q0 = ((p0 & SUFFIX_GROUP_MARKER) ^ s) & ((sa_sint_t)(p0 > 0) << ((SUFFIX_GROUP_BIT - 1)));
s = s ^ q0;
SA[i - 0] = p0 ^ q0;
- sa_sint_t p1 = SA[i - 1], q1 = ((p1 & SUFFIX_GROUP_MARKER) ^ s) &
- ((sa_sint_t)(p1 > 0) << ((SUFFIX_GROUP_BIT - 1)));
+ sa_sint_t p1 = SA[i - 1],
+ q1 = ((p1 & SUFFIX_GROUP_MARKER) ^ s) & ((sa_sint_t)(p1 > 0) << ((SUFFIX_GROUP_BIT - 1)));
s = s ^ q1;
SA[i - 1] = p1 ^ q1;
- sa_sint_t p2 = SA[i - 2], q2 = ((p2 & SUFFIX_GROUP_MARKER) ^ s) &
- ((sa_sint_t)(p2 > 0) << ((SUFFIX_GROUP_BIT - 1)));
+ sa_sint_t p2 = SA[i - 2],
+ q2 = ((p2 & SUFFIX_GROUP_MARKER) ^ s) & ((sa_sint_t)(p2 > 0) << ((SUFFIX_GROUP_BIT - 1)));
s = s ^ q2;
SA[i - 2] = p2 ^ q2;
- sa_sint_t p3 = SA[i - 3], q3 = ((p3 & SUFFIX_GROUP_MARKER) ^ s) &
- ((sa_sint_t)(p3 > 0) << ((SUFFIX_GROUP_BIT - 1)));
+ sa_sint_t p3 = SA[i - 3],
+ q3 = ((p3 & SUFFIX_GROUP_MARKER) ^ s) & ((sa_sint_t)(p3 > 0) << ((SUFFIX_GROUP_BIT - 1)));
s = s ^ q3;
SA[i - 3] = p3 ^ q3;
}
for (; i >= 0; i -= 1) {
- sa_sint_t p = SA[i], q = ((p & SUFFIX_GROUP_MARKER) ^ s) &
- ((sa_sint_t)(p > 0) << ((SUFFIX_GROUP_BIT - 1)));
+ sa_sint_t p = SA[i], q = ((p & SUFFIX_GROUP_MARKER) ^ s) & ((sa_sint_t)(p > 0) << ((SUFFIX_GROUP_BIT - 1)));
s = s ^ q;
SA[i] = p ^ q;
}
}
-static void libsais_partial_sorting_shift_buckets_32s_6k(sa_sint_t k,
- sa_sint_t * RESTRICT buckets) {
+static void libsais_partial_sorting_shift_buckets_32s_6k(sa_sint_t k, sa_sint_t * RESTRICT buckets) {
sa_sint_t * RESTRICT temp_bucket = &buckets[4 * k];
fast_sint_t i;
- for (i = BUCKETS_INDEX2(0, 0); i <= BUCKETS_INDEX2((fast_sint_t)k - 1, 0);
- i += BUCKETS_INDEX2(1, 0)) {
+ for (i = BUCKETS_INDEX2(0, 0); i <= BUCKETS_INDEX2((fast_sint_t)k - 1, 0); i += BUCKETS_INDEX2(1, 0)) {
buckets[2 * i + BUCKETS_INDEX4(0, 0)] = temp_bucket[i + BUCKETS_INDEX2(0, 0)];
buckets[2 * i + BUCKETS_INDEX4(0, 1)] = temp_bucket[i + BUCKETS_INDEX2(0, 1)];
}
}
-static sa_sint_t libsais_partial_sorting_scan_right_to_left_8u(
- const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets, sa_sint_t d,
- fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
+static sa_sint_t libsais_partial_sorting_scan_right_to_left_8u(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
+ sa_sint_t * RESTRICT buckets, sa_sint_t d,
+ fast_sint_t omp_block_start,
+ fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
sa_sint_t * RESTRICT induction_bucket = &buckets[0 * ALPHABET_SIZE];
sa_sint_t * RESTRICT distinct_names = &buckets[2 * ALPHABET_SIZE];
fast_sint_t i, j;
- for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 1;
- i >= j; i -= 2) {
+ for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 1; i >= j; i -= 2) {
libsais_prefetch(&SA[i - 2 * prefetch_distance]);
libsais_prefetch(&T[SA[i - prefetch_distance - 0] & SAINT_MAX] - 1);
@@ -1896,16 +1828,14 @@ static sa_sint_t libsais_partial_sorting_scan_right_to_left_8u(
d += (p0 < 0);
p0 &= SAINT_MAX;
sa_sint_t v0 = BUCKETS_INDEX2(T[p0 - 1], T[p0 - 2] > T[p0 - 1]);
- SA[--induction_bucket[v0]] =
- (p0 - 1) | ((sa_sint_t)(distinct_names[v0] != d) << (SAINT_BIT - 1));
+ SA[--induction_bucket[v0]] = (p0 - 1) | ((sa_sint_t)(distinct_names[v0] != d) << (SAINT_BIT - 1));
distinct_names[v0] = d;
sa_sint_t p1 = SA[i - 1];
d += (p1 < 0);
p1 &= SAINT_MAX;
sa_sint_t v1 = BUCKETS_INDEX2(T[p1 - 1], T[p1 - 2] > T[p1 - 1]);
- SA[--induction_bucket[v1]] =
- (p1 - 1) | ((sa_sint_t)(distinct_names[v1] != d) << (SAINT_BIT - 1));
+ SA[--induction_bucket[v1]] = (p1 - 1) | ((sa_sint_t)(distinct_names[v1] != d) << (SAINT_BIT - 1));
distinct_names[v1] = d;
}
@@ -1914,35 +1844,35 @@ static sa_sint_t libsais_partial_sorting_scan_right_to_left_8u(
d += (p < 0);
p &= SAINT_MAX;
sa_sint_t v = BUCKETS_INDEX2(T[p - 1], T[p - 2] > T[p - 1]);
- SA[--induction_bucket[v]] =
- (p - 1) | ((sa_sint_t)(distinct_names[v] != d) << (SAINT_BIT - 1));
+ SA[--induction_bucket[v]] = (p - 1) | ((sa_sint_t)(distinct_names[v] != d) << (SAINT_BIT - 1));
distinct_names[v] = d;
}
return d;
}
-static void libsais_partial_sorting_scan_right_to_left_8u_omp(
- const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t * RESTRICT buckets,
- sa_sint_t first_lms_suffix, sa_sint_t left_suffixes_count, sa_sint_t d, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static void libsais_partial_sorting_scan_right_to_left_8u_omp(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
+ sa_sint_t n, sa_sint_t * RESTRICT buckets,
+ sa_sint_t first_lms_suffix, sa_sint_t left_suffixes_count,
+ sa_sint_t d, sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
fast_sint_t scan_start = (fast_sint_t)left_suffixes_count + 1;
fast_sint_t scan_end = (fast_sint_t)n - (fast_sint_t)first_lms_suffix;
if (threads == 1 || (scan_end - scan_start) < 65536) {
- libsais_partial_sorting_scan_right_to_left_8u(T, SA, buckets, d, scan_start,
- scan_end - scan_start);
+ libsais_partial_sorting_scan_right_to_left_8u(T, SA, buckets, d, scan_start, scan_end - scan_start);
}
(void)(thread_state);
}
-static sa_sint_t libsais_partial_sorting_scan_right_to_left_32s_6k(
- const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT buckets,
- sa_sint_t d, fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
+static sa_sint_t libsais_partial_sorting_scan_right_to_left_32s_6k(const sa_sint_t * RESTRICT T,
+ sa_sint_t * RESTRICT SA,
+ sa_sint_t * RESTRICT buckets, sa_sint_t d,
+ fast_sint_t omp_block_start,
+ fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
fast_sint_t i, j;
- for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + 2 * prefetch_distance + 1;
- i >= j; i -= 2) {
+ for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + 2 * prefetch_distance + 1; i >= j; i -= 2) {
libsais_prefetch(&SA[i - 3 * prefetch_distance]);
libsais_prefetch(&T[SA[i - 2 * prefetch_distance - 0] & SAINT_MAX] - 1);
@@ -1984,18 +1914,18 @@ static sa_sint_t libsais_partial_sorting_scan_right_to_left_32s_6k(
return d;
}
-static sa_sint_t libsais_partial_sorting_scan_right_to_left_32s_4k(
- const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t k,
- sa_sint_t * RESTRICT buckets, sa_sint_t d, fast_sint_t omp_block_start,
- fast_sint_t omp_block_size) {
+static sa_sint_t libsais_partial_sorting_scan_right_to_left_32s_4k(const sa_sint_t * RESTRICT T,
+ sa_sint_t * RESTRICT SA, sa_sint_t k,
+ sa_sint_t * RESTRICT buckets, sa_sint_t d,
+ fast_sint_t omp_block_start,
+ fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
sa_sint_t * RESTRICT induction_bucket = &buckets[3 * k];
sa_sint_t * RESTRICT distinct_names = &buckets[0 * k];
fast_sint_t i, j;
- for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + 2 * prefetch_distance + 1;
- i >= j; i -= 2) {
+ for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + 2 * prefetch_distance + 1; i >= j; i -= 2) {
libsais_prefetchw(&SA[i - 3 * prefetch_distance]);
sa_sint_t s0 = SA[i - 2 * prefetch_distance - 0];
@@ -2027,9 +1957,8 @@ static sa_sint_t libsais_partial_sorting_scan_right_to_left_32s_4k(
d += (p0 >> (SUFFIX_GROUP_BIT - 1));
p0 &= ~SUFFIX_GROUP_MARKER;
sa_sint_t v0 = BUCKETS_INDEX2(T[p0 - 1], T[p0 - 2] > T[p0 - 1]);
- SA[--induction_bucket[T[p0 - 1]]] =
- (p0 - 1) | ((sa_sint_t)(T[p0 - 2] > T[p0 - 1]) << (SAINT_BIT - 1)) |
- ((sa_sint_t)(distinct_names[v0] != d) << (SUFFIX_GROUP_BIT - 1));
+ SA[--induction_bucket[T[p0 - 1]]] = (p0 - 1) | ((sa_sint_t)(T[p0 - 2] > T[p0 - 1]) << (SAINT_BIT - 1)) |
+ ((sa_sint_t)(distinct_names[v0] != d) << (SUFFIX_GROUP_BIT - 1));
distinct_names[v0] = d;
}
@@ -2039,9 +1968,8 @@ static sa_sint_t libsais_partial_sorting_scan_right_to_left_32s_4k(
d += (p1 >> (SUFFIX_GROUP_BIT - 1));
p1 &= ~SUFFIX_GROUP_MARKER;
sa_sint_t v1 = BUCKETS_INDEX2(T[p1 - 1], T[p1 - 2] > T[p1 - 1]);
- SA[--induction_bucket[T[p1 - 1]]] =
- (p1 - 1) | ((sa_sint_t)(T[p1 - 2] > T[p1 - 1]) << (SAINT_BIT - 1)) |
- ((sa_sint_t)(distinct_names[v1] != d) << (SUFFIX_GROUP_BIT - 1));
+ SA[--induction_bucket[T[p1 - 1]]] = (p1 - 1) | ((sa_sint_t)(T[p1 - 2] > T[p1 - 1]) << (SAINT_BIT - 1)) |
+ ((sa_sint_t)(distinct_names[v1] != d) << (SUFFIX_GROUP_BIT - 1));
distinct_names[v1] = d;
}
}
@@ -2053,9 +1981,8 @@ static sa_sint_t libsais_partial_sorting_scan_right_to_left_32s_4k(
d += (p >> (SUFFIX_GROUP_BIT - 1));
p &= ~SUFFIX_GROUP_MARKER;
sa_sint_t v = BUCKETS_INDEX2(T[p - 1], T[p - 2] > T[p - 1]);
- SA[--induction_bucket[T[p - 1]]] =
- (p - 1) | ((sa_sint_t)(T[p - 2] > T[p - 1]) << (SAINT_BIT - 1)) |
- ((sa_sint_t)(distinct_names[v] != d) << (SUFFIX_GROUP_BIT - 1));
+ SA[--induction_bucket[T[p - 1]]] = (p - 1) | ((sa_sint_t)(T[p - 2] > T[p - 1]) << (SAINT_BIT - 1)) |
+ ((sa_sint_t)(distinct_names[v] != d) << (SUFFIX_GROUP_BIT - 1));
distinct_names[v] = d;
}
}
@@ -2063,16 +1990,13 @@ static sa_sint_t libsais_partial_sorting_scan_right_to_left_32s_4k(
return d;
}
-static void libsais_partial_sorting_scan_right_to_left_32s_1k(const sa_sint_t * RESTRICT T,
- sa_sint_t * RESTRICT SA,
+static void libsais_partial_sorting_scan_right_to_left_32s_1k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
sa_sint_t * RESTRICT induction_bucket,
- fast_sint_t omp_block_start,
- fast_sint_t omp_block_size) {
+ fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
fast_sint_t i, j;
- for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + 2 * prefetch_distance + 1;
- i >= j; i -= 2) {
+ for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + 2 * prefetch_distance + 1; i >= j; i -= 2) {
libsais_prefetchw(&SA[i - 3 * prefetch_distance]);
sa_sint_t s0 = SA[i - 2 * prefetch_distance - 0];
@@ -2095,14 +2019,12 @@ static void libsais_partial_sorting_scan_right_to_left_32s_1k(const sa_sint_t *
sa_sint_t p0 = SA[i - 0];
if (p0 > 0) {
SA[i - 0] = 0;
- SA[--induction_bucket[T[p0 - 1]]] =
- (p0 - 1) | ((sa_sint_t)(T[p0 - 2] > T[p0 - 1]) << (SAINT_BIT - 1));
+ SA[--induction_bucket[T[p0 - 1]]] = (p0 - 1) | ((sa_sint_t)(T[p0 - 2] > T[p0 - 1]) << (SAINT_BIT - 1));
}
sa_sint_t p1 = SA[i - 1];
if (p1 > 0) {
SA[i - 1] = 0;
- SA[--induction_bucket[T[p1 - 1]]] =
- (p1 - 1) | ((sa_sint_t)(T[p1 - 2] > T[p1 - 1]) << (SAINT_BIT - 1));
+ SA[--induction_bucket[T[p1 - 1]]] = (p1 - 1) | ((sa_sint_t)(T[p1 - 2] > T[p1 - 1]) << (SAINT_BIT - 1));
}
}
@@ -2110,30 +2032,29 @@ static void libsais_partial_sorting_scan_right_to_left_32s_1k(const sa_sint_t *
sa_sint_t p = SA[i];
if (p > 0) {
SA[i] = 0;
- SA[--induction_bucket[T[p - 1]]] =
- (p - 1) | ((sa_sint_t)(T[p - 2] > T[p - 1]) << (SAINT_BIT - 1));
+ SA[--induction_bucket[T[p - 1]]] = (p - 1) | ((sa_sint_t)(T[p - 2] > T[p - 1]) << (SAINT_BIT - 1));
}
}
}
static sa_sint_t libsais_partial_sorting_scan_right_to_left_32s_6k_omp(
- const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
- sa_sint_t * RESTRICT buckets, sa_sint_t first_lms_suffix, sa_sint_t left_suffixes_count,
- sa_sint_t d, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+ const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t * RESTRICT buckets,
+ sa_sint_t first_lms_suffix, sa_sint_t left_suffixes_count, sa_sint_t d, sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
fast_sint_t scan_start = (fast_sint_t)left_suffixes_count + 1;
fast_sint_t scan_end = (fast_sint_t)n - (fast_sint_t)first_lms_suffix;
if (threads == 1 || (scan_end - scan_start) < 65536) {
- d = libsais_partial_sorting_scan_right_to_left_32s_6k(T, SA, buckets, d, scan_start,
- scan_end - scan_start);
+ d = libsais_partial_sorting_scan_right_to_left_32s_6k(T, SA, buckets, d, scan_start, scan_end - scan_start);
}
(void)(thread_state);
return d;
}
-static sa_sint_t libsais_partial_sorting_scan_right_to_left_32s_4k_omp(
- const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k,
- sa_sint_t * RESTRICT buckets, sa_sint_t d, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static sa_sint_t libsais_partial_sorting_scan_right_to_left_32s_4k_omp(const sa_sint_t * RESTRICT T,
+ sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t k, sa_sint_t * RESTRICT buckets,
+ sa_sint_t d, sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
if (threads == 1 || n < 65536) {
d = libsais_partial_sorting_scan_right_to_left_32s_4k(T, SA, k, buckets, d, 0, n);
}
@@ -2141,9 +2062,10 @@ static sa_sint_t libsais_partial_sorting_scan_right_to_left_32s_4k_omp(
return d;
}
-static void libsais_partial_sorting_scan_right_to_left_32s_1k_omp(
- const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
- sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static void libsais_partial_sorting_scan_right_to_left_32s_1k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+ sa_sint_t n, sa_sint_t * RESTRICT buckets,
+ sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
if (threads == 1 || n < 65536) {
libsais_partial_sorting_scan_right_to_left_32s_1k(T, SA, buckets, 0, n);
}
@@ -2156,8 +2078,7 @@ static fast_sint_t libsais_partial_sorting_gather_lms_suffixes_32s_4k(sa_sint_t
const fast_sint_t prefetch_distance = 32;
fast_sint_t i, j, l;
- for (i = omp_block_start, j = omp_block_start + omp_block_size - 3, l = omp_block_start; i < j;
- i += 4) {
+ for (i = omp_block_start, j = omp_block_start + omp_block_size - 3, l = omp_block_start; i < j; i += 4) {
libsais_prefetch(&SA[i + prefetch_distance]);
sa_sint_t s0 = SA[i + 0];
@@ -2189,8 +2110,7 @@ static fast_sint_t libsais_partial_sorting_gather_lms_suffixes_32s_1k(sa_sint_t
const fast_sint_t prefetch_distance = 32;
fast_sint_t i, j, l;
- for (i = omp_block_start, j = omp_block_start + omp_block_size - 3, l = omp_block_start; i < j;
- i += 4) {
+ for (i = omp_block_start, j = omp_block_start + omp_block_size - 3, l = omp_block_start; i < j; i += 4) {
libsais_prefetch(&SA[i + prefetch_distance]);
sa_sint_t s0 = SA[i + 0];
@@ -2216,9 +2136,9 @@ static fast_sint_t libsais_partial_sorting_gather_lms_suffixes_32s_1k(sa_sint_t
return l;
}
-static void libsais_partial_sorting_gather_lms_suffixes_32s_4k_omp(
- sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static void libsais_partial_sorting_gather_lms_suffixes_32s_4k_omp(sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
{
(void)(threads);
(void)(thread_state);
@@ -2228,8 +2148,7 @@ static void libsais_partial_sorting_gather_lms_suffixes_32s_4k_omp(
fast_sint_t omp_block_stride = (n / omp_num_threads) & (-16);
fast_sint_t omp_block_start = omp_thread_num * omp_block_stride;
- fast_sint_t omp_block_size =
- omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start;
+ fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start;
if (omp_num_threads == 1) {
libsais_partial_sorting_gather_lms_suffixes_32s_4k(SA, omp_block_start, omp_block_size);
@@ -2237,9 +2156,9 @@ static void libsais_partial_sorting_gather_lms_suffixes_32s_4k_omp(
}
}
-static void libsais_partial_sorting_gather_lms_suffixes_32s_1k_omp(
- sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static void libsais_partial_sorting_gather_lms_suffixes_32s_1k_omp(sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
{
(void)(threads);
(void)(thread_state);
@@ -2249,8 +2168,7 @@ static void libsais_partial_sorting_gather_lms_suffixes_32s_1k_omp(
fast_sint_t omp_block_stride = (n / omp_num_threads) & (-16);
fast_sint_t omp_block_start = omp_thread_num * omp_block_stride;
- fast_sint_t omp_block_size =
- omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start;
+ fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start;
if (omp_num_threads == 1) {
libsais_partial_sorting_gather_lms_suffixes_32s_1k(SA, omp_block_start, omp_block_size);
@@ -2258,63 +2176,52 @@ static void libsais_partial_sorting_gather_lms_suffixes_32s_1k_omp(
}
}
-static void libsais_induce_partial_order_8u_omp(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
- sa_sint_t n, sa_sint_t * RESTRICT buckets,
- sa_sint_t first_lms_suffix,
+static void libsais_induce_partial_order_8u_omp(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t * RESTRICT buckets, sa_sint_t first_lms_suffix,
sa_sint_t left_suffixes_count, sa_sint_t threads,
LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
memset(&buckets[2 * ALPHABET_SIZE], 0, 2 * ALPHABET_SIZE * sizeof(sa_sint_t));
- sa_sint_t d = libsais_partial_sorting_scan_left_to_right_8u_omp(
- T, SA, n, buckets, left_suffixes_count, 0, threads, thread_state);
+ sa_sint_t d = libsais_partial_sorting_scan_left_to_right_8u_omp(T, SA, n, buckets, left_suffixes_count, 0, threads,
+ thread_state);
libsais_partial_sorting_shift_markers_8u_omp(SA, n, buckets, threads);
- libsais_partial_sorting_scan_right_to_left_8u_omp(
- T, SA, n, buckets, first_lms_suffix, left_suffixes_count, d, threads, thread_state);
+ libsais_partial_sorting_scan_right_to_left_8u_omp(T, SA, n, buckets, first_lms_suffix, left_suffixes_count, d,
+ threads, thread_state);
}
-static void libsais_induce_partial_order_32s_6k_omp(
- const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k,
- sa_sint_t * RESTRICT buckets, sa_sint_t first_lms_suffix, sa_sint_t left_suffixes_count,
- sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
- sa_sint_t d = libsais_partial_sorting_scan_left_to_right_32s_6k_omp(
- T, SA, n, buckets, left_suffixes_count, 0, threads, thread_state);
+static void libsais_induce_partial_order_32s_6k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t k, sa_sint_t * RESTRICT buckets,
+ sa_sint_t first_lms_suffix, sa_sint_t left_suffixes_count,
+ sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+ sa_sint_t d = libsais_partial_sorting_scan_left_to_right_32s_6k_omp(T, SA, n, buckets, left_suffixes_count, 0,
+ threads, thread_state);
libsais_partial_sorting_shift_markers_32s_6k_omp(SA, k, buckets, threads);
libsais_partial_sorting_shift_buckets_32s_6k(k, buckets);
- libsais_partial_sorting_scan_right_to_left_32s_6k_omp(
- T, SA, n, buckets, first_lms_suffix, left_suffixes_count, d, threads, thread_state);
+ libsais_partial_sorting_scan_right_to_left_32s_6k_omp(T, SA, n, buckets, first_lms_suffix, left_suffixes_count, d,
+ threads, thread_state);
}
-static void libsais_induce_partial_order_32s_4k_omp(const sa_sint_t * RESTRICT T,
- sa_sint_t * RESTRICT SA, sa_sint_t n,
- sa_sint_t k, sa_sint_t * RESTRICT buckets,
- sa_sint_t threads,
+static void libsais_induce_partial_order_32s_4k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads,
LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
memset(buckets, 0, 2 * (size_t)k * sizeof(sa_sint_t));
- sa_sint_t d = libsais_partial_sorting_scan_left_to_right_32s_4k_omp(T, SA, n, k, buckets, 0,
- threads, thread_state);
+ sa_sint_t d = libsais_partial_sorting_scan_left_to_right_32s_4k_omp(T, SA, n, k, buckets, 0, threads, thread_state);
libsais_partial_sorting_shift_markers_32s_4k(SA, n);
- libsais_partial_sorting_scan_right_to_left_32s_4k_omp(T, SA, n, k, buckets, d, threads,
- thread_state);
+ libsais_partial_sorting_scan_right_to_left_32s_4k_omp(T, SA, n, k, buckets, d, threads, thread_state);
libsais_partial_sorting_gather_lms_suffixes_32s_4k_omp(SA, n, threads, thread_state);
}
-static void libsais_induce_partial_order_32s_2k_omp(const sa_sint_t * RESTRICT T,
- sa_sint_t * RESTRICT SA, sa_sint_t n,
- sa_sint_t k, sa_sint_t * RESTRICT buckets,
- sa_sint_t threads,
+static void libsais_induce_partial_order_32s_2k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads,
LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
- libsais_partial_sorting_scan_left_to_right_32s_1k_omp(T, SA, n, &buckets[1 * k], threads,
- thread_state);
- libsais_partial_sorting_scan_right_to_left_32s_1k_omp(T, SA, n, &buckets[0 * k], threads,
- thread_state);
+ libsais_partial_sorting_scan_left_to_right_32s_1k_omp(T, SA, n, &buckets[1 * k], threads, thread_state);
+ libsais_partial_sorting_scan_right_to_left_32s_1k_omp(T, SA, n, &buckets[0 * k], threads, thread_state);
libsais_partial_sorting_gather_lms_suffixes_32s_1k_omp(SA, n, threads, thread_state);
}
-static void libsais_induce_partial_order_32s_1k_omp(const sa_sint_t * RESTRICT T,
- sa_sint_t * RESTRICT SA, sa_sint_t n,
- sa_sint_t k, sa_sint_t * RESTRICT buckets,
- sa_sint_t threads,
+static void libsais_induce_partial_order_32s_1k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads,
LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
libsais_count_suffixes_32s(T, n, k, buckets);
libsais_initialize_buckets_start_32s_1k(k, buckets);
@@ -2327,16 +2234,14 @@ static void libsais_induce_partial_order_32s_1k_omp(const sa_sint_t * RESTRICT T
libsais_partial_sorting_gather_lms_suffixes_32s_1k_omp(SA, n, threads, thread_state);
}
-static sa_sint_t libsais_renumber_lms_suffixes_8u(sa_sint_t * RESTRICT SA, sa_sint_t m,
- sa_sint_t name, fast_sint_t omp_block_start,
- fast_sint_t omp_block_size) {
+static sa_sint_t libsais_renumber_lms_suffixes_8u(sa_sint_t * RESTRICT SA, sa_sint_t m, sa_sint_t name,
+ fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
sa_sint_t * RESTRICT SAm = &SA[m];
fast_sint_t i, j;
- for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 3; i < j;
- i += 4) {
+ for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 3; i < j; i += 4) {
libsais_prefetch(&SA[i + 2 * prefetch_distance]);
libsais_prefetchw(&SAm[(SA[i + prefetch_distance + 0] & SAINT_MAX) >> 1]);
@@ -2367,17 +2272,15 @@ static sa_sint_t libsais_renumber_lms_suffixes_8u(sa_sint_t * RESTRICT SA, sa_si
return name;
}
-static fast_sint_t libsais_gather_marked_suffixes_8u(sa_sint_t * RESTRICT SA, sa_sint_t m,
- fast_sint_t l, fast_sint_t omp_block_start,
- fast_sint_t omp_block_size) {
+static fast_sint_t libsais_gather_marked_suffixes_8u(sa_sint_t * RESTRICT SA, sa_sint_t m, fast_sint_t l,
+ fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
l -= 1;
fast_sint_t i, j;
- for (i = (fast_sint_t)m + omp_block_start + omp_block_size - 1,
- j = (fast_sint_t)m + omp_block_start + 3;
- i >= j; i -= 4) {
+ for (i = (fast_sint_t)m + omp_block_start + omp_block_size - 1, j = (fast_sint_t)m + omp_block_start + 3; i >= j;
+ i -= 4) {
libsais_prefetch(&SA[i - prefetch_distance]);
sa_sint_t s0 = SA[i - 0];
@@ -2405,9 +2308,8 @@ static fast_sint_t libsais_gather_marked_suffixes_8u(sa_sint_t * RESTRICT SA, sa
return l;
}
-static sa_sint_t libsais_renumber_lms_suffixes_8u_omp(
- sa_sint_t * RESTRICT SA, sa_sint_t m, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static sa_sint_t libsais_renumber_lms_suffixes_8u_omp(sa_sint_t * RESTRICT SA, sa_sint_t m, sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
sa_sint_t name = 0;
{
(void)(threads);
@@ -2418,8 +2320,7 @@ static sa_sint_t libsais_renumber_lms_suffixes_8u_omp(
fast_sint_t omp_block_stride = (m / omp_num_threads) & (-16);
fast_sint_t omp_block_start = omp_thread_num * omp_block_stride;
- fast_sint_t omp_block_size =
- omp_thread_num < omp_num_threads - 1 ? omp_block_stride : m - omp_block_start;
+ fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : m - omp_block_start;
if (omp_num_threads == 1) {
name = libsais_renumber_lms_suffixes_8u(SA, m, 0, omp_block_start, omp_block_size);
@@ -2429,9 +2330,8 @@ static sa_sint_t libsais_renumber_lms_suffixes_8u_omp(
return name;
}
-static void libsais_gather_marked_lms_suffixes_8u_omp(
- sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t fs, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static void libsais_gather_marked_lms_suffixes_8u_omp(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t fs,
+ sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
{
(void)(threads);
(void)(thread_state);
@@ -2441,20 +2341,18 @@ static void libsais_gather_marked_lms_suffixes_8u_omp(
fast_sint_t omp_block_stride = (((fast_sint_t)n >> 1) / omp_num_threads) & (-16);
fast_sint_t omp_block_start = omp_thread_num * omp_block_stride;
- fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1
- ? omp_block_stride
- : ((fast_sint_t)n >> 1) - omp_block_start;
+ fast_sint_t omp_block_size =
+ omp_thread_num < omp_num_threads - 1 ? omp_block_stride : ((fast_sint_t)n >> 1) - omp_block_start;
if (omp_num_threads == 1) {
- libsais_gather_marked_suffixes_8u(SA, m, (fast_sint_t)n + (fast_sint_t)fs,
- omp_block_start, omp_block_size);
+ libsais_gather_marked_suffixes_8u(SA, m, (fast_sint_t)n + (fast_sint_t)fs, omp_block_start, omp_block_size);
}
}
}
-static sa_sint_t libsais_renumber_and_gather_lms_suffixes_8u_omp(
- sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t fs, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static sa_sint_t libsais_renumber_and_gather_lms_suffixes_8u_omp(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m,
+ sa_sint_t fs, sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
memset(&SA[m], 0, ((size_t)n >> 1) * sizeof(sa_sint_t));
sa_sint_t name = libsais_renumber_lms_suffixes_8u_omp(SA, m, threads, thread_state);
@@ -2470,8 +2368,7 @@ static sa_sint_t libsais_renumber_and_gather_lms_suffixes_8u_omp(
return name;
}
-static sa_sint_t libsais_renumber_distinct_lms_suffixes_32s_4k(sa_sint_t * RESTRICT SA, sa_sint_t m,
- sa_sint_t name,
+static sa_sint_t libsais_renumber_distinct_lms_suffixes_32s_4k(sa_sint_t * RESTRICT SA, sa_sint_t m, sa_sint_t name,
fast_sint_t omp_block_start,
fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
@@ -2480,8 +2377,7 @@ static sa_sint_t libsais_renumber_distinct_lms_suffixes_32s_4k(sa_sint_t * RESTR
fast_sint_t i, j;
sa_sint_t p0, p1, p2, p3 = 0;
- for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 3; i < j;
- i += 4) {
+ for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 3; i < j; i += 4) {
libsais_prefetchw(&SA[i + 2 * prefetch_distance]);
libsais_prefetchw(&SAm[(SA[i + prefetch_distance + 0] & SAINT_MAX) >> 1]);
@@ -2513,16 +2409,14 @@ static sa_sint_t libsais_renumber_distinct_lms_suffixes_32s_4k(sa_sint_t * RESTR
return name;
}
-static void libsais_mark_distinct_lms_suffixes_32s(sa_sint_t * RESTRICT SA, sa_sint_t m,
- fast_sint_t omp_block_start,
+static void libsais_mark_distinct_lms_suffixes_32s(sa_sint_t * RESTRICT SA, sa_sint_t m, fast_sint_t omp_block_start,
fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
fast_sint_t i, j;
sa_sint_t p0, p1, p2, p3 = 0;
- for (i = (fast_sint_t)m + omp_block_start,
- j = (fast_sint_t)m + omp_block_start + omp_block_size - 3;
- i < j; i += 4) {
+ for (i = (fast_sint_t)m + omp_block_start, j = (fast_sint_t)m + omp_block_start + omp_block_size - 3; i < j;
+ i += 4) {
libsais_prefetchw(&SA[i + prefetch_distance]);
p0 = SA[i + 0];
@@ -2547,8 +2441,7 @@ static void libsais_mark_distinct_lms_suffixes_32s(sa_sint_t * RESTRICT SA, sa_s
}
}
-static void libsais_clamp_lms_suffixes_length_32s(sa_sint_t * RESTRICT SA, sa_sint_t m,
- fast_sint_t omp_block_start,
+static void libsais_clamp_lms_suffixes_length_32s(sa_sint_t * RESTRICT SA, sa_sint_t m, fast_sint_t omp_block_start,
fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
@@ -2569,9 +2462,9 @@ static void libsais_clamp_lms_suffixes_length_32s(sa_sint_t * RESTRICT SA, sa_si
}
}
-static sa_sint_t libsais_renumber_distinct_lms_suffixes_32s_4k_omp(
- sa_sint_t * RESTRICT SA, sa_sint_t m, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static sa_sint_t libsais_renumber_distinct_lms_suffixes_32s_4k_omp(sa_sint_t * RESTRICT SA, sa_sint_t m,
+ sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
sa_sint_t name = 0;
{
(void)(threads);
@@ -2582,20 +2475,18 @@ static sa_sint_t libsais_renumber_distinct_lms_suffixes_32s_4k_omp(
fast_sint_t omp_block_stride = (m / omp_num_threads) & (-16);
fast_sint_t omp_block_start = omp_thread_num * omp_block_stride;
- fast_sint_t omp_block_size =
- omp_thread_num < omp_num_threads - 1 ? omp_block_stride : m - omp_block_start;
+ fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : m - omp_block_start;
if (omp_num_threads == 1) {
- name = libsais_renumber_distinct_lms_suffixes_32s_4k(SA, m, 1, omp_block_start,
- omp_block_size);
+ name = libsais_renumber_distinct_lms_suffixes_32s_4k(SA, m, 1, omp_block_start, omp_block_size);
}
}
return name - 1;
}
-static void libsais_mark_distinct_lms_suffixes_32s_omp(sa_sint_t * RESTRICT SA, sa_sint_t n,
- sa_sint_t m, sa_sint_t threads) {
+static void libsais_mark_distinct_lms_suffixes_32s_omp(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m,
+ sa_sint_t threads) {
{
(void)(threads);
@@ -2606,8 +2497,8 @@ static void libsais_mark_distinct_lms_suffixes_32s_omp(sa_sint_t * RESTRICT SA,
}
}
-static void libsais_clamp_lms_suffixes_length_32s_omp(sa_sint_t * RESTRICT SA, sa_sint_t n,
- sa_sint_t m, sa_sint_t threads) {
+static void libsais_clamp_lms_suffixes_length_32s_omp(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m,
+ sa_sint_t threads) {
{
(void)(threads);
@@ -2623,8 +2514,7 @@ static sa_sint_t libsais_renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
memset(&SA[m], 0, ((size_t)n >> 1) * sizeof(sa_sint_t));
- sa_sint_t name =
- libsais_renumber_distinct_lms_suffixes_32s_4k_omp(SA, m, threads, thread_state);
+ sa_sint_t name = libsais_renumber_distinct_lms_suffixes_32s_4k_omp(SA, m, threads, thread_state);
if (name < m) {
libsais_mark_distinct_lms_suffixes_32s_omp(SA, n, m, threads);
}
@@ -2632,8 +2522,9 @@ static sa_sint_t libsais_renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
return name;
}
-static sa_sint_t libsais_renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(
- sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t threads) {
+static sa_sint_t libsais_renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(sa_sint_t * RESTRICT T,
+ sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t m, sa_sint_t threads) {
const fast_sint_t prefetch_distance = 32;
sa_sint_t * RESTRICT SAm = &SA[m];
@@ -2644,8 +2535,7 @@ static sa_sint_t libsais_renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(
memset(&SA[m], 0, ((size_t)n - (size_t)m - (size_t)m) * sizeof(sa_sint_t));
fast_sint_t i, j;
- for (i = (fast_sint_t)n - (fast_sint_t)m, j = (fast_sint_t)n - 1 - prefetch_distance - 3;
- i < j; i += 4) {
+ for (i = (fast_sint_t)n - (fast_sint_t)m, j = (fast_sint_t)n - 1 - prefetch_distance - 3; i < j; i += 4) {
libsais_prefetch(&SA[i + 2 * prefetch_distance]);
libsais_prefetchw(&SAm[((sa_uint_t)SA[i + prefetch_distance + 0]) >> 1]);
@@ -2743,15 +2633,13 @@ static sa_sint_t libsais_renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(
}
static void libsais_reconstruct_lms_suffixes(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m,
- fast_sint_t omp_block_start,
- fast_sint_t omp_block_size) {
+ fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
const sa_sint_t * RESTRICT SAnm = &SA[n - m];
fast_sint_t i, j;
- for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 3; i < j;
- i += 4) {
+ for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 3; i < j; i += 4) {
libsais_prefetchw(&SA[i + 2 * prefetch_distance]);
libsais_prefetch(&SAnm[SA[i + prefetch_distance + 0]]);
@@ -2770,8 +2658,7 @@ static void libsais_reconstruct_lms_suffixes(sa_sint_t * RESTRICT SA, sa_sint_t
}
}
-static void libsais_reconstruct_lms_suffixes_omp(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m,
- sa_sint_t threads) {
+static void libsais_reconstruct_lms_suffixes_omp(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t threads) {
{
(void)(threads);
@@ -2781,8 +2668,7 @@ static void libsais_reconstruct_lms_suffixes_omp(sa_sint_t * RESTRICT SA, sa_sin
}
}
-static void libsais_place_lms_suffixes_interval_8u(sa_sint_t * RESTRICT SA, sa_sint_t n,
- sa_sint_t m,
+static void libsais_place_lms_suffixes_interval_8u(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m,
const sa_sint_t * RESTRICT buckets) {
const sa_sint_t * RESTRICT bucket_end = &buckets[7 * ALPHABET_SIZE];
@@ -2803,8 +2689,7 @@ static void libsais_place_lms_suffixes_interval_8u(sa_sint_t * RESTRICT SA, sa_s
memset(&SA[0], 0, (size_t)j * sizeof(sa_sint_t));
}
-static void libsais_place_lms_suffixes_interval_32s_4k(sa_sint_t * RESTRICT SA, sa_sint_t n,
- sa_sint_t k, sa_sint_t m,
+static void libsais_place_lms_suffixes_interval_32s_4k(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t m,
const sa_sint_t * RESTRICT buckets) {
const sa_sint_t * RESTRICT bucket_end = &buckets[3 * k];
@@ -2825,17 +2710,15 @@ static void libsais_place_lms_suffixes_interval_32s_4k(sa_sint_t * RESTRICT SA,
memset(&SA[0], 0, (size_t)j * sizeof(sa_sint_t));
}
-static void libsais_place_lms_suffixes_interval_32s_2k(sa_sint_t * RESTRICT SA, sa_sint_t n,
- sa_sint_t k, sa_sint_t m,
+static void libsais_place_lms_suffixes_interval_32s_2k(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t m,
const sa_sint_t * RESTRICT buckets) {
fast_sint_t j = n;
if (k > 1) {
fast_sint_t c;
- for (c = BUCKETS_INDEX2((fast_sint_t)k - 2, 0); c >= BUCKETS_INDEX2(0, 0);
- c -= BUCKETS_INDEX2(1, 0)) {
- fast_sint_t l = (fast_sint_t)buckets[c + BUCKETS_INDEX2(1, 1)] -
- (fast_sint_t)buckets[c + BUCKETS_INDEX2(0, 1)];
+ for (c = BUCKETS_INDEX2((fast_sint_t)k - 2, 0); c >= BUCKETS_INDEX2(0, 0); c -= BUCKETS_INDEX2(1, 0)) {
+ fast_sint_t l =
+ (fast_sint_t)buckets[c + BUCKETS_INDEX2(1, 1)] - (fast_sint_t)buckets[c + BUCKETS_INDEX2(0, 1)];
if (l > 0) {
fast_sint_t i = buckets[c];
if (j - i > 0) {
@@ -2850,9 +2733,8 @@ static void libsais_place_lms_suffixes_interval_32s_2k(sa_sint_t * RESTRICT SA,
memset(&SA[0], 0, (size_t)j * sizeof(sa_sint_t));
}
-static void libsais_place_lms_suffixes_interval_32s_1k(const sa_sint_t * RESTRICT T,
- sa_sint_t * RESTRICT SA, sa_sint_t k,
- sa_sint_t m, sa_sint_t * RESTRICT buckets) {
+static void libsais_place_lms_suffixes_interval_32s_1k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+ sa_sint_t k, sa_sint_t m, sa_sint_t * RESTRICT buckets) {
const fast_sint_t prefetch_distance = 32;
sa_sint_t c = k - 1;
@@ -2908,8 +2790,7 @@ static void libsais_place_lms_suffixes_interval_32s_1k(const sa_sint_t * RESTRIC
memset(&SA[0], 0, (size_t)l * sizeof(sa_sint_t));
}
-static void libsais_place_lms_suffixes_histogram_32s_6k(sa_sint_t * RESTRICT SA, sa_sint_t n,
- sa_sint_t k, sa_sint_t m,
+static void libsais_place_lms_suffixes_histogram_32s_6k(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t m,
const sa_sint_t * RESTRICT buckets) {
const sa_sint_t * RESTRICT bucket_end = &buckets[5 * k];
@@ -2929,8 +2810,7 @@ static void libsais_place_lms_suffixes_histogram_32s_6k(sa_sint_t * RESTRICT SA,
memset(&SA[0], 0, (size_t)j * sizeof(sa_sint_t));
}
-static void libsais_place_lms_suffixes_histogram_32s_4k(sa_sint_t * RESTRICT SA, sa_sint_t n,
- sa_sint_t k, sa_sint_t m,
+static void libsais_place_lms_suffixes_histogram_32s_4k(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t m,
const sa_sint_t * RESTRICT buckets) {
const sa_sint_t * RESTRICT bucket_end = &buckets[3 * k];
@@ -2950,15 +2830,13 @@ static void libsais_place_lms_suffixes_histogram_32s_4k(sa_sint_t * RESTRICT SA,
memset(&SA[0], 0, (size_t)j * sizeof(sa_sint_t));
}
-static void libsais_place_lms_suffixes_histogram_32s_2k(sa_sint_t * RESTRICT SA, sa_sint_t n,
- sa_sint_t k, sa_sint_t m,
+static void libsais_place_lms_suffixes_histogram_32s_2k(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t m,
const sa_sint_t * RESTRICT buckets) {
fast_sint_t j = n;
if (k > 1) {
fast_sint_t c;
- for (c = BUCKETS_INDEX2((fast_sint_t)k - 2, 0); c >= BUCKETS_INDEX2(0, 0);
- c -= BUCKETS_INDEX2(1, 0)) {
+ for (c = BUCKETS_INDEX2((fast_sint_t)k - 2, 0); c >= BUCKETS_INDEX2(0, 0); c -= BUCKETS_INDEX2(1, 0)) {
fast_sint_t l = (fast_sint_t)buckets[c + BUCKETS_INDEX2(0, 1)];
if (l > 0) {
fast_sint_t i = buckets[c];
@@ -2975,14 +2853,12 @@ static void libsais_place_lms_suffixes_histogram_32s_2k(sa_sint_t * RESTRICT SA,
}
static void libsais_final_bwt_scan_left_to_right_8u(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
- sa_sint_t * RESTRICT induction_bucket,
- fast_sint_t omp_block_start,
+ sa_sint_t * RESTRICT induction_bucket, fast_sint_t omp_block_start,
fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
fast_sint_t i, j;
- for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 1; i < j;
- i += 2) {
+ for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 1; i < j; i += 2) {
libsais_prefetchw(&SA[i + 2 * prefetch_distance]);
sa_sint_t s0 = SA[i + prefetch_distance + 0];
@@ -3001,16 +2877,14 @@ static void libsais_final_bwt_scan_left_to_right_8u(const u8 * RESTRICT T, sa_si
if (p0 > 0) {
p0--;
SA[i + 0] = T[p0] | SAINT_MIN;
- SA[induction_bucket[T[p0]]++] =
- p0 | ((sa_sint_t)(T[p0 - (p0 > 0)] < T[p0]) << (SAINT_BIT - 1));
+ SA[induction_bucket[T[p0]]++] = p0 | ((sa_sint_t)(T[p0 - (p0 > 0)] < T[p0]) << (SAINT_BIT - 1));
}
sa_sint_t p1 = SA[i + 1];
SA[i + 1] = p1 & SAINT_MAX;
if (p1 > 0) {
p1--;
SA[i + 1] = T[p1] | SAINT_MIN;
- SA[induction_bucket[T[p1]]++] =
- p1 | ((sa_sint_t)(T[p1 - (p1 > 0)] < T[p1]) << (SAINT_BIT - 1));
+ SA[induction_bucket[T[p1]]++] = p1 | ((sa_sint_t)(T[p1 - (p1 > 0)] < T[p1]) << (SAINT_BIT - 1));
}
}
@@ -3020,23 +2894,18 @@ static void libsais_final_bwt_scan_left_to_right_8u(const u8 * RESTRICT T, sa_si
if (p > 0) {
p--;
SA[i] = T[p] | SAINT_MIN;
- SA[induction_bucket[T[p]]++] =
- p | ((sa_sint_t)(T[p - (p > 0)] < T[p]) << (SAINT_BIT - 1));
+ SA[induction_bucket[T[p]]++] = p | ((sa_sint_t)(T[p - (p > 0)] < T[p]) << (SAINT_BIT - 1));
}
}
}
-static void libsais_final_bwt_aux_scan_left_to_right_8u(const u8 * RESTRICT T,
- sa_sint_t * RESTRICT SA, sa_sint_t rm,
- sa_sint_t * RESTRICT I,
- sa_sint_t * RESTRICT induction_bucket,
- fast_sint_t omp_block_start,
- fast_sint_t omp_block_size) {
+static void libsais_final_bwt_aux_scan_left_to_right_8u(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t rm,
+ sa_sint_t * RESTRICT I, sa_sint_t * RESTRICT induction_bucket,
+ fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
fast_sint_t i, j;
- for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 1; i < j;
- i += 2) {
+ for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 1; i < j; i += 2) {
libsais_prefetchw(&SA[i + 2 * prefetch_distance]);
sa_sint_t s0 = SA[i + prefetch_distance + 0];
@@ -3055,8 +2924,7 @@ static void libsais_final_bwt_aux_scan_left_to_right_8u(const u8 * RESTRICT T,
if (p0 > 0) {
p0--;
SA[i + 0] = T[p0] | SAINT_MIN;
- SA[induction_bucket[T[p0]]++] =
- p0 | ((sa_sint_t)(T[p0 - (p0 > 0)] < T[p0]) << (SAINT_BIT - 1));
+ SA[induction_bucket[T[p0]]++] = p0 | ((sa_sint_t)(T[p0 - (p0 > 0)] < T[p0]) << (SAINT_BIT - 1));
if ((p0 & rm) == 0) {
I[p0 / (rm + 1)] = induction_bucket[T[p0]];
}
@@ -3066,8 +2934,7 @@ static void libsais_final_bwt_aux_scan_left_to_right_8u(const u8 * RESTRICT T,
if (p1 > 0) {
p1--;
SA[i + 1] = T[p1] | SAINT_MIN;
- SA[induction_bucket[T[p1]]++] =
- p1 | ((sa_sint_t)(T[p1 - (p1 > 0)] < T[p1]) << (SAINT_BIT - 1));
+ SA[induction_bucket[T[p1]]++] = p1 | ((sa_sint_t)(T[p1 - (p1 > 0)] < T[p1]) << (SAINT_BIT - 1));
if ((p1 & rm) == 0) {
I[p1 / (rm + 1)] = induction_bucket[T[p1]];
}
@@ -3080,8 +2947,7 @@ static void libsais_final_bwt_aux_scan_left_to_right_8u(const u8 * RESTRICT T,
if (p > 0) {
p--;
SA[i] = T[p] | SAINT_MIN;
- SA[induction_bucket[T[p]]++] =
- p | ((sa_sint_t)(T[p - (p > 0)] < T[p]) << (SAINT_BIT - 1));
+ SA[induction_bucket[T[p]]++] = p | ((sa_sint_t)(T[p - (p > 0)] < T[p]) << (SAINT_BIT - 1));
if ((p & rm) == 0) {
I[p / (rm + 1)] = induction_bucket[T[p]];
}
@@ -3089,16 +2955,13 @@ static void libsais_final_bwt_aux_scan_left_to_right_8u(const u8 * RESTRICT T,
}
}
-static void libsais_final_sorting_scan_left_to_right_8u(const u8 * RESTRICT T,
- sa_sint_t * RESTRICT SA,
+static void libsais_final_sorting_scan_left_to_right_8u(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
sa_sint_t * RESTRICT induction_bucket,
- fast_sint_t omp_block_start,
- fast_sint_t omp_block_size) {
+ fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
fast_sint_t i, j;
- for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 1; i < j;
- i += 2) {
+ for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 1; i < j; i += 2) {
libsais_prefetchw(&SA[i + 2 * prefetch_distance]);
sa_sint_t s0 = SA[i + prefetch_distance + 0];
@@ -3116,15 +2979,13 @@ static void libsais_final_sorting_scan_left_to_right_8u(const u8 * RESTRICT T,
SA[i + 0] = p0 ^ SAINT_MIN;
if (p0 > 0) {
p0--;
- SA[induction_bucket[T[p0]]++] =
- p0 | ((sa_sint_t)(T[p0 - (p0 > 0)] < T[p0]) << (SAINT_BIT - 1));
+ SA[induction_bucket[T[p0]]++] = p0 | ((sa_sint_t)(T[p0 - (p0 > 0)] < T[p0]) << (SAINT_BIT - 1));
}
sa_sint_t p1 = SA[i + 1];
SA[i + 1] = p1 ^ SAINT_MIN;
if (p1 > 0) {
p1--;
- SA[induction_bucket[T[p1]]++] =
- p1 | ((sa_sint_t)(T[p1 - (p1 > 0)] < T[p1]) << (SAINT_BIT - 1));
+ SA[induction_bucket[T[p1]]++] = p1 | ((sa_sint_t)(T[p1 - (p1 > 0)] < T[p1]) << (SAINT_BIT - 1));
}
}
@@ -3133,22 +2994,18 @@ static void libsais_final_sorting_scan_left_to_right_8u(const u8 * RESTRICT T,
SA[i] = p ^ SAINT_MIN;
if (p > 0) {
p--;
- SA[induction_bucket[T[p]]++] =
- p | ((sa_sint_t)(T[p - (p > 0)] < T[p]) << (SAINT_BIT - 1));
+ SA[induction_bucket[T[p]]++] = p | ((sa_sint_t)(T[p - (p > 0)] < T[p]) << (SAINT_BIT - 1));
}
}
}
-static void libsais_final_sorting_scan_left_to_right_32s(const sa_sint_t * RESTRICT T,
- sa_sint_t * RESTRICT SA,
+static void libsais_final_sorting_scan_left_to_right_32s(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
sa_sint_t * RESTRICT induction_bucket,
- fast_sint_t omp_block_start,
- fast_sint_t omp_block_size) {
+ fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
fast_sint_t i, j;
- for (i = omp_block_start, j = omp_block_start + omp_block_size - 2 * prefetch_distance - 1;
- i < j; i += 2) {
+ for (i = omp_block_start, j = omp_block_start + omp_block_size - 2 * prefetch_distance - 1; i < j; i += 2) {
libsais_prefetchw(&SA[i + 3 * prefetch_distance]);
sa_sint_t s0 = SA[i + 2 * prefetch_distance + 0];
@@ -3172,15 +3029,13 @@ static void libsais_final_sorting_scan_left_to_right_32s(const sa_sint_t * RESTR
SA[i + 0] = p0 ^ SAINT_MIN;
if (p0 > 0) {
p0--;
- SA[induction_bucket[T[p0]]++] =
- p0 | ((sa_sint_t)(T[p0 - (p0 > 0)] < T[p0]) << (SAINT_BIT - 1));
+ SA[induction_bucket[T[p0]]++] = p0 | ((sa_sint_t)(T[p0 - (p0 > 0)] < T[p0]) << (SAINT_BIT - 1));
}
sa_sint_t p1 = SA[i + 1];
SA[i + 1] = p1 ^ SAINT_MIN;
if (p1 > 0) {
p1--;
- SA[induction_bucket[T[p1]]++] =
- p1 | ((sa_sint_t)(T[p1 - (p1 > 0)] < T[p1]) << (SAINT_BIT - 1));
+ SA[induction_bucket[T[p1]]++] = p1 | ((sa_sint_t)(T[p1 - (p1 > 0)] < T[p1]) << (SAINT_BIT - 1));
}
}
@@ -3189,18 +3044,15 @@ static void libsais_final_sorting_scan_left_to_right_32s(const sa_sint_t * RESTR
SA[i] = p ^ SAINT_MIN;
if (p > 0) {
p--;
- SA[induction_bucket[T[p]]++] =
- p | ((sa_sint_t)(T[p - (p > 0)] < T[p]) << (SAINT_BIT - 1));
+ SA[induction_bucket[T[p]]++] = p | ((sa_sint_t)(T[p - (p > 0)] < T[p]) << (SAINT_BIT - 1));
}
}
}
-static void libsais_final_bwt_scan_left_to_right_8u_omp(
- const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, fast_sint_t n,
- sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static void libsais_final_bwt_scan_left_to_right_8u_omp(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, fast_sint_t n,
+ sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
SA[induction_bucket[T[(sa_sint_t)n - 1]]++] =
- ((sa_sint_t)n - 1) |
- ((sa_sint_t)(T[(sa_sint_t)n - 2] < T[(sa_sint_t)n - 1]) << (SAINT_BIT - 1));
+ ((sa_sint_t)n - 1) | ((sa_sint_t)(T[(sa_sint_t)n - 2] < T[(sa_sint_t)n - 1]) << (SAINT_BIT - 1));
if (threads == 1 || n < 65536) {
libsais_final_bwt_scan_left_to_right_8u(T, SA, induction_bucket, 0, n);
@@ -3208,13 +3060,12 @@ static void libsais_final_bwt_scan_left_to_right_8u_omp(
(void)(thread_state);
}
-static void libsais_final_bwt_aux_scan_left_to_right_8u_omp(
- const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, fast_sint_t n, sa_sint_t rm,
- sa_sint_t * RESTRICT I, sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static void libsais_final_bwt_aux_scan_left_to_right_8u_omp(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
+ fast_sint_t n, sa_sint_t rm, sa_sint_t * RESTRICT I,
+ sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
SA[induction_bucket[T[(sa_sint_t)n - 1]]++] =
- ((sa_sint_t)n - 1) |
- ((sa_sint_t)(T[(sa_sint_t)n - 2] < T[(sa_sint_t)n - 1]) << (SAINT_BIT - 1));
+ ((sa_sint_t)n - 1) | ((sa_sint_t)(T[(sa_sint_t)n - 2] < T[(sa_sint_t)n - 1]) << (SAINT_BIT - 1));
if ((((sa_sint_t)n - 1) & rm) == 0) {
I[((sa_sint_t)n - 1) / (rm + 1)] = induction_bucket[T[(sa_sint_t)n - 1]];
@@ -3226,13 +3077,12 @@ static void libsais_final_bwt_aux_scan_left_to_right_8u_omp(
(void)(thread_state);
}
-static void libsais_final_sorting_scan_left_to_right_8u_omp(
- const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, fast_sint_t n,
- sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static void libsais_final_sorting_scan_left_to_right_8u_omp(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
+ fast_sint_t n, sa_sint_t * RESTRICT induction_bucket,
+ sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
SA[induction_bucket[T[(sa_sint_t)n - 1]]++] =
- ((sa_sint_t)n - 1) |
- ((sa_sint_t)(T[(sa_sint_t)n - 2] < T[(sa_sint_t)n - 1]) << (SAINT_BIT - 1));
+ ((sa_sint_t)n - 1) | ((sa_sint_t)(T[(sa_sint_t)n - 2] < T[(sa_sint_t)n - 1]) << (SAINT_BIT - 1));
if (threads == 1 || n < 65536) {
libsais_final_sorting_scan_left_to_right_8u(T, SA, induction_bucket, 0, n);
@@ -3240,12 +3090,11 @@ static void libsais_final_sorting_scan_left_to_right_8u_omp(
(void)(thread_state);
}
-static void libsais_final_sorting_scan_left_to_right_32s_omp(
- const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
- sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
- SA[induction_bucket[T[n - 1]]++] =
- (n - 1) | ((sa_sint_t)(T[n - 2] < T[n - 1]) << (SAINT_BIT - 1));
+static void libsais_final_sorting_scan_left_to_right_32s_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+ sa_sint_t n, sa_sint_t * RESTRICT induction_bucket,
+ sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+ SA[induction_bucket[T[n - 1]]++] = (n - 1) | ((sa_sint_t)(T[n - 2] < T[n - 1]) << (SAINT_BIT - 1));
if (threads == 1 || n < 65536) {
libsais_final_sorting_scan_left_to_right_32s(T, SA, induction_bucket, 0, n);
@@ -3253,17 +3102,14 @@ static void libsais_final_sorting_scan_left_to_right_32s_omp(
(void)(thread_state);
}
-static sa_sint_t libsais_final_bwt_scan_right_to_left_8u(const u8 * RESTRICT T,
- sa_sint_t * RESTRICT SA,
+static sa_sint_t libsais_final_bwt_scan_right_to_left_8u(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
sa_sint_t * RESTRICT induction_bucket,
- fast_sint_t omp_block_start,
- fast_sint_t omp_block_size) {
+ fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
fast_sint_t i, j;
sa_sint_t index = -1;
- for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 1;
- i >= j; i -= 2) {
+ for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 1; i >= j; i -= 2) {
libsais_prefetchw(&SA[i - 2 * prefetch_distance]);
sa_sint_t s0 = SA[i - prefetch_distance - 0];
@@ -3316,17 +3162,13 @@ static sa_sint_t libsais_final_bwt_scan_right_to_left_8u(const u8 * RESTRICT T,
return index;
}
-static void libsais_final_bwt_aux_scan_right_to_left_8u(const u8 * RESTRICT T,
- sa_sint_t * RESTRICT SA, sa_sint_t rm,
- sa_sint_t * RESTRICT I,
- sa_sint_t * RESTRICT induction_bucket,
- fast_sint_t omp_block_start,
- fast_sint_t omp_block_size) {
+static void libsais_final_bwt_aux_scan_right_to_left_8u(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t rm,
+ sa_sint_t * RESTRICT I, sa_sint_t * RESTRICT induction_bucket,
+ fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
fast_sint_t i, j;
- for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 1;
- i >= j; i -= 2) {
+ for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 1; i >= j; i -= 2) {
libsais_prefetchw(&SA[i - 2 * prefetch_distance]);
sa_sint_t s0 = SA[i - prefetch_distance - 0];
@@ -3383,16 +3225,13 @@ static void libsais_final_bwt_aux_scan_right_to_left_8u(const u8 * RESTRICT T,
}
}
-static void libsais_final_sorting_scan_right_to_left_8u(const u8 * RESTRICT T,
- sa_sint_t * RESTRICT SA,
+static void libsais_final_sorting_scan_right_to_left_8u(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
sa_sint_t * RESTRICT induction_bucket,
- fast_sint_t omp_block_start,
- fast_sint_t omp_block_size) {
+ fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
fast_sint_t i, j;
- for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 1;
- i >= j; i -= 2) {
+ for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 1; i >= j; i -= 2) {
libsais_prefetchw(&SA[i - 2 * prefetch_distance]);
sa_sint_t s0 = SA[i - prefetch_distance - 0];
@@ -3410,15 +3249,13 @@ static void libsais_final_sorting_scan_right_to_left_8u(const u8 * RESTRICT T,
SA[i - 0] = p0 & SAINT_MAX;
if (p0 > 0) {
p0--;
- SA[--induction_bucket[T[p0]]] =
- p0 | ((sa_sint_t)(T[p0 - (p0 > 0)] > T[p0]) << (SAINT_BIT - 1));
+ SA[--induction_bucket[T[p0]]] = p0 | ((sa_sint_t)(T[p0 - (p0 > 0)] > T[p0]) << (SAINT_BIT - 1));
}
sa_sint_t p1 = SA[i - 1];
SA[i - 1] = p1 & SAINT_MAX;
if (p1 > 0) {
p1--;
- SA[--induction_bucket[T[p1]]] =
- p1 | ((sa_sint_t)(T[p1 - (p1 > 0)] > T[p1]) << (SAINT_BIT - 1));
+ SA[--induction_bucket[T[p1]]] = p1 | ((sa_sint_t)(T[p1 - (p1 > 0)] > T[p1]) << (SAINT_BIT - 1));
}
}
@@ -3427,22 +3264,18 @@ static void libsais_final_sorting_scan_right_to_left_8u(const u8 * RESTRICT T,
SA[i] = p & SAINT_MAX;
if (p > 0) {
p--;
- SA[--induction_bucket[T[p]]] =
- p | ((sa_sint_t)(T[p - (p > 0)] > T[p]) << (SAINT_BIT - 1));
+ SA[--induction_bucket[T[p]]] = p | ((sa_sint_t)(T[p - (p > 0)] > T[p]) << (SAINT_BIT - 1));
}
}
}
-static void libsais_final_sorting_scan_right_to_left_32s(const sa_sint_t * RESTRICT T,
- sa_sint_t * RESTRICT SA,
+static void libsais_final_sorting_scan_right_to_left_32s(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
sa_sint_t * RESTRICT induction_bucket,
- fast_sint_t omp_block_start,
- fast_sint_t omp_block_size) {
+ fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
fast_sint_t i, j;
- for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + 2 * prefetch_distance + 1;
- i >= j; i -= 2) {
+ for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + 2 * prefetch_distance + 1; i >= j; i -= 2) {
libsais_prefetchw(&SA[i - 3 * prefetch_distance]);
sa_sint_t s0 = SA[i - 2 * prefetch_distance - 0];
@@ -3466,15 +3299,13 @@ static void libsais_final_sorting_scan_right_to_left_32s(const sa_sint_t * RESTR
SA[i - 0] = p0 & SAINT_MAX;
if (p0 > 0) {
p0--;
- SA[--induction_bucket[T[p0]]] =
- p0 | ((sa_sint_t)(T[p0 - (p0 > 0)] > T[p0]) << (SAINT_BIT - 1));
+ SA[--induction_bucket[T[p0]]] = p0 | ((sa_sint_t)(T[p0 - (p0 > 0)] > T[p0]) << (SAINT_BIT - 1));
}
sa_sint_t p1 = SA[i - 1];
SA[i - 1] = p1 & SAINT_MAX;
if (p1 > 0) {
p1--;
- SA[--induction_bucket[T[p1]]] =
- p1 | ((sa_sint_t)(T[p1 - (p1 > 0)] > T[p1]) << (SAINT_BIT - 1));
+ SA[--induction_bucket[T[p1]]] = p1 | ((sa_sint_t)(T[p1 - (p1 > 0)] > T[p1]) << (SAINT_BIT - 1));
}
}
@@ -3483,15 +3314,14 @@ static void libsais_final_sorting_scan_right_to_left_32s(const sa_sint_t * RESTR
SA[i] = p & SAINT_MAX;
if (p > 0) {
p--;
- SA[--induction_bucket[T[p]]] =
- p | ((sa_sint_t)(T[p - (p > 0)] > T[p]) << (SAINT_BIT - 1));
+ SA[--induction_bucket[T[p]]] = p | ((sa_sint_t)(T[p - (p > 0)] > T[p]) << (SAINT_BIT - 1));
}
}
}
-static sa_sint_t libsais_final_bwt_scan_right_to_left_8u_omp(
- const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
- sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static sa_sint_t libsais_final_bwt_scan_right_to_left_8u_omp(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
+ sa_sint_t n, sa_sint_t * RESTRICT induction_bucket,
+ sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
sa_sint_t index = -1;
if (threads == 1 || n < 65536) {
@@ -3501,30 +3331,29 @@ static sa_sint_t libsais_final_bwt_scan_right_to_left_8u_omp(
return index;
}
-static void libsais_final_bwt_aux_scan_right_to_left_8u_omp(
- const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t rm,
- sa_sint_t * RESTRICT I, sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static void libsais_final_bwt_aux_scan_right_to_left_8u_omp(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t rm, sa_sint_t * RESTRICT I,
+ sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
if (threads == 1 || n < 65536) {
libsais_final_bwt_aux_scan_right_to_left_8u(T, SA, rm, I, induction_bucket, 0, n);
}
(void)(thread_state);
}
-static void libsais_final_sorting_scan_right_to_left_8u_omp(
- const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
- sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static void libsais_final_sorting_scan_right_to_left_8u_omp(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
if (threads == 1 || n < 65536) {
libsais_final_sorting_scan_right_to_left_8u(T, SA, induction_bucket, 0, n);
}
(void)(thread_state);
}
-static void libsais_final_sorting_scan_right_to_left_32s_omp(
- const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
- sa_sint_t * RESTRICT induction_bucket, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static void libsais_final_sorting_scan_right_to_left_32s_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+ sa_sint_t n, sa_sint_t * RESTRICT induction_bucket,
+ sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
if (threads == 1 || n < 65536) {
libsais_final_sorting_scan_right_to_left_32s(T, SA, induction_bucket, 0, n);
}
@@ -3532,90 +3361,75 @@ static void libsais_final_sorting_scan_right_to_left_32s_omp(
}
static void libsais_clear_lms_suffixes_omp(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k,
- sa_sint_t * RESTRICT bucket_start,
- sa_sint_t * RESTRICT bucket_end, sa_sint_t threads) {
+ sa_sint_t * RESTRICT bucket_start, sa_sint_t * RESTRICT bucket_end,
+ sa_sint_t threads) {
fast_sint_t c;
(void)(threads);
(void)(n);
for (c = 0; c < k; ++c) {
if (bucket_end[c] > bucket_start[c]) {
- memset(&SA[bucket_start[c]], 0,
- ((size_t)bucket_end[c] - (size_t)bucket_start[c]) * sizeof(sa_sint_t));
+ memset(&SA[bucket_start[c]], 0, ((size_t)bucket_end[c] - (size_t)bucket_start[c]) * sizeof(sa_sint_t));
}
}
}
-static sa_sint_t libsais_induce_final_order_8u_omp(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA,
- sa_sint_t n, sa_sint_t bwt, sa_sint_t r,
- sa_sint_t * RESTRICT I,
+static sa_sint_t libsais_induce_final_order_8u_omp(const u8 * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t bwt, sa_sint_t r, sa_sint_t * RESTRICT I,
sa_sint_t * RESTRICT buckets, sa_sint_t threads,
LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
if (!bwt) {
- libsais_final_sorting_scan_left_to_right_8u_omp(T, SA, n, &buckets[6 * ALPHABET_SIZE],
- threads, thread_state);
+ libsais_final_sorting_scan_left_to_right_8u_omp(T, SA, n, &buckets[6 * ALPHABET_SIZE], threads, thread_state);
if (threads > 1 && n >= 65536) {
libsais_clear_lms_suffixes_omp(SA, n, ALPHABET_SIZE, &buckets[6 * ALPHABET_SIZE],
&buckets[7 * ALPHABET_SIZE], threads);
}
- libsais_final_sorting_scan_right_to_left_8u_omp(T, SA, n, &buckets[7 * ALPHABET_SIZE],
- threads, thread_state);
+ libsais_final_sorting_scan_right_to_left_8u_omp(T, SA, n, &buckets[7 * ALPHABET_SIZE], threads, thread_state);
return 0;
} else if (I != NULL) {
- libsais_final_bwt_aux_scan_left_to_right_8u_omp(
- T, SA, n, r - 1, I, &buckets[6 * ALPHABET_SIZE], threads, thread_state);
+ libsais_final_bwt_aux_scan_left_to_right_8u_omp(T, SA, n, r - 1, I, &buckets[6 * ALPHABET_SIZE], threads,
+ thread_state);
if (threads > 1 && n >= 65536) {
libsais_clear_lms_suffixes_omp(SA, n, ALPHABET_SIZE, &buckets[6 * ALPHABET_SIZE],
&buckets[7 * ALPHABET_SIZE], threads);
}
- libsais_final_bwt_aux_scan_right_to_left_8u_omp(
- T, SA, n, r - 1, I, &buckets[7 * ALPHABET_SIZE], threads, thread_state);
+ libsais_final_bwt_aux_scan_right_to_left_8u_omp(T, SA, n, r - 1, I, &buckets[7 * ALPHABET_SIZE], threads,
+ thread_state);
return 0;
} else {
- libsais_final_bwt_scan_left_to_right_8u_omp(T, SA, n, &buckets[6 * ALPHABET_SIZE], threads,
- thread_state);
+ libsais_final_bwt_scan_left_to_right_8u_omp(T, SA, n, &buckets[6 * ALPHABET_SIZE], threads, thread_state);
if (threads > 1 && n >= 65536) {
libsais_clear_lms_suffixes_omp(SA, n, ALPHABET_SIZE, &buckets[6 * ALPHABET_SIZE],
&buckets[7 * ALPHABET_SIZE], threads);
}
- return libsais_final_bwt_scan_right_to_left_8u_omp(T, SA, n, &buckets[7 * ALPHABET_SIZE],
- threads, thread_state);
+ return libsais_final_bwt_scan_right_to_left_8u_omp(T, SA, n, &buckets[7 * ALPHABET_SIZE], threads,
+ thread_state);
}
}
-static void libsais_induce_final_order_32s_6k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
- sa_sint_t n, sa_sint_t k,
- sa_sint_t * RESTRICT buckets, sa_sint_t threads,
+static void libsais_induce_final_order_32s_6k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads,
LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
- libsais_final_sorting_scan_left_to_right_32s_omp(T, SA, n, &buckets[4 * k], threads,
- thread_state);
- libsais_final_sorting_scan_right_to_left_32s_omp(T, SA, n, &buckets[5 * k], threads,
- thread_state);
+ libsais_final_sorting_scan_left_to_right_32s_omp(T, SA, n, &buckets[4 * k], threads, thread_state);
+ libsais_final_sorting_scan_right_to_left_32s_omp(T, SA, n, &buckets[5 * k], threads, thread_state);
}
-static void libsais_induce_final_order_32s_4k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
- sa_sint_t n, sa_sint_t k,
- sa_sint_t * RESTRICT buckets, sa_sint_t threads,
+static void libsais_induce_final_order_32s_4k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads,
LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
- libsais_final_sorting_scan_left_to_right_32s_omp(T, SA, n, &buckets[2 * k], threads,
- thread_state);
- libsais_final_sorting_scan_right_to_left_32s_omp(T, SA, n, &buckets[3 * k], threads,
- thread_state);
+ libsais_final_sorting_scan_left_to_right_32s_omp(T, SA, n, &buckets[2 * k], threads, thread_state);
+ libsais_final_sorting_scan_right_to_left_32s_omp(T, SA, n, &buckets[3 * k], threads, thread_state);
}
-static void libsais_induce_final_order_32s_2k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
- sa_sint_t n, sa_sint_t k,
- sa_sint_t * RESTRICT buckets, sa_sint_t threads,
+static void libsais_induce_final_order_32s_2k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads,
LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
- libsais_final_sorting_scan_left_to_right_32s_omp(T, SA, n, &buckets[1 * k], threads,
- thread_state);
- libsais_final_sorting_scan_right_to_left_32s_omp(T, SA, n, &buckets[0 * k], threads,
- thread_state);
+ libsais_final_sorting_scan_left_to_right_32s_omp(T, SA, n, &buckets[1 * k], threads, thread_state);
+ libsais_final_sorting_scan_right_to_left_32s_omp(T, SA, n, &buckets[0 * k], threads, thread_state);
}
-static void libsais_induce_final_order_32s_1k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
- sa_sint_t n, sa_sint_t k,
- sa_sint_t * RESTRICT buckets, sa_sint_t threads,
+static void libsais_induce_final_order_32s_1k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads,
LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
libsais_count_suffixes_32s(T, n, k, buckets);
libsais_initialize_buckets_start_32s_1k(k, buckets);
@@ -3626,17 +3440,17 @@ static void libsais_induce_final_order_32s_1k(const sa_sint_t * RESTRICT T, sa_s
libsais_final_sorting_scan_right_to_left_32s_omp(T, SA, n, buckets, threads, thread_state);
}
-static sa_sint_t libsais_renumber_unique_and_nonunique_lms_suffixes_32s(
- sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t m, sa_sint_t f,
- fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
+static sa_sint_t libsais_renumber_unique_and_nonunique_lms_suffixes_32s(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+ sa_sint_t m, sa_sint_t f,
+ fast_sint_t omp_block_start,
+ fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
sa_sint_t * RESTRICT SAm = &SA[m];
sa_sint_t i, j;
- for (i = (sa_sint_t)omp_block_start, j = (sa_sint_t)omp_block_start +
- (sa_sint_t)omp_block_size -
- 2 * (sa_sint_t)prefetch_distance - 3;
+ for (i = (sa_sint_t)omp_block_start,
+ j = (sa_sint_t)omp_block_start + (sa_sint_t)omp_block_size - 2 * (sa_sint_t)prefetch_distance - 3;
i < j; i += 4) {
libsais_prefetch(&SA[i + 3 * prefetch_distance]);
@@ -3706,9 +3520,8 @@ static sa_sint_t libsais_renumber_unique_and_nonunique_lms_suffixes_32s(
return f;
}
-static void libsais_compact_unique_and_nonunique_lms_suffixes_32s(sa_sint_t * RESTRICT SA,
- sa_sint_t m, fast_sint_t * pl,
- fast_sint_t * pr,
+static void libsais_compact_unique_and_nonunique_lms_suffixes_32s(sa_sint_t * RESTRICT SA, sa_sint_t m,
+ fast_sint_t * pl, fast_sint_t * pr,
fast_sint_t omp_block_start,
fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
@@ -3717,9 +3530,8 @@ static void libsais_compact_unique_and_nonunique_lms_suffixes_32s(sa_sint_t * RE
sa_sint_t * RESTRICT SAr = &SA[0];
fast_sint_t i, j, l = *pl - 1, r = *pr - 1;
- for (i = (fast_sint_t)m + omp_block_start + omp_block_size - 1,
- j = (fast_sint_t)m + omp_block_start + 3;
- i >= j; i -= 4) {
+ for (i = (fast_sint_t)m + omp_block_start + omp_block_size - 1, j = (fast_sint_t)m + omp_block_start + 3; i >= j;
+ i -= 4) {
libsais_prefetch(&SA[i - prefetch_distance]);
sa_sint_t p0 = SA[i - 0];
@@ -3768,21 +3580,19 @@ static sa_sint_t libsais_renumber_unique_and_nonunique_lms_suffixes_32s_omp(
fast_sint_t omp_block_stride = (m / omp_num_threads) & (-16);
fast_sint_t omp_block_start = omp_thread_num * omp_block_stride;
- fast_sint_t omp_block_size =
- omp_thread_num < omp_num_threads - 1 ? omp_block_stride : m - omp_block_start;
+ fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : m - omp_block_start;
if (omp_num_threads == 1) {
- f = libsais_renumber_unique_and_nonunique_lms_suffixes_32s(T, SA, m, 0, omp_block_start,
- omp_block_size);
+ f = libsais_renumber_unique_and_nonunique_lms_suffixes_32s(T, SA, m, 0, omp_block_start, omp_block_size);
}
}
return f;
}
-static void libsais_compact_unique_and_nonunique_lms_suffixes_32s_omp(
- sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t fs, sa_sint_t f, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static void libsais_compact_unique_and_nonunique_lms_suffixes_32s_omp(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m,
+ sa_sint_t fs, sa_sint_t f, sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
{
(void)(threads);
(void)(thread_state);
@@ -3792,35 +3602,30 @@ static void libsais_compact_unique_and_nonunique_lms_suffixes_32s_omp(
fast_sint_t omp_block_stride = (((fast_sint_t)n >> 1) / omp_num_threads) & (-16);
fast_sint_t omp_block_start = omp_thread_num * omp_block_stride;
- fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1
- ? omp_block_stride
- : ((fast_sint_t)n >> 1) - omp_block_start;
+ fast_sint_t omp_block_size =
+ omp_thread_num < omp_num_threads - 1 ? omp_block_stride : ((fast_sint_t)n >> 1) - omp_block_start;
if (omp_num_threads == 1) {
fast_sint_t l = m, r = (fast_sint_t)n + (fast_sint_t)fs;
- libsais_compact_unique_and_nonunique_lms_suffixes_32s(SA, m, &l, &r, omp_block_start,
- omp_block_size);
+ libsais_compact_unique_and_nonunique_lms_suffixes_32s(SA, m, &l, &r, omp_block_start, omp_block_size);
}
}
- memcpy(&SA[(fast_sint_t)n + (fast_sint_t)fs - (fast_sint_t)m],
- &SA[(fast_sint_t)m - (fast_sint_t)f], (size_t)f * sizeof(sa_sint_t));
+ memcpy(&SA[(fast_sint_t)n + (fast_sint_t)fs - (fast_sint_t)m], &SA[(fast_sint_t)m - (fast_sint_t)f],
+ (size_t)f * sizeof(sa_sint_t));
}
-static sa_sint_t libsais_compact_lms_suffixes_32s_omp(
- sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t fs,
- sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
- sa_sint_t f =
- libsais_renumber_unique_and_nonunique_lms_suffixes_32s_omp(T, SA, m, threads, thread_state);
- libsais_compact_unique_and_nonunique_lms_suffixes_32s_omp(SA, n, m, fs, f, threads,
- thread_state);
+static sa_sint_t libsais_compact_lms_suffixes_32s_omp(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t m, sa_sint_t fs, sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+ sa_sint_t f = libsais_renumber_unique_and_nonunique_lms_suffixes_32s_omp(T, SA, m, threads, thread_state);
+ libsais_compact_unique_and_nonunique_lms_suffixes_32s_omp(SA, n, m, fs, f, threads, thread_state);
return f;
}
-static void libsais_merge_unique_lms_suffixes_32s(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
- sa_sint_t n, sa_sint_t m, fast_sint_t l,
- fast_sint_t omp_block_start,
+static void libsais_merge_unique_lms_suffixes_32s(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t m, fast_sint_t l, fast_sint_t omp_block_start,
fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
@@ -3828,9 +3633,8 @@ static void libsais_merge_unique_lms_suffixes_32s(sa_sint_t * RESTRICT T, sa_sin
sa_sint_t i, j;
fast_sint_t tmp = *SAnm++;
- for (i = (sa_sint_t)omp_block_start,
- j = (sa_sint_t)omp_block_start + (sa_sint_t)omp_block_size - 6;
- i < j; i += 4) {
+ for (i = (sa_sint_t)omp_block_start, j = (sa_sint_t)omp_block_start + (sa_sint_t)omp_block_size - 6; i < j;
+ i += 4) {
libsais_prefetch(&T[i + prefetch_distance]);
sa_sint_t c0 = T[i + 0];
@@ -3874,10 +3678,8 @@ static void libsais_merge_unique_lms_suffixes_32s(sa_sint_t * RESTRICT T, sa_sin
}
}
-static void libsais_merge_nonunique_lms_suffixes_32s(sa_sint_t * RESTRICT SA, sa_sint_t n,
- sa_sint_t m, fast_sint_t l,
- fast_sint_t omp_block_start,
- fast_sint_t omp_block_size) {
+static void libsais_merge_nonunique_lms_suffixes_32s(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, fast_sint_t l,
+ fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
const sa_sint_t * RESTRICT SAnm = &SA[(fast_sint_t)n - (fast_sint_t)m - 1 + l];
@@ -3913,9 +3715,9 @@ static void libsais_merge_nonunique_lms_suffixes_32s(sa_sint_t * RESTRICT SA, sa
}
}
-static void libsais_merge_unique_lms_suffixes_32s_omp(
- sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static void libsais_merge_unique_lms_suffixes_32s_omp(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t m, sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
{
(void)(threads);
(void)(thread_state);
@@ -3925,8 +3727,7 @@ static void libsais_merge_unique_lms_suffixes_32s_omp(
fast_sint_t omp_block_stride = (n / omp_num_threads) & (-16);
fast_sint_t omp_block_start = omp_thread_num * omp_block_stride;
- fast_sint_t omp_block_size =
- omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start;
+ fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start;
if (omp_num_threads == 1) {
libsais_merge_unique_lms_suffixes_32s(T, SA, n, m, 0, omp_block_start, omp_block_size);
@@ -3934,9 +3735,9 @@ static void libsais_merge_unique_lms_suffixes_32s_omp(
}
}
-static void libsais_merge_nonunique_lms_suffixes_32s_omp(
- sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t f, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static void libsais_merge_nonunique_lms_suffixes_32s_omp(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t f,
+ sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
{
(void)(threads);
(void)(thread_state);
@@ -3946,8 +3747,7 @@ static void libsais_merge_nonunique_lms_suffixes_32s_omp(
fast_sint_t omp_block_stride = (m / omp_num_threads) & (-16);
fast_sint_t omp_block_start = omp_thread_num * omp_block_stride;
- fast_sint_t omp_block_size =
- omp_thread_num < omp_num_threads - 1 ? omp_block_stride : m - omp_block_start;
+ fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : m - omp_block_start;
if (omp_num_threads == 1) {
libsais_merge_nonunique_lms_suffixes_32s(SA, n, m, f, omp_block_start, omp_block_size);
@@ -3955,22 +3755,22 @@ static void libsais_merge_nonunique_lms_suffixes_32s_omp(
}
}
-static void libsais_merge_compacted_lms_suffixes_32s_omp(
- sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t f,
- sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static void libsais_merge_compacted_lms_suffixes_32s_omp(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
+ sa_sint_t m, sa_sint_t f, sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
libsais_merge_unique_lms_suffixes_32s_omp(T, SA, n, m, threads, thread_state);
libsais_merge_nonunique_lms_suffixes_32s_omp(SA, n, m, f, threads, thread_state);
}
-static void libsais_reconstruct_compacted_lms_suffixes_32s_2k_omp(
- sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t m,
- sa_sint_t fs, sa_sint_t f, sa_sint_t * RESTRICT buckets, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static void libsais_reconstruct_compacted_lms_suffixes_32s_2k_omp(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+ sa_sint_t n, sa_sint_t k, sa_sint_t m, sa_sint_t fs,
+ sa_sint_t f, sa_sint_t * RESTRICT buckets,
+ sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
if (f > 0) {
memmove(&SA[n - m - 1], &SA[n + fs - m], (size_t)f * sizeof(sa_sint_t));
- libsais_count_and_gather_compacted_lms_suffixes_32s_2k_omp(T, SA, n, k, buckets, threads,
- thread_state);
+ libsais_count_and_gather_compacted_lms_suffixes_32s_2k_omp(T, SA, n, k, buckets, threads, thread_state);
libsais_reconstruct_lms_suffixes_omp(SA, n, m - f, threads);
memcpy(&SA[n - m - 1 + f], &SA[0], ((size_t)m - (size_t)f) * sizeof(sa_sint_t));
@@ -3983,9 +3783,10 @@ static void libsais_reconstruct_compacted_lms_suffixes_32s_2k_omp(
}
}
-static void libsais_reconstruct_compacted_lms_suffixes_32s_1k_omp(
- sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t m, sa_sint_t fs,
- sa_sint_t f, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static void libsais_reconstruct_compacted_lms_suffixes_32s_1k_omp(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA,
+ sa_sint_t n, sa_sint_t m, sa_sint_t fs, sa_sint_t f,
+ sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
if (f > 0) {
memmove(&SA[n - m - 1], &SA[n + fs - m], (size_t)f * sizeof(sa_sint_t));
@@ -4002,55 +3803,48 @@ static void libsais_reconstruct_compacted_lms_suffixes_32s_1k_omp(
}
}
-static sa_sint_t libsais_main_32s(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n,
- sa_sint_t k, sa_sint_t fs, sa_sint_t threads,
- LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static sa_sint_t libsais_main_32s(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k,
+ sa_sint_t fs, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
fs = fs < (SAINT_MAX - n) ? fs : (SAINT_MAX - n);
if (k > 0 && fs / k >= 6) {
sa_sint_t alignment = (fs - 1024) / k >= 6 ? 1024 : 16;
sa_sint_t * RESTRICT buckets =
(fs - alignment) / k >= 6
- ? (sa_sint_t *)libsais_align_up(&SA[n + fs - 6 * k - alignment],
- (size_t)alignment * sizeof(sa_sint_t))
+ ? (sa_sint_t *)libsais_align_up(&SA[n + fs - 6 * k - alignment], (size_t)alignment * sizeof(sa_sint_t))
: &SA[n + fs - 6 * k];
- sa_sint_t m = libsais_count_and_gather_lms_suffixes_32s_4k_omp(T, SA, n, k, buckets,
- threads, thread_state);
+ sa_sint_t m = libsais_count_and_gather_lms_suffixes_32s_4k_omp(T, SA, n, k, buckets, threads, thread_state);
if (m > 1) {
memset(SA, 0, ((size_t)n - (size_t)m) * sizeof(sa_sint_t));
sa_sint_t first_lms_suffix = SA[n - m];
sa_sint_t left_suffixes_count =
- libsais_initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(T, k, buckets,
- first_lms_suffix);
+ libsais_initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(T, k, buckets, first_lms_suffix);
- libsais_radix_sort_lms_suffixes_32s_6k_omp(T, SA, n, m, &buckets[4 * k], threads,
- thread_state);
+ libsais_radix_sort_lms_suffixes_32s_6k_omp(T, SA, n, m, &buckets[4 * k], threads, thread_state);
libsais_radix_sort_set_markers_32s_6k_omp(SA, k, &buckets[4 * k], threads);
if (threads > 1 && n >= 65536) {
memset(&SA[(fast_sint_t)n - (fast_sint_t)m], 0, (size_t)m * sizeof(sa_sint_t));
}
- libsais_initialize_buckets_for_partial_sorting_32s_6k(T, k, buckets, first_lms_suffix,
- left_suffixes_count);
- libsais_induce_partial_order_32s_6k_omp(T, SA, n, k, buckets, first_lms_suffix,
- left_suffixes_count, threads, thread_state);
+ libsais_initialize_buckets_for_partial_sorting_32s_6k(T, k, buckets, first_lms_suffix, left_suffixes_count);
+ libsais_induce_partial_order_32s_6k_omp(T, SA, n, k, buckets, first_lms_suffix, left_suffixes_count,
+ threads, thread_state);
- sa_sint_t names = libsais_renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
- SA, n, m, threads, thread_state);
+ sa_sint_t names =
+ libsais_renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(SA, n, m, threads, thread_state);
if (names < m) {
- sa_sint_t f =
- libsais_compact_lms_suffixes_32s_omp(T, SA, n, m, fs, threads, thread_state);
+ sa_sint_t f = libsais_compact_lms_suffixes_32s_omp(T, SA, n, m, fs, threads, thread_state);
- if (libsais_main_32s(SA + n + fs - m + f, SA, m - f, names - f, fs + n - 2 * m + f,
- threads, thread_state) != 0) {
+ if (libsais_main_32s(SA + n + fs - m + f, SA, m - f, names - f, fs + n - 2 * m + f, threads,
+ thread_state) != 0) {
return -2;
}
- libsais_reconstruct_compacted_lms_suffixes_32s_2k_omp(
- T, SA, n, k, m, fs, f, buckets, threads, thread_state);
+ libsais_reconstruct_compacted_lms_suffixes_32s_2k_omp(T, SA, n, k, m, fs, f, buckets, threads,
+ thread_state);
} else {
libsais_count_lms_suffixes_32s_2k(T, n, k, buckets);
}
@@ -4071,36 +3865,31 @@ static sa_sint_t libsais_main_32s(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT S
sa_sint_t alignment = (fs - 1024) / k >= 4 ? 1024 : 16;
sa_sint_t * RESTRICT buckets =
(fs - alignment) / k >= 4
- ? (sa_sint_t *)libsais_align_up(&SA[n + fs - 4 * k - alignment],
- (size_t)alignment * sizeof(sa_sint_t))
+ ? (sa_sint_t *)libsais_align_up(&SA[n + fs - 4 * k - alignment], (size_t)alignment * sizeof(sa_sint_t))
: &SA[n + fs - 4 * k];
- sa_sint_t m = libsais_count_and_gather_lms_suffixes_32s_2k_omp(T, SA, n, k, buckets,
- threads, thread_state);
+ sa_sint_t m = libsais_count_and_gather_lms_suffixes_32s_2k_omp(T, SA, n, k, buckets, threads, thread_state);
if (m > 1) {
- libsais_initialize_buckets_for_radix_and_partial_sorting_32s_4k(T, k, buckets,
- SA[n - m]);
+ libsais_initialize_buckets_for_radix_and_partial_sorting_32s_4k(T, k, buckets, SA[n - m]);
- libsais_radix_sort_lms_suffixes_32s_2k_omp(T, SA, n, m, &buckets[1], threads,
- thread_state);
+ libsais_radix_sort_lms_suffixes_32s_2k_omp(T, SA, n, m, &buckets[1], threads, thread_state);
libsais_radix_sort_set_markers_32s_4k_omp(SA, k, &buckets[1], threads);
libsais_place_lms_suffixes_interval_32s_4k(SA, n, k, m - 1, buckets);
libsais_induce_partial_order_32s_4k_omp(T, SA, n, k, buckets, threads, thread_state);
- sa_sint_t names = libsais_renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(
- SA, n, m, threads, thread_state);
+ sa_sint_t names =
+ libsais_renumber_and_mark_distinct_lms_suffixes_32s_4k_omp(SA, n, m, threads, thread_state);
if (names < m) {
- sa_sint_t f =
- libsais_compact_lms_suffixes_32s_omp(T, SA, n, m, fs, threads, thread_state);
+ sa_sint_t f = libsais_compact_lms_suffixes_32s_omp(T, SA, n, m, fs, threads, thread_state);
- if (libsais_main_32s(SA + n + fs - m + f, SA, m - f, names - f, fs + n - 2 * m + f,
- threads, thread_state) != 0) {
+ if (libsais_main_32s(SA + n + fs - m + f, SA, m - f, names - f, fs + n - 2 * m + f, threads,
+ thread_state) != 0) {
return -2;
}
- libsais_reconstruct_compacted_lms_suffixes_32s_2k_omp(
- T, SA, n, k, m, fs, f, buckets, threads, thread_state);
+ libsais_reconstruct_compacted_lms_suffixes_32s_2k_omp(T, SA, n, k, m, fs, f, buckets, threads,
+ thread_state);
} else {
libsais_count_lms_suffixes_32s_2k(T, n, k, buckets);
}
@@ -4117,35 +3906,30 @@ static sa_sint_t libsais_main_32s(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT S
sa_sint_t alignment = (fs - 1024) / k >= 2 ? 1024 : 16;
sa_sint_t * RESTRICT buckets =
(fs - alignment) / k >= 2
- ? (sa_sint_t *)libsais_align_up(&SA[n + fs - 2 * k - alignment],
- (size_t)alignment * sizeof(sa_sint_t))
+ ? (sa_sint_t *)libsais_align_up(&SA[n + fs - 2 * k - alignment], (size_t)alignment * sizeof(sa_sint_t))
: &SA[n + fs - 2 * k];
- sa_sint_t m = libsais_count_and_gather_lms_suffixes_32s_2k_omp(T, SA, n, k, buckets,
- threads, thread_state);
+ sa_sint_t m = libsais_count_and_gather_lms_suffixes_32s_2k_omp(T, SA, n, k, buckets, threads, thread_state);
if (m > 1) {
libsais_initialize_buckets_for_lms_suffixes_radix_sort_32s_2k(T, k, buckets, SA[n - m]);
- libsais_radix_sort_lms_suffixes_32s_2k_omp(T, SA, n, m, &buckets[1], threads,
- thread_state);
+ libsais_radix_sort_lms_suffixes_32s_2k_omp(T, SA, n, m, &buckets[1], threads, thread_state);
libsais_place_lms_suffixes_interval_32s_2k(SA, n, k, m - 1, buckets);
libsais_initialize_buckets_start_and_end_32s_2k(k, buckets);
libsais_induce_partial_order_32s_2k_omp(T, SA, n, k, buckets, threads, thread_state);
- sa_sint_t names =
- libsais_renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(T, SA, n, m, threads);
+ sa_sint_t names = libsais_renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(T, SA, n, m, threads);
if (names < m) {
- sa_sint_t f =
- libsais_compact_lms_suffixes_32s_omp(T, SA, n, m, fs, threads, thread_state);
+ sa_sint_t f = libsais_compact_lms_suffixes_32s_omp(T, SA, n, m, fs, threads, thread_state);
- if (libsais_main_32s(SA + n + fs - m + f, SA, m - f, names - f, fs + n - 2 * m + f,
- threads, thread_state) != 0) {
+ if (libsais_main_32s(SA + n + fs - m + f, SA, m - f, names - f, fs + n - 2 * m + f, threads,
+ thread_state) != 0) {
return -2;
}
- libsais_reconstruct_compacted_lms_suffixes_32s_2k_omp(
- T, SA, n, k, m, fs, f, buckets, threads, thread_state);
+ libsais_reconstruct_compacted_lms_suffixes_32s_2k_omp(T, SA, n, k, m, fs, f, buckets, threads,
+ thread_state);
} else {
libsais_count_lms_suffixes_32s_2k(T, n, k, buckets);
}
@@ -4162,14 +3946,12 @@ static sa_sint_t libsais_main_32s(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT S
return 0;
} else {
sa_sint_t * buffer =
- fs < k ? (sa_sint_t *)libsais_alloc_aligned((size_t)k * sizeof(sa_sint_t), 4096)
- : (sa_sint_t *)NULL;
+ fs < k ? (sa_sint_t *)libsais_alloc_aligned((size_t)k * sizeof(sa_sint_t), 4096) : (sa_sint_t *)NULL;
sa_sint_t alignment = fs - 1024 >= k ? 1024 : 16;
sa_sint_t * RESTRICT buckets =
fs - alignment >= k
- ? (sa_sint_t *)libsais_align_up(&SA[n + fs - k - alignment],
- (size_t)alignment * sizeof(sa_sint_t))
+ ? (sa_sint_t *)libsais_align_up(&SA[n + fs - k - alignment], (size_t)alignment * sizeof(sa_sint_t))
: fs >= k ? &SA[n + fs - k]
: buffer;
@@ -4186,28 +3968,24 @@ static sa_sint_t libsais_main_32s(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT S
if (m > 1) {
libsais_induce_partial_order_32s_1k_omp(T, SA, n, k, buckets, threads, thread_state);
- sa_sint_t names =
- libsais_renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(T, SA, n, m, threads);
+ sa_sint_t names = libsais_renumber_and_mark_distinct_lms_suffixes_32s_1k_omp(T, SA, n, m, threads);
if (names < m) {
if (buffer != NULL) {
libsais_free_aligned(buffer);
buckets = NULL;
}
- sa_sint_t f =
- libsais_compact_lms_suffixes_32s_omp(T, SA, n, m, fs, threads, thread_state);
+ sa_sint_t f = libsais_compact_lms_suffixes_32s_omp(T, SA, n, m, fs, threads, thread_state);
- if (libsais_main_32s(SA + n + fs - m + f, SA, m - f, names - f, fs + n - 2 * m + f,
- threads, thread_state) != 0) {
+ if (libsais_main_32s(SA + n + fs - m + f, SA, m - f, names - f, fs + n - 2 * m + f, threads,
+ thread_state) != 0) {
return -2;
}
- libsais_reconstruct_compacted_lms_suffixes_32s_1k_omp(T, SA, n, m, fs, f, threads,
- thread_state);
+ libsais_reconstruct_compacted_lms_suffixes_32s_1k_omp(T, SA, n, m, fs, f, threads, thread_state);
if (buckets == NULL) {
- buckets = buffer =
- (sa_sint_t *)libsais_alloc_aligned((size_t)k * sizeof(sa_sint_t), 4096);
+ buckets = buffer = (sa_sint_t *)libsais_alloc_aligned((size_t)k * sizeof(sa_sint_t), 4096);
}
if (buckets == NULL) {
return -2;
@@ -4226,14 +4004,12 @@ static sa_sint_t libsais_main_32s(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT S
}
}
-static sa_sint_t libsais_main_8u(const u8 * T, sa_sint_t * SA, sa_sint_t n,
- sa_sint_t * RESTRICT buckets, sa_sint_t bwt, sa_sint_t r,
- sa_sint_t * RESTRICT I, sa_sint_t fs, sa_sint_t * freq,
- sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
+static sa_sint_t libsais_main_8u(const u8 * T, sa_sint_t * SA, sa_sint_t n, sa_sint_t * RESTRICT buckets, sa_sint_t bwt,
+ sa_sint_t r, sa_sint_t * RESTRICT I, sa_sint_t fs, sa_sint_t * freq, sa_sint_t threads,
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state) {
fs = fs < (SAINT_MAX - n) ? fs : (SAINT_MAX - n);
- sa_sint_t m =
- libsais_count_and_gather_lms_suffixes_8u_omp(T, SA, n, buckets, threads, thread_state);
+ sa_sint_t m = libsais_count_and_gather_lms_suffixes_8u_omp(T, SA, n, buckets, threads, thread_state);
libsais_initialize_buckets_start_and_end_8u(buckets, freq);
@@ -4250,16 +4026,13 @@ static sa_sint_t libsais_main_8u(const u8 * T, sa_sint_t * SA, sa_sint_t n,
memset(&SA[(fast_sint_t)n - (fast_sint_t)m], 0, (size_t)m * sizeof(sa_sint_t));
}
- libsais_initialize_buckets_for_partial_sorting_8u(T, buckets, first_lms_suffix,
- left_suffixes_count);
- libsais_induce_partial_order_8u_omp(T, SA, n, buckets, first_lms_suffix,
- left_suffixes_count, threads, thread_state);
+ libsais_initialize_buckets_for_partial_sorting_8u(T, buckets, first_lms_suffix, left_suffixes_count);
+ libsais_induce_partial_order_8u_omp(T, SA, n, buckets, first_lms_suffix, left_suffixes_count, threads,
+ thread_state);
- sa_sint_t names =
- libsais_renumber_and_gather_lms_suffixes_8u_omp(SA, n, m, fs, threads, thread_state);
+ sa_sint_t names = libsais_renumber_and_gather_lms_suffixes_8u_omp(SA, n, m, fs, threads, thread_state);
if (names < m) {
- if (libsais_main_32s(SA + n + fs - m, SA, m, names, fs + n - 2 * m, threads,
- thread_state) != 0) {
+ if (libsais_main_32s(SA + n + fs - m, SA, m, names, fs + n - 2 * m, threads, thread_state) != 0) {
return -2;
}
@@ -4275,17 +4048,14 @@ static sa_sint_t libsais_main_8u(const u8 * T, sa_sint_t * SA, sa_sint_t n,
return libsais_induce_final_order_8u_omp(T, SA, n, bwt, r, I, buckets, threads, thread_state);
}
-static sa_sint_t libsais_main(const u8 * T, sa_sint_t * SA, sa_sint_t n, sa_sint_t bwt, sa_sint_t r,
- sa_sint_t * I, sa_sint_t fs, sa_sint_t * freq, sa_sint_t threads) {
- LIBSAIS_THREAD_STATE * RESTRICT thread_state =
- threads > 1 ? libsais_alloc_thread_state(threads) : NULL;
- sa_sint_t * RESTRICT buckets =
- (sa_sint_t *)libsais_alloc_aligned(8 * ALPHABET_SIZE * sizeof(sa_sint_t), 4096);
+static sa_sint_t libsais_main(const u8 * T, sa_sint_t * SA, sa_sint_t n, sa_sint_t bwt, sa_sint_t r, sa_sint_t * I,
+ sa_sint_t fs, sa_sint_t * freq, sa_sint_t threads) {
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state = threads > 1 ? libsais_alloc_thread_state(threads) : NULL;
+ sa_sint_t * RESTRICT buckets = (sa_sint_t *)libsais_alloc_aligned(8 * ALPHABET_SIZE * sizeof(sa_sint_t), 4096);
- sa_sint_t index =
- buckets != NULL && (thread_state != NULL || threads == 1)
- ? libsais_main_8u(T, SA, n, buckets, bwt, r, I, fs, freq, threads, thread_state)
- : -2;
+ sa_sint_t index = buckets != NULL && (thread_state != NULL || threads == 1)
+ ? libsais_main_8u(T, SA, n, buckets, bwt, r, I, fs, freq, threads, thread_state)
+ : -2;
libsais_free_aligned(buckets);
libsais_free_thread_state(thread_state);
@@ -4293,26 +4063,22 @@ static sa_sint_t libsais_main(const u8 * T, sa_sint_t * SA, sa_sint_t n, sa_sint
return index;
}
-static s32 libsais_main_int(sa_sint_t * T, sa_sint_t * SA, sa_sint_t n, sa_sint_t k, sa_sint_t fs,
- sa_sint_t threads) {
- LIBSAIS_THREAD_STATE * RESTRICT thread_state =
- threads > 1 ? libsais_alloc_thread_state(threads) : NULL;
+static s32 libsais_main_int(sa_sint_t * T, sa_sint_t * SA, sa_sint_t n, sa_sint_t k, sa_sint_t fs, sa_sint_t threads) {
+ LIBSAIS_THREAD_STATE * RESTRICT thread_state = threads > 1 ? libsais_alloc_thread_state(threads) : NULL;
- sa_sint_t index = thread_state != NULL || threads == 1
- ? libsais_main_32s(T, SA, n, k, fs, threads, thread_state)
- : -2;
+ sa_sint_t index =
+ thread_state != NULL || threads == 1 ? libsais_main_32s(T, SA, n, k, fs, threads, thread_state) : -2;
libsais_free_thread_state(thread_state);
return index;
}
-static sa_sint_t libsais_main_ctx(const LIBSAIS_CONTEXT * ctx, const u8 * T, sa_sint_t * SA,
- sa_sint_t n, sa_sint_t bwt, sa_sint_t r, sa_sint_t * I,
- sa_sint_t fs, sa_sint_t * freq) {
+static sa_sint_t libsais_main_ctx(const LIBSAIS_CONTEXT * ctx, const u8 * T, sa_sint_t * SA, sa_sint_t n, sa_sint_t bwt,
+ sa_sint_t r, sa_sint_t * I, sa_sint_t fs, sa_sint_t * freq) {
return ctx != NULL && (ctx->buckets != NULL && (ctx->thread_state != NULL || ctx->threads == 1))
- ? libsais_main_8u(T, SA, n, ctx->buckets, bwt, r, I, fs, freq,
- (sa_sint_t)ctx->threads, ctx->thread_state)
+ ? libsais_main_8u(T, SA, n, ctx->buckets, bwt, r, I, fs, freq, (sa_sint_t)ctx->threads,
+ ctx->thread_state)
: -2;
}
@@ -4421,8 +4187,8 @@ s32 libsais_bwt(const u8 * T, u8 * U, s32 * A, s32 n, s32 fs, s32 * freq) {
}
s32 libsais_bwt_aux(const u8 * T, u8 * U, s32 * A, s32 n, s32 fs, s32 * freq, s32 r, s32 * I) {
- if ((T == NULL) || (U == NULL) || (A == NULL) || (n < 0) || (fs < 0) || (r < 2) ||
- ((r & (r - 1)) != 0) || (I == NULL)) {
+ if ((T == NULL) || (U == NULL) || (A == NULL) || (n < 0) || (fs < 0) || (r < 2) || ((r & (r - 1)) != 0) ||
+ (I == NULL)) {
return -1;
} else if (n <= 1) {
if (freq != NULL) {
@@ -4478,10 +4244,9 @@ s32 libsais_bwt_ctx(const void * ctx, const u8 * T, u8 * U, s32 * A, s32 n, s32
return index;
}
-s32 libsais_bwt_aux_ctx(const void * ctx, const u8 * T, u8 * U, s32 * A, s32 n, s32 fs, s32 * freq,
- s32 r, s32 * I) {
- if ((ctx == NULL) || (T == NULL) || (U == NULL) || (A == NULL) || (n < 0) || (fs < 0) ||
- (r < 2) || ((r & (r - 1)) != 0) || (I == NULL)) {
+s32 libsais_bwt_aux_ctx(const void * ctx, const u8 * T, u8 * U, s32 * A, s32 n, s32 fs, s32 * freq, s32 r, s32 * I) {
+ if ((ctx == NULL) || (T == NULL) || (U == NULL) || (A == NULL) || (n < 0) || (fs < 0) || (r < 2) ||
+ ((r & (r - 1)) != 0) || (I == NULL)) {
return -1;
} else if (n <= 1) {
if (freq != NULL) {
@@ -4511,13 +4276,10 @@ static LIBSAIS_UNBWT_CONTEXT * libsais_unbwt_create_ctx_main(sa_sint_t threads)
(LIBSAIS_UNBWT_CONTEXT *)libsais_alloc_aligned(sizeof(LIBSAIS_UNBWT_CONTEXT), 64);
sa_uint_t * RESTRICT bucket2 =
(sa_uint_t *)libsais_alloc_aligned(ALPHABET_SIZE * ALPHABET_SIZE * sizeof(sa_uint_t), 4096);
- u16 * RESTRICT fastbits =
- (u16 *)libsais_alloc_aligned((1 + (1 << UNBWT_FASTBITS)) * sizeof(u16), 4096);
+ u16 * RESTRICT fastbits = (u16 *)libsais_alloc_aligned((1 + (1 << UNBWT_FASTBITS)) * sizeof(u16), 4096);
sa_uint_t * RESTRICT buckets =
threads > 1 ? (sa_uint_t *)libsais_alloc_aligned(
- (size_t)threads * (ALPHABET_SIZE + (ALPHABET_SIZE * ALPHABET_SIZE)) *
- sizeof(sa_uint_t),
- 4096)
+ (size_t)threads * (ALPHABET_SIZE + (ALPHABET_SIZE * ALPHABET_SIZE)) * sizeof(sa_uint_t), 4096)
: NULL;
if (ctx != NULL && bucket2 != NULL && fastbits != NULL && (buckets != NULL || threads == 1)) {
@@ -4546,8 +4308,7 @@ static void libsais_unbwt_free_ctx_main(LIBSAIS_UNBWT_CONTEXT * ctx) {
}
}
-static void libsais_unbwt_compute_histogram(const u8 * RESTRICT T, fast_sint_t n,
- sa_uint_t * RESTRICT count) {
+static void libsais_unbwt_compute_histogram(const u8 * RESTRICT T, fast_sint_t n, sa_uint_t * RESTRICT count) {
const fast_sint_t prefetch_distance = 256;
const u8 * RESTRICT T_p = T;
@@ -4566,14 +4327,12 @@ static void libsais_unbwt_compute_histogram(const u8 * RESTRICT T, fast_sint_t n
copy0[T_p[0]]++;
}
- fast_uint_t x = ((const u32 *)(const void *)T_p)[0],
- y = ((const u32 *)(const void *)T_p)[1];
+ fast_uint_t x = ((const u32 *)(const void *)T_p)[0], y = ((const u32 *)(const void *)T_p)[1];
for (; T_p < (u8 *)((ptrdiff_t)(T + n - 8) & (-64)); T_p += 64) {
libsais_prefetch(&T_p[prefetch_distance]);
- fast_uint_t z = ((const u32 *)(const void *)T_p)[2],
- w = ((const u32 *)(const void *)T_p)[3];
+ fast_uint_t z = ((const u32 *)(const void *)T_p)[2], w = ((const u32 *)(const void *)T_p)[3];
copy0[(u8)x]++;
x >>= 8;
copy1[(u8)x]++;
@@ -4806,10 +4565,8 @@ static void libsais_unbwt_transpose_bucket2(sa_uint_t * RESTRICT bucket2) {
}
}
-static void libsais_unbwt_compute_bigram_histogram_single(const u8 * RESTRICT T,
- sa_uint_t * RESTRICT bucket1,
- sa_uint_t * RESTRICT bucket2,
- fast_uint_t index) {
+static void libsais_unbwt_compute_bigram_histogram_single(const u8 * RESTRICT T, sa_uint_t * RESTRICT bucket1,
+ sa_uint_t * RESTRICT bucket2, fast_uint_t index) {
fast_uint_t sum, c;
for (sum = 1, c = 0; c < ALPHABET_SIZE; ++c) {
fast_uint_t prev = sum;
@@ -4839,8 +4596,8 @@ static void libsais_unbwt_compute_bigram_histogram_single(const u8 * RESTRICT T,
libsais_unbwt_transpose_bucket2(bucket2);
}
-static void libsais_unbwt_calculate_fastbits(sa_uint_t * RESTRICT bucket2, u16 * RESTRICT fastbits,
- fast_uint_t lastc, fast_uint_t shift) {
+static void libsais_unbwt_calculate_fastbits(sa_uint_t * RESTRICT bucket2, u16 * RESTRICT fastbits, fast_uint_t lastc,
+ fast_uint_t shift) {
fast_uint_t v, w, sum, c, d;
for (v = 0, w = 0, sum = 1, c = 0; c < ALPHABET_SIZE; ++c) {
if (c == lastc) {
@@ -4860,10 +4617,9 @@ static void libsais_unbwt_calculate_fastbits(sa_uint_t * RESTRICT bucket2, u16 *
}
}
-static void libsais_unbwt_calculate_biPSI(const u8 * RESTRICT T, sa_uint_t * RESTRICT P,
- sa_uint_t * RESTRICT bucket1,
- sa_uint_t * RESTRICT bucket2, fast_uint_t index,
- fast_sint_t omp_block_start, fast_sint_t omp_block_end) {
+static void libsais_unbwt_calculate_biPSI(const u8 * RESTRICT T, sa_uint_t * RESTRICT P, sa_uint_t * RESTRICT bucket1,
+ sa_uint_t * RESTRICT bucket2, fast_uint_t index, fast_sint_t omp_block_start,
+ fast_sint_t omp_block_end) {
{
fast_sint_t i = omp_block_start, j = (fast_sint_t)index;
if (omp_block_end < j) {
@@ -4875,10 +4631,7 @@ static void libsais_unbwt_calculate_biPSI(const u8 * RESTRICT T, sa_uint_t * RES
fast_sint_t t = (fast_sint_t)(index - p);
if (t != 0) {
- fast_uint_t w =
- (((fast_uint_t)T[p + (fast_uint_t)(t >> ((sizeof(fast_sint_t) * 8) - 1))])
- << 8) +
- c;
+ fast_uint_t w = (((fast_uint_t)T[p + (fast_uint_t)(t >> ((sizeof(fast_sint_t) * 8) - 1))]) << 8) + c;
P[bucket2[w]++] = (sa_uint_t)i;
}
}
@@ -4895,10 +4648,7 @@ static void libsais_unbwt_calculate_biPSI(const u8 * RESTRICT T, sa_uint_t * RES
fast_sint_t t = (fast_sint_t)(index - p);
if (t != 0) {
- fast_uint_t w =
- (((fast_uint_t)T[p + (fast_uint_t)(t >> ((sizeof(fast_sint_t) * 8) - 1))])
- << 8) +
- c;
+ fast_uint_t w = (((fast_uint_t)T[p + (fast_uint_t)(t >> ((sizeof(fast_sint_t) * 8) - 1))]) << 8) + c;
P[bucket2[w]++] = (sa_uint_t)i;
}
}
@@ -4930,9 +4680,8 @@ static void libsais_unbwt_init_single(const u8 * RESTRICT T, sa_uint_t * RESTRIC
libsais_unbwt_calculate_fastbits(bucket2, fastbits, lastc, shift);
libsais_unbwt_calculate_biPSI(T, P, bucket1, bucket2, index, 0, n);
}
-static void libsais_unbwt_decode_1(u8 * RESTRICT U, sa_uint_t * RESTRICT P,
- sa_uint_t * RESTRICT bucket2, u16 * RESTRICT fastbits,
- fast_uint_t shift, fast_uint_t * i0, fast_uint_t k) {
+static void libsais_unbwt_decode_1(u8 * RESTRICT U, sa_uint_t * RESTRICT P, sa_uint_t * RESTRICT bucket2,
+ u16 * RESTRICT fastbits, fast_uint_t shift, fast_uint_t * i0, fast_uint_t k) {
u16 * RESTRICT U0 = (u16 *)(void *)U;
fast_uint_t i, p0 = *i0;
@@ -4951,9 +4700,8 @@ static void libsais_unbwt_decode_1(u8 * RESTRICT U, sa_uint_t * RESTRICT P,
*i0 = p0;
}
-static void libsais_unbwt_decode_2(u8 * RESTRICT U, sa_uint_t * RESTRICT P,
- sa_uint_t * RESTRICT bucket2, u16 * RESTRICT fastbits,
- fast_uint_t shift, fast_uint_t r, fast_uint_t * i0,
+static void libsais_unbwt_decode_2(u8 * RESTRICT U, sa_uint_t * RESTRICT P, sa_uint_t * RESTRICT bucket2,
+ u16 * RESTRICT fastbits, fast_uint_t shift, fast_uint_t r, fast_uint_t * i0,
fast_uint_t * i1, fast_uint_t k) {
u16 * RESTRICT U0 = (u16 *)(void *)U;
u16 * RESTRICT U1 = (u16 *)(void *)(((u8 *)U0) + r);
@@ -4983,9 +4731,8 @@ static void libsais_unbwt_decode_2(u8 * RESTRICT U, sa_uint_t * RESTRICT P,
*i1 = p1;
}
-static void libsais_unbwt_decode_3(u8 * RESTRICT U, sa_uint_t * RESTRICT P,
- sa_uint_t * RESTRICT bucket2, u16 * RESTRICT fastbits,
- fast_uint_t shift, fast_uint_t r, fast_uint_t * i0,
+static void libsais_unbwt_decode_3(u8 * RESTRICT U, sa_uint_t * RESTRICT P, sa_uint_t * RESTRICT bucket2,
+ u16 * RESTRICT fastbits, fast_uint_t shift, fast_uint_t r, fast_uint_t * i0,
fast_uint_t * i1, fast_uint_t * i2, fast_uint_t k) {
u16 * RESTRICT U0 = (u16 *)(void *)U;
u16 * RESTRICT U1 = (u16 *)(void *)(((u8 *)U0) + r);
@@ -5025,11 +4772,9 @@ static void libsais_unbwt_decode_3(u8 * RESTRICT U, sa_uint_t * RESTRICT P,
*i2 = p2;
}
-static void libsais_unbwt_decode_4(u8 * RESTRICT U, sa_uint_t * RESTRICT P,
- sa_uint_t * RESTRICT bucket2, u16 * RESTRICT fastbits,
- fast_uint_t shift, fast_uint_t r, fast_uint_t * i0,
- fast_uint_t * i1, fast_uint_t * i2, fast_uint_t * i3,
- fast_uint_t k) {
+static void libsais_unbwt_decode_4(u8 * RESTRICT U, sa_uint_t * RESTRICT P, sa_uint_t * RESTRICT bucket2,
+ u16 * RESTRICT fastbits, fast_uint_t shift, fast_uint_t r, fast_uint_t * i0,
+ fast_uint_t * i1, fast_uint_t * i2, fast_uint_t * i3, fast_uint_t k) {
u16 * RESTRICT U0 = (u16 *)(void *)U;
u16 * RESTRICT U1 = (u16 *)(void *)(((u8 *)U0) + r);
u16 * RESTRICT U2 = (u16 *)(void *)(((u8 *)U1) + r);
@@ -5078,11 +4823,10 @@ static void libsais_unbwt_decode_4(u8 * RESTRICT U, sa_uint_t * RESTRICT P,
*i3 = p3;
}
-static void libsais_unbwt_decode_5(u8 * RESTRICT U, sa_uint_t * RESTRICT P,
- sa_uint_t * RESTRICT bucket2, u16 * RESTRICT fastbits,
- fast_uint_t shift, fast_uint_t r, fast_uint_t * i0,
- fast_uint_t * i1, fast_uint_t * i2, fast_uint_t * i3,
- fast_uint_t * i4, fast_uint_t k) {
+static void libsais_unbwt_decode_5(u8 * RESTRICT U, sa_uint_t * RESTRICT P, sa_uint_t * RESTRICT bucket2,
+ u16 * RESTRICT fastbits, fast_uint_t shift, fast_uint_t r, fast_uint_t * i0,
+ fast_uint_t * i1, fast_uint_t * i2, fast_uint_t * i3, fast_uint_t * i4,
+ fast_uint_t k) {
u16 * RESTRICT U0 = (u16 *)(void *)U;
u16 * RESTRICT U1 = (u16 *)(void *)(((u8 *)U0) + r);
u16 * RESTRICT U2 = (u16 *)(void *)(((u8 *)U1) + r);
@@ -5141,11 +4885,10 @@ static void libsais_unbwt_decode_5(u8 * RESTRICT U, sa_uint_t * RESTRICT P,
*i4 = p4;
}
-static void libsais_unbwt_decode_6(u8 * RESTRICT U, sa_uint_t * RESTRICT P,
- sa_uint_t * RESTRICT bucket2, u16 * RESTRICT fastbits,
- fast_uint_t shift, fast_uint_t r, fast_uint_t * i0,
- fast_uint_t * i1, fast_uint_t * i2, fast_uint_t * i3,
- fast_uint_t * i4, fast_uint_t * i5, fast_uint_t k) {
+static void libsais_unbwt_decode_6(u8 * RESTRICT U, sa_uint_t * RESTRICT P, sa_uint_t * RESTRICT bucket2,
+ u16 * RESTRICT fastbits, fast_uint_t shift, fast_uint_t r, fast_uint_t * i0,
+ fast_uint_t * i1, fast_uint_t * i2, fast_uint_t * i3, fast_uint_t * i4,
+ fast_uint_t * i5, fast_uint_t k) {
u16 * RESTRICT U0 = (u16 *)(void *)U;
u16 * RESTRICT U1 = (u16 *)(void *)(((u8 *)U0) + r);
u16 * RESTRICT U2 = (u16 *)(void *)(((u8 *)U1) + r);
@@ -5214,12 +4957,10 @@ static void libsais_unbwt_decode_6(u8 * RESTRICT U, sa_uint_t * RESTRICT P,
*i5 = p5;
}
-static void libsais_unbwt_decode_7(u8 * RESTRICT U, sa_uint_t * RESTRICT P,
- sa_uint_t * RESTRICT bucket2, u16 * RESTRICT fastbits,
- fast_uint_t shift, fast_uint_t r, fast_uint_t * i0,
- fast_uint_t * i1, fast_uint_t * i2, fast_uint_t * i3,
- fast_uint_t * i4, fast_uint_t * i5, fast_uint_t * i6,
- fast_uint_t k) {
+static void libsais_unbwt_decode_7(u8 * RESTRICT U, sa_uint_t * RESTRICT P, sa_uint_t * RESTRICT bucket2,
+ u16 * RESTRICT fastbits, fast_uint_t shift, fast_uint_t r, fast_uint_t * i0,
+ fast_uint_t * i1, fast_uint_t * i2, fast_uint_t * i3, fast_uint_t * i4,
+ fast_uint_t * i5, fast_uint_t * i6, fast_uint_t k) {
u16 * RESTRICT U0 = (u16 *)(void *)U;
u16 * RESTRICT U1 = (u16 *)(void *)(((u8 *)U0) + r);
u16 * RESTRICT U2 = (u16 *)(void *)(((u8 *)U1) + r);
@@ -5298,12 +5039,10 @@ static void libsais_unbwt_decode_7(u8 * RESTRICT U, sa_uint_t * RESTRICT P,
*i6 = p6;
}
-static void libsais_unbwt_decode_8(u8 * RESTRICT U, sa_uint_t * RESTRICT P,
- sa_uint_t * RESTRICT bucket2, u16 * RESTRICT fastbits,
- fast_uint_t shift, fast_uint_t r, fast_uint_t * i0,
- fast_uint_t * i1, fast_uint_t * i2, fast_uint_t * i3,
- fast_uint_t * i4, fast_uint_t * i5, fast_uint_t * i6,
- fast_uint_t * i7, fast_uint_t k) {
+static void libsais_unbwt_decode_8(u8 * RESTRICT U, sa_uint_t * RESTRICT P, sa_uint_t * RESTRICT bucket2,
+ u16 * RESTRICT fastbits, fast_uint_t shift, fast_uint_t r, fast_uint_t * i0,
+ fast_uint_t * i1, fast_uint_t * i2, fast_uint_t * i3, fast_uint_t * i4,
+ fast_uint_t * i5, fast_uint_t * i6, fast_uint_t * i7, fast_uint_t k) {
u16 * RESTRICT U0 = (u16 *)(void *)U;
u16 * RESTRICT U1 = (u16 *)(void *)(((u8 *)U0) + r);
u16 * RESTRICT U2 = (u16 *)(void *)(((u8 *)U1) + r);
@@ -5393,9 +5132,8 @@ static void libsais_unbwt_decode_8(u8 * RESTRICT U, sa_uint_t * RESTRICT P,
}
static void libsais_unbwt_decode(u8 * RESTRICT U, sa_uint_t * RESTRICT P, sa_sint_t n, sa_sint_t r,
- const sa_uint_t * RESTRICT I, sa_uint_t * RESTRICT bucket2,
- u16 * RESTRICT fastbits, fast_sint_t blocks,
- fast_uint_t reminder) {
+ const sa_uint_t * RESTRICT I, sa_uint_t * RESTRICT bucket2, u16 * RESTRICT fastbits,
+ fast_sint_t blocks, fast_uint_t reminder) {
fast_uint_t shift = 0;
while ((n >> shift) > (1 << UNBWT_FASTBITS)) {
shift++;
@@ -5403,10 +5141,9 @@ static void libsais_unbwt_decode(u8 * RESTRICT U, sa_uint_t * RESTRICT P, sa_sin
fast_uint_t offset = 0;
while (blocks > 8) {
- fast_uint_t i0 = I[0], i1 = I[1], i2 = I[2], i3 = I[3], i4 = I[4], i5 = I[5], i6 = I[6],
- i7 = I[7];
- libsais_unbwt_decode_8(U + offset, P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1,
- &i2, &i3, &i4, &i5, &i6, &i7, (fast_uint_t)r >> 1);
+ fast_uint_t i0 = I[0], i1 = I[1], i2 = I[2], i3 = I[3], i4 = I[4], i5 = I[5], i6 = I[6], i7 = I[7];
+ libsais_unbwt_decode_8(U + offset, P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1, &i2, &i3, &i4, &i5,
+ &i6, &i7, (fast_uint_t)r >> 1);
I += 8;
blocks -= 8;
offset += 8 * (fast_uint_t)r;
@@ -5417,59 +5154,50 @@ static void libsais_unbwt_decode(u8 * RESTRICT U, sa_uint_t * RESTRICT P, sa_sin
libsais_unbwt_decode_1(U + offset, P, bucket2, fastbits, shift, &i0, reminder >> 1);
} else if (blocks == 2) {
fast_uint_t i0 = I[0], i1 = I[1];
- libsais_unbwt_decode_2(U + offset, P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1,
- reminder >> 1);
+ libsais_unbwt_decode_2(U + offset, P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1, reminder >> 1);
libsais_unbwt_decode_1(U + offset + 2 * (reminder >> 1), P, bucket2, fastbits, shift, &i0,
((fast_uint_t)r >> 1) - (reminder >> 1));
} else if (blocks == 3) {
fast_uint_t i0 = I[0], i1 = I[1], i2 = I[2];
- libsais_unbwt_decode_3(U + offset, P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1,
- &i2, reminder >> 1);
- libsais_unbwt_decode_2(U + offset + 2 * (reminder >> 1), P, bucket2, fastbits, shift,
- (fast_uint_t)r, &i0, &i1, ((fast_uint_t)r >> 1) - (reminder >> 1));
+ libsais_unbwt_decode_3(U + offset, P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1, &i2, reminder >> 1);
+ libsais_unbwt_decode_2(U + offset + 2 * (reminder >> 1), P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1,
+ ((fast_uint_t)r >> 1) - (reminder >> 1));
} else if (blocks == 4) {
fast_uint_t i0 = I[0], i1 = I[1], i2 = I[2], i3 = I[3];
- libsais_unbwt_decode_4(U + offset, P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1,
- &i2, &i3, reminder >> 1);
- libsais_unbwt_decode_3(U + offset + 2 * (reminder >> 1), P, bucket2, fastbits, shift,
- (fast_uint_t)r, &i0, &i1, &i2,
- ((fast_uint_t)r >> 1) - (reminder >> 1));
+ libsais_unbwt_decode_4(U + offset, P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1, &i2, &i3,
+ reminder >> 1);
+ libsais_unbwt_decode_3(U + offset + 2 * (reminder >> 1), P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1,
+ &i2, ((fast_uint_t)r >> 1) - (reminder >> 1));
} else if (blocks == 5) {
fast_uint_t i0 = I[0], i1 = I[1], i2 = I[2], i3 = I[3], i4 = I[4];
- libsais_unbwt_decode_5(U + offset, P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1,
- &i2, &i3, &i4, reminder >> 1);
- libsais_unbwt_decode_4(U + offset + 2 * (reminder >> 1), P, bucket2, fastbits, shift,
- (fast_uint_t)r, &i0, &i1, &i2, &i3,
- ((fast_uint_t)r >> 1) - (reminder >> 1));
+ libsais_unbwt_decode_5(U + offset, P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1, &i2, &i3, &i4,
+ reminder >> 1);
+ libsais_unbwt_decode_4(U + offset + 2 * (reminder >> 1), P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1,
+ &i2, &i3, ((fast_uint_t)r >> 1) - (reminder >> 1));
} else if (blocks == 6) {
fast_uint_t i0 = I[0], i1 = I[1], i2 = I[2], i3 = I[3], i4 = I[4], i5 = I[5];
- libsais_unbwt_decode_6(U + offset, P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1,
- &i2, &i3, &i4, &i5, reminder >> 1);
- libsais_unbwt_decode_5(U + offset + 2 * (reminder >> 1), P, bucket2, fastbits, shift,
- (fast_uint_t)r, &i0, &i1, &i2, &i3, &i4,
- ((fast_uint_t)r >> 1) - (reminder >> 1));
+ libsais_unbwt_decode_6(U + offset, P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1, &i2, &i3, &i4, &i5,
+ reminder >> 1);
+ libsais_unbwt_decode_5(U + offset + 2 * (reminder >> 1), P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1,
+ &i2, &i3, &i4, ((fast_uint_t)r >> 1) - (reminder >> 1));
} else if (blocks == 7) {
fast_uint_t i0 = I[0], i1 = I[1], i2 = I[2], i3 = I[3], i4 = I[4], i5 = I[5], i6 = I[6];
- libsais_unbwt_decode_7(U + offset, P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1,
- &i2, &i3, &i4, &i5, &i6, reminder >> 1);
- libsais_unbwt_decode_6(U + offset + 2 * (reminder >> 1), P, bucket2, fastbits, shift,
- (fast_uint_t)r, &i0, &i1, &i2, &i3, &i4, &i5,
- ((fast_uint_t)r >> 1) - (reminder >> 1));
+ libsais_unbwt_decode_7(U + offset, P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1, &i2, &i3, &i4, &i5,
+ &i6, reminder >> 1);
+ libsais_unbwt_decode_6(U + offset + 2 * (reminder >> 1), P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1,
+ &i2, &i3, &i4, &i5, ((fast_uint_t)r >> 1) - (reminder >> 1));
} else {
- fast_uint_t i0 = I[0], i1 = I[1], i2 = I[2], i3 = I[3], i4 = I[4], i5 = I[5], i6 = I[6],
- i7 = I[7];
- libsais_unbwt_decode_8(U + offset, P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1,
- &i2, &i3, &i4, &i5, &i6, &i7, reminder >> 1);
- libsais_unbwt_decode_7(U + offset + 2 * (reminder >> 1), P, bucket2, fastbits, shift,
- (fast_uint_t)r, &i0, &i1, &i2, &i3, &i4, &i5, &i6,
- ((fast_uint_t)r >> 1) - (reminder >> 1));
+ fast_uint_t i0 = I[0], i1 = I[1], i2 = I[2], i3 = I[3], i4 = I[4], i5 = I[5], i6 = I[6], i7 = I[7];
+ libsais_unbwt_decode_8(U + offset, P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1, &i2, &i3, &i4, &i5,
+ &i6, &i7, reminder >> 1);
+ libsais_unbwt_decode_7(U + offset + 2 * (reminder >> 1), P, bucket2, fastbits, shift, (fast_uint_t)r, &i0, &i1,
+ &i2, &i3, &i4, &i5, &i6, ((fast_uint_t)r >> 1) - (reminder >> 1));
}
}
-static void libsais_unbwt_decode_omp(const u8 * RESTRICT T, u8 * RESTRICT U, sa_uint_t * RESTRICT P,
- sa_sint_t n, sa_sint_t r, const sa_uint_t * RESTRICT I,
- sa_uint_t * RESTRICT bucket2, u16 * RESTRICT fastbits,
- sa_sint_t threads) {
+static void libsais_unbwt_decode_omp(const u8 * RESTRICT T, u8 * RESTRICT U, sa_uint_t * RESTRICT P, sa_sint_t n,
+ sa_sint_t r, const sa_uint_t * RESTRICT I, sa_uint_t * RESTRICT bucket2,
+ u16 * RESTRICT fastbits, sa_sint_t threads) {
fast_uint_t lastc = T[0];
fast_sint_t blocks = 1 + (((fast_sint_t)n - 1) / (fast_sint_t)r);
fast_uint_t reminder = (fast_uint_t)n - ((fast_uint_t)r * ((fast_uint_t)blocks - 1));
@@ -5482,22 +5210,19 @@ static void libsais_unbwt_decode_omp(const u8 * RESTRICT T, u8 * RESTRICT U, sa_
fast_sint_t omp_block_stride = blocks / omp_num_threads;
fast_sint_t omp_block_reminder = blocks % omp_num_threads;
fast_sint_t omp_block_size = omp_block_stride + (omp_thread_num < omp_block_reminder);
- fast_sint_t omp_block_start =
- omp_block_stride * omp_thread_num +
- (omp_thread_num < omp_block_reminder ? omp_thread_num : omp_block_reminder);
+ fast_sint_t omp_block_start = omp_block_stride * omp_thread_num +
+ (omp_thread_num < omp_block_reminder ? omp_thread_num : omp_block_reminder);
- libsais_unbwt_decode(U + r * omp_block_start, P, n, r, I + omp_block_start, bucket2,
- fastbits, omp_block_size,
+ libsais_unbwt_decode(U + r * omp_block_start, P, n, r, I + omp_block_start, bucket2, fastbits, omp_block_size,
omp_thread_num < omp_num_threads - 1 ? (fast_uint_t)r : reminder);
}
U[n - 1] = (u8)lastc;
}
-static sa_sint_t libsais_unbwt_core(const u8 * RESTRICT T, u8 * RESTRICT U, sa_uint_t * RESTRICT P,
- sa_sint_t n, const sa_sint_t * freq, sa_sint_t r,
- const sa_uint_t * RESTRICT I, sa_uint_t * RESTRICT bucket2,
- u16 * RESTRICT fastbits, sa_uint_t * RESTRICT buckets,
+static sa_sint_t libsais_unbwt_core(const u8 * RESTRICT T, u8 * RESTRICT U, sa_uint_t * RESTRICT P, sa_sint_t n,
+ const sa_sint_t * freq, sa_sint_t r, const sa_uint_t * RESTRICT I,
+ sa_uint_t * RESTRICT bucket2, u16 * RESTRICT fastbits, sa_uint_t * RESTRICT buckets,
sa_sint_t threads) {
(void)(buckets);
@@ -5507,9 +5232,8 @@ static sa_sint_t libsais_unbwt_core(const u8 * RESTRICT T, u8 * RESTRICT U, sa_u
return 0;
}
-static sa_sint_t libsais_unbwt_main(const u8 * T, u8 * U, sa_uint_t * P, sa_sint_t n,
- const sa_sint_t * freq, sa_sint_t r, const sa_uint_t * I,
- sa_sint_t threads) {
+static sa_sint_t libsais_unbwt_main(const u8 * T, u8 * U, sa_uint_t * P, sa_sint_t n, const sa_sint_t * freq,
+ sa_sint_t r, const sa_uint_t * I, sa_sint_t threads) {
fast_uint_t shift = 0;
while ((n >> shift) > (1 << UNBWT_FASTBITS)) {
shift++;
@@ -5517,20 +5241,16 @@ static sa_sint_t libsais_unbwt_main(const u8 * T, u8 * U, sa_uint_t * P, sa_sint
sa_uint_t * RESTRICT bucket2 =
(sa_uint_t *)libsais_alloc_aligned(ALPHABET_SIZE * ALPHABET_SIZE * sizeof(sa_uint_t), 4096);
- u16 * RESTRICT fastbits =
- (u16 *)libsais_alloc_aligned(((size_t)1 + (size_t)(n >> shift)) * sizeof(u16), 4096);
+ u16 * RESTRICT fastbits = (u16 *)libsais_alloc_aligned(((size_t)1 + (size_t)(n >> shift)) * sizeof(u16), 4096);
sa_uint_t * RESTRICT buckets =
threads > 1 && n >= 262144
? (sa_uint_t *)libsais_alloc_aligned(
- (size_t)threads * (ALPHABET_SIZE + (ALPHABET_SIZE * ALPHABET_SIZE)) *
- sizeof(sa_uint_t),
- 4096)
+ (size_t)threads * (ALPHABET_SIZE + (ALPHABET_SIZE * ALPHABET_SIZE)) * sizeof(sa_uint_t), 4096)
: NULL;
- sa_sint_t index =
- bucket2 != NULL && fastbits != NULL && (buckets != NULL || threads == 1 || n < 262144)
- ? libsais_unbwt_core(T, U, P, n, freq, r, I, bucket2, fastbits, buckets, threads)
- : -2;
+ sa_sint_t index = bucket2 != NULL && fastbits != NULL && (buckets != NULL || threads == 1 || n < 262144)
+ ? libsais_unbwt_core(T, U, P, n, freq, r, I, bucket2, fastbits, buckets, threads)
+ : -2;
libsais_free_aligned(buckets);
libsais_free_aligned(fastbits);
@@ -5539,35 +5259,29 @@ static sa_sint_t libsais_unbwt_main(const u8 * T, u8 * U, sa_uint_t * P, sa_sint
return index;
}
-static sa_sint_t libsais_unbwt_main_ctx(const LIBSAIS_UNBWT_CONTEXT * ctx, const u8 * T, u8 * U,
- sa_uint_t * P, sa_sint_t n, const sa_sint_t * freq,
- sa_sint_t r, const sa_uint_t * I) {
- return ctx != NULL && ctx->bucket2 != NULL && ctx->fastbits != NULL &&
- (ctx->buckets != NULL || ctx->threads == 1)
- ? libsais_unbwt_core(T, U, P, n, freq, r, I, ctx->bucket2, ctx->fastbits,
- ctx->buckets, (sa_sint_t)ctx->threads)
+static sa_sint_t libsais_unbwt_main_ctx(const LIBSAIS_UNBWT_CONTEXT * ctx, const u8 * T, u8 * U, sa_uint_t * P,
+ sa_sint_t n, const sa_sint_t * freq, sa_sint_t r, const sa_uint_t * I) {
+ return ctx != NULL && ctx->bucket2 != NULL && ctx->fastbits != NULL && (ctx->buckets != NULL || ctx->threads == 1)
+ ? libsais_unbwt_core(T, U, P, n, freq, r, I, ctx->bucket2, ctx->fastbits, ctx->buckets,
+ (sa_sint_t)ctx->threads)
: -2;
}
void * libsais_unbwt_create_ctx(void) { return (void *)libsais_unbwt_create_ctx_main(1); }
-void libsais_unbwt_free_ctx(void * ctx) {
- libsais_unbwt_free_ctx_main((LIBSAIS_UNBWT_CONTEXT *)ctx);
-}
+void libsais_unbwt_free_ctx(void * ctx) { libsais_unbwt_free_ctx_main((LIBSAIS_UNBWT_CONTEXT *)ctx); }
s32 libsais_unbwt(const u8 * T, u8 * U, s32 * A, s32 n, const s32 * freq, s32 i) {
return libsais_unbwt_aux(T, U, A, n, freq, n, &i);
}
-s32 libsais_unbwt_ctx(const void * ctx, const u8 * T, u8 * U, s32 * A, s32 n, const s32 * freq,
- s32 i) {
+s32 libsais_unbwt_ctx(const void * ctx, const u8 * T, u8 * U, s32 * A, s32 n, const s32 * freq, s32 i) {
return libsais_unbwt_aux_ctx(ctx, T, U, A, n, freq, n, &i);
}
-s32 libsais_unbwt_aux(const u8 * T, u8 * U, s32 * A, s32 n, const s32 * freq, s32 r,
- const s32 * I) {
- if ((T == NULL) || (U == NULL) || (A == NULL) || (n < 0) ||
- ((r != n) && ((r < 2) || ((r & (r - 1)) != 0))) || (I == NULL)) {
+s32 libsais_unbwt_aux(const u8 * T, u8 * U, s32 * A, s32 n, const s32 * freq, s32 r, const s32 * I) {
+ if ((T == NULL) || (U == NULL) || (A == NULL) || (n < 0) || ((r != n) && ((r < 2) || ((r & (r - 1)) != 0))) ||
+ (I == NULL)) {
return -1;
} else if (n <= 1) {
if (I[0] != n) {
@@ -5589,10 +5303,10 @@ s32 libsais_unbwt_aux(const u8 * T, u8 * U, s32 * A, s32 n, const s32 * freq, s3
return libsais_unbwt_main(T, U, (sa_uint_t *)A, n, freq, r, (const sa_uint_t *)I, 1);
}
-s32 libsais_unbwt_aux_ctx(const void * ctx, const u8 * T, u8 * U, s32 * A, s32 n, const s32 * freq,
- s32 r, const s32 * I) {
- if ((T == NULL) || (U == NULL) || (A == NULL) || (n < 0) ||
- ((r != n) && ((r < 2) || ((r & (r - 1)) != 0))) || (I == NULL)) {
+s32 libsais_unbwt_aux_ctx(const void * ctx, const u8 * T, u8 * U, s32 * A, s32 n, const s32 * freq, s32 r,
+ const s32 * I) {
+ if ((T == NULL) || (U == NULL) || (A == NULL) || (n < 0) || ((r != n) && ((r < 2) || ((r & (r - 1)) != 0))) ||
+ (I == NULL)) {
return -1;
} else if (n <= 1) {
if (I[0] != n) {
@@ -5611,18 +5325,16 @@ s32 libsais_unbwt_aux_ctx(const void * ctx, const u8 * T, u8 * U, s32 * A, s32 n
}
}
- return libsais_unbwt_main_ctx((const LIBSAIS_UNBWT_CONTEXT *)ctx, T, U, (sa_uint_t *)A, n, freq,
- r, (const sa_uint_t *)I);
+ return libsais_unbwt_main_ctx((const LIBSAIS_UNBWT_CONTEXT *)ctx, T, U, (sa_uint_t *)A, n, freq, r,
+ (const sa_uint_t *)I);
}
-static void libsais_compute_phi(const sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT PLCP,
- sa_sint_t n, fast_sint_t omp_block_start,
- fast_sint_t omp_block_size) {
+static void libsais_compute_phi(const sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT PLCP, sa_sint_t n,
+ fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
fast_sint_t i, j;
sa_sint_t k = omp_block_start > 0 ? SA[omp_block_start - 1] : n;
- for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 3; i < j;
- i += 4) {
+ for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 3; i < j; i += 4) {
libsais_prefetchw(&PLCP[SA[i + prefetch_distance + 0]]);
libsais_prefetchw(&PLCP[SA[i + prefetch_distance + 1]]);
@@ -5646,8 +5358,8 @@ static void libsais_compute_phi(const sa_sint_t * RESTRICT SA, sa_sint_t * RESTR
}
}
-static void libsais_compute_phi_omp(const sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT PLCP,
- sa_sint_t n, sa_sint_t threads) {
+static void libsais_compute_phi_omp(const sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT PLCP, sa_sint_t n,
+ sa_sint_t threads) {
{
(void)(threads);
@@ -5656,8 +5368,7 @@ static void libsais_compute_phi_omp(const sa_sint_t * RESTRICT SA, sa_sint_t * R
fast_sint_t omp_block_stride = (n / omp_num_threads) & (-16);
fast_sint_t omp_block_start = omp_thread_num * omp_block_stride;
- fast_sint_t omp_block_size =
- omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start;
+ fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start;
libsais_compute_phi(SA, PLCP, n, omp_block_start, omp_block_size);
}
@@ -5668,8 +5379,7 @@ static void libsais_compute_plcp(const u8 * RESTRICT T, sa_sint_t * RESTRICT PLC
const fast_sint_t prefetch_distance = 32;
fast_sint_t i, j, l = 0;
- for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance; i < j;
- i += 1) {
+ for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance; i < j; i += 1) {
libsais_prefetch(&T[PLCP[i + prefetch_distance] + l]);
fast_sint_t k = PLCP[i], m = n - (i > k ? i : k);
@@ -5692,8 +5402,7 @@ static void libsais_compute_plcp(const u8 * RESTRICT T, sa_sint_t * RESTRICT PLC
}
}
-static void libsais_compute_plcp_omp(const u8 * RESTRICT T, sa_sint_t * RESTRICT PLCP, sa_sint_t n,
- sa_sint_t threads) {
+static void libsais_compute_plcp_omp(const u8 * RESTRICT T, sa_sint_t * RESTRICT PLCP, sa_sint_t n, sa_sint_t threads) {
{
(void)(threads);
@@ -5702,21 +5411,18 @@ static void libsais_compute_plcp_omp(const u8 * RESTRICT T, sa_sint_t * RESTRICT
fast_sint_t omp_block_stride = (n / omp_num_threads) & (-16);
fast_sint_t omp_block_start = omp_thread_num * omp_block_stride;
- fast_sint_t omp_block_size =
- omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start;
+ fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start;
libsais_compute_plcp(T, PLCP, n, omp_block_start, omp_block_size);
}
}
static void libsais_compute_lcp(const sa_sint_t * RESTRICT PLCP, const sa_sint_t * RESTRICT SA,
- sa_sint_t * RESTRICT LCP, fast_sint_t omp_block_start,
- fast_sint_t omp_block_size) {
+ sa_sint_t * RESTRICT LCP, fast_sint_t omp_block_start, fast_sint_t omp_block_size) {
const fast_sint_t prefetch_distance = 32;
fast_sint_t i, j;
- for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 3; i < j;
- i += 4) {
+ for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 3; i < j; i += 4) {
libsais_prefetch(&PLCP[SA[i + prefetch_distance + 0]]);
libsais_prefetch(&PLCP[SA[i + prefetch_distance + 1]]);
@@ -5745,8 +5451,7 @@ static void libsais_compute_lcp_omp(const sa_sint_t * RESTRICT PLCP, const sa_si
fast_sint_t omp_block_stride = (n / omp_num_threads) & (-16);
fast_sint_t omp_block_start = omp_thread_num * omp_block_stride;
- fast_sint_t omp_block_size =
- omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start;
+ fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start;
libsais_compute_lcp(PLCP, SA, LCP, omp_block_start, omp_block_size);
}
diff --git a/src/lzp.c b/src/lzp.c
index 6813788..0ba6dd5 100644
--- a/src/lzp.c
+++ b/src/lzp.c
@@ -16,8 +16,8 @@ static inline s32 num_blocks(s32 n) {
return 4;
}
-static s32 lzp_encode_block(const u8 * restrict in, const u8 * in_end, u8 * restrict out,
- u8 * out_end, s32 * restrict lut, s32 mask, s32 m_len) {
+static s32 lzp_encode_block(const u8 * restrict in, const u8 * in_end, u8 * restrict out, u8 * out_end,
+ s32 * restrict lut, s32 mask, s32 m_len) {
const u8 *ins = in, *outs = out;
const u8 * out_eob = out_end - 8;
const u8 * heur = in;
@@ -34,8 +34,7 @@ static s32 lzp_encode_block(const u8 * restrict in, const u8 * in_end, u8 * rest
lut[idx] = in - ins;
if (val > 0) {
const u8 * restrict ref = ins + val;
- if (memcmp(in + m_len - 4, ref + m_len - 4, sizeof(u32)) == 0 &&
- memcmp(in, ref, sizeof(u32)) == 0) {
+ if (memcmp(in + m_len - 4, ref + m_len - 4, sizeof(u32)) == 0 && memcmp(in, ref, sizeof(u32)) == 0) {
if (heur > in && *(u32 *)heur != *(u32 *)(ref + (heur - in))) goto not_found;
s32 len = 4;
@@ -91,8 +90,7 @@ static s32 lzp_encode_block(const u8 * restrict in, const u8 * in_end, u8 * rest
return out >= out_eob ? -1 : (s32)(out - outs);
}
-static s32 lzp_decode_block(const u8 * restrict in, const u8 * in_end, u8 * restrict out, s32 hash,
- s32 m_len) {
+static s32 lzp_decode_block(const u8 * restrict in, const u8 * in_end, u8 * restrict out, s32 hash, s32 m_len) {
if (in_end - in < 4) return -1;
s32 * restrict lut = calloc(1 << hash, sizeof(s32));
@@ -148,8 +146,7 @@ s32 lzp_compress(const u8 * in, u8 * out, s32 n, s32 hash, s32 m_len) {
if (!lut) return -1;
- s32 r =
- lzp_encode_block(in, in + n, out + 1, out + n - 1, lut, (s32)(1 << hash) - 1, m_len);
+ s32 r = lzp_encode_block(in, in + n, out + 1, out + n - 1, lut, (s32)(1 << hash) - 1, m_len);
free(lut);
@@ -180,8 +177,8 @@ s32 lzp_compress(const u8 * in, u8 * out, s32 n, s32 hash, s32 m_len) {
if (!lut)
r = -1;
else
- r = lzp_encode_block(in + ins, in + ins + insz, out + out_ptr,
- out + out_ptr + outsz, lut, (s32)(1 << hash) - 1, m_len);
+ r = lzp_encode_block(in + ins, in + ins + insz, out + out_ptr, out + out_ptr + outsz, lut,
+ (s32)(1 << hash) - 1, m_len);
free(lut);
}
@@ -220,8 +217,7 @@ s32 lzp_decompress(const u8 * in, u8 * out, s32 n, s32 hash, s32 m_len) {
s32 outsz = *(s32 *)(in + 1 + 8 * b_id + 0);
if (insz != outsz) {
- dec[b_id] =
- lzp_decode_block(in + in_ptr, in + in_ptr + insz, out + out_ptr, hash, m_len);
+ dec[b_id] = lzp_decode_block(in + in_ptr, in + in_ptr + insz, out + out_ptr, hash, m_len);
} else {
dec[b_id] = insz;
memcpy(out + out_ptr, in + in_ptr, insz);
diff --git a/src/main.c b/src/main.c
index 23a4fd6..84ec081 100644
--- a/src/main.c
+++ b/src/main.c
@@ -22,6 +22,7 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <arpa/inet.h>
#include "common.h"
#include "libbz3.h"
@@ -65,26 +66,26 @@ int main(int argc, char * argv[]) {
return 1;
}
- int input_des, output_des;
+ FILE * input_des, * output_des;
if (input != NULL) {
- input_des = open(input, O_RDONLY);
- if (input_des == -1) {
- perror("open");
+ input_des = fopen(input, "rb");
+ if (input_des == NULL) {
+ perror("fopen");
return 1;
}
} else {
- input_des = STDIN_FILENO;
+ input_des = stdin;
}
if (output != NULL) {
- output_des = open(output, O_WRONLY | O_CREAT | O_TRUNC, 0644);
- if (output_des == -1) {
+ output_des = fopen(output, "wb");
+ if (output_des == NULL) {
perror("open");
return 1;
}
} else {
- output_des = STDOUT_FILENO;
+ output_des = stdout;
}
if (block_size < KiB(65) || block_size > MiB(2047)) {
@@ -94,20 +95,25 @@ int main(int argc, char * argv[]) {
switch (mode) {
case 1:
- write(output_des, "BZ3v1", 5);
- write(output_des, &block_size, sizeof(u32));
+ fwrite("BZ3v1", 5, 1, output_des);
+
+ block_size = htonl(block_size);
+ fwrite(&block_size, sizeof(u32), 1, output_des);
+ block_size = ntohl(block_size);
break;
case -1:
case -2: {
char signature[5];
- read(input_des, signature, 5);
+ fread(signature, 5, 1, input_des);
if (strncmp(signature, "BZ3v1", 5) != 0) {
fprintf(stderr, "Invalid signature.\n");
return 1;
}
- read(input_des, &block_size, sizeof(u32));
+ fread(&block_size, sizeof(u32), 1, input_des);
+
+ block_size = ntohl(block_size);
if (block_size < KiB(65) || block_size > MiB(2047)) {
fprintf(stderr,
@@ -120,65 +126,77 @@ int main(int argc, char * argv[]) {
}
}
- struct block_encoder_state * block_encoder_state = new_block_encoder_state(block_size);
+ struct bz3_state * state = bz3_new(block_size);
- if (block_encoder_state == NULL) {
+ if (state == NULL) {
fprintf(stderr, "Failed to create a block encoder state.\n");
return 1;
}
+ u8 * buffer = malloc(block_size + block_size / 4);
+
if (mode == 1) {
- while (commit_read(block_encoder_state, read(input_des, get_buffer(block_encoder_state), block_size)) > 0) {
- if (get_last_error(block_encoder_state) != BZ3_OK) {
- fprintf(stderr, "Failed to read data: %s\n", str_last_error(block_encoder_state));
- return 1;
- }
- struct encoding_result r = encode_block(block_encoder_state);
- if (get_last_error(block_encoder_state) != BZ3_OK) {
- fprintf(stderr, "Failed to encode the block: %s\n",
- str_last_error(block_encoder_state));
+ s32 read_count;
+ while (!feof(input_des)) {
+ read_count = fread(buffer, 1, block_size, input_des);
+
+ s32 new_size = bz3_encode_block(state, buffer, read_count);
+ if (new_size == -1) {
+ fprintf(stderr, "Failed to encode a block: %s\n", bz3_strerror(state));
return 1;
}
- write(output_des, r.buffer, r.size);
+
+ read_count = htonl(read_count); new_size = ntohl(new_size);
+ fwrite(&new_size, 4, 1, output_des);
+ fwrite(&read_count, 4, 1, output_des);
+ fwrite(buffer, ntohl(new_size), 1, output_des);
}
- }
- else if (mode == -1) {
- s32 read_size;
- while ((read_size = read_block(input_des, block_encoder_state)) > 0) {
- if (get_last_error(block_encoder_state) != BZ3_OK) {
- fprintf(stderr, "Failed to read data: %s\n", str_last_error(block_encoder_state));
+ } else if (mode == -1) {
+ s32 new_size, old_size;
+ while (!feof(input_des)) {
+ if(fread(&new_size, 1, 4, input_des) != 4) {
+ // Assume that the file has no more data.
+ break;
+ }
+ if(fread(&old_size, 1, 4, input_des) != 4) {
+ fprintf(stderr, "I/O error.\n");
return 1;
}
- struct encoding_result r = decode_block(block_encoder_state);
- if (get_last_error(block_encoder_state) != BZ3_OK) {
- fprintf(stderr, "Failed to decode the block: %s\n",
- str_last_error(block_encoder_state));
+ new_size = ntohl(new_size); old_size = ntohl(old_size);
+ fread(buffer, 1, new_size, input_des);
+ if(bz3_decode_block(state, buffer, new_size, old_size) == -1) {
+ fprintf(stderr, "Failed to decode a block: %s\n", bz3_strerror(state));
return 1;
}
- write(output_des, r.buffer, r.size);
+ fwrite(buffer, old_size, 1, output_des);
}
} else if (mode == -2) {
- s32 read_size;
- while ((read_size = read_block(input_des, block_encoder_state)) > 0) {
- if (get_last_error(block_encoder_state) != BZ3_OK) {
- fprintf(stderr, "Failed to read data: %s\n", str_last_error(block_encoder_state));
- return 1;
+ s32 new_size, old_size;
+ while (!feof(input_des)) {
+ if(fread(&new_size, 4, 1, input_des) != 4) {
+ fprintf(stderr, "I/O error.\n");
+ }
+ if(fread(&old_size, 4, 1, input_des) != 4) {
+ fprintf(stderr, "I/O error.\n");
}
- decode_block(block_encoder_state);
- if (get_last_error(block_encoder_state) != BZ3_OK) {
- fprintf(stderr, "Failed to decode data: %s\n", str_last_error(block_encoder_state));
+ new_size = ntohl(new_size); old_size = ntohl(old_size);
+ fread(buffer, 1, new_size, input_des);
+ if(bz3_decode_block(state, buffer, new_size, old_size) == -1) {
+ fprintf(stderr, "Failed to decode a block: %s\n", bz3_strerror(state));
return 1;
}
}
}
- if (get_last_error(block_encoder_state) != BZ3_OK) {
- fprintf(stderr, "Failed to read data: %s\n", str_last_error(block_encoder_state));
+ if (bz3_last_error(state) != BZ3_OK) {
+ fprintf(stderr, "Failed to read data: %s\n", bz3_strerror(state));
return 1;
}
- delete_block_encoder_state(block_encoder_state);
+ free(buffer);
+
+ bz3_free(state);
- close(input_des);
- close(output_des);
+ fclose(input_des);
+ fclose(output_des);
}
diff --git a/src/srt.c b/src/srt.c
index d292abe..c5c1e38 100644
--- a/src/srt.c
+++ b/src/srt.c
@@ -32,8 +32,7 @@ static s32 preprocess(const u32 * freqs, u8 * symbols) {
for (u32 i = h; i < nb_symbols; i++) {
const s32 t = symbols[i];
s32 b = i - h;
- while ((b >= 0) && (freqs[symbols[b]] < freqs[t] ||
- (freqs[t] == freqs[symbols[b]] && t < symbols[b]))) {
+ while ((b >= 0) && (freqs[symbols[b]] < freqs[t] || (freqs[t] == freqs[symbols[b]] && t < symbols[b]))) {
symbols[b + h] = symbols[b];
b -= h;
}
