:: bzip3 / include / libbz3.h 9.3 KB raw

1
2
/*
3
 * BZip3 - A spiritual successor to BZip2.
4
 * Copyright (C) 2022-2024 Kamila Szewczyk
5
 *
6
 * This program is free software: you can redistribute it and/or modify it
7
 * under the terms of the GNU Lesser General Public License as published by the Free
8
 * Software Foundation, either version 3 of the License, or (at your option)
9
 * any later version.
10
 *
11
 * This program is distributed in the hope that it will be useful, but WITHOUT
12
 * ANY WARRANTY; without even the implied warranty of  MERCHANTABILITY or
13
 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14
 * more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public License along with
17
 * this program.  If not, see <http://www.gnu.org/licenses/>.
18
 */
19
20
#ifndef LIBBZ3_H
21
#define LIBBZ3_H
22
23
#include <stddef.h>
24
#include <stdint.h>
25
26
/* Symbol visibility control. */
27
#ifndef BZIP3_VISIBLE
28
    #if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
29
        #define BZIP3_VISIBLE __attribute__((visibility("default")))
30
    #else
31
        #define BZIP3_VISIBLE
32
    #endif
33
#endif
34
35
#if defined(BZIP3_DLL_EXPORT) && (BZIP3_DLL_EXPORT == 1)
36
    #define BZIP3_API __declspec(dllexport) BZIP3_VISIBLE
37
#elif defined(BZIP3_DLL_IMPORT) && (BZIP3_DLL_IMPORT == 1)
38
    #define BZIP3_API __declspec(dllimport) BZIP3_VISIBLE
39
#else
40
    #define BZIP3_API BZIP3_VISIBLE
41
#endif
42
43
#ifdef __cplusplus
44
extern "C" {
45
#endif
46
47
#define BZ3_OK 0
48
#define BZ3_ERR_OUT_OF_BOUNDS -1
49
#define BZ3_ERR_BWT -2
50
#define BZ3_ERR_CRC -3
51
#define BZ3_ERR_MALFORMED_HEADER -4
52
#define BZ3_ERR_TRUNCATED_DATA -5
53
#define BZ3_ERR_DATA_TOO_BIG -6
54
#define BZ3_ERR_INIT -7
55
#define BZ3_ERR_DATA_SIZE_TOO_SMALL -8
56
57
struct bz3_state;
58
59
/**
60
 * @brief Get bzip3 version.
61
 */
62
BZIP3_API const char * bz3_version(void);
63
64
/**
65
 * @brief Get the last error number associated with a given state.
66
 */
67
BZIP3_API int8_t bz3_last_error(struct bz3_state * state);
68
69
/**
70
 * @brief Return a user-readable message explaining the cause of the last error.
71
 */
72
BZIP3_API const char * bz3_strerror(struct bz3_state * state);
73
74
/**
75
 * @brief Construct a new block encoder state, which will encode blocks as big as the given block size.
76
 * The decoder will be able to decode blocks at most as big as the given block size.
77
 * Returns NULL in case allocation fails or the block size is not between 65K and 511M
78
 */
79
BZIP3_API struct bz3_state * bz3_new(int32_t block_size);
80
81
/**
82
 * @brief Free the memory occupied by a block encoder state.
83
 */
84
BZIP3_API void bz3_free(struct bz3_state * state);
85
86
/**
87
 * @brief Return the recommended size of the output buffer for the compression functions.
88
 */
89
BZIP3_API size_t bz3_bound(size_t input_size);
90
91
/* ** HIGH LEVEL APIs ** */
92
93
/**
94
 * @brief Compress a frame. This function does not support parallelism
95
 * by itself, consider using the low level `bz3_encode_blocks()` function instead.
96
 * Using the low level API might provide better performance.
97
 * Returns a bzip3 error code; BZ3_OK when the operation is successful.
98
 * Make sure to set out_size to the size of the output buffer before the operation;
99
 * out_size must be at least equal to `bz3_bound(in_size)'.
100
 */
101
BZIP3_API int bz3_compress(uint32_t block_size, const uint8_t * in, uint8_t * out, size_t in_size, size_t * out_size);
102
103
/**
104
 * @brief Decompress a frame. This function does not support parallelism
105
 * by itself, consider using the low level `bz3_decode_blocks()` function instead.
106
 * Using the low level API might provide better performance.
107
 * Returns a bzip3 error code; BZ3_OK when the operation is successful.
108
 * Make sure to set out_size to the size of the output buffer before the operation.
109
 */
110
BZIP3_API int bz3_decompress(const uint8_t * in, uint8_t * out, size_t in_size, size_t * out_size);
111
112
/**
113
 * @brief Calculate the minimal memory required for compression with the given block size.
114
 * This includes all internal buffers and state structures. This calculates the amount of bytes
115
 * that will be allocated by a call to `bz3_new()`.
116
 * 
117
 * @details Memory allocation and usage patterns:
118
 * 
119
 * bz3_new():
120
 *    - Allocates all memory upfront:
121
 *      - Core state structure (sizeof(struct bz3_state))
122
 *      - Swap buffer (bz3_bound(block_size) bytes)
123
 *      - SAIS array (BWT_BOUND(block_size) * sizeof(int32_t) bytes)
124
 *      - LZP lookup table ((1 << LZP_DICTIONARY) * sizeof(int32_t) bytes)
125
 *      - Compression state (sizeof(state))
126
 *    - All memory remains allocated until bz3_free()
127
 * 
128
 * Additional memory may be used depending on API used from here.
129
 * 
130
 * # Low Level APIs
131
 * 
132
 * 1. bz3_encode_block() / bz3_decode_block():
133
 *    - Uses pre-allocated memory from bz3_new()
134
 *    - No additional memory allocation except for libsais (usually ~16KiB)
135
 *    - Peak memory usage of physical RAM varies with compression stages:
136
 *      - LZP: Uses LZP lookup table + swap buffer
137
 *      - BWT: Uses SAIS array + swap buffer
138
 *      - Entropy coding: Uses compression state (cm_state) + swap buffer
139
 * 
140
 * Using the higher level API, `bz3_compress`, expect an additional allocation
141
 * of `bz3_bound(block_size)`.
142
 * 
143
 * In the parallel version `bz3_encode_blocks`, each thread gets its own state,
144
 * so memory usage is `n_threads * bz3_compress_memory_needed()`.
145
 * 
146
 * # High Level APIs
147
 * 
148
 * 1. bz3_compress():
149
 *    - Allocates additional temporary compression buffer (bz3_bound(block_size) bytes)
150
 *      in addition to the memory amount returned by this method call and libsais.
151
 *    - Everything is freed after compression completes
152
 * 
153
 * 2. bz3_decompress():
154
 *    - Allocates additional temporary compression buffer (bz3_bound(block_size) bytes)
155
 *      in addition to the memory amount returned by this method call and libsais.
156
 *    - Everything is freed after compression completes
157
 * 
158
 * Memory remains constant during operation, with except of some small allocations from libsais during
159
 * BWT stage. That is not accounted by this function, though it usually amounts to ~16KiB, negligible.
160
 * The worst case of BWT is 2*block_size technically speaking.
161
 * 
162
 * No dynamic (re)allocation occurs outside of that.
163
 * 
164
 * @param block_size The block size to be used for compression
165
 * @return The total number of bytes required for compression, or 0 if block_size is invalid
166
 */
167
BZIP3_API size_t bz3_min_memory_needed(int32_t block_size);
168
169
/* ** LOW LEVEL APIs ** */
170
171
/**
172
 * @brief Encode a single block. Returns the amount of bytes written to `buffer'.
173
 * `buffer' must be able to hold at least `bz3_bound(size)' bytes. The size must not
174
 * exceed the block size associated with the state.
175
 */
176
BZIP3_API int32_t bz3_encode_block(struct bz3_state * state, uint8_t * buffer, int32_t size);
177
178
/**
179
 * @brief Decode a single block.
180
 * 
181
 * `buffer' must be able to hold at least `bz3_bound(orig_size)' bytes
182
 * in order to ensure decompression will succeed for all possible bzip3 blocks.
183
 * 
184
 * In most (but not all) cases, `orig_size` should usually be sufficient.
185
 * If it is not sufficient, you must allocate a buffer of size `bz3_bound(orig_size)` temporarily. 
186
 * 
187
 * If `buffer_size` is too small, `BZ3_ERR_DATA_SIZE_TOO_SMALL` will be returned.
188
 * The size must not exceed the block size associated with the state.
189
 * 
190
 * @param buffer_size The size of the buffer at 'buffer'
191
 * @param compressed_size The size of the compressed data in 'buffer'
192
 * @param orig_size The original size of the data before compression.
193
 */
194
BZIP3_API int32_t bz3_decode_block(struct bz3_state * state, uint8_t * buffer, size_t buffer_size, int32_t compressed_size, int32_t orig_size);
195
196
/**
197
 * @brief Encode `n' blocks, all in parallel.
198
 * All specifics of the `bz3_encode_block' still hold. The function will launch a thread for each block.
199
 * The compressed sizes are written to the `sizes' array. Every buffer is overwritten and none of them can overlap.
200
 * Precisely `n' states, buffers and sizes must be supplied.
201
 *
202
 * Expects `n' between 2 and 16.
203
 *
204
 * Present in the shared library only if -lpthread was present during building.
205
 */
206
BZIP3_API void bz3_encode_blocks(struct bz3_state * states[], uint8_t * buffers[], int32_t sizes[], int32_t n);
207
208
/**
209
 * @brief Decode `n' blocks, all in parallel.
210
 * Same specifics as `bz3_encode_blocks', but doesn't overwrite `sizes'.
211
 */
212
BZIP3_API void bz3_decode_blocks(struct bz3_state * states[], uint8_t * buffers[], size_t buffer_sizes[], int32_t sizes[],
213
                                 int32_t orig_sizes[], int32_t n);
214
215
/**
216
 * @brief Check if using original file size as buffer size is sufficient for decompressing
217
 * a block at `block` pointer.
218
 * 
219
 * @param block Pointer to the compressed block data
220
 * @param block_size Size of the block buffer in bytes (must be at least 13 bytes for header)
221
 * @param orig_size Size of the original uncompressed data 
222
 * @return 1 if original size is sufficient, 0 if insufficient, -1 on header error (insufficient buffer size)
223
 * 
224
 * @remarks
225
 * 
226
 *      This function is useful for external APIs using the low level block encoding API,
227
 *      `bz3_encode_block`. You would normally call this directly after `bz3_encode_block`
228
 *      on the block that has been output.
229
 *      
230
 *      The purpose of this function is to prevent encoding blocks that would require an additional
231
 *      malloc at decompress time.
232
 *      The goal is to prevent erroring with `BZ3_ERR_DATA_SIZE_TOO_SMALL`, thus
233
 *      in turn 
234
 */
235
BZIP3_API int bz3_orig_size_sufficient_for_decode(const uint8_t * block, size_t block_size, int32_t orig_size);
236
237
238
#ifdef __cplusplus
239
} /* extern "C" */
240
#endif
241
242
#endif
tab: 248 wrap: offon