/* * BZip3 - A spiritual successor to BZip2. * Copyright (C) 2022-2024 Kamila Szewczyk * * This program is free software: you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by the Free * Software Foundation, either version 3 of the License, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU Lesser General Public License along with * this program. If not, see . */ #ifndef LIBBZ3_H #define LIBBZ3_H #include #include /* Symbol visibility control. */ #ifndef BZIP3_VISIBLE #if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__) #define BZIP3_VISIBLE __attribute__((visibility("default"))) #else #define BZIP3_VISIBLE #endif #endif #if defined(BZIP3_DLL_EXPORT) && (BZIP3_DLL_EXPORT == 1) #define BZIP3_API __declspec(dllexport) BZIP3_VISIBLE #elif defined(BZIP3_DLL_IMPORT) && (BZIP3_DLL_IMPORT == 1) #define BZIP3_API __declspec(dllimport) BZIP3_VISIBLE #else #define BZIP3_API BZIP3_VISIBLE #endif #ifdef __cplusplus extern "C" { #endif #define BZ3_OK 0 #define BZ3_ERR_OUT_OF_BOUNDS -1 #define BZ3_ERR_BWT -2 #define BZ3_ERR_CRC -3 #define BZ3_ERR_MALFORMED_HEADER -4 #define BZ3_ERR_TRUNCATED_DATA -5 #define BZ3_ERR_DATA_TOO_BIG -6 #define BZ3_ERR_INIT -7 #define BZ3_ERR_DATA_SIZE_TOO_SMALL -8 struct bz3_state; /** * @brief Get bzip3 version. */ BZIP3_API const char * bz3_version(void); /** * @brief Get the last error number associated with a given state. */ BZIP3_API int8_t bz3_last_error(struct bz3_state * state); /** * @brief Return a user-readable message explaining the cause of the last error. */ BZIP3_API const char * bz3_strerror(struct bz3_state * state); /** * @brief Construct a new block encoder state, which will encode blocks as big as the given block size. * The decoder will be able to decode blocks at most as big as the given block size. * Returns NULL in case allocation fails or the block size is not between 65K and 511M */ BZIP3_API struct bz3_state * bz3_new(int32_t block_size); /** * @brief Free the memory occupied by a block encoder state. */ BZIP3_API void bz3_free(struct bz3_state * state); /** * @brief Return the recommended size of the output buffer for the compression functions. */ BZIP3_API size_t bz3_bound(size_t input_size); /* ** HIGH LEVEL APIs ** */ /** * @brief Compress a frame. This function does not support parallelism * by itself, consider using the low level `bz3_encode_blocks()` function instead. * Using the low level API might provide better performance. * Returns a bzip3 error code; BZ3_OK when the operation is successful. * Make sure to set out_size to the size of the output buffer before the operation; * out_size must be at least equal to `bz3_bound(in_size)'. */ BZIP3_API int bz3_compress(uint32_t block_size, const uint8_t * in, uint8_t * out, size_t in_size, size_t * out_size); /** * @brief Decompress a frame. This function does not support parallelism * by itself, consider using the low level `bz3_decode_blocks()` function instead. * Using the low level API might provide better performance. * Returns a bzip3 error code; BZ3_OK when the operation is successful. * Make sure to set out_size to the size of the output buffer before the operation. */ BZIP3_API int bz3_decompress(const uint8_t * in, uint8_t * out, size_t in_size, size_t * out_size); /** * @brief Calculate the minimal memory required for compression with the given block size. * This includes all internal buffers and state structures. This calculates the amount of bytes * that will be allocated by a call to `bz3_new()`. * * @details Memory allocation and usage patterns: * * bz3_new(): * - Allocates all memory upfront: * - Core state structure (sizeof(struct bz3_state)) * - Swap buffer (bz3_bound(block_size) bytes) * - SAIS array (BWT_BOUND(block_size) * sizeof(int32_t) bytes) * - LZP lookup table ((1 << LZP_DICTIONARY) * sizeof(int32_t) bytes) * - Compression state (sizeof(state)) * - All memory remains allocated until bz3_free() * * Additional memory may be used depending on API used from here. * * # Low Level APIs * * 1. bz3_encode_block() / bz3_decode_block(): * - Uses pre-allocated memory from bz3_new() * - No additional memory allocation except for libsais (usually ~16KiB) * - Peak memory usage of physical RAM varies with compression stages: * - LZP: Uses LZP lookup table + swap buffer * - BWT: Uses SAIS array + swap buffer * - Entropy coding: Uses compression state (cm_state) + swap buffer * * Using the higher level API, `bz3_compress`, expect an additional allocation * of `bz3_bound(block_size)`. * * In the parallel version `bz3_encode_blocks`, each thread gets its own state, * so memory usage is `n_threads * bz3_compress_memory_needed()`. * * # High Level APIs * * 1. bz3_compress(): * - Allocates additional temporary compression buffer (bz3_bound(block_size) bytes) * in addition to the memory amount returned by this method call and libsais. * - Everything is freed after compression completes * * 2. bz3_decompress(): * - Allocates additional temporary compression buffer (bz3_bound(block_size) bytes) * in addition to the memory amount returned by this method call and libsais. * - Everything is freed after compression completes * * Memory remains constant during operation, with except of some small allocations from libsais during * BWT stage. That is not accounted by this function, though it usually amounts to ~16KiB, negligible. * The worst case of BWT is 2*block_size technically speaking. * * No dynamic (re)allocation occurs outside of that. * * @param block_size The block size to be used for compression * @return The total number of bytes required for compression, or 0 if block_size is invalid */ BZIP3_API size_t bz3_min_memory_needed(int32_t block_size); /* ** LOW LEVEL APIs ** */ /** * @brief Encode a single block. Returns the amount of bytes written to `buffer'. * `buffer' must be able to hold at least `bz3_bound(size)' bytes. The size must not * exceed the block size associated with the state. */ BZIP3_API int32_t bz3_encode_block(struct bz3_state * state, uint8_t * buffer, int32_t size); /** * @brief Decode a single block. * * `buffer' must be able to hold at least `bz3_bound(orig_size)' bytes * in order to ensure decompression will succeed for all possible bzip3 blocks. * * In most (but not all) cases, `orig_size` should usually be sufficient. * If it is not sufficient, you must allocate a buffer of size `bz3_bound(orig_size)` temporarily. * * If `buffer_size` is too small, `BZ3_ERR_DATA_SIZE_TOO_SMALL` will be returned. * The size must not exceed the block size associated with the state. * * @param buffer_size The size of the buffer at 'buffer' * @param compressed_size The size of the compressed data in 'buffer' * @param orig_size The original size of the data before compression. */ BZIP3_API int32_t bz3_decode_block(struct bz3_state * state, uint8_t * buffer, size_t buffer_size, int32_t compressed_size, int32_t orig_size); /** * @brief Encode `n' blocks, all in parallel. * All specifics of the `bz3_encode_block' still hold. The function will launch a thread for each block. * The compressed sizes are written to the `sizes' array. Every buffer is overwritten and none of them can overlap. * Precisely `n' states, buffers and sizes must be supplied. * * Expects `n' between 2 and 16. * * Present in the shared library only if -lpthread was present during building. */ BZIP3_API void bz3_encode_blocks(struct bz3_state * states[], uint8_t * buffers[], int32_t sizes[], int32_t n); /** * @brief Decode `n' blocks, all in parallel. * Same specifics as `bz3_encode_blocks', but doesn't overwrite `sizes'. */ BZIP3_API void bz3_decode_blocks(struct bz3_state * states[], uint8_t * buffers[], size_t buffer_sizes[], int32_t sizes[], int32_t orig_sizes[], int32_t n); /** * @brief Check if using original file size as buffer size is sufficient for decompressing * a block at `block` pointer. * * @param block Pointer to the compressed block data * @param block_size Size of the block buffer in bytes (must be at least 13 bytes for header) * @param orig_size Size of the original uncompressed data * @return 1 if original size is sufficient, 0 if insufficient, -1 on header error (insufficient buffer size) * * @remarks * * This function is useful for external APIs using the low level block encoding API, * `bz3_encode_block`. You would normally call this directly after `bz3_encode_block` * on the block that has been output. * * The purpose of this function is to prevent encoding blocks that would require an additional * malloc at decompress time. * The goal is to prevent erroring with `BZ3_ERR_DATA_SIZE_TOO_SMALL`, thus * in turn */ BZIP3_API int bz3_orig_size_sufficient_for_decode(const uint8_t * block, size_t block_size, int32_t orig_size); #ifdef __cplusplus } /* extern "C" */ #endif #endif