From 38bc24e0321676024b54e891014965b313d5fad3 Mon Sep 17 00:00:00 2001 From: Ilya Grebnov Date: Mon, 8 May 2023 09:32:25 -0700 Subject: [PATCH] Fixed segmentation fault on Unix based systems. --- CHANGES | 3 ++ README.md | 4 +- VERSION | 2 +- bsc-m03.cpp | 91 +++++++++++++++++++++++++++++++------------- hutucker/hu-tucker.c | 6 +-- hutucker/hu-tucker.h | 4 +- m03_model.h | 2 +- m03_parser.h | 4 +- m03_tables.h | 2 +- 9 files changed, 81 insertions(+), 37 deletions(-) diff --git a/CHANGES b/CHANGES index 815bf7f..64f3bac 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,6 @@ +* 2023-05-08 : Version 0.5.5 + * Fixed segmentation fault on Unix based systems. + * 2022-11-27 : Version 0.5.0 * Compression ratio improvements. diff --git a/README.md b/README.md index f5367a5..6b50b7e 100644 --- a/README.md +++ b/README.md @@ -8,12 +8,14 @@ Moreover, the bsc-m03 compressor is a practical implementation of *Compression v * Danny Dube, Vincent Beaudoin *Lossless Data Compression via Substring Enumeration*, 2010 * Takahiro Ota, Hiroyoshi Morita, Akiko Manada *Compression by Substring Enumeration with a Finite Alphabet Using Sorting*, 2018 -Copyright (c) 2021-2022 Ilya Grebnov +Copyright (c) 2021-2023 Ilya Grebnov ## License The bsc-m03 is released under the [GNU General Public License](LICENSE "GNU General Public License") ## Changes +* 2023-05-08 : Version 0.5.5 + * Fixed segmentation fault on Unix based systems. * 2022-11-27 : Version 0.5.0 * Compression ratio improvements. * 2022-11-20 : Version 0.4.0 diff --git a/VERSION b/VERSION index 79a2734..389facc 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.5.0 \ No newline at end of file +0.5.5 \ No newline at end of file diff --git a/bsc-m03.cpp b/bsc-m03.cpp index de81949..77cb22a 100644 --- a/bsc-m03.cpp +++ b/bsc-m03.cpp @@ -2,7 +2,7 @@ This file is a part of bsc-m03 project. - Copyright (c) 2021-2022 Ilya Grebnov + Copyright (c) 2021-2023 Ilya Grebnov bsc-m03 is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -50,7 +50,7 @@ int32_t root_frequencies[MAX_ALPHABET_SIZE + 1]; template static int32_t compress_memory_block(uint8_t * buffer, int32_t block_size) { int32_t indexes[32] = { -1 }; - int32_t comressed_size = -1; + int32_t compressed_size = -1; int32_t symbol_size = (int32_t)sizeof(symbol_t); int32_t block_symbols = block_size / symbol_size; int32_t r = next_power_of_2(std::max(block_symbols / 16, 1048576)); @@ -98,7 +98,7 @@ template static int32_t compress_memory_block(uint8_t * buffer, parser->run(); parser->destroy(); - comressed_size = coder.FinishEncoder(); + compressed_size = coder.FinishEncoder(); } else { @@ -112,6 +112,40 @@ template static int32_t compress_memory_block(uint8_t * buffer, fprintf(stderr, "\nError: Not enough memory!\n"); } + if (compressed_size >= block_size) + { + compressed_size = -1; + + if (int32_t * libsais_temp = (int32_t *)malloc(((size_t)block_symbols + 1) * sizeof(int32_t))) + { + { + int32_t primary_index = indexes[0]; + + memcpy(&((symbol_t *)buffer)[0] , &L[0] , primary_index * sizeof(symbol_t)); + memcpy(&((symbol_t *)buffer)[primary_index], &L[primary_index + 1], ((size_t)block_symbols - (size_t)primary_index) * sizeof(symbol_t)); + } + + result = symbol_size == 1 + ? libsais_unbwt_aux((uint8_t *)buffer, (uint8_t *)buffer, libsais_temp, block_symbols, root_frequencies, r, indexes) + : libsais16_unbwt_aux((uint16_t *)buffer, (uint16_t *)buffer, libsais_temp, block_symbols, root_frequencies, r, indexes); + + if (result == 0) + { + compressed_size = block_size; + } + else + { + fprintf(stderr, "\nError: libsais_unbwt failed, please contact the author!\n"); + } + + free(libsais_temp); + } + else + { + fprintf(stderr, "\nError: Not enough memory!\n"); + } + } + free(L); } else @@ -129,7 +163,7 @@ template static int32_t compress_memory_block(uint8_t * buffer, fprintf(stderr, "\nError: Not enough memory!\n"); } - return comressed_size; + return compressed_size; } #endif @@ -178,13 +212,13 @@ template static int32_t decompress_burrows_wheeler_transform(Ra return result; } -static int32_t decompress_memory_block(uint8_t * buffer, int32_t comressed_size, int32_t block_size) +static int32_t decompress_memory_block(uint8_t * buffer, int32_t compressed_size, int32_t block_size) { RangeCoder coder; coder.InitDecoder(buffer); int32_t indexes[32] = { -1 }; - int32_t decomressed_size = -1; + int32_t decompressed_size = -1; int32_t symbol_size = coder.DecodeValue(1, 2); int32_t block_symbols = block_size / symbol_size; int32_t r = next_power_of_2(std::max(block_symbols / 16, 1048576)); @@ -208,7 +242,7 @@ static int32_t decompress_memory_block(uint8_t * buffer, int32_t comressed_size, if (result == 0) { - decomressed_size = block_size; + decompressed_size = block_size; } else { @@ -223,7 +257,7 @@ static int32_t decompress_memory_block(uint8_t * buffer, int32_t comressed_size, } } - return decomressed_size; + return decompressed_size; } #if !defined(BSC_DECOMPRESSION_ONLY) @@ -237,7 +271,9 @@ static int compress_file(const char * input_file_name, const char * output_file_ { fseeko(input_file, 0, SEEK_END); int64_t remaining_size = ftello(input_file); rewind(input_file); - if (uint8_t * buffer = (uint8_t *)malloc(std::min(remaining_size, (int64_t)max_block_size) * sizeof(uint8_t))) + int64_t buffer_size = std::min(remaining_size, (int64_t)max_block_size) + 16384; buffer_size += buffer_size / 16; + + if (uint8_t * buffer = (uint8_t *)malloc(buffer_size * sizeof(uint8_t))) { int64_t input_bytes = 0, output_bytes = 0; @@ -253,11 +289,11 @@ static int compress_file(const char * input_file_name, const char * output_file_ break; } - int32_t comressed_size = symbol_size == 1 + int32_t compressed_size = symbol_size == 1 ? compress_memory_block (buffer, block_size) : compress_memory_block(buffer, block_size); - if (comressed_size <= 0) { break; } + if (compressed_size <= 0) { break; } if (fwrite(&block_size, sizeof(uint8_t), sizeof(block_size), output_file) != sizeof(block_size)) { @@ -265,13 +301,13 @@ static int compress_file(const char * input_file_name, const char * output_file_ break; } - if (fwrite(&comressed_size, sizeof(uint8_t), sizeof(comressed_size), output_file) != sizeof(comressed_size)) + if (fwrite(&compressed_size, sizeof(uint8_t), sizeof(compressed_size), output_file) != sizeof(compressed_size)) { fprintf(stderr, "\nError: Unable to write output file!\n"); break; } - if (fwrite(buffer, sizeof(uint8_t), comressed_size, output_file) != comressed_size) + if (fwrite(buffer, sizeof(uint8_t), compressed_size, output_file) != compressed_size) { fprintf(stderr, "\nError: Unable to write output file\n"); break; @@ -279,7 +315,7 @@ static int compress_file(const char * input_file_name, const char * output_file_ remaining_size -= block_size; input_bytes += block_size; - output_bytes += sizeof(block_size) + sizeof(comressed_size) + comressed_size; + output_bytes += sizeof(block_size) + sizeof(compressed_size) + compressed_size; } if (remaining_size == 0) @@ -333,43 +369,46 @@ static int decompress_file(const char * input_file_name, const char * output_fil { fprintf(stdout, "\rDecompressing %.55s(%02d%%)", input_file_name, (int)((input_bytes * 100) / (input_bytes + remaining_size))); fflush(stdout); - int32_t block_size, comressed_size; + int32_t block_size, compressed_size; if (fread(&block_size, sizeof(uint8_t), sizeof(block_size), input_file) != sizeof(block_size)) { fprintf(stderr, "\nError: Unable to read input file!\n"); break; } - if (fread(&comressed_size, sizeof(uint8_t), sizeof(comressed_size), input_file) != sizeof(comressed_size)) + if (fread(&compressed_size, sizeof(uint8_t), sizeof(compressed_size), input_file) != sizeof(compressed_size)) { fprintf(stderr, "\nError: Unable to read input file!\n"); break; } - if (block_size > max_block_size || comressed_size > max_block_size) + if (block_size > max_block_size || compressed_size > block_size) { fprintf(stderr, "\nError: The compressed data is corrupted!\n"); break; } - if (fread(buffer, sizeof(uint8_t), comressed_size, input_file) != comressed_size) + if (fread(buffer, sizeof(uint8_t), compressed_size, input_file) != compressed_size) { fprintf(stderr, "\nError: Unable to read input file!\n"); break; } - int32_t decomressed_size = decompress_memory_block(buffer, comressed_size, block_size); - if (decomressed_size != block_size) { break; } + int32_t decompressed_size = compressed_size < block_size + ? decompress_memory_block(buffer, compressed_size, block_size) + : block_size; + + if (decompressed_size != block_size) { break; } - if (fwrite(buffer, sizeof(uint8_t), decomressed_size, output_file) != decomressed_size) + if (fwrite(buffer, sizeof(uint8_t), decompressed_size, output_file) != decompressed_size) { fprintf(stderr, "\nError: Unable to write output file\n"); break; } - remaining_size -= sizeof(block_size) + sizeof(comressed_size) + comressed_size; - input_bytes += sizeof(block_size) + sizeof(comressed_size) + comressed_size; - output_bytes += decomressed_size; + remaining_size -= sizeof(block_size) + sizeof(compressed_size) + compressed_size; + input_bytes += sizeof(block_size) + sizeof(compressed_size) + compressed_size; + output_bytes += decompressed_size; } if (remaining_size == 0) @@ -421,8 +460,8 @@ static int print_usage() int main(int argc, const char * argv[]) { - fprintf(stdout, "bsc-m03 is experimental block sorting compressor. Version 0.5.0 (27 November 2022).\n"); - fprintf(stdout, "Copyright (c) 2021-2022 Ilya Grebnov . ABSOLUTELY NO WARRANTY.\n"); + fprintf(stdout, "bsc-m03 is experimental block sorting compressor. Version 0.5.5 (8 May 2023).\n"); + fprintf(stdout, "Copyright (c) 2021-2023 Ilya Grebnov . ABSOLUTELY NO WARRANTY.\n"); fprintf(stdout, "This program is based on (at least) the work of Michael Maniscalco (see AUTHORS).\n\n"); int32_t max_block_size = 128 * 1024 * 1024; diff --git a/hutucker/hu-tucker.c b/hutucker/hu-tucker.c index 82fe712..6e337c3 100644 --- a/hutucker/hu-tucker.c +++ b/hutucker/hu-tucker.c @@ -37,7 +37,7 @@ typedef struct { size_t i, j; } segnode; -static void segupdate(segnode *pa, segnode *lc, segnode *rc, unsigned long *w) { +static void segupdate(segnode *pa, segnode *lc, segnode *rc, unsigned int *w) { if (!lc->n) { *pa = *rc; return; @@ -87,7 +87,7 @@ size_t hutucker_tmp_size(size_t n) { sizeof(size_t) * (n + (2 * n - 1) + (2 * n - 1)); } -void hutucker_get_lengths(size_t n, unsigned long *weight, void *tmp) { +void hutucker_get_lengths(size_t n, unsigned int *weight, void *tmp) { size_t m = raise_power_of_two(n); segnode *seg = (segnode *) tmp; size_t *cur = (size_t *) (seg + 2 * m - 1); @@ -124,5 +124,5 @@ void hutucker_get_lengths(size_t n, unsigned long *weight, void *tmp) { for (size_t i = 2 * n - 3; i != (size_t) -1; i--) level[i] = level[pa[i]] + 1; for (size_t i = 0; i < n; i++) - weight[i] = (unsigned long)level[i]; + weight[i] = (unsigned int)level[i]; } diff --git a/hutucker/hu-tucker.h b/hutucker/hu-tucker.h index b637f4f..500c0bb 100644 --- a/hutucker/hu-tucker.h +++ b/hutucker/hu-tucker.h @@ -43,13 +43,13 @@ size_t hutucker_tmp_size(size_t n); * Precondition: * n: number of symbols * weight[i] (0 <= i < n): the weight of symbol i - * weight[0] + ... + weight[n - 1] must not exceed ULONG_MAX + * weight[0] + ... + weight[n - 1] must not exceed UINT_MAX * tmp: buffer with size >= hutucker_tmp_size(n) * * Postcondition: * weight[i] (0 <= i < n): the length of hu-tucker code of symbol i */ -void hutucker_get_lengths(size_t n, unsigned long *weight, void *tmp); +void hutucker_get_lengths(size_t n, unsigned int *weight, void *tmp); #ifdef __cplusplus } diff --git a/m03_model.h b/m03_model.h index b6d8a1a..f16fb97 100644 --- a/m03_model.h +++ b/m03_model.h @@ -2,7 +2,7 @@ This file is a part of bsc-m03 project. - Copyright (c) 2021-2022 Ilya Grebnov + Copyright (c) 2021-2023 Ilya Grebnov bsc-m03 is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/m03_parser.h b/m03_parser.h index b28e46a..bd1bea8 100644 --- a/m03_parser.h +++ b/m03_parser.h @@ -2,7 +2,7 @@ This file is a part of bsc-m03 project. - Copyright (c) 2021-2022 Ilya Grebnov + Copyright (c) 2021-2023 Ilya Grebnov bsc-m03 is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -409,7 +409,7 @@ class m03_parser: m03_model this->left_frequencies[offsets_index] = 1 + segment_end - segment_start; segment_end = segment_start; } - hutucker_get_lengths(offsets_count, (unsigned long *)this->left_frequencies, this->hutucker_tmp); + hutucker_get_lengths(offsets_count, (unsigned int *)this->left_frequencies, this->hutucker_tmp); } { diff --git a/m03_tables.h b/m03_tables.h index 30506de..14ac3f1 100644 --- a/m03_tables.h +++ b/m03_tables.h @@ -2,7 +2,7 @@ This file is a part of bsc-m03 project. - Copyright (c) 2021-2022 Ilya Grebnov + Copyright (c) 2021-2023 Ilya Grebnov bsc-m03 is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by