-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fixed segmentation fault on Unix based systems.
- Loading branch information
1 parent
60afcd3
commit 38bc24e
Showing
9 changed files
with
81 additions
and
37 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,12 +8,14 @@ Moreover, the bsc-m03 compressor is a practical implementation of *Compression v | |
* Danny Dube, Vincent Beaudoin *Lossless Data Compression via Substring Enumeration*, 2010 | ||
* Takahiro Ota, Hiroyoshi Morita, Akiko Manada *Compression by Substring Enumeration with a Finite Alphabet Using Sorting*, 2018 | ||
|
||
Copyright (c) 2021-2022 Ilya Grebnov <[email protected]> | ||
Copyright (c) 2021-2023 Ilya Grebnov <[email protected]> | ||
|
||
## License | ||
The bsc-m03 is released under the [GNU General Public License](LICENSE "GNU General Public License") | ||
|
||
## Changes | ||
* 2023-05-08 : Version 0.5.5 | ||
* Fixed segmentation fault on Unix based systems. | ||
* 2022-11-27 : Version 0.5.0 | ||
* Compression ratio improvements. | ||
* 2022-11-20 : Version 0.4.0 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
0.5.0 | ||
0.5.5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
This file is a part of bsc-m03 project. | ||
Copyright (c) 2021-2022 Ilya Grebnov <[email protected]> | ||
Copyright (c) 2021-2023 Ilya Grebnov <[email protected]> | ||
bsc-m03 is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License as published by | ||
|
@@ -50,7 +50,7 @@ int32_t root_frequencies[MAX_ALPHABET_SIZE + 1]; | |
template <class symbol_t> static int32_t compress_memory_block(uint8_t * buffer, int32_t block_size) | ||
{ | ||
int32_t indexes[32] = { -1 }; | ||
int32_t comressed_size = -1; | ||
int32_t compressed_size = -1; | ||
int32_t symbol_size = (int32_t)sizeof(symbol_t); | ||
int32_t block_symbols = block_size / symbol_size; | ||
int32_t r = next_power_of_2(std::max(block_symbols / 16, 1048576)); | ||
|
@@ -98,7 +98,7 @@ template <class symbol_t> static int32_t compress_memory_block(uint8_t * buffer, | |
parser->run(); | ||
parser->destroy(); | ||
|
||
comressed_size = coder.FinishEncoder(); | ||
compressed_size = coder.FinishEncoder(); | ||
} | ||
else | ||
{ | ||
|
@@ -112,6 +112,40 @@ template <class symbol_t> static int32_t compress_memory_block(uint8_t * buffer, | |
fprintf(stderr, "\nError: Not enough memory!\n"); | ||
} | ||
|
||
if (compressed_size >= block_size) | ||
{ | ||
compressed_size = -1; | ||
|
||
if (int32_t * libsais_temp = (int32_t *)malloc(((size_t)block_symbols + 1) * sizeof(int32_t))) | ||
{ | ||
{ | ||
int32_t primary_index = indexes[0]; | ||
|
||
memcpy(&((symbol_t *)buffer)[0] , &L[0] , primary_index * sizeof(symbol_t)); | ||
memcpy(&((symbol_t *)buffer)[primary_index], &L[primary_index + 1], ((size_t)block_symbols - (size_t)primary_index) * sizeof(symbol_t)); | ||
} | ||
|
||
result = symbol_size == 1 | ||
? libsais_unbwt_aux((uint8_t *)buffer, (uint8_t *)buffer, libsais_temp, block_symbols, root_frequencies, r, indexes) | ||
: libsais16_unbwt_aux((uint16_t *)buffer, (uint16_t *)buffer, libsais_temp, block_symbols, root_frequencies, r, indexes); | ||
|
||
if (result == 0) | ||
{ | ||
compressed_size = block_size; | ||
} | ||
else | ||
{ | ||
fprintf(stderr, "\nError: libsais_unbwt failed, please contact the author!\n"); | ||
} | ||
|
||
free(libsais_temp); | ||
} | ||
else | ||
{ | ||
fprintf(stderr, "\nError: Not enough memory!\n"); | ||
} | ||
} | ||
|
||
free(L); | ||
} | ||
else | ||
|
@@ -129,7 +163,7 @@ template <class symbol_t> static int32_t compress_memory_block(uint8_t * buffer, | |
fprintf(stderr, "\nError: Not enough memory!\n"); | ||
} | ||
|
||
return comressed_size; | ||
return compressed_size; | ||
} | ||
|
||
#endif | ||
|
@@ -178,13 +212,13 @@ template <class symbol_t> static int32_t decompress_burrows_wheeler_transform(Ra | |
return result; | ||
} | ||
|
||
static int32_t decompress_memory_block(uint8_t * buffer, int32_t comressed_size, int32_t block_size) | ||
static int32_t decompress_memory_block(uint8_t * buffer, int32_t compressed_size, int32_t block_size) | ||
{ | ||
RangeCoder coder; | ||
coder.InitDecoder(buffer); | ||
|
||
int32_t indexes[32] = { -1 }; | ||
int32_t decomressed_size = -1; | ||
int32_t decompressed_size = -1; | ||
int32_t symbol_size = coder.DecodeValue(1, 2); | ||
int32_t block_symbols = block_size / symbol_size; | ||
int32_t r = next_power_of_2(std::max(block_symbols / 16, 1048576)); | ||
|
@@ -208,7 +242,7 @@ static int32_t decompress_memory_block(uint8_t * buffer, int32_t comressed_size, | |
|
||
if (result == 0) | ||
{ | ||
decomressed_size = block_size; | ||
decompressed_size = block_size; | ||
} | ||
else | ||
{ | ||
|
@@ -223,7 +257,7 @@ static int32_t decompress_memory_block(uint8_t * buffer, int32_t comressed_size, | |
} | ||
} | ||
|
||
return decomressed_size; | ||
return decompressed_size; | ||
} | ||
|
||
#if !defined(BSC_DECOMPRESSION_ONLY) | ||
|
@@ -237,7 +271,9 @@ static int compress_file(const char * input_file_name, const char * output_file_ | |
{ | ||
fseeko(input_file, 0, SEEK_END); int64_t remaining_size = ftello(input_file); rewind(input_file); | ||
|
||
if (uint8_t * buffer = (uint8_t *)malloc(std::min(remaining_size, (int64_t)max_block_size) * sizeof(uint8_t))) | ||
int64_t buffer_size = std::min(remaining_size, (int64_t)max_block_size) + 16384; buffer_size += buffer_size / 16; | ||
|
||
if (uint8_t * buffer = (uint8_t *)malloc(buffer_size * sizeof(uint8_t))) | ||
{ | ||
int64_t input_bytes = 0, output_bytes = 0; | ||
|
||
|
@@ -253,33 +289,33 @@ static int compress_file(const char * input_file_name, const char * output_file_ | |
break; | ||
} | ||
|
||
int32_t comressed_size = symbol_size == 1 | ||
int32_t compressed_size = symbol_size == 1 | ||
? compress_memory_block<uint8_t> (buffer, block_size) | ||
: compress_memory_block<uint16_t>(buffer, block_size); | ||
|
||
if (comressed_size <= 0) { break; } | ||
if (compressed_size <= 0) { break; } | ||
|
||
if (fwrite(&block_size, sizeof(uint8_t), sizeof(block_size), output_file) != sizeof(block_size)) | ||
{ | ||
fprintf(stderr, "\nError: Unable to write output file!\n"); | ||
break; | ||
} | ||
|
||
if (fwrite(&comressed_size, sizeof(uint8_t), sizeof(comressed_size), output_file) != sizeof(comressed_size)) | ||
if (fwrite(&compressed_size, sizeof(uint8_t), sizeof(compressed_size), output_file) != sizeof(compressed_size)) | ||
{ | ||
fprintf(stderr, "\nError: Unable to write output file!\n"); | ||
break; | ||
} | ||
|
||
if (fwrite(buffer, sizeof(uint8_t), comressed_size, output_file) != comressed_size) | ||
if (fwrite(buffer, sizeof(uint8_t), compressed_size, output_file) != compressed_size) | ||
{ | ||
fprintf(stderr, "\nError: Unable to write output file\n"); | ||
break; | ||
} | ||
|
||
remaining_size -= block_size; | ||
input_bytes += block_size; | ||
output_bytes += sizeof(block_size) + sizeof(comressed_size) + comressed_size; | ||
output_bytes += sizeof(block_size) + sizeof(compressed_size) + compressed_size; | ||
} | ||
|
||
if (remaining_size == 0) | ||
|
@@ -333,43 +369,46 @@ static int decompress_file(const char * input_file_name, const char * output_fil | |
{ | ||
fprintf(stdout, "\rDecompressing %.55s(%02d%%)", input_file_name, (int)((input_bytes * 100) / (input_bytes + remaining_size))); fflush(stdout); | ||
|
||
int32_t block_size, comressed_size; | ||
int32_t block_size, compressed_size; | ||
if (fread(&block_size, sizeof(uint8_t), sizeof(block_size), input_file) != sizeof(block_size)) | ||
{ | ||
fprintf(stderr, "\nError: Unable to read input file!\n"); | ||
break; | ||
} | ||
|
||
if (fread(&comressed_size, sizeof(uint8_t), sizeof(comressed_size), input_file) != sizeof(comressed_size)) | ||
if (fread(&compressed_size, sizeof(uint8_t), sizeof(compressed_size), input_file) != sizeof(compressed_size)) | ||
{ | ||
fprintf(stderr, "\nError: Unable to read input file!\n"); | ||
break; | ||
} | ||
|
||
if (block_size > max_block_size || comressed_size > max_block_size) | ||
if (block_size > max_block_size || compressed_size > block_size) | ||
{ | ||
fprintf(stderr, "\nError: The compressed data is corrupted!\n"); | ||
break; | ||
} | ||
|
||
if (fread(buffer, sizeof(uint8_t), comressed_size, input_file) != comressed_size) | ||
if (fread(buffer, sizeof(uint8_t), compressed_size, input_file) != compressed_size) | ||
{ | ||
fprintf(stderr, "\nError: Unable to read input file!\n"); | ||
break; | ||
} | ||
|
||
int32_t decomressed_size = decompress_memory_block(buffer, comressed_size, block_size); | ||
if (decomressed_size != block_size) { break; } | ||
int32_t decompressed_size = compressed_size < block_size | ||
? decompress_memory_block(buffer, compressed_size, block_size) | ||
: block_size; | ||
|
||
if (decompressed_size != block_size) { break; } | ||
|
||
if (fwrite(buffer, sizeof(uint8_t), decomressed_size, output_file) != decomressed_size) | ||
if (fwrite(buffer, sizeof(uint8_t), decompressed_size, output_file) != decompressed_size) | ||
{ | ||
fprintf(stderr, "\nError: Unable to write output file\n"); | ||
break; | ||
} | ||
|
||
remaining_size -= sizeof(block_size) + sizeof(comressed_size) + comressed_size; | ||
input_bytes += sizeof(block_size) + sizeof(comressed_size) + comressed_size; | ||
output_bytes += decomressed_size; | ||
remaining_size -= sizeof(block_size) + sizeof(compressed_size) + compressed_size; | ||
input_bytes += sizeof(block_size) + sizeof(compressed_size) + compressed_size; | ||
output_bytes += decompressed_size; | ||
} | ||
|
||
if (remaining_size == 0) | ||
|
@@ -421,8 +460,8 @@ static int print_usage() | |
|
||
int main(int argc, const char * argv[]) | ||
{ | ||
fprintf(stdout, "bsc-m03 is experimental block sorting compressor. Version 0.5.0 (27 November 2022).\n"); | ||
fprintf(stdout, "Copyright (c) 2021-2022 Ilya Grebnov <[email protected]>. ABSOLUTELY NO WARRANTY.\n"); | ||
fprintf(stdout, "bsc-m03 is experimental block sorting compressor. Version 0.5.5 (8 May 2023).\n"); | ||
fprintf(stdout, "Copyright (c) 2021-2023 Ilya Grebnov <[email protected]>. ABSOLUTELY NO WARRANTY.\n"); | ||
fprintf(stdout, "This program is based on (at least) the work of Michael Maniscalco (see AUTHORS).\n\n"); | ||
|
||
int32_t max_block_size = 128 * 1024 * 1024; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
This file is a part of bsc-m03 project. | ||
Copyright (c) 2021-2022 Ilya Grebnov <[email protected]> | ||
Copyright (c) 2021-2023 Ilya Grebnov <[email protected]> | ||
bsc-m03 is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License as published by | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
This file is a part of bsc-m03 project. | ||
Copyright (c) 2021-2022 Ilya Grebnov <[email protected]> | ||
Copyright (c) 2021-2023 Ilya Grebnov <[email protected]> | ||
bsc-m03 is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License as published by | ||
|
@@ -409,7 +409,7 @@ class m03_parser: m03_model<mode> | |
this->left_frequencies[offsets_index] = 1 + segment_end - segment_start; segment_end = segment_start; | ||
} | ||
|
||
hutucker_get_lengths(offsets_count, (unsigned long *)this->left_frequencies, this->hutucker_tmp); | ||
hutucker_get_lengths(offsets_count, (unsigned int *)this->left_frequencies, this->hutucker_tmp); | ||
} | ||
|
||
{ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
|
||
This file is a part of bsc-m03 project. | ||
|
||
Copyright (c) 2021-2022 Ilya Grebnov <[email protected]> | ||
Copyright (c) 2021-2023 Ilya Grebnov <[email protected]> | ||
|
||
bsc-m03 is free software: you can redistribute it and/or modify | ||
it under the terms of the GNU General Public License as published by | ||
|