Skip to content

Commit

Permalink
Fixed segmentation fault on Unix based systems.
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyaGrebnov committed May 8, 2023
1 parent 60afcd3 commit 38bc24e
Show file tree
Hide file tree
Showing 9 changed files with 81 additions and 37 deletions.
3 changes: 3 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
* 2023-05-08 : Version 0.5.5
* Fixed segmentation fault on Unix based systems.

* 2022-11-27 : Version 0.5.0
* Compression ratio improvements.

Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@ Moreover, the bsc-m03 compressor is a practical implementation of *Compression v
* Danny Dube, Vincent Beaudoin *Lossless Data Compression via Substring Enumeration*, 2010
* Takahiro Ota, Hiroyoshi Morita, Akiko Manada *Compression by Substring Enumeration with a Finite Alphabet Using Sorting*, 2018

Copyright (c) 2021-2022 Ilya Grebnov <[email protected]>
Copyright (c) 2021-2023 Ilya Grebnov <[email protected]>

## License
The bsc-m03 is released under the [GNU General Public License](LICENSE "GNU General Public License")

## Changes
* 2023-05-08 : Version 0.5.5
* Fixed segmentation fault on Unix based systems.
* 2022-11-27 : Version 0.5.0
* Compression ratio improvements.
* 2022-11-20 : Version 0.4.0
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.5.0
0.5.5
91 changes: 65 additions & 26 deletions bsc-m03.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
This file is a part of bsc-m03 project.
Copyright (c) 2021-2022 Ilya Grebnov <[email protected]>
Copyright (c) 2021-2023 Ilya Grebnov <[email protected]>
bsc-m03 is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -50,7 +50,7 @@ int32_t root_frequencies[MAX_ALPHABET_SIZE + 1];
template <class symbol_t> static int32_t compress_memory_block(uint8_t * buffer, int32_t block_size)
{
int32_t indexes[32] = { -1 };
int32_t comressed_size = -1;
int32_t compressed_size = -1;
int32_t symbol_size = (int32_t)sizeof(symbol_t);
int32_t block_symbols = block_size / symbol_size;
int32_t r = next_power_of_2(std::max(block_symbols / 16, 1048576));
Expand Down Expand Up @@ -98,7 +98,7 @@ template <class symbol_t> static int32_t compress_memory_block(uint8_t * buffer,
parser->run();
parser->destroy();

comressed_size = coder.FinishEncoder();
compressed_size = coder.FinishEncoder();
}
else
{
Expand All @@ -112,6 +112,40 @@ template <class symbol_t> static int32_t compress_memory_block(uint8_t * buffer,
fprintf(stderr, "\nError: Not enough memory!\n");
}

if (compressed_size >= block_size)
{
compressed_size = -1;

if (int32_t * libsais_temp = (int32_t *)malloc(((size_t)block_symbols + 1) * sizeof(int32_t)))
{
{
int32_t primary_index = indexes[0];

memcpy(&((symbol_t *)buffer)[0] , &L[0] , primary_index * sizeof(symbol_t));
memcpy(&((symbol_t *)buffer)[primary_index], &L[primary_index + 1], ((size_t)block_symbols - (size_t)primary_index) * sizeof(symbol_t));
}

result = symbol_size == 1
? libsais_unbwt_aux((uint8_t *)buffer, (uint8_t *)buffer, libsais_temp, block_symbols, root_frequencies, r, indexes)
: libsais16_unbwt_aux((uint16_t *)buffer, (uint16_t *)buffer, libsais_temp, block_symbols, root_frequencies, r, indexes);

if (result == 0)
{
compressed_size = block_size;
}
else
{
fprintf(stderr, "\nError: libsais_unbwt failed, please contact the author!\n");
}

free(libsais_temp);
}
else
{
fprintf(stderr, "\nError: Not enough memory!\n");
}
}

free(L);
}
else
Expand All @@ -129,7 +163,7 @@ template <class symbol_t> static int32_t compress_memory_block(uint8_t * buffer,
fprintf(stderr, "\nError: Not enough memory!\n");
}

return comressed_size;
return compressed_size;
}

#endif
Expand Down Expand Up @@ -178,13 +212,13 @@ template <class symbol_t> static int32_t decompress_burrows_wheeler_transform(Ra
return result;
}

static int32_t decompress_memory_block(uint8_t * buffer, int32_t comressed_size, int32_t block_size)
static int32_t decompress_memory_block(uint8_t * buffer, int32_t compressed_size, int32_t block_size)
{
RangeCoder coder;
coder.InitDecoder(buffer);

int32_t indexes[32] = { -1 };
int32_t decomressed_size = -1;
int32_t decompressed_size = -1;
int32_t symbol_size = coder.DecodeValue(1, 2);
int32_t block_symbols = block_size / symbol_size;
int32_t r = next_power_of_2(std::max(block_symbols / 16, 1048576));
Expand All @@ -208,7 +242,7 @@ static int32_t decompress_memory_block(uint8_t * buffer, int32_t comressed_size,

if (result == 0)
{
decomressed_size = block_size;
decompressed_size = block_size;
}
else
{
Expand All @@ -223,7 +257,7 @@ static int32_t decompress_memory_block(uint8_t * buffer, int32_t comressed_size,
}
}

return decomressed_size;
return decompressed_size;
}

#if !defined(BSC_DECOMPRESSION_ONLY)
Expand All @@ -237,7 +271,9 @@ static int compress_file(const char * input_file_name, const char * output_file_
{
fseeko(input_file, 0, SEEK_END); int64_t remaining_size = ftello(input_file); rewind(input_file);

if (uint8_t * buffer = (uint8_t *)malloc(std::min(remaining_size, (int64_t)max_block_size) * sizeof(uint8_t)))
int64_t buffer_size = std::min(remaining_size, (int64_t)max_block_size) + 16384; buffer_size += buffer_size / 16;

if (uint8_t * buffer = (uint8_t *)malloc(buffer_size * sizeof(uint8_t)))
{
int64_t input_bytes = 0, output_bytes = 0;

Expand All @@ -253,33 +289,33 @@ static int compress_file(const char * input_file_name, const char * output_file_
break;
}

int32_t comressed_size = symbol_size == 1
int32_t compressed_size = symbol_size == 1
? compress_memory_block<uint8_t> (buffer, block_size)
: compress_memory_block<uint16_t>(buffer, block_size);

if (comressed_size <= 0) { break; }
if (compressed_size <= 0) { break; }

if (fwrite(&block_size, sizeof(uint8_t), sizeof(block_size), output_file) != sizeof(block_size))
{
fprintf(stderr, "\nError: Unable to write output file!\n");
break;
}

if (fwrite(&comressed_size, sizeof(uint8_t), sizeof(comressed_size), output_file) != sizeof(comressed_size))
if (fwrite(&compressed_size, sizeof(uint8_t), sizeof(compressed_size), output_file) != sizeof(compressed_size))
{
fprintf(stderr, "\nError: Unable to write output file!\n");
break;
}

if (fwrite(buffer, sizeof(uint8_t), comressed_size, output_file) != comressed_size)
if (fwrite(buffer, sizeof(uint8_t), compressed_size, output_file) != compressed_size)
{
fprintf(stderr, "\nError: Unable to write output file\n");
break;
}

remaining_size -= block_size;
input_bytes += block_size;
output_bytes += sizeof(block_size) + sizeof(comressed_size) + comressed_size;
output_bytes += sizeof(block_size) + sizeof(compressed_size) + compressed_size;
}

if (remaining_size == 0)
Expand Down Expand Up @@ -333,43 +369,46 @@ static int decompress_file(const char * input_file_name, const char * output_fil
{
fprintf(stdout, "\rDecompressing %.55s(%02d%%)", input_file_name, (int)((input_bytes * 100) / (input_bytes + remaining_size))); fflush(stdout);

int32_t block_size, comressed_size;
int32_t block_size, compressed_size;
if (fread(&block_size, sizeof(uint8_t), sizeof(block_size), input_file) != sizeof(block_size))
{
fprintf(stderr, "\nError: Unable to read input file!\n");
break;
}

if (fread(&comressed_size, sizeof(uint8_t), sizeof(comressed_size), input_file) != sizeof(comressed_size))
if (fread(&compressed_size, sizeof(uint8_t), sizeof(compressed_size), input_file) != sizeof(compressed_size))
{
fprintf(stderr, "\nError: Unable to read input file!\n");
break;
}

if (block_size > max_block_size || comressed_size > max_block_size)
if (block_size > max_block_size || compressed_size > block_size)
{
fprintf(stderr, "\nError: The compressed data is corrupted!\n");
break;
}

if (fread(buffer, sizeof(uint8_t), comressed_size, input_file) != comressed_size)
if (fread(buffer, sizeof(uint8_t), compressed_size, input_file) != compressed_size)
{
fprintf(stderr, "\nError: Unable to read input file!\n");
break;
}

int32_t decomressed_size = decompress_memory_block(buffer, comressed_size, block_size);
if (decomressed_size != block_size) { break; }
int32_t decompressed_size = compressed_size < block_size
? decompress_memory_block(buffer, compressed_size, block_size)
: block_size;

if (decompressed_size != block_size) { break; }

if (fwrite(buffer, sizeof(uint8_t), decomressed_size, output_file) != decomressed_size)
if (fwrite(buffer, sizeof(uint8_t), decompressed_size, output_file) != decompressed_size)
{
fprintf(stderr, "\nError: Unable to write output file\n");
break;
}

remaining_size -= sizeof(block_size) + sizeof(comressed_size) + comressed_size;
input_bytes += sizeof(block_size) + sizeof(comressed_size) + comressed_size;
output_bytes += decomressed_size;
remaining_size -= sizeof(block_size) + sizeof(compressed_size) + compressed_size;
input_bytes += sizeof(block_size) + sizeof(compressed_size) + compressed_size;
output_bytes += decompressed_size;
}

if (remaining_size == 0)
Expand Down Expand Up @@ -421,8 +460,8 @@ static int print_usage()

int main(int argc, const char * argv[])
{
fprintf(stdout, "bsc-m03 is experimental block sorting compressor. Version 0.5.0 (27 November 2022).\n");
fprintf(stdout, "Copyright (c) 2021-2022 Ilya Grebnov <[email protected]>. ABSOLUTELY NO WARRANTY.\n");
fprintf(stdout, "bsc-m03 is experimental block sorting compressor. Version 0.5.5 (8 May 2023).\n");
fprintf(stdout, "Copyright (c) 2021-2023 Ilya Grebnov <[email protected]>. ABSOLUTELY NO WARRANTY.\n");
fprintf(stdout, "This program is based on (at least) the work of Michael Maniscalco (see AUTHORS).\n\n");

int32_t max_block_size = 128 * 1024 * 1024;
Expand Down
6 changes: 3 additions & 3 deletions hutucker/hu-tucker.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ typedef struct {
size_t i, j;
} segnode;

static void segupdate(segnode *pa, segnode *lc, segnode *rc, unsigned long *w) {
static void segupdate(segnode *pa, segnode *lc, segnode *rc, unsigned int *w) {
if (!lc->n) {
*pa = *rc;
return;
Expand Down Expand Up @@ -87,7 +87,7 @@ size_t hutucker_tmp_size(size_t n) {
sizeof(size_t) * (n + (2 * n - 1) + (2 * n - 1));
}

void hutucker_get_lengths(size_t n, unsigned long *weight, void *tmp) {
void hutucker_get_lengths(size_t n, unsigned int *weight, void *tmp) {
size_t m = raise_power_of_two(n);
segnode *seg = (segnode *) tmp;
size_t *cur = (size_t *) (seg + 2 * m - 1);
Expand Down Expand Up @@ -124,5 +124,5 @@ void hutucker_get_lengths(size_t n, unsigned long *weight, void *tmp) {
for (size_t i = 2 * n - 3; i != (size_t) -1; i--)
level[i] = level[pa[i]] + 1;
for (size_t i = 0; i < n; i++)
weight[i] = (unsigned long)level[i];
weight[i] = (unsigned int)level[i];
}
4 changes: 2 additions & 2 deletions hutucker/hu-tucker.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,13 @@ size_t hutucker_tmp_size(size_t n);
* Precondition:
* n: number of symbols
* weight[i] (0 <= i < n): the weight of symbol i
* weight[0] + ... + weight[n - 1] must not exceed ULONG_MAX
* weight[0] + ... + weight[n - 1] must not exceed UINT_MAX
* tmp: buffer with size >= hutucker_tmp_size(n)
*
* Postcondition:
* weight[i] (0 <= i < n): the length of hu-tucker code of symbol i
*/
void hutucker_get_lengths(size_t n, unsigned long *weight, void *tmp);
void hutucker_get_lengths(size_t n, unsigned int *weight, void *tmp);

#ifdef __cplusplus
}
Expand Down
2 changes: 1 addition & 1 deletion m03_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
This file is a part of bsc-m03 project.
Copyright (c) 2021-2022 Ilya Grebnov <[email protected]>
Copyright (c) 2021-2023 Ilya Grebnov <[email protected]>
bsc-m03 is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
Expand Down
4 changes: 2 additions & 2 deletions m03_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
This file is a part of bsc-m03 project.
Copyright (c) 2021-2022 Ilya Grebnov <[email protected]>
Copyright (c) 2021-2023 Ilya Grebnov <[email protected]>
bsc-m03 is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -409,7 +409,7 @@ class m03_parser: m03_model<mode>
this->left_frequencies[offsets_index] = 1 + segment_end - segment_start; segment_end = segment_start;
}

hutucker_get_lengths(offsets_count, (unsigned long *)this->left_frequencies, this->hutucker_tmp);
hutucker_get_lengths(offsets_count, (unsigned int *)this->left_frequencies, this->hutucker_tmp);
}

{
Expand Down
2 changes: 1 addition & 1 deletion m03_tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

This file is a part of bsc-m03 project.

Copyright (c) 2021-2022 Ilya Grebnov <[email protected]>
Copyright (c) 2021-2023 Ilya Grebnov <[email protected]>

bsc-m03 is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
Expand Down

0 comments on commit 38bc24e

Please sign in to comment.