diff --git a/CHANGES b/CHANGES index 199a085..0bb6703 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,6 @@ +Changes in 1.1.0 (November 30, 2023) +- New API to find matches within specified sliding window. + Changes in 1.0.1 (June 19, 2022) - Improved cache coherence for ARMv8 architecture. diff --git a/README.md b/README.md index f34330e..4a96723 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ The esa-matchfinder is a C99 library for efficient Lempel-Ziv factorization using enhanced suffix array (ESA). -Copyright (c) 2022 Ilya Grebnov +Copyright (c) 2022-2023 Ilya Grebnov > * The esa-matchfinder is block based algorithm with maximum supported block size of 512 megabytes finding matches in range of 2..64 bytes using 12x bytes of extra memory. ESA_MATCHFINDER_MATCH_BITS definition could be changed to support larger match finding range, but with reduction in maximum supported block size. > * The esa-matchfinder does not employ any heuristics or search depth limitations and always finds distance optimal matches even on highly repetitive sources. The only exception is matches at beginning at the block; due to implementation details the esa-matchfinder can not find any matches with offset 0. @@ -29,6 +29,8 @@ The esa-matchfinder finds all distance optimal matches (between min_match_length The esa-matchfinder released under the [Apache License Version 2.0](LICENSE "Apache license") and is considered suitable for production use. However, no warranty or fitness for a particular purpose is expressed or implied. ## Changes +* November 30, 2023 (1.1.0) + * New API to find matches within specified sliding window. * June 19, 2022 (1.0.1) * Improved cache coherence for ARMv8 architecture. * June 12, 2022 (1.0.0) diff --git a/VERSION b/VERSION index 7f20734..1cc5f65 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.0.1 \ No newline at end of file +1.1.0 \ No newline at end of file diff --git a/esa_matchfinder.c b/esa_matchfinder.c index 67506ee..12656d0 100644 --- a/esa_matchfinder.c +++ b/esa_matchfinder.c @@ -3,7 +3,7 @@ This file is a part of esa-matchfinder, a library for efficient Lempel-Ziv factorization using enhanced suffix array (ESA). - Copyright (c) 2022 Ilya Grebnov + Copyright (c) 2022-2023 Ilya Grebnov Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -714,7 +714,61 @@ ESA_MATCHFINDER_MATCH * esa_matchfinder_find_all_matches(void * mf, ESA_MATCHFIN const uint64_t min_match_length = (uint64_t)matchfinder_ctx->min_match_length_minus_1; const uint64_t new_offset = (uint64_t)position << ESA_MF_OFFSET_SHIFT; - uint64_t best_match = ESA_MATCHFINDER_MAX_MATCH_LENGTH; + uint64_t best_match = (uint64_t)(uint32_t)-1; + uint64_t reference = plcp_leaf_link[position]; + + while (reference != 0) + { + const uint64_t interval = sa_parent_link[reference]; + const uint64_t match = min_match_length + (interval >> ESA_MF_LCP_SHIFT) + ((interval & ESA_MF_OFFSET_MASK) << (32 - ESA_MF_OFFSET_SHIFT)); + +#if defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) + if (offsetof(ESA_MATCHFINDER_MATCH, length) == 0 && offsetof(ESA_MATCHFINDER_MATCH, offset) == 4) + { + *(uint64_t *)(void *)next_match = match; + } + else +#endif + { + next_match->length = (int32_t)(match ); + next_match->offset = (int32_t)(match >> 32); + } + + next_match += match > best_match; + best_match = match; + + sa_parent_link[reference] = (interval & (~ESA_MF_OFFSET_MASK)) + new_offset; + reference = interval & ESA_MF_PARENT_MASK; + } + + return next_match; +} + +ESA_MATCHFINDER_MATCH * esa_matchfinder_find_all_matches_in_window(void * mf, ESA_MATCHFINDER_MATCH * matches, uint64_t window_size) +{ + ESA_MF_CONTEXT * ESA_MF_RESTRICT const matchfinder_ctx = (ESA_MF_CONTEXT *)mf; + + const ptrdiff_t prefetch_distance = 4; + const uint64_t position = matchfinder_ctx->position++; + + uint64_t * ESA_MF_RESTRICT const sa_parent_link = matchfinder_ctx->sa_parent_link; + uint32_t * ESA_MF_RESTRICT const plcp_leaf_link = matchfinder_ctx->plcp_leaf_link; + uint64_t * ESA_MF_RESTRICT const prefetch = &matchfinder_ctx->prefetch[position & (prefetch_distance - 1)][0]; + ESA_MATCHFINDER_MATCH * ESA_MF_RESTRICT next_match = matches; + + esa_matchfinder_prefetchw(&sa_parent_link[ (sa_parent_link[prefetch[0]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[0] = (sa_parent_link[prefetch[1]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[1] = (sa_parent_link[prefetch[2]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[2] = (sa_parent_link[prefetch[3]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[3] = (sa_parent_link[prefetch[4]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[4] = (sa_parent_link[prefetch[5]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[5] = (sa_parent_link[prefetch[6]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[6] = (plcp_leaf_link[position + 8 * prefetch_distance])]); + esa_matchfinder_prefetchr(&plcp_leaf_link[position + 9 * prefetch_distance]); + + const uint64_t min_match_length = (uint64_t)matchfinder_ctx->min_match_length_minus_1; + const uint64_t new_offset = (uint64_t)position << ESA_MF_OFFSET_SHIFT; + uint64_t best_match = (position > window_size ? (position - window_size) << 32 : 0) + (uint64_t)(uint32_t)-1; uint64_t reference = plcp_leaf_link[position]; while (reference != 0) @@ -801,6 +855,65 @@ ESA_MATCHFINDER_MATCH esa_matchfinder_find_best_match(void * mf) } } +ESA_MATCHFINDER_MATCH esa_matchfinder_find_best_match_in_window(void * mf, uint64_t window_size) +{ + ESA_MF_CONTEXT * ESA_MF_RESTRICT const matchfinder_ctx = (ESA_MF_CONTEXT *)mf; + + const ptrdiff_t prefetch_distance = 4; + const uint64_t position = matchfinder_ctx->position++; + + uint64_t * ESA_MF_RESTRICT const sa_parent_link = matchfinder_ctx->sa_parent_link; + uint32_t * ESA_MF_RESTRICT const plcp_leaf_link = matchfinder_ctx->plcp_leaf_link; + uint64_t * ESA_MF_RESTRICT const prefetch = &matchfinder_ctx->prefetch[position & (prefetch_distance - 1)][0]; + + esa_matchfinder_prefetchw(&sa_parent_link[ (sa_parent_link[prefetch[0]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[0] = (sa_parent_link[prefetch[1]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[1] = (sa_parent_link[prefetch[2]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[2] = (sa_parent_link[prefetch[3]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[3] = (sa_parent_link[prefetch[4]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[4] = (sa_parent_link[prefetch[5]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[5] = (sa_parent_link[prefetch[6]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[6] = (plcp_leaf_link[position + 8 * prefetch_distance])]); + esa_matchfinder_prefetchr(&plcp_leaf_link[position + 9 * prefetch_distance]); + + const uint64_t min_match_length = (uint64_t)matchfinder_ctx->min_match_length_minus_1; + const uint64_t new_offset = (uint64_t)position << ESA_MF_OFFSET_SHIFT; + const uint64_t match_cutoff = (position > window_size ? (position - window_size) << 32 : 0) + (uint64_t)(uint32_t)-1; + + uint64_t best_match = 0; + uint64_t reference = plcp_leaf_link[position]; + + while (reference != 0) + { + const uint64_t interval = sa_parent_link[reference]; + uint64_t match = min_match_length + (interval >> ESA_MF_LCP_SHIFT) + ((interval & ESA_MF_OFFSET_MASK) << (32 - ESA_MF_OFFSET_SHIFT)); + + match = match > match_cutoff ? match : best_match; + best_match = best_match == 0 ? match : best_match; + + sa_parent_link[reference] = (interval & (~ESA_MF_OFFSET_MASK)) + new_offset; + reference = interval & ESA_MF_PARENT_MASK; + } + + { + ESA_MATCHFINDER_MATCH match; + +#if defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) + if (offsetof(ESA_MATCHFINDER_MATCH, length) == 0 && offsetof(ESA_MATCHFINDER_MATCH, offset) == 4) + { + *(uint64_t *)(void *)&match = best_match; + } + else +#endif + { + match.length = (int32_t)(best_match ); + match.offset = (int32_t)(best_match >> 32); + } + + return match; + } +} + void esa_matchfinder_advance(void * mf, int32_t count) { ESA_MF_CONTEXT * ESA_MF_RESTRICT const matchfinder_ctx = (ESA_MF_CONTEXT *)mf; diff --git a/esa_matchfinder.h b/esa_matchfinder.h index 0ec69ba..4e6a8d1 100644 --- a/esa_matchfinder.h +++ b/esa_matchfinder.h @@ -3,7 +3,7 @@ This file is a part of esa-matchfinder, a library for efficient Lempel-Ziv factorization using enhanced suffix array (ESA). - Copyright (c) 2022 Ilya Grebnov + Copyright (c) 2022-2023 Ilya Grebnov Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -32,6 +32,11 @@ Please see the file LICENSE for full copyright and license details. #define ESA_MATCHFINDER_NO_ERROR (0) #define ESA_MATCHFINDER_BAD_PARAMETER (-1) +#define ESA_MATCHFINDER_VERSION_MAJOR 1 +#define ESA_MATCHFINDER_VERSION_MINOR 1 +#define ESA_MATCHFINDER_VERSION_PATCH 0 +#define ESA_MATCHFINDER_VERSION_STRING "1.1.0" + #ifdef __cplusplus extern "C" { #endif @@ -96,8 +101,8 @@ extern "C" { int32_t esa_matchfinder_rewind(void * mf, int32_t position); /** - * Finds all distance optimal matches at the current match-finder position and advances position by one byte. The recorded - * matches will be sorted by strictly decreasing length and strictly increasing offset from the beginning of the block. + * Finds all distance-optimal matches at the current position of the match-finder, and then advances the position by one byte. + * The recorded matches will be sorted by strictly decreasing length and strictly increasing offset from the beginning of the block. * @param mf The enhanced suffix array (ESA) based match-finder. * @param matches The output array to record the matches (array must be of ESA_MATCHFINDER_MAX_MATCH_LENGTH size). * @return The pointer to the end of recorded matches array (if no matches were found, this will be the same as matches). @@ -105,12 +110,30 @@ extern "C" { ESA_MATCHFINDER_MATCH * esa_matchfinder_find_all_matches(void * mf, ESA_MATCHFINDER_MATCH * matches); /** - * Finds the best match at the current match-finder position and advances position by one byte. + * Finds all distance-optimal matches within a specified sliding window at the current position of the match-finder, and then advances the position by one byte. + * The recorded matches will be sorted by strictly decreasing length and strictly increasing offset from the beginning of the block. + * @param mf The enhanced suffix array (ESA) based match-finder. + * @param matches The output array to record the matches (array must be of ESA_MATCHFINDER_MAX_MATCH_LENGTH size). + * @param window_size The maximum allowed distance between the current position and found matches. + * @return The pointer to the end of recorded matches array (if no matches were found, this will be the same as matches). + */ + ESA_MATCHFINDER_MATCH * esa_matchfinder_find_all_matches_in_window(void * mf, ESA_MATCHFINDER_MATCH * matches, uint64_t window_size); + + /** + * Finds the best match at the current position of the match-finder, and then advances the position by one byte. * @param mf The enhanced suffix array (ESA) based match-finder. * @return The best match found (match of zero length and zero offset is returned if no matches were found). */ ESA_MATCHFINDER_MATCH esa_matchfinder_find_best_match(void * mf); + /** + * Finds the best match within a specified sliding window at the current position of the match-finder, and then advances the position by one byte. + * @param mf The enhanced suffix array (ESA) based match-finder. + * @param window_size The maximum allowed distance between the current position and found match. + * @return The best match found (match of zero length and zero offset is returned if no matches were found). + */ + ESA_MATCHFINDER_MATCH esa_matchfinder_find_best_match_in_window(void * mf, uint64_t window_size); + /** * Advances the match-finder position forward by the specified number of bytes without recording matches. * @param mf The enhanced suffix array (ESA) based match-finder. diff --git a/libsais/CHANGES b/libsais/CHANGES index c41b978..ba816ad 100644 --- a/libsais/CHANGES +++ b/libsais/CHANGES @@ -1,3 +1,6 @@ +Changes in 2.7.2 (April 18, 2023) +- Fixed out-of-bound memory access issue for large inputs (libsais64). + Changes in 2.7.1 (June 19, 2022) - Improved cache coherence for ARMv8 architecture. diff --git a/libsais/VERSION b/libsais/VERSION index 5588ae8..fbafd6b 100644 --- a/libsais/VERSION +++ b/libsais/VERSION @@ -1 +1 @@ -2.7.1 \ No newline at end of file +2.7.2 \ No newline at end of file diff --git a/libsais/libsais.c b/libsais/libsais.c index fcbfe7e..6d4c0e1 100644 --- a/libsais/libsais.c +++ b/libsais/libsais.c @@ -1126,7 +1126,7 @@ static void libsais_count_and_gather_compacted_lms_suffixes_32s_2k_fs_omp(const else { fast_sint_t bucket_size = 2 * (fast_sint_t)k; - fast_sint_t bucket_stride = libsais_get_bucket_stride(buckets - &SA[n + n], bucket_size, omp_num_threads); + fast_sint_t bucket_stride = libsais_get_bucket_stride(buckets - &SA[(fast_sint_t)n + (fast_sint_t)n], bucket_size, omp_num_threads); { thread_state[omp_thread_num].state.position = omp_block_start + omp_block_size; @@ -1310,7 +1310,7 @@ static sa_sint_t libsais_count_and_gather_lms_suffixes_32s_2k_omp(const sa_sint_ static void libsais_count_and_gather_compacted_lms_suffixes_32s_2k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) { #if defined(_OPENMP) - sa_sint_t max_threads = (sa_sint_t)((buckets - &SA[n + n]) / ((2 * (fast_sint_t)k + 15) & (-16))); if (max_threads > threads) { max_threads = threads; } + sa_sint_t max_threads = (sa_sint_t)((buckets - &SA[(fast_sint_t)n + (fast_sint_t)n]) / ((2 * (fast_sint_t)k + 15) & (-16))); if (max_threads > threads) { max_threads = threads; } if (max_threads > 1 && n >= 65536 && n / k >= 2) { if (max_threads > n / 8 / k) { max_threads = n / 8 / k; } @@ -1381,8 +1381,8 @@ static void libsais_initialize_buckets_start_and_end_8u(sa_sint_t * RESTRICT buc static void libsais_initialize_buckets_start_and_end_32s_6k(sa_sint_t k, sa_sint_t * RESTRICT buckets) { - sa_sint_t * RESTRICT bucket_start = &buckets[4 * k]; - sa_sint_t * RESTRICT bucket_end = &buckets[5 * k]; + sa_sint_t * RESTRICT bucket_start = &buckets[4 * (fast_sint_t)k]; + sa_sint_t * RESTRICT bucket_end = &buckets[5 * (fast_sint_t)k]; fast_sint_t i, j; sa_sint_t sum = 0; for (i = BUCKETS_INDEX4(0, 0), j = 0; i <= BUCKETS_INDEX4((fast_sint_t)k - 1, 0); i += BUCKETS_INDEX4(1, 0), j += 1) @@ -1395,8 +1395,8 @@ static void libsais_initialize_buckets_start_and_end_32s_6k(sa_sint_t k, sa_sint static void libsais_initialize_buckets_start_and_end_32s_4k(sa_sint_t k, sa_sint_t * RESTRICT buckets) { - sa_sint_t * RESTRICT bucket_start = &buckets[2 * k]; - sa_sint_t * RESTRICT bucket_end = &buckets[3 * k]; + sa_sint_t * RESTRICT bucket_start = &buckets[2 * (fast_sint_t)k]; + sa_sint_t * RESTRICT bucket_end = &buckets[3 * (fast_sint_t)k]; fast_sint_t i, j; sa_sint_t sum = 0; for (i = BUCKETS_INDEX2(0, 0), j = 0; i <= BUCKETS_INDEX2((fast_sint_t)k - 1, 0); i += BUCKETS_INDEX2(1, 0), j += 1) @@ -1501,7 +1501,7 @@ static sa_sint_t libsais_initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(c } { - sa_sint_t * RESTRICT temp_bucket = &buckets[4 * k]; + sa_sint_t * RESTRICT temp_bucket = &buckets[4 * (fast_sint_t)k]; fast_sint_t i, j; sa_sint_t sum = 0; for (i = BUCKETS_INDEX4(0, 0), j = 0; i <= BUCKETS_INDEX4((fast_sint_t)k - 1, 0); i += BUCKETS_INDEX4(1, 0), j += 1) @@ -1515,8 +1515,8 @@ static sa_sint_t libsais_initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(c static void libsais_initialize_buckets_for_radix_and_partial_sorting_32s_4k(const sa_sint_t * RESTRICT T, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t first_lms_suffix) { - sa_sint_t * RESTRICT bucket_start = &buckets[2 * k]; - sa_sint_t * RESTRICT bucket_end = &buckets[3 * k]; + sa_sint_t * RESTRICT bucket_start = &buckets[2 * (fast_sint_t)k]; + sa_sint_t * RESTRICT bucket_end = &buckets[3 * (fast_sint_t)k]; buckets[BUCKETS_INDEX2(T[first_lms_suffix], 0)]++; buckets[BUCKETS_INDEX2(T[first_lms_suffix], 1)]--; @@ -2062,7 +2062,7 @@ static void libsais_initialize_buckets_for_partial_sorting_8u(const uint8_t * RE static void libsais_initialize_buckets_for_partial_sorting_32s_6k(const sa_sint_t * RESTRICT T, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t first_lms_suffix, sa_sint_t left_suffixes_count) { - sa_sint_t * RESTRICT temp_bucket = &buckets[4 * k]; + sa_sint_t * RESTRICT temp_bucket = &buckets[4 * (fast_sint_t)k]; fast_sint_t i, j; sa_sint_t sum0 = left_suffixes_count + 1, sum1 = 0, sum2 = 0; for (first_lms_suffix = T[first_lms_suffix], i = BUCKETS_INDEX4(0, 0), j = BUCKETS_INDEX2(0, 0); i <= BUCKETS_INDEX4((fast_sint_t)first_lms_suffix - 1, 0); i += BUCKETS_INDEX4(1, 0), j += BUCKETS_INDEX2(1, 0)) @@ -2350,8 +2350,8 @@ static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_4k(const sa_sint { const fast_sint_t prefetch_distance = 32; - sa_sint_t * RESTRICT induction_bucket = &buckets[2 * k]; - sa_sint_t * RESTRICT distinct_names = &buckets[0 * k]; + sa_sint_t * RESTRICT induction_bucket = &buckets[2 * (fast_sint_t)k]; + sa_sint_t * RESTRICT distinct_names = &buckets[0 * (fast_sint_t)k]; fast_sint_t i, j; for (i = omp_block_start, j = omp_block_start + omp_block_size - 2 * prefetch_distance - 1; i < j; i += 2) @@ -2523,8 +2523,8 @@ static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_4k_block_sort(co { const fast_sint_t prefetch_distance = 32; - sa_sint_t * RESTRICT induction_bucket = &buckets[2 * k]; - sa_sint_t * RESTRICT distinct_names = &buckets[0 * k]; + sa_sint_t * RESTRICT induction_bucket = &buckets[2 * (fast_sint_t)k]; + sa_sint_t * RESTRICT distinct_names = &buckets[0 * (fast_sint_t)k]; fast_sint_t i, j, omp_block_end = omp_block_start + omp_block_size; for (i = omp_block_start, j = omp_block_end - prefetch_distance - 1; i < j; i += 2) @@ -2782,8 +2782,8 @@ static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_6k_omp(const sa_ static sa_sint_t libsais_partial_sorting_scan_left_to_right_32s_4k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t d, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) { - sa_sint_t * RESTRICT induction_bucket = &buckets[2 * k]; - sa_sint_t * RESTRICT distinct_names = &buckets[0 * k]; + sa_sint_t * RESTRICT induction_bucket = &buckets[2 * (fast_sint_t)k]; + sa_sint_t * RESTRICT distinct_names = &buckets[0 * (fast_sint_t)k]; SA[induction_bucket[T[n - 1]]++] = (n - 1) | ((sa_sint_t)(T[n - 2] < T[n - 1]) << (SAINT_BIT - 1)) | SUFFIX_GROUP_MARKER; distinct_names[BUCKETS_INDEX2(T[n - 1], T[n - 2] < T[n - 1])] = ++d; @@ -2871,7 +2871,7 @@ static void libsais_partial_sorting_shift_markers_32s_6k_omp(sa_sint_t * RESTRIC { const fast_sint_t prefetch_distance = 32; - const sa_sint_t * RESTRICT temp_bucket = &buckets[4 * k]; + const sa_sint_t * RESTRICT temp_bucket = &buckets[4 * (fast_sint_t)k]; fast_sint_t c; @@ -2923,7 +2923,7 @@ static void libsais_partial_sorting_shift_markers_32s_4k(sa_sint_t * RESTRICT SA static void libsais_partial_sorting_shift_buckets_32s_6k(sa_sint_t k, sa_sint_t * RESTRICT buckets) { - sa_sint_t * RESTRICT temp_bucket = &buckets[4 * k]; + sa_sint_t * RESTRICT temp_bucket = &buckets[4 * (fast_sint_t)k]; fast_sint_t i; for (i = BUCKETS_INDEX2(0, 0); i <= BUCKETS_INDEX2((fast_sint_t)k - 1, 0); i += BUCKETS_INDEX2(1, 0)) @@ -3179,8 +3179,8 @@ static sa_sint_t libsais_partial_sorting_scan_right_to_left_32s_4k(const sa_sint { const fast_sint_t prefetch_distance = 32; - sa_sint_t * RESTRICT induction_bucket = &buckets[3 * k]; - sa_sint_t * RESTRICT distinct_names = &buckets[0 * k]; + sa_sint_t * RESTRICT induction_bucket = &buckets[3 * (fast_sint_t)k]; + sa_sint_t * RESTRICT distinct_names = &buckets[0 * (fast_sint_t)k]; fast_sint_t i, j; for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + 2 * prefetch_distance + 1; i >= j; i -= 2) @@ -3352,8 +3352,8 @@ static sa_sint_t libsais_partial_sorting_scan_right_to_left_32s_4k_block_sort(co { const fast_sint_t prefetch_distance = 32; - sa_sint_t * RESTRICT induction_bucket = &buckets[3 * k]; - sa_sint_t * RESTRICT distinct_names = &buckets[0 * k]; + sa_sint_t * RESTRICT induction_bucket = &buckets[3 * (fast_sint_t)k]; + sa_sint_t * RESTRICT distinct_names = &buckets[0 * (fast_sint_t)k]; fast_sint_t i, j; for (i = omp_block_start + omp_block_size - 1, j = omp_block_start + prefetch_distance + 1; i >= j; i -= 2) @@ -3832,8 +3832,8 @@ static void libsais_induce_partial_order_32s_4k_omp(const sa_sint_t * RESTRICT T static void libsais_induce_partial_order_32s_2k_omp(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) { - libsais_partial_sorting_scan_left_to_right_32s_1k_omp(T, SA, n, &buckets[1 * k], threads, thread_state); - libsais_partial_sorting_scan_right_to_left_32s_1k_omp(T, SA, n, &buckets[0 * k], threads, thread_state); + libsais_partial_sorting_scan_left_to_right_32s_1k_omp(T, SA, n, &buckets[1 * (fast_sint_t)k], threads, thread_state); + libsais_partial_sorting_scan_right_to_left_32s_1k_omp(T, SA, n, &buckets[0 * (fast_sint_t)k], threads, thread_state); libsais_partial_sorting_gather_lms_suffixes_32s_1k_omp(SA, n, threads, thread_state); } @@ -4371,7 +4371,7 @@ static void libsais_place_lms_suffixes_interval_8u(sa_sint_t * RESTRICT SA, sa_s static void libsais_place_lms_suffixes_interval_32s_4k(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t m, const sa_sint_t * RESTRICT buckets) { - const sa_sint_t * RESTRICT bucket_end = &buckets[3 * k]; + const sa_sint_t * RESTRICT bucket_end = &buckets[3 * (fast_sint_t)k]; fast_sint_t c, j = n; for (c = (fast_sint_t)k - 2; c >= 0; --c) @@ -4448,7 +4448,7 @@ static void libsais_place_lms_suffixes_interval_32s_1k(const sa_sint_t * RESTRIC static void libsais_place_lms_suffixes_histogram_32s_6k(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t m, const sa_sint_t * RESTRICT buckets) { - const sa_sint_t * RESTRICT bucket_end = &buckets[5 * k]; + const sa_sint_t * RESTRICT bucket_end = &buckets[5 * (fast_sint_t)k]; fast_sint_t c, j = n; for (c = (fast_sint_t)k - 2; c >= 0; --c) @@ -4471,7 +4471,7 @@ static void libsais_place_lms_suffixes_histogram_32s_6k(sa_sint_t * RESTRICT SA, static void libsais_place_lms_suffixes_histogram_32s_4k(sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t m, const sa_sint_t * RESTRICT buckets) { - const sa_sint_t * RESTRICT bucket_end = &buckets[3 * k]; + const sa_sint_t * RESTRICT bucket_end = &buckets[3 * (fast_sint_t)k]; fast_sint_t c, j = n; for (c = (fast_sint_t)k - 2; c >= 0; --c) @@ -5830,20 +5830,20 @@ static sa_sint_t libsais_induce_final_order_8u_omp(const uint8_t * RESTRICT T, s static void libsais_induce_final_order_32s_6k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) { - libsais_final_sorting_scan_left_to_right_32s_omp(T, SA, n, &buckets[4 * k], threads, thread_state); - libsais_final_sorting_scan_right_to_left_32s_omp(T, SA, n, &buckets[5 * k], threads, thread_state); + libsais_final_sorting_scan_left_to_right_32s_omp(T, SA, n, &buckets[4 * (fast_sint_t)k], threads, thread_state); + libsais_final_sorting_scan_right_to_left_32s_omp(T, SA, n, &buckets[5 * (fast_sint_t)k], threads, thread_state); } static void libsais_induce_final_order_32s_4k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) { - libsais_final_sorting_scan_left_to_right_32s_omp(T, SA, n, &buckets[2 * k], threads, thread_state); - libsais_final_sorting_scan_right_to_left_32s_omp(T, SA, n, &buckets[3 * k], threads, thread_state); + libsais_final_sorting_scan_left_to_right_32s_omp(T, SA, n, &buckets[2 * (fast_sint_t)k], threads, thread_state); + libsais_final_sorting_scan_right_to_left_32s_omp(T, SA, n, &buckets[3 * (fast_sint_t)k], threads, thread_state); } static void libsais_induce_final_order_32s_2k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) { - libsais_final_sorting_scan_left_to_right_32s_omp(T, SA, n, &buckets[1 * k], threads, thread_state); - libsais_final_sorting_scan_right_to_left_32s_omp(T, SA, n, &buckets[0 * k], threads, thread_state); + libsais_final_sorting_scan_left_to_right_32s_omp(T, SA, n, &buckets[1 * (fast_sint_t)k], threads, thread_state); + libsais_final_sorting_scan_right_to_left_32s_omp(T, SA, n, &buckets[0 * (fast_sint_t)k], threads, thread_state); } static void libsais_induce_final_order_32s_1k(const sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT SA, sa_sint_t n, sa_sint_t k, sa_sint_t * RESTRICT buckets, sa_sint_t threads, LIBSAIS_THREAD_STATE * RESTRICT thread_state) @@ -6266,7 +6266,7 @@ static sa_sint_t libsais_main_32s(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT S if (k > 0 && fs / k >= 6) { sa_sint_t alignment = (fs - 1024) / k >= 6 ? 1024 : 16; - sa_sint_t * RESTRICT buckets = (fs - alignment) / k >= 6 ? (sa_sint_t *)libsais_align_up(&SA[n + fs - 6 * k - alignment], (size_t)alignment * sizeof(sa_sint_t)) : &SA[n + fs - 6 * k]; + sa_sint_t * RESTRICT buckets = (fs - alignment) / k >= 6 ? (sa_sint_t *)libsais_align_up(&SA[n + fs - 6 * (fast_sint_t)k - alignment], (size_t)alignment * sizeof(sa_sint_t)) : &SA[n + fs - 6 * (fast_sint_t)k]; sa_sint_t m = libsais_count_and_gather_lms_suffixes_32s_4k_omp(T, SA, n, k, buckets, threads, thread_state); if (m > 1) @@ -6276,8 +6276,8 @@ static sa_sint_t libsais_main_32s(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT S sa_sint_t first_lms_suffix = SA[n - m]; sa_sint_t left_suffixes_count = libsais_initialize_buckets_for_lms_suffixes_radix_sort_32s_6k(T, k, buckets, first_lms_suffix); - libsais_radix_sort_lms_suffixes_32s_6k_omp(T, SA, n, m, &buckets[4 * k], threads, thread_state); - libsais_radix_sort_set_markers_32s_6k_omp(SA, k, &buckets[4 * k], threads); + libsais_radix_sort_lms_suffixes_32s_6k_omp(T, SA, n, m, &buckets[4 * (fast_sint_t)k], threads, thread_state); + libsais_radix_sort_set_markers_32s_6k_omp(SA, k, &buckets[4 * (fast_sint_t)k], threads); if (threads > 1 && n >= 65536) { memset(&SA[(fast_sint_t)n - (fast_sint_t)m], 0, (size_t)m * sizeof(sa_sint_t)); } @@ -6319,7 +6319,7 @@ static sa_sint_t libsais_main_32s(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT S else if (k > 0 && fs / k >= 4) { sa_sint_t alignment = (fs - 1024) / k >= 4 ? 1024 : 16; - sa_sint_t * RESTRICT buckets = (fs - alignment) / k >= 4 ? (sa_sint_t *)libsais_align_up(&SA[n + fs - 4 * k - alignment], (size_t)alignment * sizeof(sa_sint_t)) : &SA[n + fs - 4 * k]; + sa_sint_t * RESTRICT buckets = (fs - alignment) / k >= 4 ? (sa_sint_t *)libsais_align_up(&SA[n + fs - 4 * (fast_sint_t)k - alignment], (size_t)alignment * sizeof(sa_sint_t)) : &SA[n + fs - 4 * (fast_sint_t)k]; sa_sint_t m = libsais_count_and_gather_lms_suffixes_32s_2k_omp(T, SA, n, k, buckets, threads, thread_state); if (m > 1) @@ -6363,7 +6363,7 @@ static sa_sint_t libsais_main_32s(sa_sint_t * RESTRICT T, sa_sint_t * RESTRICT S else if (k > 0 && fs / k >= 2) { sa_sint_t alignment = (fs - 1024) / k >= 2 ? 1024 : 16; - sa_sint_t * RESTRICT buckets = (fs - alignment) / k >= 2 ? (sa_sint_t *)libsais_align_up(&SA[n + fs - 2 * k - alignment], (size_t)alignment * sizeof(sa_sint_t)) : &SA[n + fs - 2 * k]; + sa_sint_t * RESTRICT buckets = (fs - alignment) / k >= 2 ? (sa_sint_t *)libsais_align_up(&SA[n + fs - 2 * (fast_sint_t)k - alignment], (size_t)alignment * sizeof(sa_sint_t)) : &SA[n + fs - 2 * (fast_sint_t)k]; sa_sint_t m = libsais_count_and_gather_lms_suffixes_32s_2k_omp(T, SA, n, k, buckets, threads, thread_state); if (m > 1) diff --git a/libsais/libsais.h b/libsais/libsais.h index f0f6018..f259fd0 100644 --- a/libsais/libsais.h +++ b/libsais/libsais.h @@ -24,6 +24,11 @@ Please see the file LICENSE for full copyright information. #ifndef LIBSAIS_H #define LIBSAIS_H 1 +#define LIBSAIS_VERSION_MAJOR 2 +#define LIBSAIS_VERSION_MINOR 7 +#define LIBSAIS_VERSION_PATCH 2 +#define LIBSAIS_VERSION_STRING "2.7.2" + #ifdef __cplusplus extern "C" { #endif