From d4f940bd53cc9ca11345cef701602e5a9d5cd13d Mon Sep 17 00:00:00 2001 From: Ilya Grebnov Date: Mon, 27 May 2024 16:22:52 -0700 Subject: [PATCH] Implemented suffix array construction of a long integer array (libsais64). --- CHANGES | 3 +++ CMakeLists.txt | 2 +- README.md | 2 ++ VERSION | 2 +- include/libsais.h | 4 ++-- include/libsais16.h | 4 ++-- include/libsais64.h | 29 +++++++++++++++++++++++++++-- src/libsais.c | 2 +- src/libsais64.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 84 insertions(+), 9 deletions(-) diff --git a/CHANGES b/CHANGES index a174c5e..2436787 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,6 @@ +Changes in 2.8.2 (May 27, 2024) +- Implemented suffix array construction of a long integer array (libsais64). + Changes in 2.8.1 (April 5, 2024) - Fixed out-of-bound memory access issue for large inputs (libsais64). diff --git a/CMakeLists.txt b/CMakeLists.txt index 377bae1..b08fa32 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.10) -project(libsais VERSION 2.8.1 LANGUAGES C DESCRIPTION "libsais is a library for linear time suffix array, longest common prefix array and burrows wheeler transform construction based on induced sorting algorithm.") +project(libsais VERSION 2.8.2 LANGUAGES C DESCRIPTION "libsais is a library for linear time suffix array, longest common prefix array and burrows wheeler transform construction based on induced sorting algorithm.") set(CMAKE_C_STANDARD 99) set(CMAKE_C_STANDARD_REQUIRED ON) diff --git a/README.md b/README.md index 51d6f34..edf3bfd 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,8 @@ The libsais provides simple C99 API to construct suffix array and Burrows-Wheele The libsais is released under the [Apache License Version 2.0](LICENSE "Apache license") ## Changes +* May 27, 2024 (2.8.2) + * Implemented suffix array construction of a long integer array (libsais64). * April 5, 2024 (2.8.1) * Fixed out-of-bound memory access issue for large inputs (libsais64). * March 3, 2024 (2.8.0) diff --git a/VERSION b/VERSION index dbe5900..1817afe 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.8.1 +2.8.2 diff --git a/include/libsais.h b/include/libsais.h index 58189ef..960e181 100644 --- a/include/libsais.h +++ b/include/libsais.h @@ -26,8 +26,8 @@ Please see the file LICENSE for full copyright information. #define LIBSAIS_VERSION_MAJOR 2 #define LIBSAIS_VERSION_MINOR 8 -#define LIBSAIS_VERSION_PATCH 1 -#define LIBSAIS_VERSION_STRING "2.8.1" +#define LIBSAIS_VERSION_PATCH 2 +#define LIBSAIS_VERSION_STRING "2.8.2" #ifdef _WIN32 #ifdef LIBSAIS_SHARED diff --git a/include/libsais16.h b/include/libsais16.h index 0791468..daff42f 100644 --- a/include/libsais16.h +++ b/include/libsais16.h @@ -26,8 +26,8 @@ Please see the file LICENSE for full copyright information. #define LIBSAIS16_VERSION_MAJOR 2 #define LIBSAIS16_VERSION_MINOR 8 -#define LIBSAIS16_VERSION_PATCH 1 -#define LIBSAIS16_VERSION_STRING "2.8.1" +#define LIBSAIS16_VERSION_PATCH 2 +#define LIBSAIS16_VERSION_STRING "2.8.2" #ifdef _WIN32 #ifdef LIBSAIS_SHARED diff --git a/include/libsais64.h b/include/libsais64.h index 8f3ac89..37e7ec5 100644 --- a/include/libsais64.h +++ b/include/libsais64.h @@ -26,8 +26,8 @@ Please see the file LICENSE for full copyright information. #define LIBSAIS64_VERSION_MAJOR 2 #define LIBSAIS64_VERSION_MINOR 8 -#define LIBSAIS64_VERSION_PATCH 1 -#define LIBSAIS64_VERSION_STRING "2.8.1" +#define LIBSAIS64_VERSION_PATCH 2 +#define LIBSAIS64_VERSION_STRING "2.8.2" #ifdef _WIN32 #ifdef LIBSAIS_SHARED @@ -60,6 +60,18 @@ extern "C" { */ LIBSAIS64_API int64_t libsais64(const uint8_t * T, int64_t * SA, int64_t n, int64_t fs, int64_t * freq); + /** + * Constructs the suffix array of a given integer array. + * Note, during construction input array will be modified, but restored at the end if no errors occurred. + * @param T [0..n-1] The input integer array. + * @param SA [0..n-1+fs] The output array of suffixes. + * @param n The length of the integer array. + * @param k The alphabet size of the input integer array. + * @param fs Extra space available at the end of SA array (can be 0, but 4k or better 6k is recommended for optimal performance). + * @return 0 if no error occurred, -1 or -2 otherwise. + */ + LIBSAIS64_API int64_t libsais64_long(int64_t * T, int64_t * SA, int64_t n, int64_t k, int64_t fs); + #if defined(LIBSAIS_OPENMP) /** * Constructs the suffix array of a given string in parallel using OpenMP. @@ -72,6 +84,19 @@ extern "C" { * @return 0 if no error occurred, -1 or -2 otherwise. */ LIBSAIS64_API int64_t libsais64_omp(const uint8_t * T, int64_t * SA, int64_t n, int64_t fs, int64_t * freq, int64_t threads); + + /** + * Constructs the suffix array of a given integer array in parallel using OpenMP. + * Note, during construction input array will be modified, but restored at the end if no errors occurred. + * @param T [0..n-1] The input integer array. + * @param SA [0..n-1+fs] The output array of suffixes. + * @param n The length of the integer array. + * @param k The alphabet size of the input integer array. + * @param fs Extra space available at the end of SA array (can be 0, but 4k or better 6k is recommended for optimal performance). + * @param threads The number of OpenMP threads to use (can be 0 for OpenMP default). + * @return 0 if no error occurred, -1 or -2 otherwise. + */ + LIBSAIS64_API int64_t libsais64_long_omp(int64_t * T, int64_t * SA, int64_t n, int64_t k, int64_t fs, int64_t threads); #endif /** diff --git a/src/libsais.c b/src/libsais.c index 7e5a270..4a8d297 100644 --- a/src/libsais.c +++ b/src/libsais.c @@ -6528,7 +6528,7 @@ static sa_sint_t libsais_main(const uint8_t * T, sa_sint_t * SA, sa_sint_t n, sa return index; } -static int32_t libsais_main_int(sa_sint_t * T, sa_sint_t * SA, sa_sint_t n, sa_sint_t k, sa_sint_t fs, sa_sint_t threads) +static sa_sint_t libsais_main_int(sa_sint_t * T, sa_sint_t * SA, sa_sint_t n, sa_sint_t k, sa_sint_t fs, sa_sint_t threads) { LIBSAIS_THREAD_STATE * RESTRICT thread_state = threads > 1 ? libsais_alloc_thread_state(threads) : NULL; diff --git a/src/libsais64.c b/src/libsais64.c index 0474478..54b1032 100644 --- a/src/libsais64.c +++ b/src/libsais64.c @@ -6584,6 +6584,19 @@ static sa_sint_t libsais64_main(const uint8_t * T, sa_sint_t * SA, sa_sint_t n, return index; } +static sa_sint_t libsais64_main_long(sa_sint_t * T, sa_sint_t * SA, sa_sint_t n, sa_sint_t k, sa_sint_t fs, sa_sint_t threads) +{ + LIBSAIS_THREAD_STATE * RESTRICT thread_state = threads > 1 ? libsais64_alloc_thread_state(threads) : NULL; + + sa_sint_t index = thread_state != NULL || threads == 1 + ? libsais64_main_32s_entry(T, SA, n, k, fs, threads, thread_state) + : -2; + + libsais64_free_thread_state(thread_state); + + return index; +} + static void libsais64_bwt_copy_8u(uint8_t * RESTRICT U, sa_sint_t * RESTRICT A, sa_sint_t n) { const fast_sint_t prefetch_distance = 32; @@ -6666,6 +6679,21 @@ int64_t libsais64(const uint8_t * T, int64_t * SA, int64_t n, int64_t fs, int64_ return libsais64_main(T, SA, n, 0, 0, NULL, fs, freq, 1); } +int64_t libsais64_long(int64_t * T, int64_t * SA, int64_t n, int64_t k, int64_t fs) +{ + if ((T == NULL) || (SA == NULL) || (n < 0) || (fs < 0)) + { + return -1; + } + else if (n < 2) + { + if (n == 1) { SA[0] = 0; } + return 0; + } + + return libsais64_main_long(T, SA, n, k, fs, 1); +} + int64_t libsais64_bwt(const uint8_t * T, uint8_t * U, int64_t * A, int64_t n, int64_t fs, int64_t * freq) { if ((T == NULL) || (U == NULL) || (A == NULL) || (n < 0) || (fs < 0)) @@ -6779,6 +6807,23 @@ int64_t libsais64_omp(const uint8_t * T, int64_t * SA, int64_t n, int64_t fs, in return libsais64_main(T, SA, n, 0, 0, NULL, fs, freq, threads); } +int64_t libsais64_long_omp(int64_t * T, int64_t * SA, int64_t n, int64_t k, int64_t fs, int64_t threads) +{ + if ((T == NULL) || (SA == NULL) || (n < 0) || (fs < 0) || (threads < 0)) + { + return -1; + } + else if (n < 2) + { + if (n == 1) { SA[0] = 0; } + return 0; + } + + threads = threads > 0 ? threads : omp_get_max_threads(); + + return libsais64_main_long(T, SA, n, k, fs, threads); +} + int64_t libsais64_bwt_omp(const uint8_t * T, uint8_t * U, int64_t * A, int64_t n, int64_t fs, int64_t * freq, int64_t threads) { if ((T == NULL) || (U == NULL) || (A == NULL) || (n < 0) || (fs < 0) || (threads < 0))