Skip to content

Commit

Permalink
Implemented permuted longest common prefix array (PLCP) construction …
Browse files Browse the repository at this point in the history
…of an integer array.
  • Loading branch information
IlyaGrebnov committed Mar 3, 2024
1 parent 1913652 commit 340191b
Show file tree
Hide file tree
Showing 9 changed files with 123 additions and 14 deletions.
3 changes: 2 additions & 1 deletion CHANGES
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
Changes in 2.7.6 (March 3, 2024)
Changes in 2.8.0 (March 3, 2024)
- Implemented permuted longest common prefix array (PLCP) construction of an integer array.
- Fixed compilation error when compiling the library with OpenMP enabled.

Changes in 2.7.5 (February 26, 2024)
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.10)

project(libsais VERSION 2.7.6 LANGUAGES C DESCRIPTION "libsais is a library for linear time suffix array, longest common prefix array and burrows wheeler transform construction based on induced sorting algorithm.")
project(libsais VERSION 2.8.0 LANGUAGES C DESCRIPTION "libsais is a library for linear time suffix array, longest common prefix array and burrows wheeler transform construction based on induced sorting algorithm.")

set(CMAKE_C_STANDARD 99)
set(CMAKE_C_STANDARD_REQUIRED ON)
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ The libsais provides simple C99 API to construct suffix array and Burrows-Wheele
The libsais is released under the [Apache License Version 2.0](LICENSE "Apache license")

## Changes
* March 3, 2024 (2.7.6)
* March 3, 2024 (2.8.0)
* Implemented permuted longest common prefix array (PLCP) construction of an integer array.
* Fixed compilation error when compiling the library with OpenMP enabled.
* February 26, 2024 (2.7.5)
* Improved performance of suffix array and burrows wheeler transform construction on degenerate inputs.
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.7.6
2.8.0
27 changes: 24 additions & 3 deletions include/libsais.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ Please see the file LICENSE for full copyright information.
#define LIBSAIS_H 1

#define LIBSAIS_VERSION_MAJOR 2
#define LIBSAIS_VERSION_MINOR 7
#define LIBSAIS_VERSION_PATCH 5
#define LIBSAIS_VERSION_STRING "2.7.5"
#define LIBSAIS_VERSION_MINOR 8
#define LIBSAIS_VERSION_PATCH 0
#define LIBSAIS_VERSION_STRING "2.8.0"

#ifdef _WIN32
#ifdef LIBSAIS_SHARED
Expand Down Expand Up @@ -332,6 +332,16 @@ extern "C" {
*/
LIBSAIS_API int32_t libsais_plcp(const uint8_t * T, const int32_t * SA, int32_t * PLCP, int32_t n);

/**
* Constructs the permuted longest common prefix array (PLCP) of a integer array and a suffix array.
* @param T [0..n-1] The input integer array.
* @param SA [0..n-1] The input suffix array.
* @param PLCP [0..n-1] The output permuted longest common prefix array.
* @param n The length of the integer array and the suffix array.
* @return 0 if no error occurred, -1 otherwise.
*/
LIBSAIS_API int32_t libsais_plcp_int(const int32_t * T, const int32_t * SA, int32_t * PLCP, int32_t n);

/**
* Constructs the longest common prefix array (LCP) of a given permuted longest common prefix array (PLCP) and a suffix array.
* @param PLCP [0..n-1] The input permuted longest common prefix array.
Expand All @@ -354,6 +364,17 @@ extern "C" {
*/
LIBSAIS_API int32_t libsais_plcp_omp(const uint8_t * T, const int32_t * SA, int32_t * PLCP, int32_t n, int32_t threads);

/**
* Constructs the permuted longest common prefix array (PLCP) of a given integer array and a suffix array in parallel using OpenMP.
* @param T [0..n-1] The input integer array.
* @param SA [0..n-1] The input suffix array.
* @param PLCP [0..n-1] The output permuted longest common prefix array.
* @param n The length of the integer array and the suffix array.
* @param threads The number of OpenMP threads to use (can be 0 for OpenMP default).
* @return 0 if no error occurred, -1 otherwise.
*/
LIBSAIS_API int32_t libsais_plcp_int_omp(const int32_t * T, const int32_t * SA, int32_t * PLCP, int32_t n, int32_t threads);

/**
* Constructs the longest common prefix array (LCP) of a given permuted longest common prefix array (PLCP) and a suffix array in parallel using OpenMP.
* @param PLCP [0..n-1] The input permuted longest common prefix array.
Expand Down
6 changes: 3 additions & 3 deletions include/libsais16.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ Please see the file LICENSE for full copyright information.
#define LIBSAIS16_H 1

#define LIBSAIS16_VERSION_MAJOR 2
#define LIBSAIS16_VERSION_MINOR 7
#define LIBSAIS16_VERSION_PATCH 5
#define LIBSAIS16_VERSION_STRING "2.7.5"
#define LIBSAIS16_VERSION_MINOR 8
#define LIBSAIS16_VERSION_PATCH 0
#define LIBSAIS16_VERSION_STRING "2.8.0"

#ifdef _WIN32
#ifdef LIBSAIS_SHARED
Expand Down
6 changes: 3 additions & 3 deletions include/libsais64.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ Please see the file LICENSE for full copyright information.
#define LIBSAIS64_H 1

#define LIBSAIS64_VERSION_MAJOR 2
#define LIBSAIS64_VERSION_MINOR 7
#define LIBSAIS64_VERSION_PATCH 5
#define LIBSAIS64_VERSION_STRING "2.7.5"
#define LIBSAIS64_VERSION_MINOR 8
#define LIBSAIS64_VERSION_PATCH 0
#define LIBSAIS64_VERSION_STRING "2.8.0"

#ifdef _WIN32
#ifdef LIBSAIS_SHARED
Expand Down
86 changes: 86 additions & 0 deletions src/libsais.c
Original file line number Diff line number Diff line change
Expand Up @@ -7761,6 +7761,54 @@ static void libsais_compute_plcp_omp(const uint8_t * RESTRICT T, sa_sint_t * RES
}
}

static void libsais_compute_plcp_int(const int32_t * RESTRICT T, sa_sint_t * RESTRICT PLCP, fast_sint_t n, fast_sint_t omp_block_start, fast_sint_t omp_block_size)
{
const fast_sint_t prefetch_distance = 32;

fast_sint_t i, j, l = 0;
for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance; i < j; i += 1)
{
libsais_prefetchw(&PLCP[i + 2 * prefetch_distance]);
libsais_prefetchr(&T[PLCP[i + prefetch_distance] + l]);

fast_sint_t k = PLCP[i], m = n - (i > k ? i : k);
while (l < m && T[i + l] == T[k + l]) { l++; }

PLCP[i] = (sa_sint_t)l; l -= (l != 0);
}

for (j += prefetch_distance; i < j; i += 1)
{
fast_sint_t k = PLCP[i], m = n - (i > k ? i : k);
while (l < m && T[i + l] == T[k + l]) { l++; }

PLCP[i] = (sa_sint_t)l; l -= (l != 0);
}
}

static void libsais_compute_plcp_int_omp(const int32_t * RESTRICT T, sa_sint_t * RESTRICT PLCP, sa_sint_t n, sa_sint_t threads)
{
#if defined(LIBSAIS_OPENMP)
#pragma omp parallel num_threads(threads) if(threads > 1 && n >= 65536)
#endif
{
#if defined(LIBSAIS_OPENMP)
fast_sint_t omp_thread_num = omp_get_thread_num();
fast_sint_t omp_num_threads = omp_get_num_threads();
#else
UNUSED(threads);

fast_sint_t omp_thread_num = 0;
fast_sint_t omp_num_threads = 1;
#endif
fast_sint_t omp_block_stride = (n / omp_num_threads) & (-16);
fast_sint_t omp_block_start = omp_thread_num * omp_block_stride;
fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start;

libsais_compute_plcp_int(T, PLCP, n, omp_block_start, omp_block_size);
}
}

static void libsais_compute_lcp(const sa_sint_t * RESTRICT PLCP, const sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT LCP, fast_sint_t omp_block_start, fast_sint_t omp_block_size)
{
const fast_sint_t prefetch_distance = 32;
Expand Down Expand Up @@ -7831,6 +7879,24 @@ int32_t libsais_plcp(const uint8_t * T, const int32_t * SA, int32_t * PLCP, int3
return 0;
}

int32_t libsais_plcp_int(const int32_t * T, const int32_t * SA, int32_t * PLCP, int32_t n)
{
if ((T == NULL) || (SA == NULL) || (PLCP == NULL) || (n < 0))
{
return -1;
}
else if (n <= 1)
{
if (n == 1) { PLCP[0] = 0; }
return 0;
}

libsais_compute_phi_omp(SA, PLCP, n, 1);
libsais_compute_plcp_int_omp(T, PLCP, n, 1);

return 0;
}

int32_t libsais_lcp(const int32_t * PLCP, const int32_t * SA, int32_t * LCP, int32_t n)
{
if ((PLCP == NULL) || (SA == NULL) || (LCP == NULL) || (n < 0))
Expand Down Expand Up @@ -7870,6 +7936,26 @@ int32_t libsais_plcp_omp(const uint8_t * T, const int32_t * SA, int32_t * PLCP,
return 0;
}

int32_t libsais_plcp_int_omp(const int32_t * T, const int32_t * SA, int32_t * PLCP, int32_t n, int32_t threads)
{
if ((T == NULL) || (SA == NULL) || (PLCP == NULL) || (n < 0) || (threads < 0))
{
return -1;
}
else if (n <= 1)
{
if (n == 1) { PLCP[0] = 0; }
return 0;
}

threads = threads > 0 ? threads : omp_get_max_threads();

libsais_compute_phi_omp(SA, PLCP, n, threads);
libsais_compute_plcp_int_omp(T, PLCP, n, threads);

return 0;
}

int32_t libsais_lcp_omp(const int32_t * PLCP, const int32_t * SA, int32_t * LCP, int32_t n, int32_t threads)
{
if ((PLCP == NULL) || (SA == NULL) || (LCP == NULL) || (n < 0) || (threads < 0))
Expand Down
2 changes: 1 addition & 1 deletion src/libsais64.c
Original file line number Diff line number Diff line change
Expand Up @@ -6288,7 +6288,7 @@ static void libsais64_convert_inplace_32u_to_64u_omp(uint32_t * V, sa_sint_t n,
fast_sint_t omp_block_start = omp_thread_num * omp_block_stride;
fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : block_size - omp_block_start;

libsais64_convert_32u_to_64u((uint32_t *)V, (uint64_t *)V, n + omp_block_start, omp_block_size);
libsais64_convert_32u_to_64u(((uint32_t *)(void *)V) + n, ((uint64_t *)(void *)V) + n, omp_block_start, omp_block_size);
}
}

Expand Down

0 comments on commit 340191b

Please sign in to comment.