Skip to content

Commit

Permalink
Version 2.7.0 Support for longest common prefix array (LCP) construct…
Browse files Browse the repository at this point in the history
…ion.
  • Loading branch information
IlyaGrebnov committed Apr 13, 2022
1 parent 873f15d commit 223c7eb
Show file tree
Hide file tree
Showing 10 changed files with 862 additions and 55 deletions.
3 changes: 3 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
Changes in 2.7.0 (April 12, 2022)
- Support for longest common prefix array (LCP) construction.

Changes in 2.6.5 (January 1, 2022)
- Exposed functions to construct suffix array of a given integer array.
- Improved detection of various compiler intrinsics.
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
PROJECT=sais
PLIBNAME=lib$(PROJECT)
PVER=2.6.5
PVER=2.7.0
PSOVER=2
ifeq ($(OS),Windows_NT)
PLIBSTATIC=$(PROJECT).a
Expand Down
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# libsais

The libsais is a library for fast (see [Benchmarks](#benchmarks) below) linear time suffix array and Burrows-Wheeler transform construction based on induced sorting algorithm described in the following papers:
* Ge Nong, Sen Zhang and Wai Hong Chan *Two Efficient Algorithms for Linear Suffix Array Construction*, 2008
The libsais is a library for fast (see [Benchmarks](#benchmarks) below) linear time suffix array, longest common prefix array and Burrows-Wheeler transform construction based on induced sorting algorithm described in the following papers:
* Ge Nong, Sen Zhang, Wai Hong Chan *Two Efficient Algorithms for Linear Suffix Array Construction*, 2008
* Juha Karkkainen, Giovanni Manzini, Simon J. Puglisi *Permuted Longest-Common-Prefix Array*, 2009
* Nataliya Timoshevskaya, Wu-chun Feng *SAIS-OPT: On the characterization and optimization of the SA-IS algorithm for suffix array construction*, 2014
* Jing Yi Xie, Ge Nong, Bin Lao, Wentao Xu *Scalable Suffix Sorting on a Multicore Machine*, 2020

Expand All @@ -19,6 +20,8 @@ The libsais provides simple C99 API to construct suffix array and Burrows-Wheele
The libsais is released under the [Apache License Version 2.0](LICENSE "Apache license")

## Changes
* April 12, 2022 (2.7.0)
* Support for longest common prefix array (LCP) construction.
* January 1, 2022 (2.6.5)
* Exposed functions to construct suffix array of a given integer array.
* Improved detection of various compiler intrinsics.
Expand All @@ -41,7 +44,7 @@ The libsais is released under the [Apache License Version 2.0](LICENSE "Apache l
* Initial release.

## Versions of the libsais library
* [libsais.c](src/libsais.c) (and corresponding [libsais.h](src/libsais.h)) is for suffix array, forward BWT and reverse BWT construction over 8-bit inputs smaller than 2GB (2147483648 bytes).
* [libsais.c](src/libsais.c) (and corresponding [libsais.h](src/libsais.h)) is for suffix array, PLCP, LCP, forward BWT and reverse BWT construction over 8-bit inputs smaller than 2GB (2147483648 bytes).
* This version of the library could also be used to construct suffix array of an integer array (with a caveat that input array must be mutable).
* [libsais64.c](src/libsais64.c) (and corresponding [libsais64.h](src/libsais64.h)) is optional extension of the library for inputs larger or equlas to 2GB (2147483648 bytes).
* [libsais16.c](src/libsais16.c) (and corresponding [libsais16.h](src/libsais16.h)) is independent version of the library for 16-bit inputs.
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.6.5
2.7.0
231 changes: 227 additions & 4 deletions src/libsais.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*--
This file is a part of libsais, a library for linear time
suffix array and burrows wheeler transform construction.
This file is a part of libsais, a library for linear time suffix array,
longest common prefix array and burrows wheeler transform construction.
Copyright (c) 2021-2022 Ilya Grebnov <[email protected]>
Expand Down Expand Up @@ -105,15 +105,15 @@ typedef struct LIBSAIS_UNBWT_CONTEXT
#if __has_builtin(__builtin_prefetch)
#define HAS_BUILTIN_PREFECTCH
#endif
#elif defined(__GNUC__) && ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 2)) || (__GNUC__ >= 4)
#elif defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 2)) || (__GNUC__ >= 4))
#define HAS_BUILTIN_PREFECTCH
#endif

#if defined(__has_builtin)
#if __has_builtin(__builtin_bswap16)
#define HAS_BUILTIN_BSWAP16
#endif
#elif defined(__GNUC__) && ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 8)) || (__GNUC__ >= 5)
#elif defined(__GNUC__) && (((__GNUC__ == 4) && (__GNUC_MINOR__ >= 8)) || (__GNUC__ >= 5))
#define HAS_BUILTIN_BSWAP16
#endif

Expand Down Expand Up @@ -7645,3 +7645,226 @@ int32_t libsais_unbwt_aux_omp(const uint8_t * T, uint8_t * U, int32_t * A, int32
}

#endif

static void libsais_compute_phi(const sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT PLCP, sa_sint_t n, fast_sint_t omp_block_start, fast_sint_t omp_block_size)
{
const fast_sint_t prefetch_distance = 32;

fast_sint_t i, j; sa_sint_t k = omp_block_start > 0 ? SA[omp_block_start - 1] : n;
for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 3; i < j; i += 4)
{
libsais_prefetchw(&PLCP[SA[i + prefetch_distance + 0]]);
libsais_prefetchw(&PLCP[SA[i + prefetch_distance + 1]]);

PLCP[SA[i + 0]] = k; k = SA[i + 0];
PLCP[SA[i + 1]] = k; k = SA[i + 1];

libsais_prefetchw(&PLCP[SA[i + prefetch_distance + 2]]);
libsais_prefetchw(&PLCP[SA[i + prefetch_distance + 3]]);

PLCP[SA[i + 2]] = k; k = SA[i + 2];
PLCP[SA[i + 3]] = k; k = SA[i + 3];
}

for (j += prefetch_distance + 3; i < j; i += 1)
{
PLCP[SA[i]] = k; k = SA[i];
}
}

static void libsais_compute_phi_omp(const sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT PLCP, sa_sint_t n, sa_sint_t threads)
{
#if defined(_OPENMP)
#pragma omp parallel num_threads(threads) if(threads > 1 && n >= 65536)
#endif
{
#if defined(_OPENMP)
fast_sint_t omp_thread_num = omp_get_thread_num();
fast_sint_t omp_num_threads = omp_get_num_threads();
#else
UNUSED(threads);

fast_sint_t omp_thread_num = 0;
fast_sint_t omp_num_threads = 1;
#endif
fast_sint_t omp_block_stride = (n / omp_num_threads) & (-16);
fast_sint_t omp_block_start = omp_thread_num * omp_block_stride;
fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start;

libsais_compute_phi(SA, PLCP, n, omp_block_start, omp_block_size);
}
}

static void libsais_compute_plcp(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT PLCP, fast_sint_t n, fast_sint_t omp_block_start, fast_sint_t omp_block_size)
{
const fast_sint_t prefetch_distance = 32;

fast_sint_t i, j, l = 0;
for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance; i < j; i += 1)
{
libsais_prefetch(&T[PLCP[i + prefetch_distance] + l]);

fast_sint_t k = PLCP[i], m = n - (i > k ? i : k);
while (l < m && T[i + l] == T[k + l]) { l++; }

PLCP[i] = (sa_sint_t)l; l -= (l != 0);
}

for (j += prefetch_distance; i < j; i += 1)
{
fast_sint_t k = PLCP[i], m = n - (i > k ? i : k);
while (l < m && T[i + l] == T[k + l]) { l++; }

PLCP[i] = (sa_sint_t)l; l -= (l != 0);
}
}

static void libsais_compute_plcp_omp(const uint8_t * RESTRICT T, sa_sint_t * RESTRICT PLCP, sa_sint_t n, sa_sint_t threads)
{
#if defined(_OPENMP)
#pragma omp parallel num_threads(threads) if(threads > 1 && n >= 65536)
#endif
{
#if defined(_OPENMP)
fast_sint_t omp_thread_num = omp_get_thread_num();
fast_sint_t omp_num_threads = omp_get_num_threads();
#else
UNUSED(threads);

fast_sint_t omp_thread_num = 0;
fast_sint_t omp_num_threads = 1;
#endif
fast_sint_t omp_block_stride = (n / omp_num_threads) & (-16);
fast_sint_t omp_block_start = omp_thread_num * omp_block_stride;
fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start;

libsais_compute_plcp(T, PLCP, n, omp_block_start, omp_block_size);
}
}

static void libsais_compute_lcp(const sa_sint_t * RESTRICT PLCP, const sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT LCP, fast_sint_t omp_block_start, fast_sint_t omp_block_size)
{
const fast_sint_t prefetch_distance = 32;

fast_sint_t i, j;
for (i = omp_block_start, j = omp_block_start + omp_block_size - prefetch_distance - 3; i < j; i += 4)
{
libsais_prefetch(&PLCP[SA[i + prefetch_distance + 0]]);
libsais_prefetch(&PLCP[SA[i + prefetch_distance + 1]]);

LCP[i + 0] = PLCP[SA[i + 0]];
LCP[i + 1] = PLCP[SA[i + 1]];

libsais_prefetch(&PLCP[SA[i + prefetch_distance + 2]]);
libsais_prefetch(&PLCP[SA[i + prefetch_distance + 3]]);

LCP[i + 2] = PLCP[SA[i + 2]];
LCP[i + 3] = PLCP[SA[i + 3]];
}

for (j += prefetch_distance + 3; i < j; i += 1)
{
LCP[i] = PLCP[SA[i]];
}
}

static void libsais_compute_lcp_omp(const sa_sint_t * RESTRICT PLCP, const sa_sint_t * RESTRICT SA, sa_sint_t * RESTRICT LCP, sa_sint_t n, sa_sint_t threads)
{
#if defined(_OPENMP)
#pragma omp parallel num_threads(threads) if(threads > 1 && n >= 65536)
#endif
{
#if defined(_OPENMP)
fast_sint_t omp_thread_num = omp_get_thread_num();
fast_sint_t omp_num_threads = omp_get_num_threads();
#else
UNUSED(threads);

fast_sint_t omp_thread_num = 0;
fast_sint_t omp_num_threads = 1;
#endif
fast_sint_t omp_block_stride = (n / omp_num_threads) & (-16);
fast_sint_t omp_block_start = omp_thread_num * omp_block_stride;
fast_sint_t omp_block_size = omp_thread_num < omp_num_threads - 1 ? omp_block_stride : n - omp_block_start;

libsais_compute_lcp(PLCP, SA, LCP, omp_block_start, omp_block_size);
}
}

int32_t libsais_plcp(const uint8_t * T, const int32_t * SA, int32_t * PLCP, int32_t n)
{
if ((T == NULL) || (SA == NULL) || (PLCP == NULL) || (n < 0))
{
return -1;
}
else if (n <= 1)
{
if (n == 1) { PLCP[0] = 0; }
return 0;
}

libsais_compute_phi_omp(SA, PLCP, n, 1);
libsais_compute_plcp_omp(T, PLCP, n, 1);

return 0;
}

int32_t libsais_lcp(const int32_t * PLCP, const int32_t * SA, int32_t * LCP, int32_t n)
{
if ((PLCP == NULL) || (SA == NULL) || (LCP == NULL) || (n < 0))
{
return -1;
}
else if (n <= 1)
{
if (n == 1) { LCP[0] = PLCP[SA[0]]; }
return 0;
}

libsais_compute_lcp_omp(PLCP, SA, LCP, n, 1);

return 0;
}

#if defined(_OPENMP)

int32_t libsais_plcp_omp(const uint8_t * T, const int32_t * SA, int32_t * PLCP, int32_t n, int32_t threads)
{
if ((T == NULL) || (SA == NULL) || (PLCP == NULL) || (n < 0) || (threads < 0))
{
return -1;
}
else if (n <= 1)
{
if (n == 1) { PLCP[0] = 0; }
return 0;
}

threads = threads > 0 ? threads : omp_get_max_threads();

libsais_compute_phi_omp(SA, PLCP, n, threads);
libsais_compute_plcp_omp(T, PLCP, n, threads);

return 0;
}

int32_t libsais_lcp_omp(const int32_t * PLCP, const int32_t * SA, int32_t * LCP, int32_t n, int32_t threads)
{
if ((PLCP == NULL) || (SA == NULL) || (LCP == NULL) || (n < 0) || (threads < 0))
{
return -1;
}
else if (n <= 1)
{
if (n == 1) { LCP[0] = PLCP[SA[0]]; }
return 0;
}

threads = threads > 0 ? threads : omp_get_max_threads();

libsais_compute_lcp_omp(PLCP, SA, LCP, n, threads);

return 0;
}

#endif
Loading

0 comments on commit 223c7eb

Please sign in to comment.