From df54ffb86fd309a0a70ad0e76e815aca14893dfd Mon Sep 17 00:00:00 2001 From: Ilya Grebnov Date: Sat, 2 Dec 2023 17:39:55 -0800 Subject: [PATCH] Small performance optimization for esa_matchfinder_advance API. --- CHANGES | 3 ++ README.md | 2 ++ VERSION | 2 +- esa_matchfinder.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++ esa_matchfinder.h | 4 +-- 5 files changed, 89 insertions(+), 3 deletions(-) diff --git a/CHANGES b/CHANGES index 0bb6703..a43b1a1 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,6 @@ +Changes in 1.2.0 (December 2, 2023) +- Small performance optimization for esa_matchfinder_advance API. + Changes in 1.1.0 (November 30, 2023) - New API to find matches within specified sliding window. diff --git a/README.md b/README.md index 4a96723..65ed225 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,8 @@ The esa-matchfinder finds all distance optimal matches (between min_match_length The esa-matchfinder released under the [Apache License Version 2.0](LICENSE "Apache license") and is considered suitable for production use. However, no warranty or fitness for a particular purpose is expressed or implied. ## Changes +* December 2, 2023 (1.2.0) + * Small performance optimization for esa_matchfinder_advance API. * November 30, 2023 (1.1.0) * New API to find matches within specified sliding window. * June 19, 2022 (1.0.1) diff --git a/VERSION b/VERSION index 1cc5f65..867e524 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.1.0 \ No newline at end of file +1.2.0 \ No newline at end of file diff --git a/esa_matchfinder.c b/esa_matchfinder.c index 12656d0..5fcacb5 100644 --- a/esa_matchfinder.c +++ b/esa_matchfinder.c @@ -914,8 +914,89 @@ ESA_MATCHFINDER_MATCH esa_matchfinder_find_best_match_in_window(void * mf, uint6 } } +static void esa_matchfinder_advance_backwards(void * mf, int32_t count) +{ + ESA_MF_CONTEXT * ESA_MF_RESTRICT const matchfinder_ctx = (ESA_MF_CONTEXT *)mf; + + const ptrdiff_t prefetch_distance = 4; + const uint64_t current_position = matchfinder_ctx->position; + const uint64_t target_position = matchfinder_ctx->position += (uint64_t)count; + + uint64_t * ESA_MF_RESTRICT const sa_parent_link = matchfinder_ctx->sa_parent_link; + uint32_t * ESA_MF_RESTRICT const plcp_leaf_link = matchfinder_ctx->plcp_leaf_link; + + memset(matchfinder_ctx->prefetch, 0, sizeof(matchfinder_ctx->prefetch)); + + for (uint64_t position = target_position + prefetch_distance * 8; position-- != target_position; ) + { + uint64_t * ESA_MF_RESTRICT const prefetch = &matchfinder_ctx->prefetch[position & (prefetch_distance - 1)][0]; + + esa_matchfinder_prefetchw(&sa_parent_link[ (sa_parent_link[prefetch[0]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[0] = (sa_parent_link[prefetch[1]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[1] = (sa_parent_link[prefetch[2]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[2] = (sa_parent_link[prefetch[3]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[3] = (sa_parent_link[prefetch[4]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[4] = (sa_parent_link[prefetch[5]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[5] = (sa_parent_link[prefetch[6]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[6] = (plcp_leaf_link[position - 8 * prefetch_distance])]); + esa_matchfinder_prefetchr(&plcp_leaf_link[position - 9 * prefetch_distance]); + } + + for (uint64_t position = target_position; position-- != current_position; ) + { + if (position >= 8 * prefetch_distance) + { + uint64_t * ESA_MF_RESTRICT const prefetch = &matchfinder_ctx->prefetch[position & (prefetch_distance - 1)][0]; + + esa_matchfinder_prefetchw(&sa_parent_link[ (sa_parent_link[prefetch[0]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[0] = (sa_parent_link[prefetch[1]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[1] = (sa_parent_link[prefetch[2]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[2] = (sa_parent_link[prefetch[3]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[3] = (sa_parent_link[prefetch[4]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[4] = (sa_parent_link[prefetch[5]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[5] = (sa_parent_link[prefetch[6]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[6] = (plcp_leaf_link[position - 8 * prefetch_distance])]); + esa_matchfinder_prefetchr(&plcp_leaf_link[position - 9 * prefetch_distance]); + } + + const uint64_t new_offset = (uint64_t)position << ESA_MF_OFFSET_SHIFT; + uint64_t reference = plcp_leaf_link[position]; + uint64_t interval = sa_parent_link[reference]; + + while ((interval & ESA_MF_OFFSET_MASK) < new_offset) + { + sa_parent_link[reference] = (interval & (~ESA_MF_OFFSET_MASK)) + new_offset; + reference = interval & ESA_MF_PARENT_MASK; + interval = sa_parent_link[reference]; + } + } + + memset(matchfinder_ctx->prefetch, 0, sizeof(matchfinder_ctx->prefetch)); + + for (uint64_t position = target_position - prefetch_distance * 8; position != target_position; position += 1) + { + uint64_t * ESA_MF_RESTRICT const prefetch = &matchfinder_ctx->prefetch[position & (prefetch_distance - 1)][0]; + + esa_matchfinder_prefetchw(&sa_parent_link[ (sa_parent_link[prefetch[0]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[0] = (sa_parent_link[prefetch[1]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[1] = (sa_parent_link[prefetch[2]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[2] = (sa_parent_link[prefetch[3]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[3] = (sa_parent_link[prefetch[4]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[4] = (sa_parent_link[prefetch[5]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[5] = (sa_parent_link[prefetch[6]] & ESA_MF_PARENT_MASK)]); + esa_matchfinder_prefetchw(&sa_parent_link[prefetch[6] = (plcp_leaf_link[position + 8 * prefetch_distance])]); + esa_matchfinder_prefetchr(&plcp_leaf_link[position + 9 * prefetch_distance]); + } +} + void esa_matchfinder_advance(void * mf, int32_t count) { + if (count >= /*ESA_MF_ADVANCE_BACKWARDS_THRESHOLD*/ 64) + { + esa_matchfinder_advance_backwards(mf, count); + return; + } + ESA_MF_CONTEXT * ESA_MF_RESTRICT const matchfinder_ctx = (ESA_MF_CONTEXT *)mf; const ptrdiff_t prefetch_distance = 4; diff --git a/esa_matchfinder.h b/esa_matchfinder.h index 4e6a8d1..c69f780 100644 --- a/esa_matchfinder.h +++ b/esa_matchfinder.h @@ -33,9 +33,9 @@ Please see the file LICENSE for full copyright and license details. #define ESA_MATCHFINDER_BAD_PARAMETER (-1) #define ESA_MATCHFINDER_VERSION_MAJOR 1 -#define ESA_MATCHFINDER_VERSION_MINOR 1 +#define ESA_MATCHFINDER_VERSION_MINOR 2 #define ESA_MATCHFINDER_VERSION_PATCH 0 -#define ESA_MATCHFINDER_VERSION_STRING "1.1.0" +#define ESA_MATCHFINDER_VERSION_STRING "1.2.0" #ifdef __cplusplus extern "C" {