Skip to content

Commit

Permalink
Small performance optimization for esa_matchfinder_advance API.
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyaGrebnov committed Dec 3, 2023
1 parent bb75b1c commit df54ffb
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 3 deletions.
3 changes: 3 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
Changes in 1.2.0 (December 2, 2023)
- Small performance optimization for esa_matchfinder_advance API.

Changes in 1.1.0 (November 30, 2023)
- New API to find matches within specified sliding window.

Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ The esa-matchfinder finds all distance optimal matches (between min_match_length
The esa-matchfinder released under the [Apache License Version 2.0](LICENSE "Apache license") and is considered suitable for production use. However, no warranty or fitness for a particular purpose is expressed or implied.

## Changes
* December 2, 2023 (1.2.0)
* Small performance optimization for esa_matchfinder_advance API.
* November 30, 2023 (1.1.0)
* New API to find matches within specified sliding window.
* June 19, 2022 (1.0.1)
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.1.0
1.2.0
81 changes: 81 additions & 0 deletions esa_matchfinder.c
Original file line number Diff line number Diff line change
Expand Up @@ -914,8 +914,89 @@ ESA_MATCHFINDER_MATCH esa_matchfinder_find_best_match_in_window(void * mf, uint6
}
}

static void esa_matchfinder_advance_backwards(void * mf, int32_t count)
{
ESA_MF_CONTEXT * ESA_MF_RESTRICT const matchfinder_ctx = (ESA_MF_CONTEXT *)mf;

const ptrdiff_t prefetch_distance = 4;
const uint64_t current_position = matchfinder_ctx->position;
const uint64_t target_position = matchfinder_ctx->position += (uint64_t)count;

uint64_t * ESA_MF_RESTRICT const sa_parent_link = matchfinder_ctx->sa_parent_link;
uint32_t * ESA_MF_RESTRICT const plcp_leaf_link = matchfinder_ctx->plcp_leaf_link;

memset(matchfinder_ctx->prefetch, 0, sizeof(matchfinder_ctx->prefetch));

for (uint64_t position = target_position + prefetch_distance * 8; position-- != target_position; )
{
uint64_t * ESA_MF_RESTRICT const prefetch = &matchfinder_ctx->prefetch[position & (prefetch_distance - 1)][0];

esa_matchfinder_prefetchw(&sa_parent_link[ (sa_parent_link[prefetch[0]] & ESA_MF_PARENT_MASK)]);
esa_matchfinder_prefetchw(&sa_parent_link[prefetch[0] = (sa_parent_link[prefetch[1]] & ESA_MF_PARENT_MASK)]);
esa_matchfinder_prefetchw(&sa_parent_link[prefetch[1] = (sa_parent_link[prefetch[2]] & ESA_MF_PARENT_MASK)]);
esa_matchfinder_prefetchw(&sa_parent_link[prefetch[2] = (sa_parent_link[prefetch[3]] & ESA_MF_PARENT_MASK)]);
esa_matchfinder_prefetchw(&sa_parent_link[prefetch[3] = (sa_parent_link[prefetch[4]] & ESA_MF_PARENT_MASK)]);
esa_matchfinder_prefetchw(&sa_parent_link[prefetch[4] = (sa_parent_link[prefetch[5]] & ESA_MF_PARENT_MASK)]);
esa_matchfinder_prefetchw(&sa_parent_link[prefetch[5] = (sa_parent_link[prefetch[6]] & ESA_MF_PARENT_MASK)]);
esa_matchfinder_prefetchw(&sa_parent_link[prefetch[6] = (plcp_leaf_link[position - 8 * prefetch_distance])]);
esa_matchfinder_prefetchr(&plcp_leaf_link[position - 9 * prefetch_distance]);
}

for (uint64_t position = target_position; position-- != current_position; )
{
if (position >= 8 * prefetch_distance)
{
uint64_t * ESA_MF_RESTRICT const prefetch = &matchfinder_ctx->prefetch[position & (prefetch_distance - 1)][0];

esa_matchfinder_prefetchw(&sa_parent_link[ (sa_parent_link[prefetch[0]] & ESA_MF_PARENT_MASK)]);
esa_matchfinder_prefetchw(&sa_parent_link[prefetch[0] = (sa_parent_link[prefetch[1]] & ESA_MF_PARENT_MASK)]);
esa_matchfinder_prefetchw(&sa_parent_link[prefetch[1] = (sa_parent_link[prefetch[2]] & ESA_MF_PARENT_MASK)]);
esa_matchfinder_prefetchw(&sa_parent_link[prefetch[2] = (sa_parent_link[prefetch[3]] & ESA_MF_PARENT_MASK)]);
esa_matchfinder_prefetchw(&sa_parent_link[prefetch[3] = (sa_parent_link[prefetch[4]] & ESA_MF_PARENT_MASK)]);
esa_matchfinder_prefetchw(&sa_parent_link[prefetch[4] = (sa_parent_link[prefetch[5]] & ESA_MF_PARENT_MASK)]);
esa_matchfinder_prefetchw(&sa_parent_link[prefetch[5] = (sa_parent_link[prefetch[6]] & ESA_MF_PARENT_MASK)]);
esa_matchfinder_prefetchw(&sa_parent_link[prefetch[6] = (plcp_leaf_link[position - 8 * prefetch_distance])]);
esa_matchfinder_prefetchr(&plcp_leaf_link[position - 9 * prefetch_distance]);
}

const uint64_t new_offset = (uint64_t)position << ESA_MF_OFFSET_SHIFT;
uint64_t reference = plcp_leaf_link[position];
uint64_t interval = sa_parent_link[reference];

while ((interval & ESA_MF_OFFSET_MASK) < new_offset)
{
sa_parent_link[reference] = (interval & (~ESA_MF_OFFSET_MASK)) + new_offset;
reference = interval & ESA_MF_PARENT_MASK;
interval = sa_parent_link[reference];
}
}

memset(matchfinder_ctx->prefetch, 0, sizeof(matchfinder_ctx->prefetch));

for (uint64_t position = target_position - prefetch_distance * 8; position != target_position; position += 1)
{
uint64_t * ESA_MF_RESTRICT const prefetch = &matchfinder_ctx->prefetch[position & (prefetch_distance - 1)][0];

esa_matchfinder_prefetchw(&sa_parent_link[ (sa_parent_link[prefetch[0]] & ESA_MF_PARENT_MASK)]);
esa_matchfinder_prefetchw(&sa_parent_link[prefetch[0] = (sa_parent_link[prefetch[1]] & ESA_MF_PARENT_MASK)]);
esa_matchfinder_prefetchw(&sa_parent_link[prefetch[1] = (sa_parent_link[prefetch[2]] & ESA_MF_PARENT_MASK)]);
esa_matchfinder_prefetchw(&sa_parent_link[prefetch[2] = (sa_parent_link[prefetch[3]] & ESA_MF_PARENT_MASK)]);
esa_matchfinder_prefetchw(&sa_parent_link[prefetch[3] = (sa_parent_link[prefetch[4]] & ESA_MF_PARENT_MASK)]);
esa_matchfinder_prefetchw(&sa_parent_link[prefetch[4] = (sa_parent_link[prefetch[5]] & ESA_MF_PARENT_MASK)]);
esa_matchfinder_prefetchw(&sa_parent_link[prefetch[5] = (sa_parent_link[prefetch[6]] & ESA_MF_PARENT_MASK)]);
esa_matchfinder_prefetchw(&sa_parent_link[prefetch[6] = (plcp_leaf_link[position + 8 * prefetch_distance])]);
esa_matchfinder_prefetchr(&plcp_leaf_link[position + 9 * prefetch_distance]);
}
}

void esa_matchfinder_advance(void * mf, int32_t count)
{
if (count >= /*ESA_MF_ADVANCE_BACKWARDS_THRESHOLD*/ 64)
{
esa_matchfinder_advance_backwards(mf, count);
return;
}

ESA_MF_CONTEXT * ESA_MF_RESTRICT const matchfinder_ctx = (ESA_MF_CONTEXT *)mf;

const ptrdiff_t prefetch_distance = 4;
Expand Down
4 changes: 2 additions & 2 deletions esa_matchfinder.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ Please see the file LICENSE for full copyright and license details.
#define ESA_MATCHFINDER_BAD_PARAMETER (-1)

#define ESA_MATCHFINDER_VERSION_MAJOR 1
#define ESA_MATCHFINDER_VERSION_MINOR 1
#define ESA_MATCHFINDER_VERSION_MINOR 2
#define ESA_MATCHFINDER_VERSION_PATCH 0
#define ESA_MATCHFINDER_VERSION_STRING "1.1.0"
#define ESA_MATCHFINDER_VERSION_STRING "1.2.0"

#ifdef __cplusplus
extern "C" {
Expand Down

0 comments on commit df54ffb

Please sign in to comment.