Skip to content

Commit

Permalink
Optimize sort query, had some minimal unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
NickPhura committed May 10, 2024
1 parent 1d6972c commit b85c5a2
Show file tree
Hide file tree
Showing 3 changed files with 145 additions and 87 deletions.
4 changes: 2 additions & 2 deletions api/src/services/itis-service.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import axios from 'axios';
import { sortExactMatches } from '../utils/itis-sort';
import { sortTaxonSearchResults } from '../utils/itis-sort';
import { getLogger } from '../utils/logger';
import { TaxonSearchResult } from './taxonomy-service';

Expand Down Expand Up @@ -47,7 +47,7 @@ export class ItisService {
const sanitizedResponse = this._sanitizeItisData(response.data.response.docs);

Check warning on line 47 in api/src/services/itis-service.ts

View check run for this annotation

Codecov / codecov/patch

api/src/services/itis-service.ts#L47

Added line #L47 was not covered by tests

// Sort the results to place exact matches at the top
const sortedResponse = sortExactMatches(sanitizedResponse, searchTerms);
const sortedResponse = sortTaxonSearchResults(sanitizedResponse, searchTerms);

Check warning on line 50 in api/src/services/itis-service.ts

View check run for this annotation

Codecov / codecov/patch

api/src/services/itis-service.ts#L50

Added line #L50 was not covered by tests

// Return only a subset of the records
// More records than are returned here are requested from ITIS to help find and prioritize exact matches
Expand Down
80 changes: 80 additions & 0 deletions api/src/utils/itis-sort.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import { expect } from 'chai';
import { describe } from 'mocha';
import { TaxonSearchResult } from '../services/taxonomy-service';
import { sortTaxonSearchResults } from './itis-sort';

describe.only('itis-sort', () => {
describe('sortTaxonSearchResults', () => {
it('Sorts the list when there is only 1 item', () => {
const data: TaxonSearchResult[] = [
{
tsn: 1,
commonNames: ['Moose', 'moose'],
scientificName: 'Alces alces'
}
];
const searchTerms = ['Moose'];

const result = sortTaxonSearchResults(data, searchTerms);

expect(result.length).to.equal(data.length);
expect(result[0].tsn).to.equal(1);
});

it('Sorts the list when there are exact matches', () => {
const data: TaxonSearchResult[] = [
{
tsn: 1,
commonNames: ['Goose', 'goose'],
scientificName: 'Goose goose'
},
{
tsn: 2,
commonNames: ['Moose', 'moose'],
scientificName: 'Moose moose'
},
{
tsn: 3,
commonNames: ['House'],
scientificName: 'House'
}
];
const searchTerms = ['Moose'];

const result = sortTaxonSearchResults(data, searchTerms);

expect(result.length).to.equal(data.length);
expect(result[0].tsn).to.equal(2);
expect(result[1].tsn).to.equal(1);
expect(result[2].tsn).to.equal(3);
});

it('Sorts the list when there are no exact matches', () => {
const data: TaxonSearchResult[] = [
{
tsn: 1,
commonNames: ['Goose', 'goose'],
scientificName: 'Goose goose'
},
{
tsn: 2,
commonNames: ['Moose', 'moose'],
scientificName: 'Moose moose'
},
{
tsn: 3,
commonNames: ['House'],
scientificName: 'House'
}
];
const searchTerms = ['oose'];

const result = sortTaxonSearchResults(data, searchTerms);

expect(result.length).to.equal(data.length);
expect(result[0].tsn).to.equal(1);
expect(result[1].tsn).to.equal(2);
expect(result[2].tsn).to.equal(3);
});
});
});
148 changes: 63 additions & 85 deletions api/src/utils/itis-sort.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,103 +3,81 @@ import { TaxonSearchResult } from '../services/taxonomy-service';
/**
* Sorts the ITIS response by how strongly records match the search terms
*
* @param {ItisSolrSearchResponse[]} data
* @param {TaxonSearchResult[]} data
* @param {string[]} searchTerms
* @memberof ItisService
* @return {*} {TaxonSearchResult[]}
*/
export const sortExactMatches = (data: TaxonSearchResult[], searchTerms: string[]): TaxonSearchResult[] => {
export const sortTaxonSearchResults = (data: TaxonSearchResult[], searchTerms: string[]): TaxonSearchResult[] => {
const searchTermsLower = searchTerms.map((item) => item.toLowerCase());
const taxonNames = data.map((item) => {
item.scientificName = item.scientificName.toLowerCase().trim();
item.commonNames = item.commonNames.map((name) => name.toLowerCase().trim());
return item;
});
const searchTermJoined = searchTermsLower.join(' ');

// Prioritize taxa where any word in the scientific or common name matches ANY of the search terms
// eg. ['Black', 'bear'] -> "Black" matches on "Black widow"
const containsAnyMatch = customSortContainsAnyMatchingSearchTerm(taxonNames, searchTermsLower);
// Caches the scientific name data
const scientificNameDataMap = new Map<string, { words: string[]; lowercased: string }>();
// Caches the common name data
const commonNamesDataMap = new Map<string, { words: string[]; lowercased: string }>();

// Prioritize taxa where either the scientific name or any common name CONTAINS the search terms joined
// eg. ['Black', 'bear'] -> "Black bear" matches on "American black bear"
const containsAnyMatchJoined = customSortContainsSearchTermsJoinedExact(containsAnyMatch, searchTermsLower);
// Returns the scientific name data, adding it to the cache if it doesn't exist
const getScientificNameData = (scientificName: string) => {
if (!scientificNameDataMap.has(scientificName)) {
const lowercased = scientificName.toLowerCase();
scientificNameDataMap.set(scientificName, { words: lowercased.trim().split(' '), lowercased });
}

// Prioritize taxa where either the scientific name or any common name is EXACTLY EQUAL to the search terms joined
// eg. ['Wolf'] -> "Wolf" is prioritized over "Forest Wolf"
const exactlyEquals = customSortEqualsSearchTermsExact(containsAnyMatchJoined, searchTermsLower);
return scientificNameDataMap.get(scientificName) as { words: string[]; lowercased: string };
};

return exactlyEquals;
};
// Returns the common names data, adding it to the cache if it doesn't exist
const getCommonNamesData = (commonNames: string[]) => {
return commonNames.map((name) => {
if (!commonNamesDataMap.has(name)) {
const lowercased = name.toLowerCase();
commonNamesDataMap.set(name, { words: lowercased.trim().split(' '), lowercased });
}

/**
* Sorts the ITIS response to prioritize records where any word in the scientific or
* common name matches ANY of the search terms
*
* @param {ItisSolrSearchResponse[]} data
* @param {string[]} searchTerms
* @memberof ItisService
*/
export const customSortContainsAnyMatchingSearchTerm = (
data: TaxonSearchResult[],
searchTerms: string[]
): TaxonSearchResult[] =>
data.sort((a, b) => {
const checkForMatch = (item: TaxonSearchResult) =>
searchTerms.some(
(searchTerm) =>
item.scientificName.split(' ').includes(searchTerm) ||
item.commonNames?.flatMap((name) => name.split(' ')).includes(searchTerm)
);
return commonNamesDataMap.get(name) as { words: string[]; lowercased: string };
});
};

const aInReference = checkForMatch(a);
const bInReference = checkForMatch(b);
/**
* Custom scoring function to determine how well a record matches the search terms
*
* @param {TaxonSearchResult} item
* @return {*}
*/
const calculateScore = (item: TaxonSearchResult) => {
let score = 0;

return aInReference && !bInReference ? -1 : !aInReference && bInReference ? 1 : 0;
});
const scientificNameData = getScientificNameData(item.scientificName);
const commonNamesData = getCommonNamesData(item.commonNames);

/**
* Sorts the ITIS response to prioritize records where either the scientific name or
* any common name CONTAINS the search terms joined
*
* @param {ItisSolrSearchResponse[]} data
* @param {string[]} searchTerms
* @memberof ItisService
*/
export const customSortContainsSearchTermsJoinedExact = (
data: TaxonSearchResult[],
searchTerms: string[]
): TaxonSearchResult[] =>
data.sort((a, b) => {
const checkForMatch = (item: TaxonSearchResult) => {
return (
item.commonNames.some((name) => name.includes(searchTerms.join(' '))) ||
item.scientificName === searchTerms.join(' ')
);
};
// Check if any word in the scientific or common name matches ANY of the search terms
if (
searchTermsLower.some(
(term) => scientificNameData.words.includes(term) || commonNamesData.some((data) => data.words.includes(term))
)
) {
score += 1;
}

const aInReference = checkForMatch(a);
const bInReference = checkForMatch(b);
// Check if either the scientific name or any common name CONTAINS the search terms joined
if (
scientificNameData.lowercased.includes(searchTermJoined) ||
commonNamesData.some((data) => data.lowercased.includes(searchTermJoined))
) {
score += 2;
}

return aInReference && !bInReference ? -1 : 0;
});
// Check if either the scientific name or any common name is EXACTLY EQUAL to the search terms joined
if (
scientificNameData.lowercased === searchTermJoined ||
commonNamesData.some((data) => data.lowercased === searchTermJoined)
) {
score += 3;
}

/**
* Sorts the ITIS response to prioritize taxa where either the scientific name or
* any common name is EXACTLY EQUAL to the search terms joined
*
* @param {ItisSolrSearchResponse[]} data
* @param {string[]} searchTerms
* @memberof ItisService
*/
export const customSortEqualsSearchTermsExact = (
data: TaxonSearchResult[],
searchTerms: string[]
): TaxonSearchResult[] =>
data.sort((a, b) => {
const checkForMatch = (item: TaxonSearchResult) =>
item.scientificName === searchTerms.join(' ') || item.commonNames.includes(searchTerms.join(' '));

const aInReference = checkForMatch(a);
const bInReference = checkForMatch(b);
return score;
};

return aInReference && !bInReference ? -1 : 0;
});
// Sort the data by the score
return data.sort((a, b) => calculateScore(b) - calculateScore(a));
};

0 comments on commit b85c5a2

Please sign in to comment.