diff --git a/api/src/openapi/schemas/csv.ts b/api/src/openapi/schemas/csv.ts new file mode 100644 index 0000000000..8c18600e35 --- /dev/null +++ b/api/src/openapi/schemas/csv.ts @@ -0,0 +1,41 @@ +import { OpenAPIV3 } from 'openapi-types'; + +/** + * CSV validation error object schema + * + */ +export const CSVErrorSchema: OpenAPIV3.SchemaObject = { + title: 'CSV validation error object', + type: 'object', + additionalProperties: false, + required: ['error', 'solution', 'row'], + properties: { + error: { + description: 'The error message', + type: 'string' + }, + solution: { + description: 'The error solution or instructions to resolve', + type: 'string' + }, + values: { + description: 'The list of allowed values if applicable', + type: 'array', + items: { + oneOf: [{ type: 'string' }, { type: 'number' }] + } + }, + cell: { + description: 'The CSV cell value', + oneOf: [{ type: 'string' }, { type: 'number' }] + }, + header: { + description: 'The header name used in the CSV file', + type: 'string' + }, + row: { + description: 'The row index the error occurred. Header row index 0. First data row index 1.', + type: 'number' + } + } +}; diff --git a/api/src/paths/project/{projectId}/survey/{surveyId}/critters/import.test.ts b/api/src/paths/project/{projectId}/survey/{surveyId}/critters/import.test.ts index f31ba7d465..6c24e5b70f 100644 --- a/api/src/paths/project/{projectId}/survey/{surveyId}/critters/import.test.ts +++ b/api/src/paths/project/{projectId}/survey/{surveyId}/critters/import.test.ts @@ -1,19 +1,20 @@ import { expect } from 'chai'; import sinon from 'sinon'; import * as db from '../../../../../../database/db'; -import * as strategy from '../../../../../../services/import-services/import-csv'; +import { ImportCrittersService } from '../../../../../../services/import-services/critter/import-critters-service'; import { getMockDBConnection, getRequestHandlerMocks } from '../../../../../../__mocks__/db'; -import { importCsv } from './import'; +import { importCritterCSV } from './import'; describe('importCsv', () => { afterEach(() => { sinon.restore(); }); - it('returns imported critters', async () => { + it('status 200 when successful', async () => { const mockDBConnection = getMockDBConnection({ open: sinon.stub(), commit: sinon.stub(), release: sinon.stub() }); const getDBConnectionStub = sinon.stub(db, 'getDBConnection').returns(mockDBConnection); - const mockImportCSV = sinon.stub(strategy, 'importCSV').resolves([1, 2]); + + const importCSVWorksheetStub = sinon.stub(ImportCrittersService.prototype, 'importCSVWorksheet'); const mockFile = { originalname: 'test.csv', mimetype: 'test.csv', buffer: Buffer.alloc(1) } as Express.Multer.File; @@ -22,7 +23,7 @@ describe('importCsv', () => { mockReq.files = [mockFile]; mockReq.params.surveyId = '1'; - const requestHandler = importCsv(); + const requestHandler = importCritterCSV(); await requestHandler(mockReq, mockRes, mockNext); @@ -30,9 +31,10 @@ describe('importCsv', () => { expect(getDBConnectionStub).to.have.been.calledOnce; - expect(mockImportCSV).to.have.been.calledOnce; + expect(importCSVWorksheetStub).to.have.been.calledOnce; - expect(mockRes.json).to.have.been.calledOnceWithExactly({ survey_critter_ids: [1, 2] }); + expect(mockRes.status).to.have.been.calledOnceWithExactly(200); + expect(mockRes.send).to.have.been.calledOnceWithExactly(); expect(mockDBConnection.commit).to.have.been.calledOnce; expect(mockDBConnection.release).to.have.been.calledOnce; diff --git a/api/src/paths/project/{projectId}/survey/{surveyId}/critters/import.ts b/api/src/paths/project/{projectId}/survey/{surveyId}/critters/import.ts index 798665d078..f3b83117fa 100644 --- a/api/src/paths/project/{projectId}/survey/{surveyId}/critters/import.ts +++ b/api/src/paths/project/{projectId}/survey/{surveyId}/critters/import.ts @@ -4,11 +4,11 @@ import { PROJECT_PERMISSION, SYSTEM_ROLE } from '../../../../../../constants/rol import { getDBConnection } from '../../../../../../database/db'; import { csvFileSchema } from '../../../../../../openapi/schemas/file'; import { authorizeRequestHandler } from '../../../../../../request-handlers/security/authorization'; -import { ImportCrittersStrategy } from '../../../../../../services/import-services/critter/import-critters-strategy'; -import { importCSV } from '../../../../../../services/import-services/import-csv'; +import { ImportCrittersService } from '../../../../../../services/import-services/critter/import-critters-service'; import { getLogger } from '../../../../../../utils/logger'; import { parseMulterFile } from '../../../../../../utils/media/media-utils'; import { getFileFromRequest } from '../../../../../../utils/request'; +import { constructXLSXWorkbook, getDefaultWorksheet } from '../../../../../../utils/xlsx-utils/worksheet-utils'; const defaultLog = getLogger('/api/project/{projectId}/survey/{surveyId}/critters/import'); @@ -28,7 +28,7 @@ export const POST: Operation = [ ] }; }), - importCsv() + importCritterCSV() ]; POST.apiDoc = { @@ -83,25 +83,7 @@ POST.apiDoc = { }, responses: { 200: { - description: 'Import OK', - content: { - 'application/json': { - schema: { - type: 'object', - additionalProperties: false, - required: ['survey_critter_ids'], - properties: { - survey_critter_ids: { - type: 'array', - items: { - type: 'integer', - minimum: 1 - } - } - } - } - } - } + description: 'Import OK' }, 400: { $ref: '#/components/responses/400' @@ -126,26 +108,26 @@ POST.apiDoc = { * * @return {*} {RequestHandler} */ -export function importCsv(): RequestHandler { +export function importCritterCSV(): RequestHandler { return async (req, res) => { const surveyId = Number(req.params.surveyId); const rawFile = getFileFromRequest(req); const connection = getDBConnection(req.keycloak_token); + const mediaFile = parseMulterFile(rawFile); + const worksheet = getDefaultWorksheet(constructXLSXWorkbook(mediaFile)); + try { await connection.open(); - // Critter CSV import strategy - child of CSVImportStrategy - const importCsvCritters = new ImportCrittersStrategy(connection, surveyId); - - const surveyCritterIds = await importCSV(parseMulterFile(rawFile), importCsvCritters); + const importService = new ImportCrittersService(connection, worksheet, surveyId); - defaultLog.info({ label: 'importCritterCsv', message: 'result', survey_critter_ids: surveyCritterIds }); + await importService.importCSVWorksheet(); await connection.commit(); - return res.status(200).json({ survey_critter_ids: surveyCritterIds }); + return res.status(200).send(); } catch (error) { defaultLog.error({ label: 'importCritterCsv', message: 'error', error }); await connection.rollback(); diff --git a/api/src/services/import-services/critter/critter-header-configs.test.ts b/api/src/services/import-services/critter/critter-header-configs.test.ts new file mode 100644 index 0000000000..d965a70a2f --- /dev/null +++ b/api/src/services/import-services/critter/critter-header-configs.test.ts @@ -0,0 +1,311 @@ +import { expect } from 'chai'; +import xlsx from 'xlsx'; +import { CSVConfigUtils } from '../../../utils/csv-utils/csv-config-utils'; +import { CSVConfig } from '../../../utils/csv-utils/csv-config-validation.interface'; +import { NestedRecord } from '../../../utils/nested-record'; +import { + getCritterAliasCellValidator, + getCritterCollectionUnitCellSetter, + getCritterCollectionUnitCellValidator, + getCritterSexCellSetter, + getCritterSexCellValidator, + getWlhIDCellValidator +} from './critter-header-configs'; + +const mockConfig: CSVConfig = { + staticHeadersConfig: { + ALIAS: { aliases: [] } + }, + ignoreDynamicHeaders: true +}; + +describe('critter-header-configs', () => { + describe('getCritterAliasCellValidator', () => { + it('should return a single error when cell value is invalid', () => { + const badCellValues = [null, undefined, '', ' ', {}]; + for (const badCellValue of badCellValues) { + const critterAliasValidator = getCritterAliasCellValidator( + new Set(), + new CSVConfigUtils(xlsx.utils.json_to_sheet([]), mockConfig) + ); + + const result = critterAliasValidator({ cell: badCellValue, row: {}, header: 'ALIAS', rowIndex: 0 }); + + expect(result.length).to.be.equal(1); + } + }); + + it('should return an empty array if the cell is valid', () => { + const mockWorksheet = xlsx.utils.json_to_sheet([{ ALIAS: 'alias1' }, { ALIAS: 'alias2' }, { ALIAS: 'alias3' }]); + const surveyAliases = new Set(['alias1', 'alias2']); + const configUtils = new CSVConfigUtils(mockWorksheet, mockConfig); + + const critterAliasValidator = getCritterAliasCellValidator(surveyAliases, configUtils); + + const result = critterAliasValidator({ cell: 'alias4', row: {}, header: 'ALIAS', rowIndex: 0 }); + + expect(result).to.be.deep.equal([]); + }); + + it('should return single error when cell value already exists in survey aliases', () => { + const mockWorksheet = xlsx.utils.json_to_sheet([{ ALIAS: 'alias1' }, { ALIAS: 'alias2' }, { ALIAS: 'alias3' }]); + const surveyAliases = new Set(['alias1', 'alias2']); + const configUtils = new CSVConfigUtils(mockWorksheet, mockConfig); + + const critterAliasValidator = getCritterAliasCellValidator(surveyAliases, configUtils); + + const result = critterAliasValidator({ cell: 'alias1', row: {}, header: 'ALIAS', rowIndex: 0 }); + + expect(result).to.be.deep.equal([ + { + error: 'Critter alias already exists in the Survey', + solution: 'Update the alias to be unique' + } + ]); + }); + + it('should return single error when cell value already exists in row aliases', () => { + const mockWorksheet = xlsx.utils.json_to_sheet([{ ALIAS: 'alias1' }, { ALIAS: 'alias3' }, { ALIAS: 'alias3' }]); + const surveyAliases = new Set(['alias1', 'alias2']); + const configUtils = new CSVConfigUtils(mockWorksheet, mockConfig); + + const critterAliasValidator = getCritterAliasCellValidator(surveyAliases, configUtils); + + const result = critterAliasValidator({ cell: 'alias3', row: {}, header: 'ALIAS', rowIndex: 0 }); + + expect(result).to.be.deep.equal([ + { + error: 'Critter alias already exists in the CSV', + solution: 'Update the alias to be unique' + } + ]); + }); + }); + + describe('getCritterCollectionUnitCellValidator', () => { + it('should return an empty array if the cell is valid', () => { + const cellValidator = getCritterCollectionUnitCellValidator( + new NestedRecord({ + 1: { + HEADER: { + unit: 'uuid' + } + } + }), + new CSVConfigUtils(xlsx.utils.json_to_sheet([]), mockConfig) + ); + + const cellValues = ['unit', undefined]; + + for (const cell of cellValues) { + const result = cellValidator({ cell: cell, row: { ITIS_TSN: 1 }, header: 'HEADER', rowIndex: 0 }); + + expect(result).to.be.deep.equal([]); + } + }); + + it('should return a single error when the tsn has no collection units', () => { + const cellValidator = getCritterCollectionUnitCellValidator( + new NestedRecord({ + 1: { + HEADER: { + unit: 'uuid' + } + } + }), + new CSVConfigUtils(xlsx.utils.json_to_sheet([]), mockConfig) + ); + + const result = cellValidator({ cell: 'unit', row: { ITIS_TSN: 2 }, header: 'HEADER', rowIndex: 0 }); + + expect(result[0].error).to.be.equal('Collection units not found for TSN: 2'); + }); + + it('should return a single error when collection unit header invalid', () => { + const cellValidator = getCritterCollectionUnitCellValidator( + new NestedRecord({ + 1: { + HEADER: { + unit: 'uuid' + } + } + }), + new CSVConfigUtils(xlsx.utils.json_to_sheet([]), mockConfig) + ); + + const result = cellValidator({ cell: 'unit', row: { ITIS_TSN: 1 }, header: 'HEADER2', rowIndex: 0 }); + + expect(result[0].error).to.be.equal('Invalid collection category header'); + }); + + it('should return a single error when collection unit value invalid', () => { + const cellValidator = getCritterCollectionUnitCellValidator( + new NestedRecord({ + 1: { + HEADER: { + unit: 'uuid' + } + } + }), + new CSVConfigUtils(xlsx.utils.json_to_sheet([]), mockConfig) + ); + + const result = cellValidator({ cell: 'unit2', row: { ITIS_TSN: 1 }, header: 'HEADER', rowIndex: 0 }); + + expect(result[0].error).to.be.equal('Invalid collection unit cell value'); + }); + }); + + describe('getCritterCollectionUnitSetter', () => { + it('should return undefined when cell value is falsy', () => { + const cellSetter = getCritterCollectionUnitCellSetter( + new NestedRecord(), + new CSVConfigUtils(xlsx.utils.json_to_sheet([]), mockConfig) + ); + + const result = cellSetter({ cell: '', row: {}, header: 'HEADER', rowIndex: 0 }); + + expect(result).to.be.equal(undefined); + }); + + it('should return the uuid', () => { + const cellSetter = getCritterCollectionUnitCellSetter( + new NestedRecord({ + 1: { + HEADER: { + unit: 'uuid' + } + } + }), + new CSVConfigUtils(xlsx.utils.json_to_sheet([]), mockConfig) + ); + + const result = cellSetter({ cell: 'unit', row: { ITIS_TSN: 1 }, header: 'HEADER', rowIndex: 0 }); + + expect(result).to.be.equal('uuid'); + }); + }); + + describe('getCritterSexCellValidator', () => { + it('should return an empty array if the cell is valid', () => { + const cellValidator = getCritterSexCellValidator( + new NestedRecord({ + 1: { + male: 'uuid' + } + }), + new CSVConfigUtils(xlsx.utils.json_to_sheet([]), mockConfig) + ); + + const cellValues = ['male', 'MALE', undefined]; + + for (const cell of cellValues) { + const result = cellValidator({ cell: cell, row: { ITIS_TSN: 1 }, header: 'HEADER', rowIndex: 0 }); + + expect(result).to.be.deep.equal([]); + } + }); + + it('should return a single error when the cell value is invalid', () => { + const cellValidator = getCritterSexCellValidator( + new NestedRecord({ + 1: { + male: 'uuid' + } + }), + new CSVConfigUtils(xlsx.utils.json_to_sheet([]), mockConfig) + ); + + const cellValues = ['', 0]; + + for (const cell of cellValues) { + const result = cellValidator({ cell: cell, row: { ITIS_TSN: 1 }, header: 'HEADER', rowIndex: 0 }); + + expect(result.length).to.be.equal(1); + } + }); + + it('should return a single error when rowDictionary has no reference to TSN', () => { + const cellValidator = getCritterSexCellValidator( + new NestedRecord({ + 1: { + male: 'uuid' + } + }), + new CSVConfigUtils(xlsx.utils.json_to_sheet([]), mockConfig) + ); + + const result = cellValidator({ cell: 'male', row: { ITIS_TSN: 2 }, header: 'HEADER', rowIndex: 0 }); + + expect(result[0].error).to.be.equal('Sex is not a supported attribute for TSN: 2'); + }); + + it('should return a single error when rowDictionary has no reference to sex value', () => { + const cellValidator = getCritterSexCellValidator( + new NestedRecord({ + 1: { + male: 'uuid' + } + }), + new CSVConfigUtils(xlsx.utils.json_to_sheet([]), mockConfig) + ); + + const result = cellValidator({ cell: 'maled', row: { ITIS_TSN: 1 }, header: 'HEADER', rowIndex: 0 }); + + expect(result[0].error).to.be.equal('Sex cell value is invalid'); + }); + }); + + describe('getCritterSexCellSetter', () => { + it('should return the uuid', () => { + const nestedRecord = new NestedRecord({ + 1: { + male: 'uuid' + } + }); + const cellSetter = getCritterSexCellSetter( + nestedRecord, + new CSVConfigUtils(xlsx.utils.json_to_sheet([]), mockConfig) + ); + + const result = cellSetter({ cell: 'MALE', row: { ITIS_TSN: 1 }, header: 'HEADER', rowIndex: 0 }); + + expect(result).to.be.equal('uuid'); + }); + }); + + describe('getWlhIDCellValidator', () => { + it('should return an empty array if the cell is valid', () => { + const wlhIDValidator = getWlhIDCellValidator(new CSVConfigUtils(xlsx.utils.json_to_sheet([]), mockConfig)); + + const result = wlhIDValidator({ cell: '10-01111', row: {}, header: 'HEADER', rowIndex: 0 }); + + expect(result).to.be.deep.equal([]); + }); + + it('should return no errors when cell is undefined', () => { + const wlhIDValidator = getWlhIDCellValidator(new CSVConfigUtils(xlsx.utils.json_to_sheet([]), mockConfig)); + + const result = wlhIDValidator({ cell: undefined, row: {}, header: 'HEADER', rowIndex: 0 }); + + expect(result).to.be.deep.equal([]); + }); + + it('should return single error when cell value does not pass regex', () => { + const wlhIDValidator = getWlhIDCellValidator(new CSVConfigUtils(xlsx.utils.json_to_sheet([]), mockConfig)); + + const badWlhIds = ['100111', '1-011111', '100-222', '21-']; + + badWlhIds.forEach((badWlhId) => { + const result = wlhIDValidator({ cell: badWlhId, row: {}, header: 'HEADER', rowIndex: 0 }); + + expect(result).to.be.deep.equal([ + { + error: `Invalid Wildlife Health ID format`, + solution: `Update the Wildlife Health ID to match the expected format 'XX-XXXX'` + } + ]); + }); + }); + }); +}); diff --git a/api/src/services/import-services/critter/critter-header-configs.ts b/api/src/services/import-services/critter/critter-header-configs.ts new file mode 100644 index 0000000000..2ab817c9bd --- /dev/null +++ b/api/src/services/import-services/critter/critter-header-configs.ts @@ -0,0 +1,259 @@ +import { z } from 'zod'; +import { CSVConfigUtils } from '../../../utils/csv-utils/csv-config-utils'; +import { + CSVCellSetter, + CSVCellValidator, + CSVError, + CSVParams +} from '../../../utils/csv-utils/csv-config-validation.interface'; +import { validateZodCell } from '../../../utils/csv-utils/csv-header-configs'; +import { NestedRecord } from '../../../utils/nested-record'; +import { CritterCSVStaticHeader } from './import-critters-service'; + +/** + * Get the critter alias cell validator. + * + * Rules: + * 1. The cell can be a string with a length between 1 and 50 + * 2. The cell can be a number with a min value of 0 + * 3. The cell must be unique in the survey + * 4. The cell must be unique in the CSV + * + * @param {Set} surveyAliases The survey aliases. + * @param {CSVConfigUtils} configUtils The CSV config utils. + * @returns {*} {CSVCellValidator} The validate cell callback + */ +export const getCritterAliasCellValidator = ( + surveyAliases: Set, + configUtils: CSVConfigUtils +): CSVCellValidator => { + return (params: CSVParams) => { + const cellErrors = validateZodCell(params, z.union([z.string().trim().min(1).max(50), z.number().min(0)])); + const isAliasUnique = configUtils.isCellUnique('ALIAS', params.cell); + + if (cellErrors.length) { + return cellErrors; + } + + // Check if the alias already exists in the survey + if (surveyAliases.has(String(params.cell))) { + cellErrors.push({ + error: `Critter alias already exists in the Survey`, + solution: `Update the alias to be unique` + }); + } + + // Check if the alias already exists in the CSV + if (!isAliasUnique) { + cellErrors.push({ + error: `Critter alias already exists in the CSV`, + solution: `Update the alias to be unique` + }); + } + + return cellErrors; + }; +}; + +/** + * Get the critter collection unit cell validator. + * + * Rules: + * 1. The header must be a valid collection category for the TSN + * 2. The cell value must be a valid collection unit for the collection category + * + * @param {NestedRecord} rowDictionary The row dictionary. + * @param {CSVConfigUtils} configUtils The CSV config utils. + * @returns {*} {CSVCellValidator} The validate cell callback + */ +export const getCritterCollectionUnitCellValidator = ( + rowDictionary: NestedRecord, + configUtils: CSVConfigUtils +): CSVCellValidator => { + return (params: CSVParams) => { + if (params.cell === undefined) { + return []; + } + + const rowTsn = Number(configUtils.getCellValue('ITIS_TSN', params.row)); // Row TSN + const collectionUnitCellValue = String(params.cell); // Cell value + const collectionCategory = params.header; // Current header ie: collection category + + const rowDictionaryTsn = rowDictionary.get(rowTsn); + + // Check if the row TSN has associated collection units + if (!rowDictionaryTsn) { + return [ + { + error: `Collection units not found for TSN: ${rowTsn}`, + solution: `Validate TSN is correct and has collection units` + } + ]; + } + + const rowDictionaryCategory = rowDictionary.get(rowTsn, collectionCategory); + + // Check if the dynamic header is a valid collection category for the TSN + if (!rowDictionaryCategory) { + return [ + { + error: `Invalid collection category header`, + solution: `Use valid collection unit category header`, + values: Object.keys(rowDictionaryTsn) + } + ]; + } + + const rowDictionaryUnit = rowDictionary.get(rowTsn, collectionCategory, collectionUnitCellValue); + + // Check if the cell value is a valid collection unit for the collection category + if (!rowDictionaryUnit) { + return [ + { + error: `Invalid collection unit cell value`, + solution: `Use valid collection unit cell value`, + values: Object.keys(rowDictionaryCategory) + } + ]; + } + + return []; + }; +}; + +/** + * Get the collection unit cell setter. + * + * @param {NestedRecord} rowDictionary The row dictionary. + * @param {CSVConfigUtils} configUtils The CSV config utils. + * @returns {*} {CSVCellSetter} The set cell value callback + */ +export const getCritterCollectionUnitCellSetter = ( + rowDictionary: NestedRecord, + configUtils: CSVConfigUtils +): CSVCellSetter => { + return (params: CSVParams) => { + if (params.cell === undefined) { + return undefined; + } + + const rowTsn = Number(configUtils.getCellValue('ITIS_TSN', params.row)); + const collectionCategory = params.header; + const collectionUnitCellValue = String(params.cell); + + return rowDictionary.get(rowTsn, collectionCategory, collectionUnitCellValue); + }; +}; + +/** + * Get the critter sex cell validator. + * + * Rules: + * 1. The TSN must have sex measurements available + * 2. The cell value must be a valid sex option for the TSN or undefined + * + * @param {NestedRecord} rowDictionary The row dictionary. + * @param {CSVConfigUtils} configUtils The CSV config utils. + * @returns {*} {CSVCellValidator} The validate cell callback + */ +export const getCritterSexCellValidator = ( + rowDictionary: NestedRecord, + configUtils: CSVConfigUtils +): CSVCellValidator => { + return (params: CSVParams) => { + if (params.cell === undefined) { + return []; + } + + const rowTsn = Number(configUtils.getCellValue('ITIS_TSN', params.row)); // Row TSN + const sexCellValue = String(params.cell); // Cell value + + const rowDictionaryTsn = rowDictionary.get(rowTsn); + + // Check if the row TSN has sex measurements available + if (!rowDictionaryTsn) { + return [ + { + error: `Sex is not a supported attribute for TSN: ${rowTsn}`, + solution: `Use a valid TSN that supports sex, or contact a system administrator to add additional sex values.` + } + ]; + } + + const rowDictionarySex = rowDictionary.get(rowTsn, sexCellValue); + + // Check if the cell value is a valid sex measurement for the TSN + if (!rowDictionarySex) { + return [ + { + error: `Sex cell value is invalid`, + solution: `Use valid sex option`, + values: Object.keys(rowDictionaryTsn) + } + ]; + } + + return []; + }; +}; + +/** + * Get the critter sex cell setter. + * + * @param {NestedRecord} rowDictionary The row dictionary. + * @param {CSVConfigUtils} configUtils The CSV config utils. + * @returns {*} {CSVCellValidator} The validate cell callback + */ +export const getCritterSexCellSetter = ( + rowDictionary: NestedRecord, + configUtils: CSVConfigUtils +): CSVCellSetter => { + return (params: CSVParams) => { + if (params.cell === undefined) { + return undefined; + } + + const rowTsn = Number(configUtils.getCellValue('ITIS_TSN', params.row)); + const sexCellValue = String(params.cell); + + return rowDictionary.get(rowTsn, sexCellValue); + }; +}; + +/** + * Get the Wildlife Health ID header cell validator. + * + * Rules: + * 1. The Wildlife Health ID must be in the format 'XX-XXXX' or undefined + * 2. The Wildlife Health ID must be unique in the CSV + * + * @param {CSVConfigUtils} configUtils The CSV config utils. + * @returns {*} {CSVCellValidator} The validate cell callback + */ +export const getWlhIDCellValidator = (configUtils: CSVConfigUtils): CSVCellValidator => { + return (params: CSVParams) => { + const cellErrors: CSVError[] = []; + + if (params.cell === undefined) { + return []; + } + + const isWlhIdUnique = configUtils.isCellUnique('WLH_ID', params.cell); + + if (!/^\d{2}-.+/.exec(String(params.cell))) { + cellErrors.push({ + error: `Invalid Wildlife Health ID format`, + solution: `Update the Wildlife Health ID to match the expected format 'XX-XXXX'` + }); + } + + if (!isWlhIdUnique) { + cellErrors.push({ + error: `Wildlife Health ID already exists in the CSV`, + solution: `Update the Wildlife Health ID to be unique` + }); + } + + return cellErrors; + }; +}; diff --git a/api/src/services/import-services/critter/import-critters-service.test.ts b/api/src/services/import-services/critter/import-critters-service.test.ts new file mode 100644 index 0000000000..3a4015ba93 --- /dev/null +++ b/api/src/services/import-services/critter/import-critters-service.test.ts @@ -0,0 +1,280 @@ +import chai, { expect } from 'chai'; +import sinon from 'sinon'; +import sinonChai from 'sinon-chai'; +import xlsx from 'xlsx'; +import { CSVConfigUtils } from '../../../utils/csv-utils/csv-config-utils'; +import * as headerConfig from '../../../utils/csv-utils/csv-header-configs'; +import { NestedRecord } from '../../../utils/nested-record'; +import { getMockDBConnection } from '../../../__mocks__/db'; +import { CritterbaseService } from '../../critterbase-service'; +import { SurveyCritterService } from '../../survey-critter-service'; +import * as critterConfig from './critter-header-configs'; +import { ImportCrittersService } from './import-critters-service'; + +chai.use(sinonChai); + +describe('ImportCrittersService', () => { + beforeEach(() => { + sinon.restore(); + }); + + describe('constructor', () => { + it('should create a new instance of the service', () => { + const mockConnection = getMockDBConnection(); + const worksheet = xlsx.utils.json_to_sheet([]); + + const service = new ImportCrittersService(mockConnection, worksheet, 1); + + expect(service).to.be.instanceof(ImportCrittersService); + expect(service).to.have.property('connection', mockConnection); + expect(service).to.have.property('worksheet', worksheet); + expect(service).to.have.property('surveyId', 1); + + expect(service.configUtils).to.be.instanceof(CSVConfigUtils); + expect(service.surveyCritterService).to.be.instanceof(SurveyCritterService); + expect(service.critterbaseService).to.be.instanceof(CritterbaseService); + + expect(Object.keys(service._config.staticHeadersConfig)).to.deep.equal([ + 'ITIS_TSN', + 'ALIAS', + 'SEX', + 'WLH_ID', + 'DESCRIPTION' + ]); + }); + }); + + describe('_getCSVConfig', () => { + it('should return a valid CSVConfig object (no errors thrown)', async () => { + const mockConnection = getMockDBConnection(); + const worksheet = xlsx.utils.json_to_sheet([]); + + const service = new ImportCrittersService(mockConnection, worksheet, 1); + + sinon.stub(service, '_getTsnHeaderConfig').resolves({ validateCell: () => [] }); + sinon.stub(service, '_getAliasHeaderConfig').resolves({ validateCell: () => [] }); + sinon.stub(service, '_getSexHeaderConfig').resolves({ validateCell: () => [], setCellValue: () => 'A' }); + sinon + .stub(service, '_getCollectionUnitDynamicHeaderConfig') + .resolves({ validateCell: () => [], setCellValue: () => 'B' }); + + sinon.stub(headerConfig, 'getDescriptionCellValidator').returns(() => []); + sinon.stub(critterConfig, 'getWlhIDCellValidator').returns(() => []); + + const config = await service.getCSVConfig(); + + expect(config.staticHeadersConfig.ITIS_TSN.validateCell).to.be.a('function'); + expect(config.staticHeadersConfig.ALIAS.validateCell).to.be.a('function'); + expect(config.staticHeadersConfig.SEX.validateCell).to.be.a('function'); + expect(config.staticHeadersConfig.SEX.setCellValue).to.be.a('function'); + expect(config.staticHeadersConfig.WLH_ID.validateCell).to.be.a('function'); + expect(config.staticHeadersConfig.DESCRIPTION.validateCell).to.be.a('function'); + expect(config.dynamicHeadersConfig?.validateCell).to.be.a('function'); + expect(config.dynamicHeadersConfig?.setCellValue).to.be.a('function'); + + expect(config.ignoreDynamicHeaders).to.be.false; + }); + + it('should return a valid CSVConfig object (when errors thrown)', async () => { + const mockConnection = getMockDBConnection(); + const worksheet = xlsx.utils.json_to_sheet([]); + + const service = new ImportCrittersService(mockConnection, worksheet, 1); + + sinon.stub(service, '_getTsnHeaderConfig').resolves({ validateCell: () => [] }); + sinon.stub(service, '_getAliasHeaderConfig').resolves({ validateCell: () => [] }); + sinon.stub(service, '_getSexHeaderConfig').resolves({ validateCell: () => [], setCellValue: () => 'A' }); + sinon.stub(service, '_getCollectionUnitDynamicHeaderConfig').rejects(new Error('Dynamic header error')); + + sinon.stub(headerConfig, 'getDescriptionCellValidator').returns(() => []); + sinon.stub(critterConfig, 'getWlhIDCellValidator').returns(() => []); + + const config = await service.getCSVConfig(); + + expect(config.staticHeadersConfig.ITIS_TSN.validateCell).to.be.a('function'); + expect(config.staticHeadersConfig.ALIAS.validateCell).to.be.a('function'); + expect(config.staticHeadersConfig.SEX.validateCell).to.be.a('function'); + expect(config.staticHeadersConfig.SEX.setCellValue).to.be.a('function'); + expect(config.staticHeadersConfig.WLH_ID.validateCell).to.be.a('function'); + expect(config.staticHeadersConfig.DESCRIPTION.validateCell).to.be.a('function'); + + expect(config.dynamicHeadersConfig).to.be.undefined; + expect(config.ignoreDynamicHeaders).to.be.true; + }); + }); + + describe('_getTsnHeaderConfig', () => { + it('should return a valid header config object', async () => { + const mockConnection = getMockDBConnection(); + const worksheet = xlsx.utils.json_to_sheet([{ ITIS_TSN: '1234' }]); + + const service = new ImportCrittersService(mockConnection, worksheet, 1); + + const getTaxonomyByTsnsStub = sinon + .stub(service.platformService, 'getTaxonomyByTsns') + .resolves([{ tsn: 1234, scientificName: 'test' }]); + const getTsnCellValidatorStub = sinon.stub(headerConfig, 'getTsnCellValidator').returns(() => []); + + const tsnHeaderConfig = await service._getTsnHeaderConfig(); + + expect(getTaxonomyByTsnsStub).to.have.been.calledOnceWithExactly(['1234']); + expect(getTsnCellValidatorStub).to.have.been.calledOnceWithExactly(new Set([1234])); + + expect(tsnHeaderConfig.validateCell).to.be.a('function'); + expect(tsnHeaderConfig.setCellValue).to.be.a('function'); + }); + }); + + describe('_getAliasHeaderConfig', () => { + it('should return a valid header config object', async () => { + const mockConnection = getMockDBConnection(); + const worksheet = xlsx.utils.json_to_sheet([{ ALIAS: 'test' }]); + + const service = new ImportCrittersService(mockConnection, worksheet, 1); + + const getSurveyCritterAliasesStub = sinon + .stub(service.surveyCritterService, 'getUniqueSurveyCritterAliases') + .resolves(new Set(['test'])); + const getCritterAliasCellValidatorStub = sinon + .stub(critterConfig, 'getCritterAliasCellValidator') + .returns(() => []); + + const aliasHeaderConfig = await service._getAliasHeaderConfig(); + + expect(getSurveyCritterAliasesStub).to.have.been.calledOnceWithExactly(1); + expect(getCritterAliasCellValidatorStub).to.have.been.calledOnceWithExactly( + new Set(['test']), + service.configUtils + ); + + expect(aliasHeaderConfig.validateCell).to.be.a('function'); + expect(aliasHeaderConfig.setCellValue).to.be.a('function'); + }); + }); + + describe('_getSexHeaderConfig', () => { + it('should return a valid header config object', async () => { + const mockConnection = getMockDBConnection(); + const worksheet = xlsx.utils.json_to_sheet([{ ITIS_TSN: 1234 }]); + + const service = new ImportCrittersService(mockConnection, worksheet, 1); + + const getTaxonMeasurementsStub = sinon.stub(service.critterbaseService, 'getTaxonMeasurements').resolves({ + qualitative: [ + { + measurement_name: 'sex', + itis_tsn: 1234, + options: [ + { + option_label: 'male', + qualitative_option_id: 'maleUUID' + }, + { + option_label: 'female', + qualitative_option_id: 'femaleUUID' + } + ] + } + ] + } as any); + + const getSexCellValidatorStub = sinon.stub(critterConfig, 'getCritterSexCellValidator').returns(() => []); + const getSexCellSetterStub = sinon.stub(critterConfig, 'getCritterSexCellSetter').returns(() => 'A'); + + const sexHeaderConfig = await service._getSexHeaderConfig(); + + expect(getTaxonMeasurementsStub).to.have.been.calledWithExactly(1234); + expect(getSexCellValidatorStub).to.have.been.calledWithExactly( + new NestedRecord({ + 1234: { male: 'maleUUID', female: 'femaleUUID' } + }), + service.configUtils + ); + + expect(getSexCellSetterStub).to.have.been.calledWithExactly( + new NestedRecord({ + 1234: { male: 'maleUUID', female: 'femaleUUID' } + }), + service.configUtils + ); + + expect(sexHeaderConfig.validateCell).to.be.a('function'); + expect(sexHeaderConfig.setCellValue).to.be.a('function'); + }); + }); + + describe('_getCollectionUnitDynamicHeaderConfig', () => { + it('should return a valid header config object', async () => { + const mockConnection = getMockDBConnection(); + const worksheet = xlsx.utils.json_to_sheet([{ UNIT: 'unit', ITIS_TSN: 1234 }]); + + const service = new ImportCrittersService(mockConnection, worksheet, 1); + + const findTaxonCollectionUnitsStub = sinon + .stub(service.critterbaseService, 'findTaxonCollectionUnits') + .resolves([{ category_name: 'category', unit_name: 'unit', collection_unit_id: 'uuid' }] as any[]); + + const getCollectionUnitCellValidatorStub = sinon + .stub(critterConfig, 'getCritterCollectionUnitCellValidator') + .returns(() => []); + + const getCollectionUnitCellSetterStub = sinon + .stub(critterConfig, 'getCritterCollectionUnitCellSetter') + .returns(() => 'value'); + + const config = await service._getCollectionUnitDynamicHeaderConfig(); + + expect(findTaxonCollectionUnitsStub).to.have.been.calledOnceWithExactly(1234); + + expect(getCollectionUnitCellValidatorStub).to.have.been.calledWithExactly( + new NestedRecord({ 1234: { category: { unit: 'uuid' } } }), + service.configUtils + ); + + expect(getCollectionUnitCellSetterStub).to.have.been.calledWithExactly( + new NestedRecord({ 1234: { category: { unit: 'uuid' } } }), + service.configUtils + ); + + expect(config.validateCell).to.be.a('function'); + expect(config.setCellValue).to.be.a('function'); + }); + }); + + describe('_getImportPayloads', () => { + it('should return all import payloads', () => { + const mockConnection = getMockDBConnection(); + const rows = [ + { + ITIS_TSN: '1234', + ALIAS: 'test', + SEX: 'male', + WLH_ID: '12-2222', + DESCRIPTION: 'comment', + POPULATION_UNIT: 'unit', + COLLECTION_UNIT: 'collection' + } + ]; + const worksheet = xlsx.utils.json_to_sheet(rows); + + const service = new ImportCrittersService(mockConnection, worksheet, 1); + + const payloads = service._getImportPayloads(rows); + + expect(payloads.simsPayload[0]).to.be.a('string'); + + expect(payloads.critterbasePayload.critters?.[0].itis_tsn).to.be.equal('1234'); + expect(payloads.critterbasePayload.critters?.[0].animal_id).to.be.equal('test'); + expect(payloads.critterbasePayload.critters?.[0].sex_qualitative_option_id).to.be.equal('male'); + expect(payloads.critterbasePayload.critters?.[0].wlh_id).to.be.equal('12-2222'); + expect(payloads.critterbasePayload.critters?.[0].critter_comment).to.be.equal('comment'); + expect(payloads.critterbasePayload.critters?.[0].critter_id).to.be.a('string'); + + expect(payloads.critterbasePayload.collections?.[0].critter_id).to.be.a('string'); + expect(payloads.critterbasePayload.collections?.[0].collection_unit_id).to.be.equal('unit'); + + expect(payloads.critterbasePayload.collections?.[1].critter_id).to.be.a('string'); + expect(payloads.critterbasePayload.collections?.[1].collection_unit_id).to.be.equal('collection'); + }); + }); +}); diff --git a/api/src/services/import-services/critter/import-critters-service.ts b/api/src/services/import-services/critter/import-critters-service.ts new file mode 100644 index 0000000000..c714f56530 --- /dev/null +++ b/api/src/services/import-services/critter/import-critters-service.ts @@ -0,0 +1,302 @@ +import { merge } from 'lodash'; +import { v4 } from 'uuid'; +import { WorkSheet } from 'xlsx'; +import { IDBConnection } from '../../../database/db'; +import { ApiGeneralError } from '../../../errors/api-error'; +import { CSVConfigUtils } from '../../../utils/csv-utils/csv-config-utils'; +import { validateCSVWorksheet } from '../../../utils/csv-utils/csv-config-validation'; +import { CSVConfig, CSVHeaderConfig, CSVRowValidated } from '../../../utils/csv-utils/csv-config-validation.interface'; +import { getDescriptionCellValidator, getTsnCellValidator } from '../../../utils/csv-utils/csv-header-configs'; +import { getLogger } from '../../../utils/logger'; +import { NestedRecord } from '../../../utils/nested-record'; +import { CritterbaseService, IBulkCreate } from '../../critterbase-service'; +import { DBService } from '../../db-service'; +import { PlatformService } from '../../platform-service'; +import { SurveyCritterService } from '../../survey-critter-service'; +import { + getCritterAliasCellValidator, + getCritterCollectionUnitCellSetter, + getCritterCollectionUnitCellValidator, + getCritterSexCellSetter, + getCritterSexCellValidator, + getWlhIDCellValidator +} from './critter-header-configs'; + +const defaultLog = getLogger('services/import/import-critters-service'); + +// Critter CSV static headers +export type CritterCSVStaticHeader = 'ITIS_TSN' | 'ALIAS' | 'SEX' | 'WLH_ID' | 'DESCRIPTION'; + +/** + * + * ImportCrittersService + * + * @class ImportCrittersService + * @extends DBService + * + */ +export class ImportCrittersService extends DBService { + _config: CSVConfig; + + surveyId: number; + worksheet: WorkSheet; + + configUtils: CSVConfigUtils; + + platformService: PlatformService; + critterbaseService: CritterbaseService; + surveyCritterService: SurveyCritterService; + + /** + * Instantiates an instance of ImportCrittersService + * + * @param {IDBConnection} connection - Database connection + * @param {number} surveyId - Survey identifier + */ + constructor(connection: IDBConnection, worksheet: WorkSheet, surveyId: number) { + super(connection); + + this._config = { + staticHeadersConfig: { + ITIS_TSN: { aliases: ['TAXON', 'SPECIES', 'TSN'] }, + ALIAS: { aliases: ['NICKNAME', 'NAME', 'ANIMAL_ID'] }, + SEX: { aliases: [], optional: true }, + WLH_ID: { aliases: ['WILDLIFE_HEALTH_ID', 'WILD LIFE HEALTH ID', 'WLHID'], optional: true }, + DESCRIPTION: { aliases: ['COMMENTS', 'COMMENT', 'NOTES'], optional: true } + }, + ignoreDynamicHeaders: false + }; + + this.surveyId = surveyId; + this.worksheet = worksheet; + + this.configUtils = new CSVConfigUtils(worksheet, this._config); + + this.platformService = new PlatformService(connection); + this.surveyCritterService = new SurveyCritterService(connection); + this.critterbaseService = new CritterbaseService({ + keycloak_guid: connection.systemUserGUID(), + username: connection.systemUserIdentifier() + }); + } + + /** + * Import a Critter CSV worksheet into Critterbase and SIMS. + * + * @async + * @throws {ApiGeneralError} - If unable to fully insert records into Critterbase + * @returns {*} {Promise} List of inserted survey critter ids + */ + async importCSVWorksheet(): Promise { + const config = await this.getCSVConfig(); + + const { errors, rows } = validateCSVWorksheet(this.worksheet, config); + + if (errors.length) { + throw new ApiGeneralError('Failed to validate CSV', errors); + } + + const payloads = this._getImportPayloads(rows); + + // Add critters to Critterbase + const bulkResponse = await this.critterbaseService.bulkCreate(payloads.critterbasePayload); + + // Check critterbase inserted the full list of critters + // In reality this error should not be triggered, safeguard to prevent floating critter ids in SIMS + if (bulkResponse.created.critters !== payloads.simsPayload.length) { + throw new ApiGeneralError('Unable to fully import critters from CSV', [ + 'importCrittersService->importCSVWorksheet', + 'critterbase bulk create response count !== critterIds.length' + ]); + } + + // Add Critters to SIMS survey + await this.surveyCritterService.addCrittersToSurvey(this.surveyId, payloads.simsPayload); + } + + /** + * Get the Critter CSV config - this will fetch all the header configs and merge them into the final config. + * + * Note: This will simulate a multi-step validation process if the TSNs are invalid. This is because the TSNs are + * dependencies for the other header configs, so all TSN related errors must be resolved first. + * + * @returns {*} {Promise>} The Critter CSV config + */ + async getCSVConfig(): Promise> { + const [tsnHeaderConfig, aliasHeaderConfig, sexHeaderConfig, dynamicHeadersConfig] = await Promise.all([ + this._getTsnHeaderConfig(), + this._getAliasHeaderConfig(), + this._getSexHeaderConfig().catch(() => undefined), // If this throws due to invalid TSNs, we can ignore this header till TSNs are fixed + this._getCollectionUnitDynamicHeaderConfig().catch(() => undefined) // Same for the dynamic columns + ]); + + const newConfig = merge(this._config, { + staticHeadersConfig: { + ITIS_TSN: tsnHeaderConfig, + ALIAS: aliasHeaderConfig, + SEX: sexHeaderConfig, + WLH_ID: { validateCell: getWlhIDCellValidator(this.configUtils) }, + DESCRIPTION: { validateCell: getDescriptionCellValidator() } + }, + dynamicHeadersConfig: dynamicHeadersConfig, + ignoreDynamicHeaders: !dynamicHeadersConfig + }); + + return newConfig; + } + + /** + * Get the Critterbase and SIMS import payloads. + * + * @param {CSVRowValidated[]} rows - The validated CSV rows + * @returns {*} { simsPayload: string[]; critterbasePayload: IBulkCreate } The import payloads + */ + _getImportPayloads(rows: CSVRowValidated[]): { + simsPayload: string[]; + critterbasePayload: IBulkCreate; + } { + const simsPayload: string[] = []; + const critterbasePayload: IBulkCreate = { critters: [], collections: [] }; + + // Convert rows to Critterbase and SIMS payloads + for (const row of rows) { + const critterId = v4(); + + // SIMS payload + simsPayload.push(critterId); + + // Critterbase static headers payload + critterbasePayload.critters?.push({ + critter_id: critterId, + sex_qualitative_option_id: row.SEX, + itis_tsn: row.ITIS_TSN, + animal_id: row.ALIAS, + wlh_id: row.WLH_ID, + critter_comment: row.DESCRIPTION + }); + + // Critterbase dynamic headers payload + this.configUtils.worksheetDynamicHeaders.forEach((header) => { + if (row[header]) { + critterbasePayload.collections?.push({ + collection_unit_id: row[header], + critter_id: critterId + }); + } + }); + } + + defaultLog.debug({ label: 'critter import payloads', simsPayload, critterbasePayload }); + + return { simsPayload, critterbasePayload }; + } + + /** + * Get the TSN header config. + * + * Validation rules: + * 1. TSN must be a number + * 2. TSN must be a real ITIS TSN + * + * @returns {*} {Promise} The TSN header config + */ + async _getTsnHeaderConfig(): Promise { + const rowTsns = this.configUtils.getUniqueCellValues('ITIS_TSN'); + const taxonomy = await this.platformService.getTaxonomyByTsns(rowTsns); + const allowedTsns = new Set(taxonomy.map((taxon) => taxon.tsn)); + + return { + validateCell: getTsnCellValidator(allowedTsns), + setCellValue: (params) => Number(params.cell) + }; + } + + /** + * Get the CSV Alias header config. + * + * Validation rules: + * 1. Alias must be a string + * 2. Alias must be unique in the SIMS Survey + * 3. Alias must be unique in the CSV + * + * @returns {*} {Promise} The alias header config + */ + async _getAliasHeaderConfig(): Promise { + const surveyAliases = await this.surveyCritterService.getUniqueSurveyCritterAliases(this.surveyId); + + return { + validateCell: getCritterAliasCellValidator(surveyAliases, this.configUtils), + setCellValue: (params) => String(params.cell) + }; + } + + /** + * Get the CSV Sex header config. + * + * Validation rules: + * 1. Sex must be a string + * 2. Sex must be a valid option in Critterbase for the TSN + * + * @returns {*} {CSVHeaderConfig} The sex header config + */ + async _getSexHeaderConfig(): Promise { + const rowDictionary = new NestedRecord(); + + const rowTsns = this.configUtils.getUniqueCellValues('ITIS_TSN'); + const measurements = await Promise.all(rowTsns.map((tsn) => this.critterbaseService.getTaxonMeasurements(tsn))); + + measurements.forEach((measurement, index) => { + const sexMeasurement = measurement.qualitative.find( + (measurement) => measurement.measurement_name.toLowerCase() === 'sex' + ); + + if (sexMeasurement) { + sexMeasurement.options.forEach((option) => { + const tsn = Number(rowTsns[index]); + const sexLabel = option.option_label; + + rowDictionary.set({ path: [tsn, sexLabel], value: option.qualitative_option_id }); + }); + } + }); + + return { + validateCell: getCritterSexCellValidator(rowDictionary, this.configUtils), + setCellValue: getCritterSexCellSetter(rowDictionary, this.configUtils) + }; + } + + /** + * Get the CSV Collection Unit dynamic header config. + * + * @returns {*} {Promise} The Collection Unit dynamic header config + */ + async _getCollectionUnitDynamicHeaderConfig(): Promise { + const rowDictionary = new NestedRecord(); + const rowTsns = this.configUtils.getUniqueCellValues('ITIS_TSN'); + // Get the collection units for all the tsns in the worksheet + const collectionUnits = await Promise.all( + rowTsns.map((tsn) => this.critterbaseService.findTaxonCollectionUnits(tsn)) + ); + + collectionUnits.forEach((collectionUnits, index) => { + collectionUnits.forEach((unit) => { + const category = unit.category_name; + const tsn = Number(rowTsns[index]); + const unitName = unit.unit_name; + + rowDictionary.set({ + path: [tsn, category, unitName], + value: unit.collection_unit_id + }); + + rowDictionary.set({ path: [tsn, category, unitName], value: unit.collection_unit_id }); + }); + }); + + return { + validateCell: getCritterCollectionUnitCellValidator(rowDictionary, this.configUtils), + setCellValue: getCritterCollectionUnitCellSetter(rowDictionary, this.configUtils) + }; + } +} diff --git a/api/src/services/import-services/critter/import-critters-strategy.interface.ts b/api/src/services/import-services/critter/import-critters-strategy.interface.ts deleted file mode 100644 index 31ad5300e2..0000000000 --- a/api/src/services/import-services/critter/import-critters-strategy.interface.ts +++ /dev/null @@ -1,20 +0,0 @@ -/** - * A validated CSV Critter object - * - */ -export type CsvCritter = { - critter_id: string; - sex?: string; - itis_tsn: number; - animal_id: string; - wlh_id?: string; - critter_comment?: string; -} & { - [collectionUnitColumn: string]: unknown; -}; - -/** - * Invalidated CSV Critter object - * - */ -export type PartialCsvCritter = Partial & { critter_id: string }; diff --git a/api/src/services/import-services/critter/import-critters-strategy.test.ts b/api/src/services/import-services/critter/import-critters-strategy.test.ts deleted file mode 100644 index b8dd5af7fd..0000000000 --- a/api/src/services/import-services/critter/import-critters-strategy.test.ts +++ /dev/null @@ -1,621 +0,0 @@ -import chai, { expect } from 'chai'; -import sinon from 'sinon'; -import sinonChai from 'sinon-chai'; -import { WorkSheet } from 'xlsx'; -import { getMockDBConnection } from '../../../__mocks__/db'; -import { CBQualitativeOption, IBulkCreateResponse } from '../../critterbase-service'; -import { ImportCrittersStrategy } from './import-critters-strategy'; -import { CsvCritter } from './import-critters-strategy.interface'; - -chai.use(sinonChai); - -const mockConnection = getMockDBConnection(); - -describe('ImportCrittersStrategy', () => { - describe('_getRowsToValidate', () => { - it('it should correctly format rows', () => { - const rows = [ - { - SEX: 'Male', - ITIS_TSN: 1, - WLH_ID: '10-1000', - ALIAS: 'Carl', - COMMENT: 'Test', - COLLECTION: 'Unit', - BAD_COLLECTION: 'Bad' - } - ]; - const service = new ImportCrittersStrategy(mockConnection, 1); - - const parsedRow = service._getRowsToValidate(rows, ['COLLECTION'])[0]; - - expect(parsedRow.sex).to.be.eq('Male'); - expect(parsedRow.itis_tsn).to.be.eq(1); - expect(parsedRow.wlh_id).to.be.eq('10-1000'); - expect(parsedRow.animal_id).to.be.eq('Carl'); - expect(parsedRow.critter_comment).to.be.eq('Test'); - expect(parsedRow.COLLECTION).to.be.eq('Unit'); - expect(parsedRow.TEST).to.be.undefined; - expect(parsedRow.BAD_COLLECTION).to.be.undefined; - }); - }); - - describe('_getCritterFromRow', () => { - it('should get all critter properties', () => { - const row: CsvCritter = { - critter_id: 'id', - sex: 'Male', - itis_tsn: 1, - animal_id: 'Carl', - wlh_id: '10-1000', - critter_comment: 'comment', - extra_property: 'test' - }; - const service = new ImportCrittersStrategy(mockConnection, 1); - - const critter = service._getCritterFromRow(row); - - expect(critter).to.be.eql({ - critter_id: 'id', - sex_qualitative_option_id: 'Male', - itis_tsn: 1, - animal_id: 'Carl', - wlh_id: '10-1000', - critter_comment: 'comment' - }); - }); - }); - - describe('_getCollectionUnitsFromRow', () => { - it('should get all collection unit properties', () => { - const row: CsvCritter = { - critter_id: 'id', - sex: 'Male', - itis_tsn: 1, - animal_id: 'Carl', - wlh_id: '10-1000', - critter_comment: 'comment', - COLLECTION: 'ID1', - HERD: 'ID2' - }; - const service = new ImportCrittersStrategy(mockConnection, 1); - - const collectionUnits = service._getCollectionUnitsFromRow(row); - - expect(collectionUnits).to.be.deep.equal([ - { collection_unit_id: 'ID1', critter_id: 'id' }, - { collection_unit_id: 'ID2', critter_id: 'id' } - ]); - }); - }); - - describe('_getValidTsns', () => { - afterEach(() => { - sinon.restore(); - }); - - it('should return unique list of tsns', async () => { - const service = new ImportCrittersStrategy(mockConnection, 1); - - const getTaxonomyStub = sinon.stub(service.platformService, 'getTaxonomyByTsns').resolves([ - { tsn: 1, scientificName: 'a' }, - { tsn: 2, scientificName: 'b' } - ]); - - const tsns = await service._getValidTsns([ - { critter_id: 'a', itis_tsn: 1 }, - { critter_id: 'b', itis_tsn: 2 } - ]); - - expect(getTaxonomyStub).to.have.been.calledWith(['1', '2']); - expect(tsns).to.deep.equal(['1', '2']); - }); - }); - - describe('_getCollectionUnitMap', () => { - afterEach(() => { - sinon.restore(); - }); - - const collectionUnitsA = [ - { - collection_unit_id: '1', - collection_category_id: '2', - category_name: 'COLLECTION', - unit_name: 'UNIT_A', - description: 'description' - }, - { - collection_unit_id: '2', - collection_category_id: '3', - category_name: 'COLLECTION', - unit_name: 'UNIT_B', - description: 'description' - } - ]; - - const collectionUnitsB = [ - { - collection_unit_id: '1', - collection_category_id: '2', - category_name: 'HERD', - unit_name: 'UNIT_A', - description: 'description' - }, - { - collection_unit_id: '2', - collection_category_id: '3', - category_name: 'HERD', - unit_name: 'UNIT_B', - description: 'description' - } - ]; - - it('should return collection unit mapping', async () => { - const service = new ImportCrittersStrategy(mockConnection, 1); - - const getColumnsStub = sinon.stub(service, '_getNonStandardColumns'); - const mockWorksheet = {} as unknown as WorkSheet; - - const findCollectionUnitsStub = sinon.stub(service.critterbaseService, 'findTaxonCollectionUnits'); - - getColumnsStub.returns(['COLLECTION', 'HERD']); - findCollectionUnitsStub.onCall(0).resolves(collectionUnitsA); - findCollectionUnitsStub.onCall(1).resolves(collectionUnitsB); - - const mapping = await service._getCollectionUnitMap(mockWorksheet, ['1', '2']); - expect(getColumnsStub).to.have.been.calledWith(mockWorksheet); - expect(findCollectionUnitsStub).to.have.been.calledTwice; - - expect(mapping).to.be.instanceof(Map); - expect(mapping.get('COLLECTION')).to.be.deep.equal({ collectionUnits: collectionUnitsA, tsn: 1 }); - expect(mapping.get('HERD')).to.be.deep.equal({ collectionUnits: collectionUnitsB, tsn: 2 }); - }); - - it('should return empty map when no collection unit columns', async () => { - const service = new ImportCrittersStrategy(mockConnection, 1); - - const getColumnsStub = sinon.stub(service, '_getNonStandardColumns'); - const mockWorksheet = {} as unknown as WorkSheet; - - const findCollectionUnitsStub = sinon.stub(service.critterbaseService, 'findTaxonCollectionUnits'); - getColumnsStub.returns([]); - - const mapping = await service._getCollectionUnitMap(mockWorksheet, ['1', '2']); - expect(getColumnsStub).to.have.been.calledWith(mockWorksheet); - expect(findCollectionUnitsStub).to.have.not.been.called; - - expect(mapping).to.be.instanceof(Map); - }); - }); - - describe('insert', () => { - afterEach(() => { - sinon.restore(); - }); - - const critters: CsvCritter[] = [ - { - critter_id: '1', - sex: 'Male', - itis_tsn: 1, - animal_id: 'Carl', - wlh_id: '10-1000', - critter_comment: 'comment', - COLLECTION: 'Collection Unit' - }, - { - critter_id: '2', - sex: 'Female', - itis_tsn: 2, - animal_id: 'Carl', - wlh_id: '10-1000', - critter_comment: 'comment', - HERD: 'Herd Unit' - } - ]; - - it('should correctly parse collection units and critters and insert into sims / critterbase', async () => { - const service = new ImportCrittersStrategy(mockConnection, 1); - - const critterbaseBulkCreateStub = sinon.stub(service.critterbaseService, 'bulkCreate'); - const simsAddSurveyCrittersStub = sinon.stub(service.surveyCritterService, 'addCrittersToSurvey'); - - critterbaseBulkCreateStub.resolves({ created: { critters: 2, collections: 1 } } as IBulkCreateResponse); - simsAddSurveyCrittersStub.resolves([1]); - - const ids = await service.insert(critters); - - expect(critterbaseBulkCreateStub).to.have.been.calledWithExactly({ - critters: [ - { - critter_id: '1', - sex_qualitative_option_id: 'Male', - itis_tsn: 1, - animal_id: 'Carl', - wlh_id: '10-1000', - critter_comment: 'comment' - }, - { - critter_id: '2', - sex_qualitative_option_id: 'Female', - itis_tsn: 2, - animal_id: 'Carl', - wlh_id: '10-1000', - critter_comment: 'comment' - } - ], - collections: [ - { collection_unit_id: 'Collection Unit', critter_id: '1' }, - { collection_unit_id: 'Herd Unit', critter_id: '2' } - ] - }); - - expect(ids).to.be.deep.equal([1]); - }); - - it('should throw error if response from critterbase is less than provided critters', async () => { - const service = new ImportCrittersStrategy(mockConnection, 1); - - const critterbaseBulkCreateStub = sinon.stub(service.critterbaseService, 'bulkCreate'); - const simsAddSurveyCrittersStub = sinon.stub(service.surveyCritterService, 'addCrittersToSurvey'); - - critterbaseBulkCreateStub.resolves({ created: { critters: 1, collections: 1 } } as IBulkCreateResponse); - simsAddSurveyCrittersStub.resolves([1]); - - try { - await service.insert(critters); - expect.fail(); - } catch (err: any) { - expect(err.message).to.be.equal('Unable to fully import critters from CSV'); - } - - expect(simsAddSurveyCrittersStub).to.not.have.been.called; - }); - }); - - describe('validateRows', () => { - afterEach(() => { - sinon.restore(); - }); - - const collectionUnitsA = [ - { - collection_unit_id: '1', - collection_category_id: '2', - category_name: 'COLLECTION', - unit_name: 'UNIT_A', - description: 'description' - }, - { - collection_unit_id: '2', - collection_category_id: '3', - category_name: 'COLLECTION', - unit_name: 'UNIT_B', - description: 'description' - } - ]; - - const collectionUnitsB = [ - { - collection_unit_id: '1', - collection_category_id: '2', - category_name: 'HERD', - unit_name: 'UNIT_A', - description: 'description' - }, - { - collection_unit_id: '2', - collection_category_id: '3', - category_name: 'HERD', - unit_name: 'UNIT_B', - description: 'description' - } - ]; - - const sexOptionsA: CBQualitativeOption[] = [ - { qualitative_option_id: 'A1', option_label: 'Male', option_value: 0, option_desc: 'description' }, - { qualitative_option_id: 'A2', option_label: 'Female', option_value: 1, option_desc: 'description' } - ]; - const sexOptionsB: CBQualitativeOption[] = [ - { qualitative_option_id: 'B1', option_label: 'Female', option_value: 0, option_desc: 'description' }, - { qualitative_option_id: 'B2', option_label: 'Hermaphroditic', option_value: 1, option_desc: 'description' } - ]; - - it('should return successful', async () => { - const service = new ImportCrittersStrategy(mockConnection, 1); - - const getColumnsStub = sinon.stub(service, '_getNonStandardColumns'); - const surveyAliasesStub = sinon.stub(service.surveyCritterService, 'getUniqueSurveyCritterAliases'); - const getValidTsnsStub = sinon.stub(service, '_getValidTsns'); - const collectionMapStub = sinon.stub(service, '_getCollectionUnitMap'); - const sexMapStub = sinon.stub(service, '_getSpeciesSexMap'); - - getColumnsStub.returns(['COLLECTION', 'HERD']); - surveyAliasesStub.resolves(new Set(['Not Carl', 'Carlita'])); - getValidTsnsStub.resolves(['1', '2']); - collectionMapStub.resolves( - new Map([ - ['COLLECTION', { collectionUnits: collectionUnitsA, tsn: 1 }], - ['HERD', { collectionUnits: collectionUnitsB, tsn: 2 }] - ]) - ); - sexMapStub.resolves( - new Map([ - [1, { sexes: sexOptionsA }], - [2, { sexes: sexOptionsB }] - ]) - ); - - const rows = [ - { - ITIS_TSN: 1, - SEX: sexOptionsA[0].option_label, - ALIAS: 'Carl', - WLH_ID: '10-1000', - DESCRIPTION: 'A', - COLLECTION: 'UNIT_A' - }, - { - ITIS_TSN: 2, - SEX: sexOptionsB[1].option_label, - ALIAS: 'Carl2', - WLH_ID: '10-1000', - DESCRIPTION: 'B', - HERD: 'UNIT_B' - } - ]; - - const validation = await service.validateRows(rows, {}); - - if (validation.success) { - // The sex property is renamed to sex_qualitative_option_id in _getCrittersFromRow, after validateRows() - expect(validation.data[0]).to.contain({ - sex: sexOptionsA[0].qualitative_option_id, - itis_tsn: 1, - animal_id: 'Carl', - wlh_id: '10-1000', - critter_comment: 'A', - COLLECTION: '1' - }); - - expect(validation.data[1]).to.contain({ - sex: sexOptionsB[1].qualitative_option_id, - itis_tsn: 2, - animal_id: 'Carl2', - wlh_id: '10-1000', - critter_comment: 'B', - HERD: '2' - }); - } else { - expect.fail(); - } - }); - - it('should allow optional columns to be excluded from the csv', async () => { - const service = new ImportCrittersStrategy(mockConnection, 1); - - const surveyAliasesStub = sinon.stub(service.surveyCritterService, 'getUniqueSurveyCritterAliases'); - const getValidTsnsStub = sinon.stub(service, '_getValidTsns'); - const sexMapStub = sinon.stub(service, '_getSpeciesSexMap'); - - surveyAliasesStub.resolves(new Set([])); - getValidTsnsStub.resolves(['1']); - sexMapStub.resolves(new Map([[1, { sexes: [] }]])); - - const rows = [ - { - ITIS_TSN: 1, - ALIAS: 'Carl1' - }, - { - ITIS_TSN: 1, - ALIAS: 'Carl2', - DESCRIPTION: 'A' - } - ]; - - const validation = await service.validateRows(rows, {}); - - if (validation.success) { - expect(validation.data[0]).to.contain({ - itis_tsn: 1, - animal_id: 'Carl1' - }); - expect(validation.data[1]).to.contain({ - itis_tsn: 1, - animal_id: 'Carl2', - critter_comment: 'A' - }); - } else { - expect.fail(); - } - }); - - it('should return error when wlh_id invalid regex', async () => { - const service = new ImportCrittersStrategy(mockConnection, 1); - - const surveyAliasesStub = sinon.stub(service.surveyCritterService, 'getUniqueSurveyCritterAliases'); - const getValidTsnsStub = sinon.stub(service, '_getValidTsns'); - const sexMapStub = sinon.stub(service, '_getSpeciesSexMap'); - - surveyAliasesStub.resolves(new Set([])); - getValidTsnsStub.resolves(['1']); - sexMapStub.resolves(new Map([[1, { sexes: [] }]])); - - const rows = [ - { - ITIS_TSN: 1, - ALIAS: 'Carl', - WLH_ID: '1-1000', - DESCRIPTION: 'A' - }, - { - ITIS_TSN: 1, - ALIAS: 'Carl2', - WLH_ID: '101000', - DESCRIPTION: 'A' - } - ]; - - const validation = await service.validateRows(rows, {}); - - if (validation.success) { - expect.fail(); - } else { - const errorMessages = validation.error.issues.map((issue) => issue.message); - // Define a regex pattern to match the general structure of the error message - const errorPattern = /incorrectly formatted\./; - // Check that all error messages contain the expected pattern. The full error message is dynamic. - expect(errorMessages).to.satisfy((messages: string[]) => messages.every((msg) => errorPattern.test(msg))); - } - }); - - it('should return error when itis_tsn invalid option or undefined', async () => { - const service = new ImportCrittersStrategy(mockConnection, 1); - - const surveyAliasesStub = sinon.stub(service.surveyCritterService, 'getUniqueSurveyCritterAliases'); - const getValidTsnsStub = sinon.stub(service, '_getValidTsns'); - const sexMapStub = sinon.stub(service, '_getSpeciesSexMap'); - - surveyAliasesStub.resolves(new Set([])); - getValidTsnsStub.resolves(['1']); - sexMapStub.resolves(new Map([[1, { sexes: [] }]])); - - const rows = [ - { - ITIS_TSN: undefined, - SEX: null, - ALIAS: 'Carl', - WLH_ID: '10-1000', - DESCRIPTION: 'A' - }, - { - ITIS_TSN: 3, - SEX: null, - ALIAS: 'Carl2', - WLH_ID: '10-1000', - DESCRIPTION: 'A' - } - ]; - - const validation = await service.validateRows(rows, {}); - - if (validation.success) { - expect.fail(); - } else { - const errorMessages = validation.error.issues.map((issue) => issue.message); - // Define a regex pattern to match the general structure of the error message - const errorPattern = /does not exist\./; - // Check that all error messages contain the expected pattern. The full error message is dynamic. - expect(errorMessages).to.satisfy((messages: string[]) => messages.every((msg) => errorPattern.test(msg))); - } - }); - - it('should return error if alias undefined, duplicate or exists in survey', async () => { - const service = new ImportCrittersStrategy(mockConnection, 1); - - const surveyAliasesStub = sinon.stub(service.surveyCritterService, 'getUniqueSurveyCritterAliases'); - const getValidTsnsStub = sinon.stub(service, '_getValidTsns'); - const sexMapStub = sinon.stub(service, '_getSpeciesSexMap'); - - surveyAliasesStub.resolves(new Set(['Carl3'])); - getValidTsnsStub.resolves(['1']); - sexMapStub.resolves(new Map([[1, { sexes: [] }]])); - - const rows = [ - { - ITIS_TSN: 1, - SEX: null, - ALIAS: undefined, - WLH_ID: '10-1000', - DESCRIPTION: 'A' - }, - { - ITIS_TSN: 1, - SEX: null, - ALIAS: 'Carl2', - WLH_ID: '10-1000', - DESCRIPTION: 'A' - }, - { - ITIS_TSN: 1, - SEX: null, - ALIAS: 'Carl2', - WLH_ID: '10-1000', - DESCRIPTION: 'A' - }, - { - ITIS_TSN: 1, - SEX: null, - ALIAS: 'Carl3', - WLH_ID: '10-1000', - DESCRIPTION: 'A' - } - ]; - - const validation = await service.validateRows(rows, {}); - - if (validation.success) { - expect.fail(); - } else { - const errorMessages = validation.error.issues.map((issue) => issue.message); - // Define a regex pattern to match the general structure of the error message - const errorPattern = /already exists in the Survey\./; - // Check that all error messages contain the expected pattern. The full error message is dynamic. - expect(errorMessages).to.satisfy((messages: string[]) => messages.every((msg) => errorPattern.test(msg))); - } - }); - - it('should return error if collection unit invalid value', async () => { - const service = new ImportCrittersStrategy(mockConnection, 1); - - const surveyAliasesStub = sinon.stub(service.surveyCritterService, 'getUniqueSurveyCritterAliases'); - const getValidTsnsStub = sinon.stub(service, '_getValidTsns'); - const collectionMapStub = sinon.stub(service, '_getCollectionUnitMap'); - const getColumnsStub = sinon.stub(service, '_getNonStandardColumns'); - const sexMapStub = sinon.stub(service, '_getSpeciesSexMap'); - - surveyAliasesStub.resolves(new Set([])); - getValidTsnsStub.resolves(['1', '2']); - getColumnsStub.returns(['COLLECTION', 'HERD']); - collectionMapStub.resolves( - new Map([ - ['COLLECTION', { collectionUnits: collectionUnitsA, tsn: 1 }], - ['HERD', { collectionUnits: collectionUnitsB, tsn: 2 }] - ]) - ); - sexMapStub.resolves(new Map([[1, { sexes: [] }]])); - - const rows = [ - { - ITIS_TSN: 1, - SEX: null, - ALIAS: 'Carl', - WLH_ID: '10-1000', - DESCRIPTION: 'A', - COLLECTION: 'UNIT_C' - }, - { - ITIS_TSN: 2, - SEX: null, - ALIAS: 'Carl2', - WLH_ID: '10-1000', - DESCRIPTION: 'A', - COLLECTION: 'UNIT_A' - } - ]; - - const validation = await service.validateRows(rows, {}); - - if (validation.success) { - expect.fail(); - } else { - expect(validation.error.issues).to.deep.equal([ - { row: 0, message: `Invalid COLLECTION. Cell value is not valid.` }, - { row: 1, message: `Invalid COLLECTION. Cell value not allowed for TSN.` } - ]); - } - }); - }); -}); diff --git a/api/src/services/import-services/critter/import-critters-strategy.ts b/api/src/services/import-services/critter/import-critters-strategy.ts deleted file mode 100644 index 08e2d04c2b..0000000000 --- a/api/src/services/import-services/critter/import-critters-strategy.ts +++ /dev/null @@ -1,428 +0,0 @@ -import { keys, omit, toUpper, uniq } from 'lodash'; -import { v4 as uuid } from 'uuid'; -import { WorkSheet } from 'xlsx'; -import { IDBConnection } from '../../../database/db'; -import { ApiGeneralError } from '../../../errors/api-error'; -import { getLogger } from '../../../utils/logger'; -import { getTsnMeasurementTypeDefinitionMap } from '../../../utils/observation-xlsx-utils/measurement-column-utils'; -import { CSV_COLUMN_ALIASES } from '../../../utils/xlsx-utils/column-aliases'; -import { generateColumnCellGetterFromColumnValidator } from '../../../utils/xlsx-utils/column-validator-utils'; -import { getNonStandardColumnNamesFromWorksheet, IXLSXCSVValidator } from '../../../utils/xlsx-utils/worksheet-utils'; -import { - CBQualitativeOption, - CritterbaseService, - IBulkCreate, - ICollection, - ICollectionUnitWithCategory, - ICreateCritter -} from '../../critterbase-service'; -import { DBService } from '../../db-service'; -import { PlatformService } from '../../platform-service'; -import { SurveyCritterService } from '../../survey-critter-service'; -import { CSVImportStrategy, Row, Validation, ValidationError } from '../import-csv.interface'; -import { CsvCritter, PartialCsvCritter } from './import-critters-strategy.interface'; - -const defaultLog = getLogger('services/import/import-critters-service'); - -/** - * - * ImportCrittersStrategy - Injected into CSVImportStrategy as the CSV import dependency - * - * @example new CSVImportStrategy(new ImportCrittersStrategy(connection, surveyId)).import(file); - * - * @class ImportCrittersStrategy - * @extends DBService - * - */ -export class ImportCrittersStrategy extends DBService implements CSVImportStrategy { - platformService: PlatformService; - critterbaseService: CritterbaseService; - surveyCritterService: SurveyCritterService; - - surveyId: number; - - /** - * An XLSX validation config for the standard columns of a Critter CSV. - * - * Note: `satisfies` allows `keyof` to correctly infer key types, while also - * enforcing uppercase object keys. - */ - columnValidator = { - ITIS_TSN: { type: 'number', aliases: CSV_COLUMN_ALIASES.ITIS_TSN }, - SEX: { type: 'string', optional: true }, - ALIAS: { type: 'string', aliases: CSV_COLUMN_ALIASES.ALIAS }, - WLH_ID: { type: 'string', optional: true }, - DESCRIPTION: { type: 'string', aliases: CSV_COLUMN_ALIASES.DESCRIPTION, optional: true } - } satisfies IXLSXCSVValidator; - - /** - * Instantiates an instance of ImportCrittersStrategy - * - * @param {IDBConnection} connection - Database connection - * @param {number} surveyId - Survey identifier - */ - constructor(connection: IDBConnection, surveyId: number) { - super(connection); - - this.surveyId = surveyId; - - this.platformService = new PlatformService(connection); - this.surveyCritterService = new SurveyCritterService(connection); - this.critterbaseService = new CritterbaseService({ - keycloak_guid: connection.systemUserGUID(), - username: connection.systemUserIdentifier() - }); - } - - /** - * Get non-standard columns (collection unit columns) from worksheet. - * - * @param {WorkSheet} worksheet - Xlsx worksheet - * @returns {string[]} Array of non-standard headers from CSV (worksheet) - */ - _getNonStandardColumns(worksheet: WorkSheet) { - return uniq(getNonStandardColumnNamesFromWorksheet(worksheet, this.columnValidator)); - } - - /** - * Get critter from properties from row. - * - * @param {CsvCritter} row - Row object as CsvCritter - * @returns {ICreateCritter} Create critter object - */ - _getCritterFromRow(row: CsvCritter): ICreateCritter { - return { - critter_id: row.critter_id, - sex_qualitative_option_id: row.sex, - itis_tsn: row.itis_tsn, - animal_id: row.animal_id, - wlh_id: row.wlh_id, - critter_comment: row.critter_comment - }; - } - - /** - * Get list of collection units from row. - * - * @param {CsvCritter} row - Row object as a CsvCritter - * @returns {ICollection[]} Array of collection units - */ - _getCollectionUnitsFromRow(row: CsvCritter): ICollection[] { - const critterId = row.critter_id; - - // Get portion of row object that is not a critter - const partialRow: { [key: keyof ICreateCritter | keyof CsvCritter]: any } = omit(row, [ - ...keys(this._getCritterFromRow(row)), - 'sex' as keyof CsvCritter - ]); - - // Keys of collection units - const collectionUnitKeys = keys(partialRow); - - // Return an array of formatted collection units for bulk create - return collectionUnitKeys - .filter((key) => partialRow[key]) - .map((key) => ({ collection_unit_id: partialRow[key], critter_id: critterId })); - } - - /** - * Get a Set of valid ITIS TSNS from xlsx worksheet rows. - * - * @async - * @returns {Promise} Unique Set of valid TSNS from worksheet. - */ - async _getValidTsns(rows: PartialCsvCritter[]): Promise { - // Get a unique list of tsns from worksheet - const critterTsns = uniq(rows.map((row) => String(row.itis_tsn))); - - // Query the platform service (taxonomy) for matching tsns - const taxonomy = await this.platformService.getTaxonomyByTsns(critterTsns); - - return taxonomy.map((taxon) => String(taxon.tsn)); - } - - /** - * Get a mapping of collection units for a list of tsns. - * Used in the zod validation. - * - * @example new Map([['Population Unit', new Set(['Atlin', 'Unit B'])]]); - * - * @async - * @param {WorkSheet} worksheet - Xlsx Worksheet - * @param {string[]} tsns - List of unique and valid TSNS - * @returns {Promise} Collection unit mapping - */ - async _getCollectionUnitMap(worksheet: WorkSheet, tsns: string[]) { - const collectionUnitMap = new Map(); - - const collectionUnitColumns = this._getNonStandardColumns(worksheet); - - // If no collection unit columns return empty Map - if (!collectionUnitColumns.length) { - return collectionUnitMap; - } - - // Get the collection units for all the tsns in the worksheet - const tsnCollectionUnits = await Promise.all( - tsns.map((tsn) => this.critterbaseService.findTaxonCollectionUnits(tsn)) - ); - - tsnCollectionUnits.forEach((collectionUnits, index) => { - if (collectionUnits.length) { - // TODO: Is this correct? - collectionUnitMap.set(toUpper(collectionUnits[0].category_name), { collectionUnits, tsn: Number(tsns[index]) }); - } - }); - - return collectionUnitMap; - } - - /** - * Get a mapping of sex values for a list of tsns. - * Used in the zod validation. - * - * @example new Map([['180844', new Set(['Male', 'Female'])]]); - * - * @async - * @param {string[]} tsns - List of unique and valid TSNS - * @returns {Promise} Sex mapping - */ - async _getSpeciesSexMap(tsns: string[]): Promise> { - // Initialize the sex map - const sexMap = new Map(); - - // Fetch the measurement type definitions - const tsnMeasurementTypeDefinitionMap = await getTsnMeasurementTypeDefinitionMap(tsns, this.critterbaseService); - - // Iterate over each TSN to populate the sexMap - tsns.forEach((tsn) => { - // Get the sex options for the current species - const measurements = tsnMeasurementTypeDefinitionMap[tsn]; - - // Look for a measurement called "sex" (case insensitive) - const sexMeasurement = measurements.qualitative.find((qual) => qual.measurement_name.toLowerCase() === 'sex'); - - // If there is a measurement called sex, add the options to the sexMap - sexMap.set(Number(tsn), { - sexes: sexMeasurement?.options ?? [] - }); - }); - - return sexMap; - } - - /** - * Parse the CSV rows into the Critterbase critter format. - * - * @param {Row[]} rows - CSV rows - * @param {string[]} collectionUnitColumns - Non standard columns - * @returns {PartialCsvCritter[]} CSV critters before validation - */ - _getRowsToValidate(rows: Row[], collectionUnitColumns: string[]): PartialCsvCritter[] { - const getColumnCell = generateColumnCellGetterFromColumnValidator(this.columnValidator); - - return rows.map((row) => { - // Standard critter properties from CSV - const standardCritterRow = { - critter_id: uuid(), // Generate a uuid for each critter for convienence - sex: getColumnCell(row, 'SEX').cell, - itis_tsn: getColumnCell(row, 'ITIS_TSN').cell, - wlh_id: getColumnCell(row, 'WLH_ID').cell, - animal_id: getColumnCell(row, 'ALIAS').cell, - critter_comment: getColumnCell(row, 'DESCRIPTION').cell - }; - - // All other properties must be collection units ie: `population unit` or `herd unit` etc... - collectionUnitColumns.forEach((categoryHeader) => { - standardCritterRow[categoryHeader] = row[categoryHeader]; - }); - - return standardCritterRow; - }); - } - - /** - * Validate CSV worksheet rows against reference data. - * - * @async - * @param {Row[]} rows - Invalidated CSV rows - * @param {WorkSheet} worksheet - Xlsx worksheet - * @returns {Promise>} Conditional validation object - */ - async validateRows(rows: Row[], worksheet: WorkSheet): Promise> { - const nonStandardColumns = this._getNonStandardColumns(worksheet); - const rowsToValidate = this._getRowsToValidate(rows, nonStandardColumns); - - // Retrieve the dynamic validation config - const [validRowTsns, surveyCritterAliases] = await Promise.all([ - this._getValidTsns(rowsToValidate), - this.surveyCritterService.getUniqueSurveyCritterAliases(this.surveyId) - ]); - const collectionUnitMap = await this._getCollectionUnitMap(worksheet, validRowTsns); - - // Get sex options for each species being imported - const sexMap = await this._getSpeciesSexMap(validRowTsns); - - // Parse reference data for validation - const tsnSet = new Set(validRowTsns.map((tsn) => Number(tsn))); - const csvCritterAliases = rowsToValidate.map((row) => row.animal_id); - - // Track the row validation errors - const errors: ValidationError[] = []; - - const csvCritters = rowsToValidate.map((row, index) => { - const tsn = row.itis_tsn; - - /** - * -------------------------------------------------------------------- - * STANDARD ROW VALIDATION - * -------------------------------------------------------------------- - */ - - // WLH_ID must follow regex pattern - const invalidWlhId = row.wlh_id && !/^\d{2}-.+/.exec(row.wlh_id); - // ITIS_TSN is required and be a valid TSN - const invalidTsn = !tsn || !tsnSet.has(tsn); - // ALIAS is required and must not already exist in Survey or CSV - const invalidAlias = - !row.animal_id || - surveyCritterAliases.has(row.animal_id) || - csvCritterAliases.filter((value) => value === row.animal_id).length > 1; - - if (invalidWlhId) { - errors.push({ - row: index, - message: `Wildlife health ID ${row.wlh_id} is incorrectly formatted. Expected a 2-digit hyphenated prefix like '18-98491'.` - }); - } - if (invalidTsn) { - errors.push({ row: index, message: `Species TSN ${tsn} does not exist.` }); - } - if (invalidAlias) { - errors.push({ - row: index, - message: `Animal ${row.animal_id} already exists in the Survey. Duplicate names are not allowed.` - }); - } - - /** - * -------------------------------------------------------------------- - * SEX VALIDATION - * -------------------------------------------------------------------- - */ - if (tsn) { - // Get the sex options from the sexMap - const sexOptionsForTsn = sexMap.get(tsn); - - // If no sex value is given, delete the sex column - if (!row.sex) { - delete row.sex; - } - - // If a sex value is given but sex is not allowed for the tsn, add an error message - if (!sexOptionsForTsn && row.sex) { - errors.push({ - row: index, - message: `Sex is not a supported attribute for TSN ${tsn}. Please contact a system administrator if it should be.` - }); - } - - // If sex is allowed and a value is given, look for a matching quantitative_option_id - if (sexOptionsForTsn && row.sex) { - const sexMatch = sexOptionsForTsn.sexes.find( - (sex) => sex.option_label.toLowerCase() === row.sex?.toLowerCase() - ); - - // If the given value is not valid, add an error message - if (!sexMatch) { - errors.push({ - row: index, - message: `${sexMatch} is not a valid sex option for TSN ${tsn}. Did you mean one of ${sexOptionsForTsn.sexes.join( - ',' - )}` - }); - } else { - // If the value is valid, update the cell with the qualitative_option_id - row.sex = sexMatch.qualitative_option_id; - } - } - } - - /** - * -------------------------------------------------------------------- - * NON-STANDARD ROW VALIDATION - * -------------------------------------------------------------------- - */ - - nonStandardColumns.forEach((column) => { - const collectionUnitColumn = collectionUnitMap.get(column); - // Remove property if undefined or not a collection unit - if (!collectionUnitColumn || !row[column]) { - delete row[column]; - return; - } - // Attempt to find the collection unit with the cell value from the mapping - const collectionUnitMatch = collectionUnitColumn.collectionUnits.find( - (unit) => unit.unit_name.toLowerCase() === String(row[column]).toLowerCase() - ); - // Collection unit must be a valid value - if (!collectionUnitMatch) { - errors.push({ row: index, message: `Invalid ${column}. Cell value is not valid.` }); - } - // Collection unit must have correct TSN mapping - else if (row.itis_tsn !== collectionUnitColumn.tsn) { - errors.push({ row: index, message: `Invalid ${column}. Cell value not allowed for TSN.` }); - } else { - // Update the cell to be the collection unit id - row[column] = collectionUnitMatch.collection_unit_id; - } - }); - - return row; - }); - - // If validation successful the rows should all be CsvCritters - if (!errors.length) { - return { success: true, data: csvCritters as CsvCritter[] }; - } - - return { success: false, error: { issues: errors } }; - } - - /** - * Insert CSV critters into Critterbase and SIMS. - * - * @async - * @param {CsvCritter[]} critterRows - CSV row critters - * @throws {ApiGeneralError} - If unable to fully insert records into Critterbase - * @returns {Promise} List of inserted survey critter ids - */ - async insert(critterRows: CsvCritter[]): Promise { - const simsPayload: string[] = []; - const critterbasePayload: IBulkCreate = { critters: [], collections: [] }; - - // Convert rows to Critterbase and SIMS payloads - for (const row of critterRows) { - simsPayload.push(row.critter_id); - critterbasePayload.critters?.push(this._getCritterFromRow(row)); - critterbasePayload.collections = critterbasePayload.collections?.concat(this._getCollectionUnitsFromRow(row)); - } - - defaultLog.debug({ label: 'critter import payloads', simsPayload, critterbasePayload }); - - // Add critters to Critterbase - const bulkResponse = await this.critterbaseService.bulkCreate(critterbasePayload); - - // Check critterbase inserted the full list of critters - // In reality this error should not be triggered, safeguard to prevent floating critter ids in SIMS - if (bulkResponse.created.critters !== simsPayload.length) { - throw new ApiGeneralError('Unable to fully import critters from CSV', [ - 'importCrittersStrategy -> insertCsvCrittersIntoSimsAndCritterbase', - 'critterbase bulk create response count !== critterIds.length' - ]); - } - - // Add Critters to SIMS survey - return this.surveyCritterService.addCrittersToSurvey(this.surveyId, simsPayload); - } -} diff --git a/api/src/utils/csv-utils/csv-config-utils.test.ts b/api/src/utils/csv-utils/csv-config-utils.test.ts new file mode 100644 index 0000000000..c5622b51e1 --- /dev/null +++ b/api/src/utils/csv-utils/csv-config-utils.test.ts @@ -0,0 +1,202 @@ +import { expect } from 'chai'; +import xlsx, { WorkSheet } from 'xlsx'; +import { CSVConfigUtils } from './csv-config-utils'; +import { CSVConfig } from './csv-config-validation.interface'; + +describe('CSVConfigUtils', () => { + describe('init', () => { + it('should initialize the CSVConfigUtils', () => { + const worksheet: WorkSheet = xlsx.utils.json_to_sheet([ + { TEST: 'cellValue', ALIASED_HEADER: 'cellValue2', DYNAMIC_HEADER: 'dynamicValue' } + ]); + const mockConfig: CSVConfig = { + staticHeadersConfig: { + TEST: { aliases: [] }, + TEST_ALIAS: { aliases: ['ALIASED_HEADER'] } + }, + ignoreDynamicHeaders: false + }; + + const utils = new CSVConfigUtils(worksheet, mockConfig); + + expect(utils).to.be.instanceOf(CSVConfigUtils); + expect(utils._config).to.be.equal(mockConfig); + expect(utils.worksheet).to.be.equal(worksheet); + expect(utils.worksheetRows).to.be.deep.equal([ + { TEST: 'cellValue', ALIASED_HEADER: 'cellValue2', DYNAMIC_HEADER: 'dynamicValue' } + ]); + expect(utils.worksheetHeaders).to.be.deep.equal(['TEST', 'ALIASED_HEADER', 'DYNAMIC_HEADER']); + expect(utils.worksheetAliasedStaticHeaders).to.be.deep.equal(['TEST', 'ALIASED_HEADER']); + expect(utils.worksheetStaticHeaders).to.be.deep.equal(['TEST', 'TEST_ALIAS']); + expect(utils.worksheetDynamicHeaders).to.be.deep.equal(['DYNAMIC_HEADER']); + }); + }); + + describe('getCellValue', () => { + it('should get the cell value from a CSV row', () => { + const worksheet: WorkSheet = xlsx.utils.json_to_sheet([{ TEST: 'cellValue', DYNAMIC_HEADER: 'dynamicValue' }]); + const mockConfig = { + staticHeadersConfig: { + TEST: { aliases: [] } + }, + ignoreDynamicHeaders: false + }; + + const utils = new CSVConfigUtils(worksheet, mockConfig); + + const cellValue = utils.getCellValue('TEST', { TEST: 'cellValue' }); + + expect(cellValue).to.be.equal('cellValue'); + }); + + it('should return undefined if the header does not exist', () => { + const worksheet: WorkSheet = xlsx.utils.json_to_sheet([{ TEST: 'cellValue', DYNAMIC_HEADER: 'dynamicValue' }]); + const mockConfig = { + staticHeadersConfig: { + TEST: { aliases: [] } + }, + ignoreDynamicHeaders: false + }; + + const utils = new CSVConfigUtils(worksheet, mockConfig); + + const cellValue = utils.getCellValue('UNKNOWN' as any, { TEST: 'cellValue' }); + + expect(cellValue).to.be.equal(undefined); + }); + + it('should get the cell value by the alias', () => { + const worksheet: WorkSheet = xlsx.utils.json_to_sheet([{ TEST_ALIAS: 'cellValue' }]); + const mockConfig: CSVConfig = { + staticHeadersConfig: { + TEST: { aliases: ['OTHER_ALIAS', 'TEST_ALIAS'] } + }, + ignoreDynamicHeaders: false + }; + + const utils = new CSVConfigUtils(worksheet, mockConfig); + + const cellValue = utils.getCellValue('TEST', { TEST_ALIAS: 'cellValue' }); + + expect(cellValue).to.be.equal('cellValue'); + }); + + it('should return undefined for a bad header / alias', () => { + const worksheet: WorkSheet = xlsx.utils.json_to_sheet([{ TEST_ALIAS: 'cellValue' }]); + const mockConfig: CSVConfig = { + staticHeadersConfig: { + TEST: { aliases: ['OTHER_ALIAS'] } + }, + ignoreDynamicHeaders: false + }; + + const utils = new CSVConfigUtils(worksheet, mockConfig); + + const cellValue = utils.getCellValue('NOT_FOUND', { TEST_ALIAS: 'cellValue' }); + + expect(cellValue).to.be.equal(undefined); + }); + }); + + describe('getCellValues', () => { + it('should get the cell values from a CSV row', () => { + const worksheet: WorkSheet = xlsx.utils.json_to_sheet([{ TEST: 'cellValue', DYNAMIC_HEADER: 'dynamicValue' }]); + const mockConfig = { + staticHeadersConfig: { + TEST: { aliases: [] } + }, + ignoreDynamicHeaders: false + }; + + const utils = new CSVConfigUtils(worksheet, mockConfig); + + const cellValues = utils.getCellValues('TEST'); + + expect(cellValues).to.be.deep.equal(['cellValue']); + }); + + it('should get the cell values from a CSV row when using alias', () => { + const worksheet: WorkSheet = xlsx.utils.json_to_sheet([ + { TEST_ALIAS: 'cellValue', DYNAMIC_HEADER: 'dynamicValue' } + ]); + + const mockConfig: CSVConfig = { + staticHeadersConfig: { + TEST: { aliases: ['TEST_ALIAS'] } + }, + ignoreDynamicHeaders: false + }; + + const utils = new CSVConfigUtils(worksheet, mockConfig); + + const cellValues = utils.getCellValues('TEST'); + + expect(cellValues).to.be.deep.equal(['cellValue']); + }); + }); + + describe('getUniqueCellValues', () => { + it('should get the unique cell values from a CSV row', () => { + const worksheet: WorkSheet = xlsx.utils.json_to_sheet([ + { TEST: 'cellValue', DYNAMIC_HEADER: 'dynamicValue' }, + { TEST: 'cellValue', DYNAMIC_HEADER: 'dynamicValue' }, + { TEST: 'cellValue2', DYNAMIC_HEADER: 'dynamicValue' } + ]); + + const mockConfig = { + staticHeadersConfig: { + TEST: { aliases: [] } + }, + ignoreDynamicHeaders: false + }; + + const utils = new CSVConfigUtils(worksheet, mockConfig); + + const cellValues = utils.getUniqueCellValues('TEST'); + + expect(cellValues).to.be.deep.equal(['cellValue', 'cellValue2']); + }); + }); + + describe('isCellUnique', () => { + it('should return true if the cell is unique', () => { + const worksheet: WorkSheet = xlsx.utils.json_to_sheet([ + { TEST: 'cellValue', DYNAMIC_HEADER: 'dynamicValue' }, + { TEST: 'cellValue2', DYNAMIC_HEADER: 'dynamicValue' } + ]); + + const mockConfig = { + staticHeadersConfig: { + TEST: { aliases: [] } + }, + ignoreDynamicHeaders: false + }; + + const utils = new CSVConfigUtils(worksheet, mockConfig); + + const isUnique = utils.isCellUnique('TEST', 'cellValue'); + + expect(isUnique).to.be.true; + }); + + it('should return false if the cell is not unique', () => { + const worksheet: WorkSheet = xlsx.utils.json_to_sheet([ + { TEST: 'cellValue', DYNAMIC_HEADER: 'dynamicValue' }, + { TEST: 'cellValue', DYNAMIC_HEADER: 'dynamicValue' } + ]); + + const mockConfig = { + staticHeadersConfig: { + TEST: { aliases: [] } + }, + ignoreDynamicHeaders: false + }; + + const utils = new CSVConfigUtils(worksheet, mockConfig); + + const isUnique = utils.isCellUnique('TEST', 'cellValue'); + + expect(isUnique).to.be.false; + }); + }); +}); diff --git a/api/src/utils/csv-utils/csv-config-utils.ts b/api/src/utils/csv-utils/csv-config-utils.ts new file mode 100644 index 0000000000..555a75af67 --- /dev/null +++ b/api/src/utils/csv-utils/csv-config-utils.ts @@ -0,0 +1,174 @@ +import { countBy, difference } from 'lodash'; +import { WorkSheet } from 'xlsx'; +import { getHeadersUpperCase, getWorksheetRowObjects } from '../xlsx-utils/worksheet-utils'; +import { CSVConfig, CSVRow } from './csv-config-validation.interface'; + +/** + * CSV Config Utils - A collection of methods useful when building CSVConfigs + * + * @exports + * @template StaticHeaderType - The static header type + * @class CSVConfigUtils + */ +export class CSVConfigUtils> { + _config: CSVConfig; + worksheet: WorkSheet; + worksheetRows: CSVRow[]; + + constructor(worksheet: WorkSheet, config: CSVConfig) { + this._config = config; + this.worksheet = worksheet; + this.worksheetRows = getWorksheetRowObjects(worksheet); + } + + /** + * The CSV config static headers. + * + * @returns {Uppercase[]} - The config headers + */ + get configStaticHeaders(): Uppercase[] { + return Object.keys(this._config.staticHeadersConfig) as Uppercase[]; + } + + /** + * The CSV worksheet headers. Raw incomming headers from the worksheet. + * + * @example + * worksheetHeaders: ['STATIC1', 'STATIC2_ALIAS', 'DYNAMIC1'] + * this: ['STATIC1', 'STATIC2_ALIAS', 'DYNAMIC1'] + * + * @returns {Uppercase[]} - The headers + */ + get worksheetHeaders(): Uppercase[] { + return getHeadersUpperCase(this.worksheet) as Uppercase[]; + } + + /** + * The CSV worksheet aliased static headers (leaves aliased headers as is). + * + * @example + * worksheetHeaders: ['STATIC1', 'STATIC2_ALIAS', 'DYNAMIC1'] + * this: ['STATIC1', 'STATIC2_ALIAS'] + * + * @returns {Uppercase[]} - The static headers + */ + get worksheetAliasedStaticHeaders(): Uppercase[] { + const staticHeaders: Uppercase[] = []; + const worksheetHeaders = new Set(this.worksheetHeaders); + + for (const header of this.configStaticHeaders) { + if (worksheetHeaders.has(header)) { + staticHeaders.push(header); + } + + const aliases = this._config.staticHeadersConfig[header].aliases; + + for (const alias of aliases) { + if (worksheetHeaders.has(alias)) { + // Pushing the alias instead of the static header + staticHeaders.push(alias); + } + } + } + + return staticHeaders; + } + + /** + * The CSV worksheet static headers (converts aliased headers to static headers). + * + * @example + * worksheetHeaders: ['STATIC1', 'STATIC2_ALIAS', 'DYNAMIC'] // STATIC2_ALIAS is an alias for STATIC2 + * this: ['STATIC1', 'STATIC2'] + * + * @returns {Uppercase[]} - The static headers + */ + get worksheetStaticHeaders(): Uppercase[] { + const staticHeaders: Uppercase[] = []; + const worksheetHeaders = new Set(this.worksheetHeaders); + + for (const header of this.configStaticHeaders) { + if (worksheetHeaders.has(header)) { + staticHeaders.push(header); + } + + const aliases = this._config.staticHeadersConfig[header].aliases; + + for (const alias of aliases) { + if (worksheetHeaders.has(alias)) { + // Pushing the static header instead of the alias + staticHeaders.push(header); + } + } + } + + return staticHeaders; + } + + /** + * The CSV worksheet dynamic headers. + * + * @example + * worksheetHeaders: ['STATIC1', 'STATIC2_ALIAS', 'DYNAMIC1'] + * this: ['DYNAMIC1'] + * + * @returns {Uppercase[]} - The dynamic headers + */ + get worksheetDynamicHeaders(): Uppercase[] { + return difference(this.worksheetHeaders, this.worksheetAliasedStaticHeaders); + } + + /** + * Get the cell value from a CSV row. + * + * @param {StaticHeaderType} header - The header name + * @param {CSVRow} row - The CSV row + * @returns {unknown} - The cell value + */ + getCellValue(header: StaticHeaderType, row: CSVRow) { + // Static header or dynamic header exact match + if (header in row) { + return row[header]; + } + + // Attempt to find the cell value from the header aliases + for (const alias of this._config.staticHeadersConfig[header]?.aliases ?? []) { + if (alias in row) { + return row[alias]; + } + } + } + + /** + * Get all the cell values from a static header. + * + * @param {StaticHeaderType} header - The header name + * @returns {unknown[]} - The cell values + */ + getCellValues(header: StaticHeaderType) { + return this.worksheetRows.map((row) => this.getCellValue(header, row)); + } + + /** + * Get all the unique cell values from a static header. + * + * @param {StaticHeaderType} header - The header name + * @returns {unknown[]} - The unique cell values + */ + getUniqueCellValues(header: StaticHeaderType) { + return [...new Set(this.getCellValues(header))]; + } + + /** + * Check if all the cell values from a static header are unique. + * + * @param {StaticHeaderType} header - The header name + * @param {unknown} cell - The cell value + * @returns {boolean} - Whether all the cell values are unique + */ + isCellUnique(header: StaticHeaderType, cell: unknown) { + const uniqueDictionary = countBy(this.getCellValues(header), (value) => String(value).toLowerCase()); + const dictionaryKey = String(cell).toLowerCase(); + return uniqueDictionary[dictionaryKey] === 1 || uniqueDictionary[dictionaryKey] === undefined; + } +} diff --git a/api/src/utils/csv-utils/csv-config-validation.interface.ts b/api/src/utils/csv-utils/csv-config-validation.interface.ts new file mode 100644 index 0000000000..07cad6ee57 --- /dev/null +++ b/api/src/utils/csv-utils/csv-config-validation.interface.ts @@ -0,0 +1,193 @@ +/** + * The CSV configuration interface + * + */ +export interface CSVConfig = Uppercase> { + /** + * Record containing the static headers, their aliases, and the `validateCell` and `setCellValue` callbacks + * to be called for each static cell. + * + * Note: A static header is a header that is known and defined in the configuration. + * + * @type {Record} + */ + staticHeadersConfig: Record; + /** + * Contains the `validateCell` and `setCellValue` callbacks to be called for each dynamic cell. + * + * Note: A dynamic header is a header that is not known and defined in the configuration. + * The actual header name is `dynamic` meaning it is defined by the user. + * + * ie: Additional headers like measurements, markings, collection units etc. + * + * @type {CSVHeaderConfig | undefined} + */ + dynamicHeadersConfig?: CSVHeaderConfig; + /** + * Boolean to ignore dynamic headers. + * + * ie: If true, the dynamic headers will not be processed. + * + * @type {boolean} + */ + ignoreDynamicHeaders: boolean; +} + +interface CSVStaticHeaderConfig { + /** + * A list of aliases for the header. + * + * @type {Uppercase[]} + */ + aliases: Uppercase[]; + /** + * Indicates if the header is optional. Set this to true if you want to be able to omit the header from the CSV. + * + * Note: This is not related to the cell validation. It is used to check if the header is present in the CSV. + * + * @type {true} + */ + optional?: true; +} + +/** + * The CSV header config cell validator function + * + * @param {CSVParams} params - The CSV parameters + * @returns {CSVError[]} - The list of CSV errors + */ +export type CSVCellValidator = (params: CSVParams) => CSVError[]; + +/** + * The CSV header config cell setter function + * + * @param {CSVParams} params - The CSV parameters + * @returns {*} {any} - The new cell value + */ +export type CSVCellSetter = (params: CSVParams) => any; + +/** + * The CSV header configuration interface + * + */ +export interface CSVHeaderConfig { + /** + * Callback to fire when validating the cell. Returns a list of CSVErrors. + * + * @type {CSVCellValidator | undefined} The cell validator function + */ + validateCell?: CSVCellValidator; + /** + * Callback to fire when setting the cell (after validation). Returns the new cell value. + * + * ie: Convert a string to a number, or find a the matching UUID for the cell value. + * + * @type {CSVCellSetter | undefined} The cell setter function + */ + setCellValue?: CSVCellSetter; +} + +/** + * The CSV parameters interface - passed to the cell validation/setter callbacks. + * + */ +export interface CSVParams { + /** + * The cell value. + * + * @type {unknown} + */ + cell: unknown; + /** + * The row header name. The initial row key. + * + * @type {string} + */ + header: string; + /** + * The data row object. + * + * @type {CSVRow} + */ + row: CSVRow; + /** + * The row index. + * + * Note: First data row index 0. + * + * @type {number} + */ + rowIndex: number; + /** + * The config static header name. The final row key. + * + * @type {string | undefined} + */ + staticHeader?: string; +} + +/** + * The CSV error interface + * + * @example + * { + * error: `Invalid collection unit`, // No need to include the header name / cell + * solution: `Use a valid collection unit`, // Solution includes the instructions to resolve + * values: ['unit1', 'unit2'], // Optional list of allowed values + * header: 'POPULATION_UNIT', + * cell: 'unit3', + * row: 1, // Header row index 0. First data row index 1 + * } + */ +export interface CSVError { + /** + * The error message. + * + * @type {string} + */ + error: string; + /** + * The solution message. + * + * @type {string} + */ + solution: string; + /** + * The list of allowed values if applicable. + * + * @type {(string[] | number[]) | undefined} + */ + values?: string[] | number[]; + /** + * The cell value. + * + * @type {unknown | undefined} + */ + cell?: unknown; + /** + * The header name. + * + * @type {string | undefined} + */ + header?: string; + /** + * The row index the error occurred. + * + * Note: Header row index 0. First data row index 1. + * + * @type {number} + */ + row?: number; +} + +/** + * The raw unvalidated CSV row + * + */ +export type CSVRow = Record, any>; + +/** + * The validated CSV row keyed by the static headers + * + */ +export type CSVRowValidated> = Record; diff --git a/api/src/utils/csv-utils/csv-config-validation.test.ts b/api/src/utils/csv-utils/csv-config-validation.test.ts new file mode 100644 index 0000000000..99e881955e --- /dev/null +++ b/api/src/utils/csv-utils/csv-config-validation.test.ts @@ -0,0 +1,410 @@ +import chai, { expect } from 'chai'; +import sinon from 'sinon'; +import sinonChai from 'sinon-chai'; +import xlsx, { WorkSheet } from 'xlsx'; +import { + executeSetCellValue, + executeValidateCell, + forEachCSVCell, + validateCSVHeaders, + validateCSVWorksheet +} from './csv-config-validation'; +import { CSVConfig } from './csv-config-validation.interface'; +chai.use(sinonChai); + +describe('csv-config-validation', () => { + afterEach(() => { + sinon.restore(); + }); + + describe('validateCSVWorksheet', () => { + it('should return rows when CSV is valid', () => { + const validateCellStub = sinon.stub().returns([]); + const setCellValueStub = sinon.stub().returns('newValue'); + + const validateDynamicCellStub = sinon.stub().returns([]); + const setCellValueDynamicStub = sinon.stub().returns('newDynamicValue'); + + const mockConfig: CSVConfig = { + staticHeadersConfig: { + ALIAS: { + aliases: ['ALIAS_2'], + validateCell: validateCellStub, + setCellValue: setCellValueStub + } + }, + dynamicHeadersConfig: { + validateCell: validateDynamicCellStub, + setCellValue: setCellValueDynamicStub + }, + ignoreDynamicHeaders: false + }; + + const worksheet: WorkSheet = xlsx.utils.json_to_sheet([ + { ALIAS_2: 'value', DYNAMIC_HEADER: 'dynamicValue', OTHER_DYNAMIC_HEADER: 'otherDynamicValue' } + ]); + + const result = validateCSVWorksheet(worksheet, mockConfig); + + expect(validateCellStub).to.have.been.calledOnce; + expect(setCellValueStub).to.have.been.calledOnce; + + expect(validateDynamicCellStub).to.have.been.calledTwice; + expect(setCellValueDynamicStub).to.have.been.calledTwice; + + expect(result).to.deep.equal({ + errors: [], + rows: [ + { + ALIAS: 'newValue', + DYNAMIC_HEADER: 'newDynamicValue', + OTHER_DYNAMIC_HEADER: 'newDynamicValue' + } + ] + }); + }); + + it('should only call execute handlers when headers have no errors', () => { + const validateCellStub = sinon.stub().returns([]); + const setCellValueStub = sinon.stub().returns('newValue'); + + const mockConfig: CSVConfig = { + staticHeadersConfig: { + ALIAS: { + aliases: ['ALIAS_2'], + validateCell: validateCellStub, + setCellValue: setCellValueStub + } + }, + ignoreDynamicHeaders: true + }; + + const worksheet: WorkSheet = xlsx.utils.json_to_sheet([{ BAD: 'value' }]); + + const result = validateCSVWorksheet(worksheet, mockConfig); + + expect(validateCellStub).to.have.been.not.calledOnce; + expect(setCellValueStub).to.have.been.not.calledOnce; + + expect(result).to.deep.equal({ + errors: [ + { + error: 'A required column is missing', + solution: `Add all required columns to the file.`, + header: 'ALIAS', + values: ['ALIAS', 'ALIAS_2'], + row: 0 + } + ], + rows: [] + }); + }); + }); + + describe('validateCSVHeaders', () => { + it('should return an empty array if the headers are valid', () => { + const mockConfig: CSVConfig = { staticHeadersConfig: { ALIAS: { aliases: [] } }, ignoreDynamicHeaders: true }; + const worksheet: WorkSheet = xlsx.utils.json_to_sheet([{ ALIAS: 'value' }]); + + const result = validateCSVHeaders(worksheet, mockConfig); + + expect(result).to.deep.equal([]); + }); + + it('should return an error if the worksheet is empty', () => { + const mockConfig: CSVConfig = { staticHeadersConfig: { ALIAS: { aliases: [] } }, ignoreDynamicHeaders: true }; + const worksheet: WorkSheet = xlsx.utils.json_to_sheet([]); + + const result = validateCSVHeaders(worksheet, mockConfig); + + expect(result).to.deep.equal([ + { + row: 0, + error: 'No columns in the file', + solution: 'Add column names. Did you accidentally include an empty first row above the columns?', + values: ['ALIAS'] + } + ]); + }); + + it('should return an error if CSV missing row data', () => { + const mockConfig: CSVConfig = { staticHeadersConfig: { ALIAS: { aliases: [] } }, ignoreDynamicHeaders: true }; + const worksheet: WorkSheet = { A1: { t: 's', v: 'ALIAS' }, '!ref': 'A1' }; + + const result = validateCSVHeaders(worksheet, mockConfig); + + expect(result).to.deep.equal([ + { + row: 1, + error: 'No rows in the file', + solution: 'Add rows. Did you accidentally import the wrong file?' + } + ]); + }); + + it('should return an error if the worksheet is missing a required header', () => { + const mockConfig: CSVConfig = { staticHeadersConfig: { ALIAS: { aliases: [] } }, ignoreDynamicHeaders: true }; + const worksheet: WorkSheet = xlsx.utils.json_to_sheet([{ NOT_ALIAS: 'value' }]); + + const result = validateCSVHeaders(worksheet, mockConfig); + + expect(result).to.deep.equal([ + { + row: 0, + error: 'A required column is missing', + solution: `Add all required columns to the file.`, + header: 'ALIAS', + values: ['ALIAS'] + } + ]); + }); + + it('should NOT return an error if the worksheet is missing a optional header', () => { + const mockConfig: CSVConfig = { + staticHeadersConfig: { ALIAS: { aliases: [], optional: true } }, + ignoreDynamicHeaders: true + }; + const worksheet: WorkSheet = xlsx.utils.json_to_sheet([{ NOT_ALIAS: 'value' }]); + + const result = validateCSVHeaders(worksheet, mockConfig); + + expect(result).to.deep.equal([]); + }); + + it('should return an error if the worksheet has an unknown header and dynamic headers are not ignored', () => { + const mockConfig: CSVConfig = { staticHeadersConfig: { ALIAS: { aliases: [] } }, ignoreDynamicHeaders: false }; + const worksheet: WorkSheet = xlsx.utils.json_to_sheet([{ ALIAS: 'alias', UNKNOWN_HEADER: 'value' }]); + + const result = validateCSVHeaders(worksheet, mockConfig); + + expect(result).to.deep.equal([ + { + row: 0, + error: 'An unknown column is included in the file', + solution: `Remove extra columns from the file.`, + header: 'UNKNOWN_HEADER' + } + ]); + }); + }); + + describe('forEachCSVCell', () => { + it('should iterate over each cell in the worksheet', () => { + const worksheet: WorkSheet = xlsx.utils.json_to_sheet([{ TEST: 'cellValue' }]); + + const validateCellStub = sinon.stub(); + const setCellValueStub = sinon.stub(); + + const config: CSVConfig = { + staticHeadersConfig: { + TEST: { + aliases: [], + validateCell: validateCellStub, + setCellValue: setCellValueStub + } + }, + ignoreDynamicHeaders: true + }; + + const callbackStub = sinon.stub(); + + forEachCSVCell(worksheet, config, callbackStub); + + expect(callbackStub).to.have.been.calledOnceWithExactly( + { + cell: 'cellValue', + header: 'TEST', + rowIndex: 0, + row: { TEST: 'cellValue' }, + staticHeader: 'TEST' + }, + { + validateCell: validateCellStub, + setCellValue: setCellValueStub + } + ); + }); + + it('should iterate over each cell in the worksheet when alias is used', () => { + const worksheet: WorkSheet = xlsx.utils.json_to_sheet([{ TEST_ALIAS: 'cellValue' }]); + + const validateCellStub = sinon.stub(); + const setCellValueStub = sinon.stub(); + + const config: CSVConfig = { + staticHeadersConfig: { + TEST: { + aliases: ['TEST_ALIAS'], + validateCell: validateCellStub, + setCellValue: setCellValueStub + } + }, + ignoreDynamicHeaders: true + }; + + const callbackStub = sinon.stub(); + + forEachCSVCell(worksheet, config, callbackStub); + + expect(callbackStub).to.have.been.calledOnceWithExactly( + { + cell: 'cellValue', + header: 'TEST_ALIAS', + rowIndex: 0, + row: { TEST_ALIAS: 'cellValue' }, + staticHeader: 'TEST' + }, + { + validateCell: validateCellStub, + setCellValue: setCellValueStub + } + ); + }); + + it('should iterate over dynamic cell values', () => { + const worksheet: WorkSheet = xlsx.utils.json_to_sheet([ + { TEST_ALIAS: 'cellValue', DYNAMIC_HEADER: 'dynamicValue' } + ]); + + const staticValidateCellStub = sinon.stub(); + const staticSetCellValueStub = sinon.stub(); + + const validateDynamicCellStub = sinon.stub(); + const setCellValueDynamicStub = sinon.stub(); + + const config: CSVConfig = { + staticHeadersConfig: { + TEST: { + aliases: ['TEST_ALIAS'], + validateCell: staticValidateCellStub, + setCellValue: staticSetCellValueStub + } + }, + dynamicHeadersConfig: { + validateCell: validateDynamicCellStub, + setCellValue: setCellValueDynamicStub + }, + ignoreDynamicHeaders: false + }; + + const callbackStub = sinon.stub(); + + forEachCSVCell(worksheet, config, callbackStub); + + expect(callbackStub).to.have.been.calledTwice; + + expect(callbackStub.getCall(0).args).to.deep.equal([ + { + cell: 'cellValue', + header: 'TEST_ALIAS', + rowIndex: 0, + row: { TEST_ALIAS: 'cellValue', DYNAMIC_HEADER: 'dynamicValue' }, + staticHeader: 'TEST' + }, + { + validateCell: staticValidateCellStub, + setCellValue: staticSetCellValueStub + } + ]); + + expect(callbackStub.getCall(1).args).to.deep.equal([ + { + cell: 'dynamicValue', + header: 'DYNAMIC_HEADER', + rowIndex: 0, + row: { TEST_ALIAS: 'cellValue', DYNAMIC_HEADER: 'dynamicValue' }, + staticHeader: undefined // Dynamic headers have no static header mapping + }, + { + validateCell: validateDynamicCellStub, + setCellValue: setCellValueDynamicStub + } + ]); + }); + }); + + describe('executeValidateCell', () => { + it('should call the validateCell callback and mutate errors array', () => { + const errors: any[] = []; + + const validateCellStub = sinon.stub().returns([{ error: 'error', solution: 'solution' }]); + + const params = { + cell: 'cellValue', + header: 'TEST', + rowIndex: 0, + row: { TEST: 'cellValue' }, + staticHeader: 'TEST' + }; + + const headerConfig = { + validateCell: validateCellStub + }; + + executeValidateCell(params, headerConfig, errors); + expect(validateCellStub).to.have.been.calledOnceWithExactly(params); + expect(errors).to.deep.equal([ + { + error: 'error', + solution: 'solution', + cell: 'cellValue', + header: 'TEST', + row: 1, + values: undefined + } + ]); + }); + }); + + describe('executeSetCellValue', () => { + it('should call the setCellValue callback and mutate the row', () => { + const row = { TEST: 'cellValue' }; + + const setCellValueStub = sinon.stub().returns('newValue'); + + const params = { + cell: 'cellValue', + header: 'TEST', + rowIndex: 0, + row, + staticHeader: 'TEST' + }; + + const headerConfig = { + setCellValue: setCellValueStub + }; + + const mutableRows = [row]; + + executeSetCellValue(params, headerConfig, mutableRows); + + expect(setCellValueStub).to.have.been.calledOnceWithExactly(params); + expect(mutableRows).to.deep.equal([{ TEST: 'newValue' }]); + }); + + it('should remap the key for a static header alias', () => { + const row = { TEST: 'cellValue' }; + + const setCellValueStub = sinon.stub().returns('newValue'); + + const params = { + cell: 'cellValue', + header: 'TEST', + rowIndex: 0, + row, + staticHeader: 'NEW_KEY' + }; + + const headerConfig = { + setCellValue: setCellValueStub + }; + + const mutableRows = [row]; + + executeSetCellValue(params, headerConfig, mutableRows); + + expect(setCellValueStub).to.have.been.calledOnceWithExactly(params); + expect(mutableRows).to.deep.equal([{ NEW_KEY: 'newValue' }]); + }); + }); +}); diff --git a/api/src/utils/csv-utils/csv-config-validation.ts b/api/src/utils/csv-utils/csv-config-validation.ts new file mode 100644 index 0000000000..8b5347b862 --- /dev/null +++ b/api/src/utils/csv-utils/csv-config-validation.ts @@ -0,0 +1,242 @@ +import { WorkSheet } from 'xlsx'; +import { getWorksheetRowObjects } from '../xlsx-utils/worksheet-utils'; +import { CSVConfigUtils } from './csv-config-utils'; +import { + CSVConfig, + CSVError, + CSVHeaderConfig, + CSVParams, + CSVRow, + CSVRowValidated +} from './csv-config-validation.interface'; + +/** + * Validate the CSV worksheet with the CSV config. + * + * @template StaticHeaderType - The CSV static headers + * @param {WorkSheet} worksheet - The worksheet + * @param {CSVConfigType} config - The CSV configuration + * @returns {*} {{ errors: CSVError[]; rows: CSVRowValidated[] }} - The CSV errors and rows + */ +export const validateCSVWorksheet = >( + worksheet: WorkSheet, + config: CSVConfig +): { errors: CSVError[]; rows: CSVRowValidated[] } => { + const rows: CSVRowValidated[] = []; + const errors = validateCSVHeaders(worksheet, config); + + // If there are errors in the headers, return early + if (errors.length) { + return { errors: errors, rows: [] }; + } + + // Iterate over each cell in the worksheet and validate + set cell values + forEachCSVCell(worksheet, config, (params, headerConfig) => { + // Validate the cell value and modify the errors + executeValidateCell(params, headerConfig, errors); // Mutates `errors` + + // If there are errors in the cell don't set the cell value + if (errors.length) { + return; + } + + // Set the cell value and modify the rows + executeSetCellValue(params, headerConfig, rows); // Mutates `rows` + }); + + if (errors.length) { + return { errors: errors, rows: [] }; + } + + return { errors: [], rows: rows }; +}; + +/** + * Validate the CSV static and dynamic headers against the CSV config. + * + * @param {WorkSheet} worksheet - The worksheet + * @param {CSVConfig} config - The CSV configuration + * @returns {*} {CSVError[]} - The CSV errors + */ +export const validateCSVHeaders = (worksheet: WorkSheet, config: CSVConfig): CSVError[] => { + const csvErrors: CSVError[] = []; + + const configUtils = new CSVConfigUtils(worksheet, config); + + if (!configUtils.worksheetHeaders.length) { + return [ + { + error: 'No columns in the file', + solution: 'Add column names. Did you accidentally include an empty first row above the columns?', + values: configUtils.configStaticHeaders, + row: 0 + } + ]; + } + + if (!configUtils.worksheetRows.length) { + return [ + { + error: 'No rows in the file', + solution: 'Add rows. Did you accidentally import the wrong file?', + row: 1 + } + ]; + } + + const worksheetStaticHeaders = new Set(configUtils.worksheetStaticHeaders); + + for (const staticHeader of configUtils.configStaticHeaders) { + const headerConfig = config.staticHeadersConfig[staticHeader]; + const worksheetHasStaticHeader = worksheetStaticHeaders.has(staticHeader); + + // Validate the CSV is not missing a required header + if (!headerConfig.optional && !worksheetHasStaticHeader) { + csvErrors.push({ + error: 'A required column is missing', + solution: `Add all required columns to the file.`, + header: staticHeader, + values: [staticHeader, ...config.staticHeadersConfig[staticHeader].aliases], + row: 0 + }); + } + } + + // Validate the CSV has no unknown headers (if dynamic headers not ignored or allowed) + if (!config.ignoreDynamicHeaders && !config.dynamicHeadersConfig && configUtils.worksheetDynamicHeaders.length) { + for (const unknownHeader of configUtils.worksheetDynamicHeaders) { + csvErrors.push({ + error: 'An unknown column is included in the file', + solution: `Remove extra columns from the file.`, + header: unknownHeader, + row: 0 + }); + } + } + + return csvErrors; +}; + +/** + * Iterate over each cell in the CSV worksheet. + * + * @param {WorkSheet} worksheet - The worksheet + * @param {CSVConfig} config - The CSV configuration + * @param {(params: CSVParams, headerConfig: CSVHeaderConfig) => void} callback - The callback function + * @returns {*} {void} + */ +export const forEachCSVCell = ( + worksheet: WorkSheet, + config: CSVConfig, + callback: (params: CSVParams, headerConfig: CSVHeaderConfig) => void +): void => { + const staticHeaderConfigMap = _getCSVStaticHeaderMap(config); + const worksheetRows = getWorksheetRowObjects(worksheet); + + for (let i = 0; i < worksheetRows.length; i++) { + const worksheetRow = worksheetRows[i]; + + for (const header in worksheetRow) { + // Get the header config for the cell (static or dynamic) + const headerConfig = staticHeaderConfigMap.get(header) ?? config.dynamicHeadersConfig ?? {}; + const cell = worksheetRow[header]; + const params: CSVParams = { + cell, + header, + row: worksheetRow, + rowIndex: i, + staticHeader: staticHeaderConfigMap.get(header)?.staticHeader + }; + + callback(params, { + validateCell: headerConfig.validateCell, + setCellValue: headerConfig.setCellValue + }); + } + } +}; + +/** + * Execute the CSVConfig `setCellValue` callback for the cell. + * + * Note: This mutates the CSV row objects `mutableRows`. + * + * @param {CSVParams} params - The CSV parameters + * @param {CSVHeaderConfig} headerConfig - The header configuration + * @param {CSVRow[]} mutableRows - The mutable rows array + * @returns {*} {CSVRow[]} - The updated row + */ +export const executeSetCellValue = (params: CSVParams, headerConfig: CSVHeaderConfig, mutableRows: CSVRow[]) => { + const headerKey = params.staticHeader?.toUpperCase() ?? params.header.toUpperCase(); + const cellValue = headerConfig?.setCellValue?.(params) ?? params.cell; + + // Remove the aliased header if it is not the static header + if (params.staticHeader && params.header !== params.staticHeader) { + delete params.row[params.header]; + } + + params.row[headerKey] = cellValue; + + mutableRows[params.rowIndex] = params.row; +}; + +/** + * Execute the CSVConfig `validateCell` callback for the cell. + * + * Note: This mutates the CSV errors array `mutableErrors`. + * + * @param {CSVParams} params - The CSV parameters + * @param {CSVHeaderConfig} headerConfig - The header configuration + * @param {CSVError[]} mutableErrors - The mutable errors array + * @returns {*} {void} + */ +export const executeValidateCell = ( + params: CSVParams, + headerConfig: CSVHeaderConfig, + mutableErrors: CSVError[] +): void => { + if (!headerConfig.validateCell) { + return; + } + + const cellErrors = headerConfig.validateCell(params); + + if (cellErrors.length) { + cellErrors.forEach((error) => { + mutableErrors.push({ + error: error.error, + solution: error.solution, + values: error.values, + cell: error.cell ?? params.cell, + header: error.header ?? params.header, + row: error.row ?? params.rowIndex + 1 // headers: 0, data row: 1 + }); + }); + } +}; + +/** + * Get the header config map for the CSV worksheet staticHeaders and aliases. + * + * Maps the header / alias name to the header config. + * + * @param {CSVConfig} config - The CSV configuration + * @returns {*} {Map} - The header config Map + */ +export const _getCSVStaticHeaderMap = (config: CSVConfig) => { + const headerMap = new Map(); + + for (const [staticHeader, headerConfig] of Object.entries(config.staticHeadersConfig)) { + for (const header of [staticHeader, ...headerConfig.aliases]) { + const uppercasedHeader = header.toUpperCase(); + + if (headerMap.has(uppercasedHeader)) { + throw new Error(`Duplicate header in CSV config: ${uppercasedHeader}`); + } + + headerMap.set(uppercasedHeader, { ...headerConfig, staticHeader }); + } + } + + return headerMap; +}; diff --git a/api/src/utils/csv-utils/csv-header-configs.test.ts b/api/src/utils/csv-utils/csv-header-configs.test.ts new file mode 100644 index 0000000000..109f9ebc37 --- /dev/null +++ b/api/src/utils/csv-utils/csv-header-configs.test.ts @@ -0,0 +1,69 @@ +import { expect } from 'chai'; +import { z } from 'zod'; +import { getDescriptionCellValidator, getTsnCellValidator, validateZodCell } from './csv-header-configs'; + +describe('CSVHeaderConfigs', () => { + describe('validateZodCell', () => { + it('should return an empty array if the cell is valid', () => { + const result = validateZodCell({ cell: 123 } as any, z.number()); + expect(result).to.be.deep.equal([]); + }); + + it('should return an array of CSV error objects when invalid', () => { + const result = validateZodCell({ cell: 'hi', header: 'HEADER', rowIndex: 0 } as any, z.number().min(0).max(0)); + expect(result).to.be.deep.equal([ + { + error: 'Expected number, received string', + solution: 'Update the cell value to match the expected type' + } + ]); + }); + }); + + describe('getTsnCellValidator', () => { + it('should return an empty array if the cell is valid', () => { + const tsns = new Set([1, 2]); + const tsnValidator = getTsnCellValidator(tsns); + + const result = tsnValidator({ cell: 1, row: {}, header: 'HEADER', rowIndex: 0 }); + + expect(result).to.be.deep.equal([]); + }); + + it('should return single error when cell value not included in TSNs', () => { + const tsns = new Set([1, 2]); + const tsnValidator = getTsnCellValidator(tsns); + + const result = tsnValidator({ cell: 3, row: {}, header: 'HEADER', rowIndex: 0 }); + + expect(result).to.be.deep.equal([ + { + error: `Did not receive a Taxonomic Serial Number (TSN) for the species`, + solution: `Use a valid Taxonomic Serial Number (TSN) instead of a name to reference species.` + } + ]); + }); + }); + + describe('getDescriptionCellValidator', () => { + it('should return an empty array if the cell is valid', () => { + const descriptionValidator = getDescriptionCellValidator(); + + const result = descriptionValidator({ cell: 'description', row: {}, header: 'HEADER', rowIndex: 0 }); + + expect(result).to.be.deep.equal([]); + }); + + it('should return a single error when invalid', () => { + const badDescriptions = ['', 2, null, ' ']; + + for (const badDescription of badDescriptions) { + const descriptionValidator = getDescriptionCellValidator(); + + const result = descriptionValidator({ cell: badDescription, row: {}, header: 'HEADER', rowIndex: 0 }); + + expect(result.length).to.be.equal(1); + } + }); + }); +}); diff --git a/api/src/utils/csv-utils/csv-header-configs.ts b/api/src/utils/csv-utils/csv-header-configs.ts new file mode 100644 index 0000000000..a2f8c5bb5b --- /dev/null +++ b/api/src/utils/csv-utils/csv-header-configs.ts @@ -0,0 +1,66 @@ +import { z } from 'zod'; +import { CSVCellValidator, CSVError, CSVParams } from './csv-config-validation.interface'; + +/** + * Utility function to validate a CSV cell using a Zod schema. + * + * @param {CSVParams} params - The cell parameters + * @param {z.ZodSchema} schema - The Zod schema + * @param {string} [solution] - The solution message + * @returns {*} {CSVError[]} - The cell validation errors + */ +export const validateZodCell = (params: CSVParams, schema: z.ZodSchema, solution?: string): CSVError[] => { + const errors: CSVError[] = []; + + const parsed = schema.safeParse(params.cell); + + if (!parsed.success) { + parsed.error.errors.forEach((error) => { + errors.push({ + error: error.message, + solution: solution ?? 'Update the cell value to match the expected type' + }); + }); + } + + return errors; +}; + +/** + * Get the TSN header cell validator. + * + * Rules: + * 1. The cell must be a number greater than or equal to 0 + * 2. The cell must be a real ITIS TSN (from the provided set) + * + * @param {Set} tsns Set of allowed ITIS TSNs + * @returns {*} {CSVCellValidator} The validate cell callback + */ +export const getTsnCellValidator = (tsns: Set): CSVCellValidator => { + return (params: CSVParams) => { + if (tsns.has(Number(params.cell))) { + return []; + } + + return [ + { + error: `Did not receive a Taxonomic Serial Number (TSN) for the species`, + solution: `Use a valid Taxonomic Serial Number (TSN) instead of a name to reference species.` + } + ]; + }; +}; + +/** + * Get the description header cell validator. + * + * Rules: + * 1. The cell must be a string or undefined with a maximum length of 250 + * + * @returns {*} {CSVCellValidator} The validate cell callback + */ +export const getDescriptionCellValidator = (): CSVCellValidator => { + return (params: CSVParams) => { + return validateZodCell(params, z.string().trim().min(1).max(250).optional()); + }; +}; diff --git a/api/src/utils/nested-record.test.ts b/api/src/utils/nested-record.test.ts new file mode 100644 index 0000000000..c4f5c6599c --- /dev/null +++ b/api/src/utils/nested-record.test.ts @@ -0,0 +1,84 @@ +import { expect } from 'chai'; +import { NestedRecord } from './nested-record'; + +describe('NestedRecord', () => { + describe('constructor', () => { + it('should create a new instance of the class', () => { + const record = new NestedRecord(); + + expect(record).to.be.instanceof(NestedRecord); + expect(record).to.have.property('record').to.deep.equal({}); + }); + + it('should create a new instance of the class with a record', () => { + const record = new NestedRecord({ key: 'value' }); + + expect(record).to.be.instanceof(NestedRecord); + expect(record).to.have.property('record').to.deep.equal({ key: 'value' }); + }); + + it('should create a new instance of the class with a record with lowercase keys', () => { + const record = new NestedRecord({ a: { B: 'c' } }); + + expect(record).to.be.instanceof(NestedRecord); + expect(record) + .to.have.property('record') + .to.deep.equal({ a: { b: 'c' } }); + }); + + it('should create a new instance of the class with a record with lowercase keys and number keys', () => { + const record = new NestedRecord({ 1: { B: 'c' } }); + + expect(record).to.be.instanceof(NestedRecord); + expect(record) + .to.have.property('record') + .to.deep.equal({ 1: { b: 'c' } }); + }); + }); + + describe('get', () => { + it('should return a value from the record', () => { + const record = new NestedRecord({ a: { b: 'c' } }); + + expect(record.get('a', 'b')).to.equal('c'); + }); + + it('should return a value from the record case insensitive', () => { + const record = new NestedRecord({ a: { b: 'c' } }); + + expect(record.get('A', 'B')).to.equal('c'); + }); + + it('should return a value from the record case insensitive and number keys', () => { + const record = new NestedRecord({ a: { b: { 3: 'c' } } }); + + expect(record.get('A', 'B', 3)).to.equal('c'); + }); + }); + + describe('set', () => { + it('should set a value in the record', () => { + const record = new NestedRecord(); + + record.set({ path: ['a', 'b'], value: 'c' }); + + expect(record.record).to.deep.equal({ a: { b: 'c' } }); + }); + + it('should set a value in the record case insensitive', () => { + const record = new NestedRecord(); + + record.set({ path: ['A', 'B'], value: 'c' }); + + expect(record.record).to.deep.equal({ a: { b: 'c' } }); + }); + + it('should set a value in the record case insensitive and number keys', () => { + const record = new NestedRecord(); + + record.set({ path: ['A', 'B', 3], value: 'c' }); + + expect(record.record).to.deep.equal({ a: { b: { 3: 'c' } } }); + }); + }); +}); diff --git a/api/src/utils/nested-record.ts b/api/src/utils/nested-record.ts new file mode 100644 index 0000000000..39db132794 --- /dev/null +++ b/api/src/utils/nested-record.ts @@ -0,0 +1,97 @@ +import { get, setWith } from 'lodash'; + +type IKey = string | number; + +/** + * INestedRecord - A recursive nested record interface + * + */ +interface INestedRecord { + [key: IKey]: TValue | INestedRecord; +} + +/** + * NestedRecord - A class to handle nested records with case-insensitive keys + * + * @example + * const record = new NestedRecord({ a: { b: 'c' } }); + * record.get('A', 'B'); // 'c' + * record.has('A', 'B'); // true + * record.set({ path: ['A', 'B'], value: 'd' }); + * + * @class + * @exports + * @template TValue - The final value type + */ +export class NestedRecord { + record: INestedRecord; + + constructor(record?: INestedRecord) { + this.record = record ? this._convertRecordToLowerCase(record) : {}; + } + + /** + * Convert keys to lowercase + * + * @param {IKey[]} keys - The keys to convert + * @returns {IKey[]} The keys in lowercase + */ + _keysToLowercase(keys: IKey[]): IKey[] { + return keys.map((key) => key.toString().toLowerCase()); + } + + /** + * Convert a record to lowercase + * Note: This function is recursive + * + * @param {INestedRecord} record - The record to convert + * @returns {INestedRecord} The record with lowercase keys + */ + _convertRecordToLowerCase(record: INestedRecord): INestedRecord { + const newRecord: INestedRecord = {}; + + Object.keys(record).forEach((key) => { + const newKey = key.toLowerCase(); + + if (typeof record[key] === 'object') { + newRecord[newKey] = this._convertRecordToLowerCase(record[key] as INestedRecord); + } else { + newRecord[newKey] = record[key]; + } + }); + + return newRecord; + } + + /** + * Get a value from the nested record + * + * @param {...IKey[]} keys - The record keys in order + * @returns {*} {TValue | INestedRecord | undefined} The record or value or undefined + */ + get(...keys: IKey[]): INestedRecord | TValue | undefined { + return get(this.record, this._keysToLowercase(keys)); + } + + /** + * Check if the nested record has a value + * + * @param {...IKey[]} keys - The record keys in order + * @returns True if the value or record exists + */ + has(...keys: IKey[]): boolean { + return this.get(...this._keysToLowercase(keys)) !== undefined; + } + + /** + * Set a value in the nested record + * + * Note: First param is the value to set, all other params are the keys + * + * @param {{path: IKey[], value: TValue}} { path, value } - The new nested record value and path (record keys) + * @returns {*} {void} + */ + set({ path, value }: { path: IKey[]; value: TValue }): void { + setWith(this.record, this._keysToLowercase(path), value, Object); + } +}