Skip to content

Commit

Permalink
feat: Added MySQL destination.
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesiarmes committed Oct 31, 2024
1 parent 349e218 commit 8218ec3
Show file tree
Hide file tree
Showing 15 changed files with 226 additions and 116 deletions.
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# The ibm_db driver currently only supports x86_64 architecture, so we'll
# support that as the only option for now.
FROM --platform=linux/amd64 senzing/senzingapi-runtime:${SENZING_VERSION:-3.10.3} AS configs
FROM --platform=linux/amd64 senzing/senzingapi-runtime:${SENZING_VERSION:-3.12.0} AS configs

FROM --platform=linux/amd64 ruby:${RUBY_VERSION:-3.3}

# Required in order to bypass the license prompt.
ENV SENZING_ACCEPT_EULA="I_ACCEPT_THE_SENZING_EULA"
ENV TERM=xterm
ENV SENZING_VERSION=${SENZING_VERSION:-3.10.3}
ENV SENZING_VERSION=${SENZING_VERSION:-3.12.0}

# Update packages and install additional dependencies.
RUN apt-get update && \
Expand Down
92 changes: 33 additions & 59 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,31 @@
x-senzing-options: &senzing-options
SENZING_API_SERVER_ALLOWED_ORIGINS: '*'
SENZING_API_SERVER_ENABLE_ADMIN: 'true'
SENZING_ENGINE_CONFIGURATION_JSON: >-
{
"PIPELINE": {
"CONFIGPATH": "/etc/opt/senzing",
"LICENSESTRINGBASE64": "${SENZING_LICENSE_BASE64_ENCODED}",
"RESOURCEPATH": "/opt/senzing/g2/resources",
"SUPPORTPATH": "/opt/senzing/data/current"
},
"SQL": {
"BACKEND": "SQL",
"CONNECTION": "postgresql://${POSTGRES_USERNAME:-postgres}:${POSTGRES_PASSWORD:-postgres}@postgres:5432:G2/"
}
}
x-service-defaults: &service-defaults
build: .
depends_on:
- api
environment:
<<: *senzing-options
networks:
- senzing
volumes:
- .:/opt/cmr

services:
rabbitmq:
profiles:
Expand Down Expand Up @@ -89,8 +117,7 @@ services:
depends_on:
- postgres
environment:
SENZING_API_SERVER_ALLOWED_ORIGINS: '*'
SENZING_API_SERVER_ENABLE_ADMIN: 'true'
<<: *senzing-options
SENZING_ENGINE_CONFIGURATION_JSON: >-
{
"PIPELINE": {
Expand All @@ -104,7 +131,7 @@ services:
"CONNECTION": "postgresql://${POSTGRES_USERNAME:-postgres}:${POSTGRES_PASSWORD:-postgres}@postgres:5432:G2/"
}
}
image: senzing/senzing-api-server:${SENZING_DOCKER_IMAGE_VERSION_SENZING_API_SERVER:-latest}
image: senzing/senzing-api-server:${SENZING_DOCKER_IMAGE_VERSION_SENZING_API_SERVER:-3.5.15}
networks:
- senzing
ports:
Expand All @@ -115,79 +142,26 @@ services:
- '/tmp'

tools:
build: .
environment:
SENZING_ENGINE_CONFIGURATION_JSON: >-
{
"PIPELINE": {
"CONFIGPATH": "/etc/opt/senzing",
"LICENSESTRINGBASE64": "${SENZING_LICENSE_BASE64_ENCODED}",
"RESOURCEPATH": "/opt/senzing/g2/resources",
"SUPPORTPATH": "/opt/senzing/data/current"
},
"SQL": {
"BACKEND": "SQL",
"CONNECTION": "postgresql://${POSTGRES_USERNAME:-senzing}:${POSTGRES_PASSWORD:-senzing}@postgres:5432:G2"
}
}
networks:
- senzing
<<: *service-defaults
command: run
volumes:
- ./config:/etc/cmr/config
- ./data/import:/etc/cmr/import
- ./data/export:/etc/cmr/export
- ./lib:/opt/cmr/lib

importer:
<<: *service-defaults
profiles:
- load
depends_on:
- api
build: .
environment:
SENZING_ENGINE_CONFIGURATION_JSON: >-
{
"PIPELINE": {
"CONFIGPATH": "/etc/opt/senzing",
"LICENSESTRINGBASE64": "${SENZING_LICENSE_BASE64_ENCODED}",
"RESOURCEPATH": "/opt/senzing/g2/resources",
"SUPPORTPATH": "/opt/senzing/data/current"
},
"SQL": {
"BACKEND": "SQL",
"CONNECTION": "postgresql://${POSTGRES_USERNAME:-senzing}:${POSTGRES_PASSWORD:-senzing}@postgres:5432:G2"
}
}
networks:
- senzing
command: load
volumes:
- ${IMPORTER_CONFIG_FILE:-./config/config.yml}:/etc/cmr/config.yml
- ./data/import:/etc/cmr/import

exporter:
<<: *service-defaults
profiles:
- export
depends_on:
- api
build: .
environment:
SENZING_ENGINE_CONFIGURATION_JSON: >-
{
"PIPELINE": {
"CONFIGPATH": "/etc/opt/senzing",
"LICENSESTRINGBASE64": "${SENZING_LICENSE_BASE64_ENCODED}",
"RESOURCEPATH": "/opt/senzing/g2/resources",
"SUPPORTPATH": "/opt/senzing/data/current"
},
"SQL": {
"BACKEND": "SQL",
"CONNECTION": "postgresql://${POSTGRES_USERNAME:-senzing}:${POSTGRES_PASSWORD:-senzing}@postgres:5432:G2"
}
}
networks:
- senzing
command: export
volumes:
- ${EXPORTER_CONFIG_FILE:-./config/config.yml}:/etc/cmr/config.yml
Expand Down
85 changes: 65 additions & 20 deletions docs/destinations.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ before it is sent to the destination.
| Option | Default | Required | Description |
|-----------------|---------|----------|--------------------------------------------------------------------------------------|
| export_file[^1] | | YES | Path to the JSON export from Senzing. |
| field_map | | YES | A mapping of fields from Senzing to their counterparts in the destination. |
| type | | YES | The type of destination to use. Should be the name of one of the destinations below. |

## CSV
Expand Down Expand Up @@ -49,6 +50,37 @@ destination:
export_file: /home/senzing/export.json
```
## JSONL
Write records to a [JSON Lines][jsonl] formatted file. Each record will be
written as a single JSON object each on their own line.
### Configuration
The following options are available for this destination.
| Option | Default | Required | Description |
|-----------|---------|----------|---------------------------------------------------------|
| overwrite | false | NO | Overwrite the existing file instead of appending to it. |
| path | | YES | The path to write the JSONL file. |
### Example
```yaml
destination:
type: JSONL
path: /home/senzing/export.csv
overwrite: false
field_map:
ENTITY_ID: person_id
DATABASE: database
PARTY_ID: party_id
MATCH_SCORE: match_score
RELATED_RECORD_ID: potential_person_id
RELATED_MATCH_SCORE: potential_match_score
export_file: /home/senzing/export.json
```
## Mongo
Write records to a [MongoDB][mongo] collection as individual JSON documents.
Expand Down Expand Up @@ -88,40 +120,53 @@ destination:
Check out the [Export to Mongo][mongo-example] to see this in action.
## JSONL
## MySQL
Write records to a [JSON Lines][jsonl] formatted file. Each record will be
written as a single JSON object each on their own line.
Insert entities into a [MySQL] or compatible (such as [MariaDB]) database.
### Configuration
The following options are available for this destination.
The following options are available for this source.
| Option | Default | Required | Description |
|-----------|---------|----------|---------------------------------------------------------|
| overwrite | false | NO | Overwrite the existing file instead of appending to it. |
| path | | YES | The path to write the JSONL file. |
| Option | Default | Required | Description |
|----------|-----------|----------|--------------------------------------------|
| database | | YES | Database to write to. |
| host | | YES | Database host to connect to. |
| password | | YES | Password for the database user. |
| port | 3306 | NO | Port to connect to on the database server. |
| security | nil | NO | Set to "SSL" in order to utilize TLS[^2]. |
| table | | YES | Table to write entities to. |
| username | | YES | User with access to the database. |
### Example
```yaml
destination:
type: JSONL
path: /home/senzing/export.csv
overwrite: false
field_map:
ENTITY_ID: person_id
DATABASE: database
PARTY_ID: party_id
MATCH_SCORE: match_score
RELATED_RECORD_ID: potential_person_id
RELATED_MATCH_SCORE: potential_match_score
export_file: /home/senzing/export.json
sources:
informix:
type: MySQL
host: localhost
database: people
table: entity_resolution
username: mysql
password: password
field_map:
ENTITY_ID: person_id
DATABASE: database
PARTY_ID: party_id
MATCH_SCORE: match_score
RELATED_RECORD_ID: potential_person_id
RELATED_MATCH_SCORE: potential_match_score
export_file: /etc/cmr/export/export.json
```
Check out the [Import from MySQL][mysql-example] to see this in action.
[jsonl]: https://jsonlines.org/
[mariadb]: https://mariadb.org/
[mongo]: https://www.mongodb.com/
[mongo-example]: examples/export-to-mongo.md
[mysql]: https://www.mysql.com/
[transformations]: transformations.md
[^1]: Use of an export file is temporary until records can be exported directly
using the API.
[^2]: Transport Layer Security
5 changes: 3 additions & 2 deletions docs/examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@ functionality of this entity resolution system.
## Destinations

* [Export to Mongo][export-to-mongo]
* [Export to MySQL][mysql]

## Sources

* [Import from Informix][import-from-informix]
* [Import from MySQL][import-from-mysql]
* [Import from MySQL][mysql]

[export-to-mongo]: examples/export-to-mongo.md
[import-from-informix]: examples/import-from-informix.md
[import-from-mysql]: examples/import-from-mysql.md
[mysql]: examples/mysql.md
18 changes: 7 additions & 11 deletions docs/examples/assets/config.mysql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ sources:
username: root
password: password
field_map:
party_id: OTHER_ID_PARTY
party_id: RECORD_ID
last_name: PRIMARY_NAME_LAST
first_name: PRIMARY_NAME_FIRST
gender: GENDER
Expand All @@ -43,16 +43,12 @@ sources:
party_code: TYPE

destination:
type: CSV
path: /etc/cmr/export/export.csv
overwrite: true
headers:
- person_id
- database
- party_id
- match_score
- potential_person_id
- potential_match_score
type: MySQL
host: maraidb
database: people
table: entity_resolution
username: root
password: password
field_map:
ENTITY_ID: person_id
DATABASE: database
Expand Down
12 changes: 11 additions & 1 deletion docs/examples/assets/mysql-schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,14 @@ LOAD DATA LOCAL INFILE "/docker-entrypoint-initdb.d/import.csv"
INTO TABLE people
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
IGNORE 1 ROWS;
IGNORE 1 ROWS;

CREATE TABLE entity_resolution(
person_id VARCHAR(255) NOT NULL,
database VARCHAR(255) NOT NULL,
party_id VARCHAR(255) NOT NULL,
match_score INTEGER NULL,
potential_person_id VARCHAR(255) NULL,
potential_match_score INTEGER NULL,
PRIMARY KEY (person_id, party_id, database)
);
30 changes: 23 additions & 7 deletions docs/examples/import-from-mysql.md → docs/examples/mysql.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@
> The MySQL source type can be used for any MySQL compatible database, as shown
> in this example using MariaDB.
This example demonstrates importing data from a [MySQL][mysql] database.
Following the steps below will launch a [MariaDB] container locally, and load a
sample dataset to be imported into Senzing. You can also use your own
MySQL compatible database, but you must also provide an appropriate
configuration file rather than the one specified here.
This example demonstrates importing data from a [MySQL][mysql] database, and
exporting the results to another table in that same database. Following the
steps below will launch a [MariaDB] container locally, and load a sample dataset
to be imported into Senzing. You can also use your own MySQL compatible
database, but you must also provide an appropriate configuration file rather
than the one specified here.

> [!NOTE]
> All commands listed in this document are run from the root directory of this
Expand Down Expand Up @@ -62,8 +63,23 @@ Once the importer container exits, your data is now in Senzing!

## Exporting

To verify that the import succeeded, we can export the results from Senzing
to a CSV file. Our config file already has this setup.
With our records imported into Senzing, we can export the resulting entities.
For this example, we'll export the entities to a new table in the same database.

> [!NOTE]
> The export process assumes that the table already exists. For this example,
> we've used the following to create the table:
> ```sql
> CREATE TABLE entity_resolution(
> person_id VARCHAR(255) NOT NULL,
> database VARCHAR(255) NOT NULL,
> party_id VARCHAR(255) NOT NULL,
> match_score INTEGER NULL,
> potential_person_id VARCHAR(255) NULL,
> potential_match_score INTEGER NULL,
> PRIMARY KEY (person_id, party_id, database)
> );
> ```
```bash
export EXPORTER_CONFIG_FILE="$(pwd)/docs/examples/assets/config.mysql.yml"
Expand Down
Loading

0 comments on commit 8218ec3

Please sign in to comment.