diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml new file mode 100644 index 0000000..ecf46e7 --- /dev/null +++ b/.github/workflows/docs.yaml @@ -0,0 +1,36 @@ +on: + push: + branches: + - main + - mariadb + +jobs: + deploy: + name: Deploy Documentation + environment: 'docs-dev' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Set up AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: us-east-1 + - uses: actions/setup-python@v5 + with: + python-version: 3.x + - name: Install plantuml + run: sudo apt-get install -y plantuml + - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV + - uses: actions/cache@v4 + with: + key: mkdocs-material-${{ env.cache_id }} + path: .cache + restore-keys: | + mkdocs-material- + - run: pip install mkdocs-material pymdown-extensions plantuml_markdown + - run: mkdocs build + - run: aws s3 sync ./site "s3://${{ env.BUCKET_NAME || 'dev.docs.cfa.codes' }}/${{ env.PREFIX || 'cmr-entity-resolution' }}" diff --git a/README.md b/README.md index 1083dce..16749be 100644 --- a/README.md +++ b/README.md @@ -13,15 +13,16 @@ An entity resolution solution for automated record clearance. This solution is provided as a [docker compose][docker-compose] file and can be launched locally by running the following command: -*Note: If you would like to launch the webapp to search for entities using a web -interface, you will need to add `-p webapp` to include the `webapp` profile.* +> [!NOTE] +> If you would like to launch the webapp to search for entities using a web +> interface, you will need to add `-p webapp` to include the `webapp` profile. ```bash docker compose up -d ``` The configuration uses a persistent volume for the database and message queue -containers to ensure data is persisted through updates to the image or +container to ensure data is persisted through updates to the image or configuration. ### Applying changes @@ -70,7 +71,25 @@ information on how to get data in and out of Senzing. See our [collection of examples][examples] to see a demonstration of the system in action. +## Documentation + +Necessary documentation to operate, use, maintain, and contribute to this +solution is included in this repository. The majority of these documents are +written in Markdown and can be rendered directly in GitHub or you favorite IDE. +However, the documentation as a whole is meant to be converted to a static site +using [MkDocs]. + +In order to view the documentation in its intended form locally, you can use the +included docker container. Simply run the following: + +```bash +docker compose --profile docs up -d +``` + +The documentation should then be available at . + [docker-compose]: https://docs.docker.com/compose/ [entity-spec]: https://senzing.zendesk.com/hc/en-us/articles/231925448-Generic-Entity-Specification-Data-Mapping [examples]: docs/examples.md +[mkdocs]: https://www.mkdocs.org/ [import-export]: docs/importing-exporting.md diff --git a/docker-compose.yml b/docker-compose.yml index c670088..8013355 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -260,6 +260,16 @@ services: read_only: true restart: always + docs: + profiles: + - docs + build: + dockerfile: docs.dockerfile + ports: + - "8000:8000" + volumes: + - .:/docs + networks: senzing: name: ${SENZING_DOCKER_NETWORK:-senzing-network} diff --git a/docs.dockerfile b/docs.dockerfile new file mode 100644 index 0000000..ad2746a --- /dev/null +++ b/docs.dockerfile @@ -0,0 +1,6 @@ +FROM squidfunk/mkdocs-material:latest + +# Install PlantUML so we can render UML diagrams. +RUN pip install markdown-callouts plantuml_markdown +RUN apk add --no-cache plantuml --repository=https://dl-cdn.alpinelinux.org/alpine/edge/community \ + && rm -rf /var/cache/apk/* diff --git a/docs/api.md b/docs/api.md index d21ae29..bd70627 100644 --- a/docs/api.md +++ b/docs/api.md @@ -105,5 +105,5 @@ Example response: If the request succeeds, the API will return a 201 status code. -[compose]: ../docker-compose.yml +[compose]: https://github.com/codeforamerica/cmr-entity-resolution/blob/main/docker-compose.yml [config]: importing-exporting.md diff --git a/docs/examples.md b/docs/examples.md index ce38158..68bcb2f 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -10,6 +10,8 @@ functionality of this entity resolution system. ## Sources * [Import from Informix][import-from-informix] +* [Import from MySQL][import-from-mysql] [export-to-mongo]: examples/export-to-mongo.md [import-from-informix]: examples/import-from-informix.md +[import-from-mysql]: examples/import-from-mysql.md diff --git a/docs/examples/assets/config.mariadb.yml b/docs/examples/assets/config.mysql.yml similarity index 98% rename from docs/examples/assets/config.mariadb.yml rename to docs/examples/assets/config.mysql.yml index cc7cc35..d34e51d 100644 --- a/docs/examples/assets/config.mariadb.yml +++ b/docs/examples/assets/config.mysql.yml @@ -18,7 +18,6 @@ sources: mariadb: type: MySQL host: maraidb -# host: 127.0.0.1 database: people table: people username: root diff --git a/docs/examples/assets/docker-compose.mariadb.yml b/docs/examples/assets/docker-compose.mysql.yml similarity index 90% rename from docs/examples/assets/docker-compose.mariadb.yml rename to docs/examples/assets/docker-compose.mysql.yml index 1297dde..37f64fe 100644 --- a/docs/examples/assets/docker-compose.mariadb.yml +++ b/docs/examples/assets/docker-compose.mysql.yml @@ -3,7 +3,7 @@ services: image: mariadb environment: # See https://hub.docker.com/_/mariadb - MARIADB_ROOT_PASSWORD: ${INFORMIX_PASSWORD:-password} + MARIADB_ROOT_PASSWORD: ${MARIADB_ROOT_PASSWORD:-password} MARIADB_DATABASE: people networks: - senzing diff --git a/docs/examples/export-to-mongo.md b/docs/examples/export-to-mongo.md index c8c2f99..e93e2d6 100644 --- a/docs/examples/export-to-mongo.md +++ b/docs/examples/export-to-mongo.md @@ -6,8 +6,9 @@ exporting the resulting data. If you choose to import your own CSV file, ensure that you also provide an appropriate configuration file rather than the one specified here. -> _Note: All commands listed in this document are run from the root directory of -> this [repository][repo]._ +> [!NOTE] +> All commands listed in this document are run from the root directory of this +> [repository][repo]. ## Launching @@ -45,7 +46,8 @@ Now that we have the system up and running, we can import our data. We'll do so by copying the file into the appropriate directory, setting the configuration file path, and running the importer. -See [README.md][readme:import] for more information on importing and exporting. +See [Importing & Exporting][import-export] for more information on importing +and exporting. ```bash cp docs/examples/assets/import.csv data/import/import.csv @@ -74,6 +76,6 @@ see your data in MongoDB. The default username and password are `admin` and [mongo]: https://www.mongodb.com/ [mongo-express]: https://github.com/mongo-express/mongo-express -[readme:import]: ../../README.md#importing--exporting -[readme:launching]: ../../README.md#launching +[import-export]: ../importing-exporting.md +[readme:launching]: /#launching [repo]: https://github.com/codeforamerica/cmr-entity-resolution diff --git a/docs/examples/import-from-informix.md b/docs/examples/import-from-informix.md index 53f9429..32c39d2 100644 --- a/docs/examples/import-from-informix.md +++ b/docs/examples/import-from-informix.md @@ -6,8 +6,9 @@ and load a sample dataset to be imported into Senzing. You can also use your own Informix database, but you must also provide an appropriate configuration file rather than the one specified here. -> _Note: All commands listed in this document are run from the root directory of -> this [repository][repo]._ +> [!NOTE] +> All commands listed in this document are run from the root directory of this +> [repository][repo]. ## Launching @@ -45,7 +46,8 @@ already been loaded into the Informix database, so we just need to point the importer at it. We do so using a configuration file with the source set to the database, and passing that to the importer. -See [README.md][readme:import] for more information on importing and exporting. +See [Importing & Exporting][import-export] for more information on importing and +exporting. ```bash export IMPORTER_CONFIG_FILE="$(pwd)/docs/examples/assets/config.informix.yml" @@ -64,7 +66,7 @@ export EXPORTER_CONFIG_FILE="$(pwd)/docs/examples/assets/config.informix.yml" docker compose up exporter ``` +[import-export]: ../importing-exporting.md [informix]: https://www.ibm.com/products/informix -[readme:import]: ../../README.md#importing--exporting -[readme:launching]: ../../README.md#launching +[readme:launching]: /#launching [repo]: https://github.com/codeforamerica/cmr-entity-resolution diff --git a/docs/examples/import-from-mysql.md b/docs/examples/import-from-mysql.md new file mode 100644 index 0000000..a083f82 --- /dev/null +++ b/docs/examples/import-from-mysql.md @@ -0,0 +1,77 @@ +# Example: Import from MySQL + +> [!TIP] +> The MySQL source type can be used for any MySQL compatible database, as shown +> in this example using MariaDB. + +This example demonstrates importing data from a [MySQL][mysql] database. +Following the steps below will launch a [MariaDB] container locally, and load a +sample dataset to be imported into Senzing. You can also use your own +MySQL compatible database, but you must also provide an appropriate +configuration file rather than the one specified here. + +> [!NOTE] +> All commands listed in this document are run from the root directory of this +> [repository][repo]. + +## Launching + +If you've launched the entity resolution system by following +[README.md][readme:launching], then you're already part way there! If you +haven't, don't worry! It's worth taking a look at that documentation, but you +don't have to run any of those commands. We'll launch the system along with our +MariaDB container. + +Before launching, the following environment variables can be set to configure +the container: + +| Variable | Default | Description | +|-----------------------|----------|-----------------------------| +| MARIADB_ROOT_PASSWORD | password | Password for the root user. | + +Whether you already have the entity resolution system up and you just want to +add the MariaDB container, or your need to launch the full stack, you can do so +with the following: + +```bash +docker compose \ + -f docker-compose.yml \ + -f docs/examples/assets/docker-compose.mysql.yml \ + up -d +``` + +This will launch and configure all the required containers and load the sample +dataset into MariaDB. + +## Importing + +Now that we have the system up and running, we can import our data. The data's +already been loaded into the database, so we just need to point the importer at +it. We do so using a configuration file with the source set to the database, and +passing that to the importer. + +See [Importing & Exporting][import-export] for more information on importing and +exporting. + +```bash +export IMPORTER_CONFIG_FILE="$(pwd)/docs/examples/assets/config.mysql.yml" +docker compose up importer +``` + +Once the importer container exits, your data is now in Senzing! + +## Exporting + +To verify that the import succeeded, we can export the results from Senzing +to a CSV file. Our config file already has this setup. + +```bash +export EXPORTER_CONFIG_FILE="$(pwd)/docs/examples/assets/config.mysql.yml" +docker compose up exporter +``` + +[import-export]: ../importing-exporting.md +[mariadb]: https://mariadb.org/ +[mysql]: https://www.mysql.com/ +[readme:launching]: /#launching +[repo]: https://github.com/codeforamerica/cmr-entity-resolution diff --git a/docs/filters.md b/docs/filters.md index bbd70f6..f348d8b 100644 --- a/docs/filters.md +++ b/docs/filters.md @@ -53,6 +53,6 @@ The following options are available for this filter. inverse: false ``` -[non_human]: ../lib/filter/non_human.yml +[non_human]: https://github.com/codeforamerica/cmr-entity-resolution/blob/main/lib/filter/non_human.yml [source]: sources.md [transformations]: transformations.md diff --git a/docs/importing-exporting.md b/docs/importing-exporting.md index e0948a0..d88b82f 100644 --- a/docs/importing-exporting.md +++ b/docs/importing-exporting.md @@ -4,8 +4,9 @@ Data can be imported from a number of [sources] and exported to several [destinations]. Before you begin, you will need a configuration file with settings for your source, destination, [filters], and [transformations]. -> _Note: All commands listed in this document are run from the root directory of -> this project._ +> [!NOTE] +> All commands listed in this document are run from the root directory of +> this project. ## Using the CLI @@ -36,7 +37,7 @@ correct path of your configuration file to `--config`. ```bash ./bin/exporter export --config config/config.yml -```` +``` ## Using docker diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..575e4da --- /dev/null +++ b/docs/index.md @@ -0,0 +1,4 @@ +--8<-- "README.md" + +[examples]: examples.md +[import-export]: importing-exporting.md diff --git a/docs/license.md b/docs/license.md new file mode 100644 index 0000000..f409d45 --- /dev/null +++ b/docs/license.md @@ -0,0 +1 @@ +--8<-- "LICENSE" diff --git a/docs/sources.md b/docs/sources.md index 40d84b2..ac7b97e 100644 --- a/docs/sources.md +++ b/docs/sources.md @@ -62,6 +62,7 @@ The following options are available for this source. | database | | YES | Database to read from. | | host | | YES | Informix host to connect to. | | password | | YES | Password for the database user. | +| port | 9089 | NO | Port to connect to on the database server. | | security | nil | NO | Set to "SSL" in order to utilize TLS[^1]. | | schema | $username | NO | Schema that the database is attached to. Defaults to the value of `username`. | | table | | YES | Table that contains the records to be imported | @@ -91,10 +92,56 @@ sources: Check out the [Import from Informix][informix-example] to see this in action. +## MySQL + +Query an [MySQL] or compatible (such as [MariaDB]) database for records to +import. + +### Configuration + +The following options are available for this source. + +| Option | Default | Required | Description | +|----------|-----------|----------|-------------------------------------------------------------------------------| +| database | | YES | Database to read from. | +| host | | YES | Informix host to connect to. | +| password | | YES | Password for the database user. | +| port | 3306 | NO | Port to connect to on the database server. | +| security | nil | NO | Set to "SSL" in order to utilize TLS[^1]. | +| table | | YES | Table that contains the records to be imported | +| username | | YES | User with access to the database | + +### Example + +```yaml +sources: + informix: + type: MySQL + host: localhost + database: people + table: people + username: mysql + password: password + field_map: + party_id: RECORD_ID + last_name: PRIMARY_NAME_LAST + first_name: PRIMARY_NAME_FIRST + gender: GENDER + birth_date: DATE_OF_BIRTH + dr_lic_num: DRIVERS_LICENSE_NUMBER + dr_lic_state: DRIVERS_LICENSE_STATE + ssn: SSN_NUMBER +``` + +Check out the [Import from MySQL][mysql-example] to see this in action. + [entity-spec]: https://senzing.zendesk.com/hc/en-us/articles/231925448-Generic-Entity-Specification-Data-Mapping [filters]: filters.md [informix]: https://www.ibm.com/products/informix [informix-example]: examples/import-from-informix.md +[mariadb]: https://mariadb.org/ +[mysql]: https://www.mysql.com/ +[mysql-example]: examples/import-from-mysql.md [senzing-config]: configuring-senzing.md [transformations]: transformations.md [^1]: Transport Layer Security diff --git a/lib/source/database.rb b/lib/source/database.rb new file mode 100644 index 0000000..b512906 --- /dev/null +++ b/lib/source/database.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +require 'sequel' +require_relative 'file' + +module Source + # Basic database source for data imports. Should be extended by specific + # database implementations. + class Database < Base + def each + table = db[@source_config[:table].to_sym] + table.each do |record| + record.transform_keys! { |key| field_mapper(key) } + yield record + end + end + + private + + # Establishes a database connection and proxies calls to the database. + # + # @return [Sequel::Database] + def db + @db ||= Sequel.connect( + adapter: @source_config[:adapter], + host: @source_config[:host], + database: @source_config[:database], + port: @source_config[:port], + user: @source_config[:username], + password: @source_config[:password], + schema: @source_config[:schema] || @source_config[:username], + security: @source_config[:security] + ) + end + end +end diff --git a/lib/source/informix.rb b/lib/source/informix.rb index ded8af9..12e767d 100644 --- a/lib/source/informix.rb +++ b/lib/source/informix.rb @@ -1,40 +1,14 @@ # frozen_string_literal: true -require 'csv' -require 'sequel' -require_relative 'file' +require_relative 'database' module Source # Informix source for data imports. - class Informix < Base - def each - table = db[@source_config[:table].to_sym] - table.each do |record| - record.transform_keys! { |key| field_mapper(key) } - yield record - end - end - + class Informix < Database private - # Establishes a database connection and proxies calls to the database. - # - # @return [Sequel::Database] - def db - @db ||= Sequel.connect( - adapter: 'ibmdb', - host: @source_config[:host], - database: @source_config[:database], - port: @source_config[:port], - user: @source_config[:username], - password: @source_config[:password], - schema: @source_config[:schema] || @source_config[:username], - security: @source_config[:security] - ) - end - def defaults - super.merge({ port: 9089, security: nil }) + super.merge({ adapter: :ibmdb, port: 9089, security: nil }) end end end diff --git a/lib/source/mysql.rb b/lib/source/mysql.rb index 07c234e..3801f61 100644 --- a/lib/source/mysql.rb +++ b/lib/source/mysql.rb @@ -1,40 +1,14 @@ # frozen_string_literal: true -require 'csv' -require 'sequel' -require_relative 'file' +require_relative 'database' module Source # MySQL source for data imports. - class MySQL < Base - def each - table = db[@source_config[:table].to_sym] - table.each do |record| - record.transform_keys! { |key| field_mapper(key) } - yield record - end - end - + class MySQL < Database private - # Establishes a database connection and proxies calls to the database. - # - # @return [Sequel::Database] - def db - @db ||= Sequel.connect( - adapter: 'mysql2', - host: @source_config[:host], - database: @source_config[:database], - port: @source_config[:port], - user: @source_config[:username], - password: @source_config[:password], - schema: @source_config[:schema] || @source_config[:username], - security: @source_config[:security] - ) - end - def defaults - super.merge({ port: 3306, security: nil }) + super.merge({ adapter: :mysql2, port: 3306, security: nil }) end end end diff --git a/mkdocs.yaml b/mkdocs.yaml new file mode 100644 index 0000000..ecba0ed --- /dev/null +++ b/mkdocs.yaml @@ -0,0 +1,80 @@ +site_name: CMR Entity Resolution +repo_url: https://github.com/codeforamerica/cmr-entity-resolution +edit_uri: blob/main/docs/ + +plugins: + - search + +markdown_extensions: + - admonition + - attr_list + - github-callouts + - footnotes + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - pymdownx.highlight: + anchor_linenums: true + line_spans: __span + pygments_lang_class: true + - pymdownx.inlinehilite + - pymdownx.snippets + - pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format + - pymdownx.tabbed: + alternate_style: true + +theme: + name: material + icon: + logo: material/puzzle + palette: + scheme: slate + features: + - content.code.copy + - content.code.select + - content.action.edit + - navigation.path + - navigation.top + - toc.follow + - toc.integrate + - search.highlight + - search.suggest + +extra: + generator: false + social: + - icon: material/web + link: https://codeforamerica.org/ + name: Code for America website + - icon: fontawesome/brands/github + link: https://github.com/codeforamerica + name: Code for America on GitHub + - icon: fontawesome/brands/threads + link: https://www.twitter.com/codeforamerica + name: Code for America on Threads + - icon: fontawesome/brands/x-twitter + link: https://www.threads.net/@codeforamerica + name: Code for America on X (formerly Twitter) + +nav: + - Home: index.md + - Usage: + API: api.md + Filters: filters.md + Destinations: destinations.md + Importing & Exporting: importing-exporting.md + Senzing: configuring-senzing.md + Sources: sources.md + Transformations: transformations.md + - Examples: + Export to MongoDB: examples/export-to-mongo.md + Import from Informix: examples/import-from-informix.md + Import from MySQL: examples/import-from-mysql.md + - About: + License: license.md + +copyright: Produced by Code for America under the MIT license.