Skip to content
This repository has been archived by the owner on Dec 4, 2024. It is now read-only.

Commit

Permalink
Merge pull request #17 from Limess/additional_event_info
Browse files Browse the repository at this point in the history
Add additional columns
  • Loading branch information
Claire Carroll authored Feb 26, 2020
2 parents a32b0b0 + 30f3dcc commit ca3faed
Show file tree
Hide file tree
Showing 10 changed files with 187 additions and 26 deletions.
17 changes: 16 additions & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

version: 2
version: 2.1

jobs:
build:
Expand Down Expand Up @@ -45,6 +45,11 @@ jobs:
dbt run --target postgres --full-refresh
dbt run --target postgres
dbt run-operation drop_audit_schema --target postgres
dbt run-operation create_legacy_audit_table --target postgres
dbt run --target postgres --full-refresh
dbt run --target postgres
- run:
name: "Run Tests - Redshift"
command: |
Expand All @@ -56,6 +61,11 @@ jobs:
dbt run --target redshift --full-refresh
dbt run --target redshift
dbt run-operation drop_audit_schema --target redshift
dbt run-operation create_legacy_audit_table --target redshift
dbt run --target redshift --full-refresh
dbt run --target redshift
- run:
name: "Run Tests - Snowflake"
command: |
Expand All @@ -67,6 +77,11 @@ jobs:
dbt run --target snowflake --full-refresh
dbt run --target snowflake
dbt run-operation drop_audit_schema --target snowflake
dbt run-operation create_legacy_audit_table --target snowflake
dbt run --target snowflake --full-refresh
dbt run --target snowflake
- save_cache:
key: deps1-{{ .Branch }}
paths:
Expand Down
27 changes: 20 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
## dbt Event Logging
> :warning: **ADDING THIS PACKAGE TO YOUR DBT PROJECT CAN SIGNIFICANTLY SLOW
DOWN YOUR DBT RUNS**. This is due to the number of insert statements executed by
this package, especially as a post-hook. Please consider if this package is
appropriate for your use case before using it.

> :warning: **ADDING THIS PACKAGE TO YOUR DBT PROJECT CAN SIGNIFICANTLY SLOW
> DOWN YOUR DBT RUNS**. This is due to the number of insert statements executed by
> this package, especially as a post-hook. Please consider if this package is
> appropriate for your use case before using it.
Requires dbt >= 0.12.2
Requires dbt >= 0.14.0

This package provides out-of-the-box functionality to log events for all dbt
invocations, including run start, run end, model start, and model end. It
Expand All @@ -15,9 +15,9 @@ convenience models to make it easier to parse the event log data.
### Setup

1. Include this package in your `packages.yml` -- check [here](https://hub.getdbt.com/fishtown-analytics/logging/latest/)
for installation instructions.
for installation instructions.
2. Include the following in your `dbt_project.yml` directly within your
`models:` block (making sure to handle indenting appropriately):
`models:` block (making sure to handle indenting appropriately):

```YAML
# dbt_project.yml
Expand All @@ -33,5 +33,18 @@ That's it! You'll now have a stream of events for all dbt invocations in your
warehouse.
### Adapter support
This package is currently compatible with dbt's Snowflake, Redshift, and
Postgres integrations.
### Migration guide
#### v0.1.17 -> v0.2.0
New columns were added in v0.2.0:
- **event_user as user** - `varchar(512)`the user who ran the model
- **event_target as target** - `varchar(512)` the target used when running DBT
- **event_is_full_refresh as is_full_refresh** - `boolean` whether the DBT run was a full refresh

These will be added to your existing audit table automatically in the `on-run-start` DBT hook, and added to the staging tables deployed by this table when they are ran. The existing `event_schema` column will also be propagated into to `stg_dbt_model_deployments` as `schema`.
17 changes: 17 additions & 0 deletions integration_tests/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,33 @@ test-postgres:
dbt run --target postgres --full-refresh
dbt run --target postgres

dbt run-operation drop_audit_schema --target postgres
dbt run-operation create_legacy_audit_table --target postgres
dbt run --target postgres --full-refresh
dbt run --target postgres

test-redshift:
dbt run-operation drop_audit_schema --target redshift
dbt run --target redshift --full-refresh
dbt run --target redshift

dbt run-operation drop_audit_schema --target redshift
dbt run-operation create_legacy_audit_table --target redshift
dbt run --target redshift --full-refresh
dbt run --target redshift


test-snowflake:
dbt run-operation drop_audit_schema --target snowflake
dbt run --target snowflake --full-refresh
dbt run --target snowflake


dbt run-operation drop_audit_schema --target snowflake
dbt run-operation create_legacy_audit_table --target snowflake
dbt run --target snowflake --full-refresh
dbt run --target snowflake

test-all: test-postgres test-redshift test-snowflake
echo "Completed successfully"

Expand Down
16 changes: 16 additions & 0 deletions integration_tests/macros/create_old_audit_table.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@

{# create_legacy_audit_table creates the audit table with the columns defined in version 0.1.7 #}
{% macro create_legacy_audit_table() %}

{{ logging.create_audit_schema() }}

create table if not exists {{ logging.get_audit_relation() }}
(
event_name varchar(512),
event_timestamp {{dbt_utils.type_timestamp()}},
event_schema varchar(512),
event_model varchar(512),
invocation_id varchar(512)
)
{{ dbt_utils.log_info("Created legacy audit table") }}
{% endmacro %}
15 changes: 15 additions & 0 deletions lookml/dbt_audit_log.view.lkml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,21 @@ view: dbt_audit_log {
sql: ${TABLE}.EVENT_SCHEMA ;;
}

dimension: event_user {
type: string
sql: ${TABLE}.EVENT_USER ;;
}

dimension: event_target {
type: string
sql: ${TABLE}.EVENT_TARGET ;;
}

dimension: event_is_full_refresh {
type: boolean
sql: ${TABLE}.EVENT_IS_FULL_REFRESH ;;
}

dimension_group: event_timestamp {
type: time
timeframes: [
Expand Down
15 changes: 15 additions & 0 deletions lookml/dbt_deployments.view.lkml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,21 @@ view: dbt_deployments {
sql: ${TABLE}.models_deployed ;;
}

dimension: user {
type: string
sql: ${TABLE}.USER ;;
}

dimension: target {
type: string
sql: ${TABLE}.TARGET ;;
}

dimension: is_full_refresh {
type: boolean
sql: ${TABLE}.IS_FULL_REFRESH ;;
}

dimension: duration {
type: duration
dimension_group: dimension_group_name {
Expand Down
20 changes: 20 additions & 0 deletions lookml/dbt_model_deployments.view.lkml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,26 @@ view: dbt_model_deployments {
sql: ${TABLE}.MODEL ;;
}

dimension: schema {
type: string
sql: ${TABLE}.SCHEMA ;;
}

dimension: user {
type: string
sql: ${TABLE}.USER ;;
}

dimension: target {
type: string
sql: ${TABLE}.TARGET ;;
}

dimension: is_full_refresh {
type: boolean
sql: ${TABLE}.IS_FULL_REFRESH ;;
}

dimension: duration {
type: duration
dimension_group: dimension_group_name {
Expand Down
75 changes: 59 additions & 16 deletions macros/audit.sql
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,16 @@

{% endmacro %}

{% macro log_audit_event(event_name, schema, relation) %}
{% macro log_audit_event(event_name, schema, relation, user, target_name, is_full_refresh) %}

insert into {{ logging.get_audit_relation() }} (
event_name,
event_timestamp,
event_schema,
event_model,
event_user,
event_target,
event_is_full_refresh,
invocation_id
)

Expand All @@ -35,8 +38,11 @@
{{ dbt_utils.current_timestamp_in_utc() }},
{% if variable != None %}'{{ schema }}'{% else %}null::varchar(512){% endif %},
{% if variable != None %}'{{ relation }}'{% else %}null::varchar(512){% endif %},
{% if variable != None %}'{{ user }}'{% else %}null::varchar(512){% endif %},
{% if variable != None %}'{{ target_name }}'{% else %}null::varchar(512){% endif %},
{% if variable != None %}{% if is_full_refresh %}TRUE{% else %}FALSE{% endif %}{% else %}null::boolean{% endif %},
'{{ invocation_id }}'
)
)

{% endmacro %}

Expand All @@ -46,39 +52,76 @@
{% endmacro %}


{% macro create_audit_log_table() %}
{% macro create_audit_log_table() -%}

create table if not exists {{ logging.get_audit_relation() }}
(
event_name varchar(512),
event_timestamp {{dbt_utils.type_timestamp()}},
event_schema varchar(512),
event_model varchar(512),
invocation_id varchar(512)
)
{% set required_columns = [
["event_name", "varchar(512)"],
["event_timestamp", dbt_utils.type_timestamp()],
["event_schema", "varchar(512)"],
["event_model", "varchar(512)"],
["event_user", "varchar(512)"],
["event_target", "varchar(512)"],
["event_is_full_refresh", "boolean"],
["invocation_id", "varchar(512)"],
] -%}

{% endmacro %}
{% set audit_table = logging.get_audit_relation() -%}

{% set audit_table_exists = adapter.get_relation(audit_table.database, audit_table.schema, audit_table.name) -%}


{% if audit_table_exists -%}

{%- set columns_to_create = [] -%}

{# map to lower to cater for snowflake returning column names as upper case #}
{%- set existing_columns = adapter.get_columns_in_relation(audit_table)|map(attribute='column')|map('lower')|list -%}

{%- for required_column in required_columns -%}
{%- if required_column[0] not in existing_columns -%}
{%- do columns_to_create.append(required_column) -%}

{%- endif -%}
{%- endfor -%}


{%- for column in columns_to_create -%}
alter table {{ audit_table }}
add column {{ column[0] }} {{ column[1] }}
default null;
{% endfor -%}

{%- else -%}
create table if not exists {{ audit_table }}
(
{% for column in required_columns %}
{{ column[0] }} {{ column[1] }}{% if not loop.last %},{% endif %}
{% endfor %}
)
{%- endif -%}

{%- endmacro %}


{% macro log_run_start_event() %}
{{ logging.log_audit_event('run started') }}
{{ logging.log_audit_event('run started', user=target.user, target_name=target.name, is_full_refresh=flags.FULL_REFRESH) }}
{% endmacro %}


{% macro log_run_end_event() %}
{{ logging.log_audit_event('run completed') }}; commit;
{{ logging.log_audit_event('run completed', user=target.user, target_name=target.name, is_full_refresh=flags.FULL_REFRESH) }}; commit;
{% endmacro %}


{% macro log_model_start_event() %}
{{logging.log_audit_event(
'model deployment started', this.schema, this.name
'model deployment started', schema=this.schema, relation=this.name, user=target.user, target_name=target.name, is_full_refresh=flags.FULL_REFRESH
) }}
{% endmacro %}


{% macro log_model_end_event() %}
{{ logging.log_audit_event(
'model deployment completed', this.schema, this.name
'model deployment completed', schema=this.schema, relation=this.name, user=target.user, target_name=target.name, is_full_refresh=flags.FULL_REFRESH
) }}
{% endmacro %}
5 changes: 4 additions & 1 deletion models/stg_dbt_deployments.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ aggregated as (
select

invocation_id,
event_user as user,
event_target as target,
event_is_full_refresh as is_full_refresh,

min(case
when event_name = 'run started' then event_timestamp
Expand All @@ -24,7 +27,7 @@ aggregated as (

from events

group by 1
{{ dbt_utils.group_by(n=4) }}

)

Expand Down
6 changes: 5 additions & 1 deletion models/stg_dbt_model_deployments.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ aggregated as (

invocation_id,
event_model as model,
event_schema as schema,
event_user as user,
event_target as target,
event_is_full_refresh as is_full_refresh,

min(case
when event_name = 'model deployment started' then event_timestamp
Expand All @@ -28,7 +32,7 @@ aggregated as (

where event_name ilike '%model%'

group by 1, 2, 3
{{ dbt_utils.group_by(n=7) }}

)

Expand Down

0 comments on commit ca3faed

Please sign in to comment.