From 1c3bdce0cc9f802eff14767e3206207a81ef64cc Mon Sep 17 00:00:00 2001 From: abimichel Date: Wed, 24 Jul 2024 17:26:40 -0700 Subject: [PATCH] Convert RAR data extract to Airflow DAG --- dags/housing_rar.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 dags/housing_rar.py diff --git a/dags/housing_rar.py b/dags/housing_rar.py new file mode 100644 index 0000000..f7cf2f8 --- /dev/null +++ b/dags/housing_rar.py @@ -0,0 +1,42 @@ +from airflow import DAG +from pendulum import datetime +from kubernetes import client +from airflow.providers.cncf.kubernetes.operators.pod import KubernetesPodOperator +from airflow.providers.cncf.kubernetes.secret import Secret +from datetime import timedelta + +rar_secrets = Secret("env", None, "rar-data-extract") + +default_args = { + 'owner': 'Data Foundations', + "email": ["NRM.DataFoundations@gov.bc.ca"], + 'retries': 1, + 'retry_delay': timedelta(minutes=5), + "email_on_failure": True, + "email_on_retry": True, +} + +with DAG( + start_date=datetime(2024, 7, 23), + catchup=False, + schedule='0 4 * * *', + dag_id="housing-pipeline-rar", + default_args=default_args, + description='DAG to replicate RAR data extract to S3 bucket so that it can be accessed via BCBox', +) as dag: + run_replication = KubernetesPodOperator( + task_id="run_replication", + image="ghcr.io/bcgov/nr-dap-ods-ora2s3:main", + image_pull_policy="Always", + in_cluster=True, + service_account_name="airflow-admin", + name=f"run_rar_s3_replication", + labels={"DataClass": "Medium", + "ConnectionType": "database", + "Release": "airflow"}, + is_delete_operator_pod=False, + secrets=[rar_secrets], + container_resources= client.V1ResourceRequirements( + requests={"cpu": "50m", "memory": "512Mi"}, + limits={"cpu": "100m", "memory": "1024Mi"}) + ) \ No newline at end of file