Tagging api (#15)

* tagging .py files added * name change * new file * create removing tags file * made some changes * removing_tags successful * user add/remove functions added * remove redundant files * irs tagging file generated * Update config.py * generated table of columns and respective tags * Update sandbox.ipynb * Merge with main branch should be ready soon * final cleanup before merging to main
bcgov · Nov 6, 2024 · 9342eb7 · 9342eb7
1 parent 5621d92
commit 9342eb7
Show file tree

Hide file tree

Showing 21 changed files with 294,473 additions and 33 deletions.
diff --git a/scripts/Dockerfile b/scripts/Dockerfile
diff --git a/scripts/__pycache__/config.cpython-310.pyc b/scripts/__pycache__/config.cpython-310.pyc
diff --git a/scripts/__pycache__/config.cpython-312.pyc b/scripts/__pycache__/config.cpython-312.pyc
diff --git a/scripts/apply_user.py b/scripts/apply_user.py
@@ -0,0 +1,68 @@
+from config import *
+
+#Schema Method - list all tables in a schema
+
+# Endpoint for retrieving tables filtered by database schema, set a high limit to get them all
+endpoint1 = f"/tables?databaseSchema={database_schema}&limit=9999"
+
+# Full URL
+url = base_url + endpoint1
+
+# Make the GET request to retrieve all tables in the given schema
+response = requests.get(url, headers=headers_get)
+
+# Check the response status
+if response.status_code == 200:
+    print("Raw Response Content:", response.text)  # Print the raw content of the response
+    try:
+        tables = response.json().get('data', [])  # Attempt to parse JSON response
+        print("Tables retrieved successfully!")
+        for table in tables:
+            print(f"Table Name: {table['name']}, ID: {table['id']}")
+    except ValueError as e:
+        print("Failed to parse JSON response:", e)
+else:
+    print(f"Failed to retrieve tables: {response.status_code}")
+    print(response.text)
+
+table_id_list = []
+for table in tables:
+    table_id_list.append(table['id'])
+
+
+
+
+# Function to apply user to a table
+def apply_user_to_table(table_id):
+    # Endpoint for updating a table
+    table_endpoint = f"/tables/{table_id}"
+
+    # Full URL
+    table_url = base_url + table_endpoint
+
+    # Data payload for applying the user
+    data = [
+        {
+            "op": "replace",
+            "path": "/owner",
+            "value": {
+                "id": selected_user_id,
+                "type": "user"
+            }
+        }
+    ]
+
+    # Make the PATCH request
+    response = requests.patch(table_url, headers=headers_patch, json=data)
+
+    # Check the response status
+    if response.status_code == 200:
+        print(f"User applied to table {table_id} successfully!")
+    else:
+        print(f"Failed to apply user to table {table_id}: {response.status_code}")
+        print(response.text)
+
+# Apply user to all tables in the schema
+for table in tables:
+    apply_user_to_table(table['id'])
+    time.sleep(1)
diff --git a/scripts/automated-tagging.py b/scripts/automated-tagging.py
diff --git a/scripts/batch_upload_tags.py b/scripts/batch_upload_tags.py
@@ -0,0 +1,30 @@
+from config import *
+
+# Load your data that contains all the information needed to generate a tag
+df = pd.read_csv('reference_csvs/irs.csv')
+
+# Endpoint for creating a tag
+endpoint = "/tags"
+url = base_url + endpoint
+
+# Loop through each row in the DataFrame to retrieve the information for generating a tag
+for i, row in df.iterrows():
+    data = {
+        "classification": "Application System",  # This must be one of the classifications that exist in OpenMetaData
+        "name": f"{row['APPLICATION_NAME']}",  # Tag name from the DataFrame
+        "description": f"{row['TEXT']} \r\n\r\n\r\nData Model URL: {row['DATA_MODEL_URL']}",  # Description from the DataFrame
+        "displayName": f"{row['FULL_NAME']}"  # Display name from the DataFrame
+    }
+
+    # Make the POST request to create the tag based on the info provided in the CSV
+    response = requests.post(url, headers=headers_get, json=data)
+
+    # Check the response status for each request
+    if response.status_code == 200 or response.status_code == 201:
+        print(f"Tag created successfully for row {i} - {row['APPLICATION_NAME']}!")
+    else:
+        print(f"Failed to create tag for row {i} with status code {response.status_code}")
+        print(response.text)
+
+    # Optional: Add a small pause between requests to avoid overwhelming the server
+    time.sleep(1)
diff --git a/scripts/config.py b/scripts/config.py
@@ -0,0 +1,32 @@
+# Configuration File
+import pandas as pd
+import json
+import os
+import requests
+import time
+import logging
+from datetime import datetime
+
+# API base URL TEST ENV
+base_url = "https://nr-data-catalogue-test.apps.emerald.devops.gov.bc.ca/api/v1"
+
+# Database schema to filter by
+database_schema = "ODS.odsdev.ats_replication"
+
+# Selected user ID (replace with actual user ID)
+selected_user_id = "your_user_id_here"
+
+# API key for authentication
+api_key = "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJvcGVuLW1ldGFkYXRhLm9yZyIsInN1YiI6Imphc3RpbmRlci5hdGh3YWwiLCJyb2xlcyI6WyJOUk0gRGF0YSBDb25zdW1lciBSb2xlIiwiQWRtaW4iXSwiZW1haWwiOiJqYXN0aW5kZXIuYXRod2FsQGdvdi5iYy5jYSIsImlzQm90IjpmYWxzZSwidG9rZW5UeXBlIjoiUEVSU09OQUxfQUNDRVNTIiwiaWF0IjoxNzMwNTAxODM0LCJleHAiOjE3MzMwOTM4MzR9.kiqpKYrV1afqLLvRKI1l1d_rfEqkOBlIhhr-RHK_KHWVCRmt8iY2dIMP9BcnGMdr7wwkI8Av0hcydik0cQ9yHkwiLhDMK6_-wMID0RFqqc6Dvd_nY5l-Zy_33jJeQIAJikUxfte28EobwnMpEJN30kzJh2FN14tiOA11TQm4ccUG_B0LfNB3AlVmmgfakAEwftPaXE12KLc5tIcwJ2IlIXChwhTCacK0kATp7yk9elPjCsmRjLYpaXhDq9MBKLg66YX2pqrTV_v5UI4Axbq1zXJjoqAIWrTl5uo-cmJw7iO-E_MtT8S_1xmCA7ITEAJQXBsC1S7utiRop1NfGF7o-Q"
+
+# API request headers for get requests 
+headers_get = {
+    "Content-Type": "application/json",
+    "Authorization": f"Bearer {api_key}"  
+}
+
+# API request headers for patch requests
+headers_patch = {
+    "Content-Type": "application/json-patch+json",
+    "Authorization": f"Bearer {api_key}"  
+}