Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding pre commit hooks #26

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ _Summary of changes in this PR or what it accomplishes._

<!--

Please title your PR as follows: `feature: fix foo bar`.
Please title your PR as follows: `feature: fix foo bar`.
Always start with the thing you are fixing, then describe the fix.
Don't use past tense (e.g. "fixed foo bar").

Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
name: Lint Code Base

on:
push:
branches:
Expand All @@ -18,6 +19,11 @@ jobs:
# Full git history is needed to get a proper list of changed files within `super-linter`
fetch-depth: 0

- name: Install Dependencies
run: |
pip install pipenv
pipenv install --dev
matiasz8 marked this conversation as resolved.
Show resolved Hide resolved

- name: Run Super-Linter
uses: super-linter/super-linter/slim@v5
env:
Expand Down
29 changes: 24 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,32 @@
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
hooks:
# - id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- id: mixed-line-ending
- id: check-added-large-files
- repo: https://github.com/pycqa/flake8
rev: 6.1.0
hooks:
- id: flake8
additional_dependencies: [flake8-black, flake8-bugbear, flake8-isort]
- repo: https://github.com/pycqa/isort
rev: 5.13.2
hooks:
- id: isort
name: isort (python)
- id: isort
name: isort (cython)
types: [cython]
- id: isort
name: isort (pyi)
types: [pyi]
- repo: https://github.com/ambv/black
rev: 23.1.0
hooks:
- id: black
language_version: python3
- repo: https://github.com/pycqa/flake8
rev: 6.0.0
hooks:
- id: flake8
additional_dependencies: [flake8-black, flake8-bugbear]
2 changes: 1 addition & 1 deletion .python-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.10.10
3.10.13
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ flake8-bugbear = "*"
black = "*"
localstack = "*"
awscli-local = "*"
pre-commit = "*"

[requires]
python_version = "3.10"
777 changes: 426 additions & 351 deletions Pipfile.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion jobs/etl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .extract import extract
from .load_postgresql import load_to_postgresql_db
from .load_documentdb import load_to_document_db
from .load_postgresql import load_to_postgresql_db
from .load_s3 import load_to_s3
3 changes: 2 additions & 1 deletion jobs/etl/extract.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from typing import List

from awsglue.context import GlueContext

from jobs.io import read_from_options
from libs.config import Config
from libs.aws import AwsS3Client
from libs.config import Config


def extract(glueContext: GlueContext, config: Config):
Expand Down
3 changes: 2 additions & 1 deletion jobs/etl/load_s3.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from awsglue.context import GlueContext, DynamicFrame
from awsglue.context import DynamicFrame, GlueContext

from jobs.io.writer import write_from_options
from libs.config import Config

Expand Down
2 changes: 1 addition & 1 deletion jobs/io/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .helpers import get_connection_options
from .reader import read_from_options
from .writer import write_from_options
from .helpers import get_connection_options
47 changes: 24 additions & 23 deletions jobs/io/helpers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Dict, Tuple, Union
from typing import Dict, Optional, Tuple, Union
from urllib.parse import quote

from libs.common import load_tls_ca_bundle


Expand All @@ -8,8 +9,8 @@ def get_url_for_engine(
port: int,
database: str,
engine: str,
user: str = None,
password: str = None,
user: Optional[str] = None,
password: Optional[str] = None,
ssl: bool = False,
) -> str:
"""
Expand Down Expand Up @@ -44,8 +45,8 @@ def _build_mongodb_connection_string(
port: int,
database: str,
engine: str,
user: str = None,
password: str = None,
user: Optional[str] = None,
password: Optional[str] = None,
ssl: Union[bool, str] = False,
):
encoded_user = _encode_mongodb_auth_special_chars(user)
Expand All @@ -61,7 +62,7 @@ def _build_mongodb_connection_string(
return url


def _encode_mongodb_auth_special_chars(authchars: str):
def _encode_mongodb_auth_special_chars(authchars: Optional[str] = None):
"""
This is a helper function to encode special characters
that might be present on user, password, tokens, etc:
Expand Down Expand Up @@ -108,7 +109,7 @@ def get_connection_type(engine: str, ssl: bool = False) -> str:
return connection_type


def get_driver_for_engine(engine: str = None) -> str:
def get_driver_for_engine(engine: Optional[str] = None) -> str:
"""
Returns the driver for the engine

Expand All @@ -128,7 +129,7 @@ def get_driver_for_engine(engine: str = None) -> str:
return drivers[engine]


def get_format_for_engine(engine: str = None) -> str:
def get_format_for_engine(engine: Optional[str] = None) -> str:
"""
Returns the format for the engine

Expand All @@ -150,21 +151,21 @@ def get_format_for_engine(engine: str = None) -> str:

def get_connection_options(
engine: str,
host: str = None,
port: int = None,
database: str = None,
user: str = None,
password: str = None,
host: str,
port: int,
database: str,
user: Optional[str],
password: Optional[str],
dbtable: Optional[str],
collection: Optional[str],
aws_access_key_id: Optional[str],
aws_secret_access_key: Optional[str],
aws_region_name: Optional[str],
aws_session_token: Optional[str],
aws_endpoint_url: Optional[str],
paths: Optional[str],
path: Optional[str],
ssl: bool = False,
dbtable: str = None,
collection: str = None,
aws_access_key_id: str = None,
aws_secret_access_key: str = None,
aws_region_name: str = None,
aws_session_token: str = None,
aws_endpoint_url: str = None,
paths: str = None,
path: str = None,
) -> Tuple[str, str, Dict[str, str]]:
"""
Returns the format and options for the engine. Will use the AWS credentials
Expand All @@ -176,7 +177,6 @@ def get_connection_options(
:param engine: The engine to get the format and options for
:param user: The user to connect with
:param password: The password to connect with
:param ssl: Whether to use ssl or not
:param dbtable: The table to connect to
:param collection: The collection to connect to
:param aws_access_key_id: The aws access key id to connect with
Expand All @@ -185,6 +185,7 @@ def get_connection_options(
:param aws_session_token: The aws session token to connect with
:param aws_endpoint_url: The aws endpoint url to connect with
:param path: The path to connect to
:param ssl: Whether to use ssl or not
:return: The format and options for the engine
"""
if engine is None:
Expand Down
3 changes: 2 additions & 1 deletion jobs/io/reader.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from awsglue.context import GlueContext, DynamicFrame
from awsglue.context import DynamicFrame, GlueContext

from .helpers import get_connection_options


Expand Down
3 changes: 2 additions & 1 deletion jobs/io/writer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from awsglue.context import GlueContext, DynamicFrame
from awsglue.context import DynamicFrame, GlueContext

from .helpers import get_connection_options


Expand Down
8 changes: 5 additions & 3 deletions jobs/pyspark_hello_world.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import sys

from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext

from jobs.etl import extract, load_to_postgresql_db, load_to_s3

from jobs.etl import extract, load_to_s3, load_to_postgresql_db, load_to_document_db
# from jobs.etl import load_to_document_db # isort: skip_file
from libs.config import get_config

args = getResolvedOptions(sys.argv, ["JOB_NAME"])
Expand Down
2 changes: 2 additions & 0 deletions libs/aws/s3.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import Union

import boto3

from libs.config import get_config


Expand Down
2 changes: 1 addition & 1 deletion libs/common/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from .util import cached_property
from .perm import load_tls_ca_bundle
from .util import cached_property
2 changes: 1 addition & 1 deletion libs/config/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .config import get_config, Config
from .config import Config, get_config
6 changes: 3 additions & 3 deletions libs/config/env.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
from typing import Any
from typing import Any, Dict, Optional


class EnvironmentVariable:
Expand All @@ -10,7 +10,7 @@ class EnvironmentVariable:
args = {}
cache = {}

def __init__(self, args: dict = None) -> None:
def __init__(self, args: Optional[Dict[str, Any]] = None) -> None:
if args is None:
args = dict()
self.args = args
Expand Down Expand Up @@ -38,7 +38,7 @@ def get_var(self, key, default: Any = None, throw_error: bool = False) -> Any:
envs_instance = None


def get_envs(args: dict = None) -> EnvironmentVariable:
def get_envs(args: Optional[Dict[str, Any]] = None) -> EnvironmentVariable:
"""
Get the envs instance. If it doesn't exist, create it.

Expand Down
4 changes: 3 additions & 1 deletion libs/config/secrets.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from __future__ import annotations
import boto3

import base64
import json

import boto3
from botocore.exceptions import ClientError

secrets_resolver_instance = None
Expand Down
4 changes: 3 additions & 1 deletion libs/db/docdb_mongo.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from libs.common import load_tls_ca_bundle
from pymongo import MongoClient as PyMongoClient

from libs.common import load_tls_ca_bundle

from .mongo import MongoClient


Expand Down
26 changes: 12 additions & 14 deletions libs/db/mongo.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from typing import Any, Dict, List, Tuple, Union
from typing import Any, Dict, List, Optional, Tuple, Union

from pymongo import MongoClient as PyMongoClient
from libs.common.logconfig import LogConfig
from pymongo import errors as pymongo_exceptions

from libs.common.logconfig import LogConfig # type: ignore

matiasz8 marked this conversation as resolved.
Show resolved Hide resolved

class MongoClient:
Expand All @@ -14,7 +17,7 @@ def __init__(
port: int,
database: str,
protocol: str = "mongodb",
collection: str = None,
collection: str = None, # type: ignore
throw_error: bool = True,
matiasz8 marked this conversation as resolved.
Show resolved Hide resolved
) -> None:
connection_uri = (
Expand All @@ -31,9 +34,9 @@ def __init__(
)

if self._conn is None and throw_error:
raise Exception(
"Could not connect to " + host + ":" + port + " was not found."
)
msg = f"Could not connect to {host}:{port} with user {user} and password {password}"
self.logger.error(msg)
raise pymongo_exceptions.ConnectionFailure(msg)

self._db = self._conn[database]
self.logger.info("Connected to database: " + database)
Expand Down Expand Up @@ -71,7 +74,7 @@ def find_sorted_limit(
query: Dict[str, Any],
sort: List[Tuple[str, int]],
limit: int,
projection: Dict[str, Any] = None,
projection: Optional[Dict[str, Any]] = None,
) -> List[Dict[str, Any]]:
return list(self._collection.find(query, projection).sort(sort).limit(limit))

Expand All @@ -81,20 +84,15 @@ def find_sorted_limit_skip(
sort: List[Tuple[str, int]],
limit: int,
skip: int,
projection: Dict[str, Any] = None,
projection: Optional[Dict[str, Any]] = None,
) -> List[Dict[str, Any]]:
return list(
self._collection.find(query, projection).sort(sort).limit(limit).skip(skip)
)

def find_one(self, query: Dict[str, Any]) -> Dict[str, Any]:
def find_one(self, query: Dict[str, Any]) -> Optional[Dict[str, Any]]:
return self._collection.find_one(query)

def find_one_and_update(
self, query: Dict[str, Any], update: Dict[str, Any]
) -> Dict[str, Any]:
return self._collection.find_one_and_update(query, update)

def find_one_and_delete(self, query: Dict[str, Any]) -> Dict[str, Any]:
return self._collection.find_one_and_delete(query)

Expand Down
Loading