Skip to content

Commit

Permalink
Datagatherer (#22)
Browse files Browse the repository at this point in the history
* #15 inserted real code test

* sqllite3 added

* #15 datagathererinput

* #15 datagathererinput

* #18 test updated

* #21, will revert post commit

* reverted

* reverted

* #18 unit tested

* #18 fixed failing test_read_sql_from_empty_table

* #18 fixed test_read_sql_from_populated_table

* #18 fixed test_read_sql_from_populated_table
  • Loading branch information
ZNevzz authored Feb 8, 2020
1 parent 8a47030 commit 69bb6bf
Show file tree
Hide file tree
Showing 9 changed files with 438 additions and 69 deletions.
5 changes: 4 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
#Travis CI build configuration
language: python
python:
"3.8"
#help: https://stackoverflow.com/questions/35972415/python-import-fails-on-travisci-but-not-locally
before_install:
- "pip install -U pip"
- "export PYTHONPATH=$PYTHONPATH:$(pwd)"
script:
script:
#The command "sonar-scanner" exited with 1: https://travis-ci.org/ZNClub-PA-ML-AI/OctoPy-Predictor/builds/635588777#L594
# - sonar-scanner
- pytest
addons:
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ Generic platform for Prediction using basic machine learning models
```bash
conda env list # show current environments
conda create --name OctoPy # if OctoPy is NOT listed
conda activate OctoPy
conda list # show all libraries
conda install pip # if pip is NOT listed
conda list > versions.txt # store all versions post install
Expand Down
81 changes: 64 additions & 17 deletions octopy_predictor/src/analyser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,40 +4,87 @@
"""
from sklearn.metrics import explained_variance_score

class Analyser(object):
"""docstring for Analyser"""
def __init__(self, arg = None):
super(Analyser, self).__init__()
self.arg = arg
self.regression_metrics = {
'Explained Variance' : explained_variance_score
}
self.metrics = {
'regression' : self.regression_metrics
}
REGRESSION_MODEL = 'REGRESSION'
CLASSIFICATION_MODEL = 'CLASSIFICATION'


class AnalyserMetric(object):
""""AnalyserMetrics: state to represent metrics for Analyser"""

def __init__(self,
name = 'Analyser Metric',
model_type = REGRESSION_MODEL,
function = lambda x:x):
"""
fields to store different metric configuration
"""
self.name = name
self.model_type = model_type
self.function = function

def get_columns(self, df):
return df.columns.values

class AnalyserMetricsRegistry(object):
""""AnalyserMetricsRegistry: create metrics for Analyser"""

metrics = [
AnalyserMetric('Explained Variance', REGRESSION_MODEL, explained_variance_score)
]

@staticmethod
def apply_metrics(model_type = REGRESSION_MODEL, data = {}):
expected, actual = data

filtered_metrics = filter(
lambda member: member.model_type == model_type,
AnalyserMetricsRegistry.metrics
)

return { metric.name : metric.function(expected, actual)
for metric in filtered_metrics
}

def get_column_data_types(self, df):

class Analyser(object):
"""Analyser: logic to analyse data
"""

def columns(self, df):
"""
input: DataFrame
output: list of names of columns of DataFrame
"""
return df.columns.values

def types(self, df):
"""
input: DataFrame
output: str containing data types of columns of DataFrame
"""
return str(df.dtypes).split('\n')[:-1]

def get_summary(self, df):
return df.describe()
def describe(self, df):
"""
input: DataFrame
output: DataFrame containing descriptive statistics of DataFrame
"""
return df.describe(how ='all')

def get_model_metrics(self, y_, mode):
#TODO
def model_metrics(self, y_, mode):
metric_values = {}
y_true, y_pred = y_[0], y_[1]

for metric_name, metric_method in self.metrics[mode].items():
metric_values[metric_name] = metric_method(y_true, y_pred)
return metric_values

#TODO
def _is_categorical(self, label):
"""
input:
"""
return len(set(label)) < 10 and all(map(label, lambda x: isinstance(x, str)))

#TODO
def get_model_type_by_label(self, label = []):
return 'Classification' if self._is_categorical(label) else 'Regression'
166 changes: 134 additions & 32 deletions octopy_predictor/src/datagatherer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,83 +5,163 @@
import pandas as pd
from io import StringIO
from collections import namedtuple
import sqlite3


# TODO
# from util import logit
# import util


# CONSTANTS

FILE = 'FILE'
SQL = 'SQL'
FILE_PATH = 'FILEPATH'
CONNECTION = 'CONN'
QUERY = 'SQL'
QUERY_PARAMERTERS = 'SQLPARAMS'


class DataGathererInput(object):

"""
DataGathererInput
Usage
----------
DataGatherer
"""
FILE_CONSTRAINTS = [FILE_PATH]
SQL_CONSTRAINTS = [CONNECTION, QUERY, QUERY_PARAMERTERS]

CONSTRAINTS = {
FILE: FILE_CONSTRAINTS,
SQL: SQL_CONSTRAINTS
}

def __init__(self, type: str):
"""
Parameters
----------
type : str
TYPE of DataGatherer.
Returns
-------
None.
"""
if type not in DataGathererInput.CONSTRAINTS.keys():
pass
# TODO Throw error
self.type = type
self.values = {}

def add(self, key: str, value):
"""
Parameters
----------
key : str
valid keys present in CONSTRAINTS _values.
value : any
value corresponding to key.
Returns
-------
None.
"""

if key in DataGathererInput.CONSTRAINTS[self.type]:
self.values[key] = value

#from util import logit
#import util

class DataGatherer(object):
"""docstring for DataGatherer"""
def __init__(self, arg = None):
"""docstring for DataGatherer
DataGatherer is responsible to fetch data from multiple sources
and convert it to a specific type using provided Adapters
The defaul Adapter is DataFrame
"""

def __init__(self, arg=None):
super(DataGatherer, self).__init__()
self.arg = arg

#@logit
# @logit
@staticmethod
def _read_from_file(file):
_file_content = None
try:
_file_content = file.read()
#util.debug_store['file_content at datagatherer'] = _file_content
# util.debug_store['file_content'] = _file_content
except IOError as io_error:
#util.debug_store['io_error at datagatherer'] = io_error.__traceback__
# util.debug_store['io_error'] = io_error.__traceback__
raise io_error
else:
return _file_content
#@logit

# @logit
@staticmethod
def _determine_resource(path):
def determine_resource(path):
resource_type, file_type = None, None

# resource type
resource_type = 'web' if path.startswith('http') else 'local'

# file type
try:
file_extension_index = path.rindex('.')
except ValueError as val_error:
# TODO: message = invalid path
raise val_error
else:
file_type = path[file_extension_index + 1 :]
file_type = path[file_extension_index + 1:]
finally:
FileResource = namedtuple('FileResource', 'resource_type file_type')
return FileResource(resource_type = resource_type, file_type = file_type)


#@logit
FileResource = namedtuple('FileResource',
'resource_type file_type')
return FileResource(resource_type=resource_type,
file_type=file_type)

# @logit
@staticmethod
def _read_from_path(path):
'''
read data from a file available at given path
'''
df = pd.DataFrame()
metadata = _determine_resource(path)
metadata = DataGatherer.determine_resource(path)

if metadata.resource_type == 'local':

if metadata.file_type == 'csv':
df = pd.read_csv(path)

elif metadata.resource_type == 'web':

if metadata.file_type == 'csv':
df = pd.read_csv(path)

return df
#@logit
def read(self, path = None, file = None):

# @logit
def read(self, path=None, file=None, sql=None):
'''
read receives either path or file. If received both, file is given priority
read receives either path or file.
If received both, file is given priority
'''
try:
try:
df = None
if path is None:
file_content = self._read_from_file(file)
#util.debug_store['StringIO(file_content) at datagatherer'] = StringIO(file_content)
# util.debug_store['S'] = StringIO(file_content)
df = pd.read_csv(StringIO(file_content))

elif file is None:
df = pd.read_csv(path)
else:
Expand All @@ -93,5 +173,27 @@ def read(self, path = None, file = None):
print('Exception occured while loading data')
raise exception
finally:
#util.debug_store['df at datagatherer'] = df.to_json(orient='columns')
# util.debug_store['df'] = df.to_json(orient='columns')
return df

def read_sql(self, gatherer_input: DataGathererInput):
"""
Parameters
----------
input : DataGathererInput
Contains _values required to execute SQL QUERY.
Returns
-------
df : DataFrame
Result of SQL QUERY.
"""
df = pd.DataFrame()
# TODO Move all connections to application start-up
conn = sqlite3.connect(gatherer_input.values[CONNECTION], uri=True)
df = pd.read_sql_query(gatherer_input.values[QUERY], con=conn)
return df
1 change: 1 addition & 0 deletions octopy_predictor/src/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,4 @@ def is_allowed_file(filename):
return True
#TODO
#return '.' in filename and filename.rsplit('.', 1)[1] in context.ALLOWED_EXTENSIONS

25 changes: 18 additions & 7 deletions octopy_predictor/tests/test_analyser.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
# test for visualizer.py
"""Tests for analyser.py"""

#import sys
#sys.path.insert(0, '../main/')

from octopy_predictor.src.analyser import Analyser
from octopy_predictor.src.analyser import Analyser, AnalyserMetricsRegistry, REGRESSION_MODEL
import unittest

class AnalyserMetricsRegistryTest(unittest.TestCase):
"""Test cases for AnalyserMetricsRegistry"""

def setUp(self):
self.registry = AnalyserMetricsRegistry()

def test_regression_model(self):

result = self.registry.apply_metrics(REGRESSION_MODEL,
([.0, 1.5, 3.0], [-3.0, -1.5, .0])
)

self.assertIsNotNone(result, "apply_metrics returned null")

class AnalyserTest(unittest.TestCase):
"""Test cases for Analyser"""

Expand All @@ -18,9 +29,9 @@ def test_is_regression_model_type(self):

actual_result = self.analyser.get_model_type_by_label(array)

self.assertEquals(actual_result, expected_result)
self.assertEqual(actual_result, expected_result, "expected does not match actual")

if __name__ == '__main__':
# unittest.main()
suite = unittest.defaultTestLoader.loadTestsFromTestCase(AnalyserTest)
suite = unittest.defaultTestLoader.loadTestsFromTestCase(AnalyserMetricsRegistryTest)
unittest.TextTestRunner().run(suite)
Loading

0 comments on commit 69bb6bf

Please sign in to comment.