Skip to content

Commit

Permalink
Better frequency testing
Browse files Browse the repository at this point in the history
  • Loading branch information
zblz committed Nov 30, 2018
1 parent c9a98b6 commit 1a38ca2
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 7 deletions.
2 changes: 1 addition & 1 deletion lens/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ def frequencies(series, column_props):

if column_props[name]["is_categorical"]:
logger.debug("frequencies - " + series.name)
freqs = _compute_frequencies(series.values)
freqs = _compute_frequencies(series.dropna().values)
return {name: freqs, "_columns": [name]}
else:
return None
Expand Down
17 changes: 11 additions & 6 deletions tests/test_summarise.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,13 +213,16 @@ def test_dask_outliers(df, column_summary):

@pytest.fixture(scope="module")
def frequencies(df, column_properties):
return {
col: dask.compute(metrics.frequencies(df[col], column_properties[col]))
for col in df.columns
}
[freqs] = dask.compute(
{
col: metrics.frequencies(df[col], column_properties[col])
for col in df.columns
}
)
return freqs


def test_dask_frequencies(df, frequencies):
def test_dask_frequencies(df, column_properties, frequencies):
for col in frequencies.keys():
freq_report = frequencies[col]
if freq_report is None:
Expand All @@ -229,11 +232,13 @@ def test_dask_frequencies(df, frequencies):

freqs = df[col].compute().value_counts().to_dict()

assert len(freq_report) == column_properties[col][col]["unique"]

for k in freqs.keys():
assert freqs[k] == freq_report[k]

# test serialization
joined = _join_dask_results(frequencies.values())
joined = _join_dask_results(frequencies.values()).compute()
json.dumps({"freqs": joined}, cls=NumpyEncoder)


Expand Down

0 comments on commit 1a38ca2

Please sign in to comment.