Skip to content

Commit

Permalink
adding copula tests
Browse files Browse the repository at this point in the history
Took 1 minute
  • Loading branch information
tfm000 committed Oct 27, 2023
1 parent 00188d3 commit 2740f23
Show file tree
Hide file tree
Showing 3 changed files with 307 additions and 13 deletions.
18 changes: 13 additions & 5 deletions sklarpy/tests/copulas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ def pd_mvt_uniform_data():


@pytest.fixture(scope="session", autouse=True)
def all_mv_data():
def all_mvt_data():
return {
'mv_continuous': mvt_continuous_data(),
'mvt_continuous': mvt_continuous_data(),
'mvt_discrete': mvt_discrete_data(),
'pd_mvt_continuous': pd_mvt_continuous_data(),
'pd_mvt_discrete': pd_mvt_discrete_data(),
Expand All @@ -68,6 +68,14 @@ def all_mv_data():
}


@pytest.fixture(scope="session", autouse=True)
def all_mvt_uniform_data():
return {
'np_uniform': mvt_uniform_data(),
'pd_uniform': pd_mvt_uniform_data(),
}


@pytest.fixture(scope="session", autouse=True)
def copula_params_2d():
return {
Expand Down Expand Up @@ -242,7 +250,7 @@ def copula_params_3d():
@pytest.fixture(scope="session", autouse=True)
def all_mdists_2d():
return {
'mv_continuous': {
'mvt_continuous': {
0: lognorm.fit(params=(0.01, -120.05, 119.9)),
1: lognorm.fit(params=(0.01, -150.77, 150.67))},

Expand All @@ -251,7 +259,7 @@ def all_mdists_2d():
1: poisson.fit(params=(7.83,))},

'pd_mvt_continuous': {
0: lognorm.fit(params=(0.0, -202.7, 202.72)),
0: lognorm.fit(params=(0.01, -120.05, 119.9)),
1: cauchy.fit(params=(-0.0, 0.53))},

'pd_mvt_discrete': {
Expand All @@ -270,7 +278,7 @@ def all_mdists_2d():
@pytest.fixture(scope="session", autouse=True)
def all_mdists_3d():
return {
'mv_continuous': {
'mvt_continuous': {
0: lognorm.fit(params=(0.02, -51.43, 51.52)),
1: cauchy.fit(params=(0.08, 0.51)),
2: cauchy.fit(params=(0.09, 0.59))},
Expand Down
298 changes: 292 additions & 6 deletions sklarpy/tests/copulas/test_prefit_dists.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
# Contains tests for Pre-Fit SklarPy copula models
import numpy as np
from typing import Callable
import pytest
import scipy.stats
import matplotlib.pyplot as plt

from sklarpy.copulas import *
from sklarpy.copulas._prefit_dists import PreFitCopula
from sklarpy.copulas._fitted_dists import FittedCopula
from sklarpy._utils import Params, FitError
from sklarpy.tests.copulas.helpers import get_dist


def test_correct_type():
Expand All @@ -16,14 +21,10 @@ def test_correct_type():
f"{name} is not a child class of PreFitCopula."


def test_fit(mvt_continuous_data, mvt_discrete_data,
pd_mvt_continuous_data, pd_mvt_discrete_data,
mvt_mixed_data, pd_mvt_mixed_data):
def test_fit(all_mvt_data):
"""Testing we can fit copula distributions to data."""
print("\nTesting fit")
for data in (mvt_continuous_data, pd_mvt_continuous_data,
mvt_discrete_data, pd_mvt_discrete_data, mvt_mixed_data,
pd_mvt_mixed_data):
for data in all_mvt_data.values():
mfitter: MarginalFitter = MarginalFitter(data)
mfitter.fit()

Expand Down Expand Up @@ -84,3 +85,288 @@ def test_fit(mvt_continuous_data, mvt_discrete_data,
raise
except RuntimeError:
pass


def test_prefit_logpdf_pdf_cdf_mc_cdfs(all_mvt_data, copula_params_2d,
all_mdists_2d):
"""Testing the logpdf, pdf, cdf and mc-cdf functions of pre-fit copula
models."""
print("\nTesting logpdf, pdf, cdf and mc-cdf functions")
eps: float = 10 ** -5
num_generate: int = 10
cdf_num: int = 10

for dataset_name, data in all_mvt_data.items():
mdists = all_mdists_2d[dataset_name]
for name in distributions_map['all']:
copula, _, copula_params = get_dist(name, copula_params_2d,
mdists, data)

for func_str in ('logpdf', 'pdf', 'mc_cdf'): #, 'cdf'):
func: Callable = eval(f"copula.{func_str}")
if func_str == 'cdf':
func_data = data[:cdf_num, :].copy() \
if 'pd' not in dataset_name else data.iloc[:cdf_num, :]
else:
func_data = data

# getting values to test
output = func(x=data, copula_params=copula_params,
mdists=mdists, match_datatype=True,
num_generate=num_generate, show_progress=False)
np_output: np.ndarray = np.asarray(output)
n, d = np.asarray(data).shape

# checking same datatype
assert isinstance(output, type(data)), \
f"{func_str} values for {name} do not match the " \
f"datatype: {type(data)}."

# checking the correct size
assert np_output.size == n, \
f"{func_str} values for {name} are not the correct size."

# checking for nan-values
assert np.isnan(np_output).sum() == 0, \
f'nans present in {name} {func_str} values.'

# function specific tests
if func_str == 'pdf':
assert np.all(np_output >= -eps), \
f"pdf values in {name} are negative."
elif func_str in ('cdf', 'mc_cdf'):
assert np.all((-eps <= np_output) & (output <= 1 + eps)), \
f"{func_str} values in {name} outside [0, 1]."

# checking error if wrong dimension
new_dataset: np.ndarray = np.zeros((n, d + 1))
with pytest.raises(ValueError,
match='mdists number of distributions and '
'the number of variables are not '
'equal.'):
func(x=new_dataset, copula_params=copula_params,
mdists=mdists, match_datatype=True,
num_generate=num_generate, show_progress=False)


def test_prefit_copula_logpdf_pdf_cdf_mc_cdfs(all_mvt_uniform_data,
copula_params_2d, all_mdists_2d):
"""Testing the copula-logpdf, copula-pdf, copula-cdf and copula-mc-cdf
functions of pre-fit copula models."""
print("\nTesting copula logpdf, pdf, cdf and mc-cdf functions")
eps: float = 10 ** -5
num_generate: int = 10
cdf_num: int = 10

mdists = all_mdists_2d['mvt_mixed']
for dataset_name, data in all_mvt_uniform_data.items():
for name in distributions_map['all']:
copula, _, copula_params = get_dist(name, copula_params_2d,
mdists, data)

for func_str in ('copula_logpdf', 'copula_pdf',
'copula_mc_cdf'): # , 'copula_cdf'):
func: Callable = eval(f"copula.{func_str}")
if func_str == 'copula_cdf':
func_data = data[:cdf_num, :].copy() \
if 'pd' not in dataset_name else data.iloc[:cdf_num, :]
else:
func_data = data

# getting values to test
output = func(u=data, copula_params=copula_params,
mdists=mdists, match_datatype=True,
num_generate=num_generate, show_progress=False)
np_output: np.ndarray = np.asarray(output)
n, d = np.asarray(data).shape

# checking same datatype
assert isinstance(output, type(data)), \
f"{func_str} values for {name} do not match the " \
f"datatype: {type(data)}."

# checking the correct size
assert np_output.size == n, \
f"{func_str} values for {name} are not the correct size."

# checking for nan-values
assert np.isnan(np_output).sum() == 0, \
f'nans present in {name} {func_str} values.'

# function specific tests
if func_str == 'pdf':
assert np.all(np_output >= -eps), \
f"pdf values in {name} are negative."
elif func_str in ('cdf', 'mc_cdf'):
assert np.all((-eps <= np_output) & (output <= 1 + eps)), \
f"{func_str} values in {name} outside [0, 1]."

# checking error if wrong dimension
new_dataset: np.ndarray = np.zeros((n, d + 1))
with pytest.raises(ValueError):
func(u=new_dataset, copula_params=copula_params,
mdists=mdists, match_datatype=True,
num_generate=num_generate, show_progress=False)


def test_prefit_rvs(all_mvt_data, copula_params_2d, all_mdists_2d):
"""Testing the rvs and copula-rvs functions of pre-fit copula models."""
print("\nTesting rvs and copula-rvs")

eps: float = 10 ** -5
dataset_name: str = 'mvt_mixed'
data: np.ndarray = all_mvt_data[dataset_name]
mdists: dict = all_mdists_2d[dataset_name]

for name in distributions_map['all']:
copula, _, copula_params = get_dist(name, copula_params_2d,
mdists, data)
for func_str in ('rvs', 'copula_rvs'):
func: Callable = eval(f"copula.{func_str}")
for size in (1, 2, 5, 101):
rvs = func(size=size, copula_params=copula_params,
mdists=mdists)

# checking correct type
assert isinstance(rvs, np.ndarray), \
f"pre-fit {func_str} values for {name} are not contained" \
f" in an array."

# checking correct shape
assert rvs.shape[0] == size, \
f"pre-fit {func_str} for {name} did not generate the " \
f"correct number of pseudo-samples."

# checking for nan values
assert np.isnan(rvs).sum() == 0, \
f"nan values present in {name} pre-fit {func_str}."

# function specific checks
if func_str == 'copula_rvs' and size > 1:
assert np.all((1 - rvs > -eps) & (rvs > -eps)), \
f"pre-fit copula-rvs are not in the [0, 1] cdf space."


def test_prefit_scalars(all_mvt_data, copula_params_2d, all_mdists_2d):
"""Testing the likelihood, loglikelihood, AIC and BIC functions of
pre-fit copula models."""
print("\nTesting scalars")

for dataset_name, data in all_mvt_data.items():
mdists = all_mdists_2d[dataset_name]
for name in distributions_map['all']:
copula, _, copula_params = get_dist(name, copula_params_2d,
mdists, data)
for func_str in ('likelihood', 'loglikelihood', 'aic', 'bic'):
func: Callable = eval(f"copula.{func_str}")
value = func(data=data, copula_params=copula_params,
mdists=mdists)

# checking correct type
assert isinstance(value, float), \
f"{func_str} for {name} is not a float when datatype is " \
f"{type(data)}"

# checking valid number
assert not np.isnan(value), \
f"{func_str} for {name} is is nan when datatype is " \
f"{type(data)}"

if func_str == "likelihood":
# checking positive
assert value >= 0, \
f"{func_str} for {name} is negative when datatype " \
f"is {type(data)}."

# checking error if wrong dimension
n, d = data.shape
new_dataset: np.ndarray = np.zeros((n, d + 1))
with pytest.raises(
ValueError,
match='mdists number of distributions and the '
'number of variables are not equal.'):
func(data=new_dataset, copula_params=copula_params,
mdists=mdists)


def test_prefit_integers(all_mvt_data, copula_params_2d, all_mdists_2d):
"""Testing the num_marginal_params, num_copula_params,
num_scalar_params and num_params functions of pre-fit copula models."""
print("\nTesting integers")

dataset_name: str = 'mvt_mixed'
data: np.ndarray = all_mvt_data[dataset_name]
mdists = all_mdists_2d[dataset_name]

for name in distributions_map['all']:
copula, _, copula_params = get_dist(name, copula_params_2d, mdists,
data)
for func_str in ("num_scalar_params", "num_copula_params",
"num_marginal_params", "num_params"):
func: Callable = eval(f"copula.{func_str}")
value = func(copula_params=copula_params, mdists=mdists)

assert isinstance(value, int), \
f"{func_str} of {name} is not an integer."
assert value >= 0, f"{func_str} of {name} is negative."


def test_prefit_plots(all_mvt_data, copula_params_2d, copula_params_3d,
all_mdists_2d, all_mdists_3d):
"""Testing the marginal_pairplot, pdf_plot, cdf_plot, mc_cdf_plot,
copula_pdf_plot, copula_cdf_plot and copula_mc_cdf_plot methods of
pre-fit copula models."""
print("\nTesting plots")

num_generate: int = 10
mc_num_generate: int = num_generate
num_points = 2

mvt_data_2d: np.ndarray = all_mvt_data['mvt_mixed']
mdists_2d: dict = all_mdists_2d['mvt_mixed']

mvt_data_3d: np.ndarray = scipy.stats.multivariate_normal.rvs(
size=(mvt_data_2d.shape[0], 3))
mdists_3d: dict = all_mdists_3d['mvt_continuous']

for name in distributions_map['all']:
copula, _, cparams_2d = get_dist(name, copula_params_2d, mdists_2d,
mvt_data_2d)

if name != 'frank_copula':
_, _, cparams_3d = get_dist(name, copula_params_3d, mdists_3d,
mvt_data_3d)

for func_str in ('marginal_pairplot', 'pdf_plot', 'cdf_plot',
'mc_cdf_plot', 'copula_pdf_plot', 'copula_cdf_plot',
'copula_mc_cdf_plot'):
func: Callable = eval(f"copula.{func_str}")

# testing 3d plots
if name == 'frank_copula':
pass
elif func_str == 'marginal_pairplot':
func(copula_params=cparams_3d, mdists=mdists_3d, show=False,
num_generate=num_generate)
plt.close()
else:
with pytest.raises(NotImplementedError,
match=f"{func_str} is not "
f"implemented when the number of "
f"variables is not 2."):
func(copula_params=cparams_3d, mdists=mdists_3d,
show=False, show_progress=False,
num_generate=num_generate, num_points=num_points)

# testing 2d plots
func(copula_params=cparams_2d, mdists=mdists_2d, show=False,
show_progress=False, num_generate=num_generate,
mc_num_generate=mc_num_generate, num_points=num_points)
plt.close()


def test_prefit_names():
print("\nTesting name")
for name in distributions_map['all']:
copula = eval(name)
assert isinstance(copula.name, str), f"name of {name} is not a string."
4 changes: 2 additions & 2 deletions sklarpy/tests/multivariate/test_prefit_dists.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,12 @@ def test_prefit_logpdf_pdf_cdf_mc_cdfs(
print("\nTesting logpdf, pdf, cdf and mc-cdf functions")
eps: float = 10 ** -5
num_generate: int = 10
cdf_num: int = 10

for name in mv_dists_to_test:
dist, _, params = get_dist(name, params_2d, mvt_continuous_data)
for func_str in ('logpdf', 'pdf', 'mc_cdf'): #, 'cdf'):
func: Callable = eval(f'dist.{func_str}')
cdf_num: int = 10
datasets = (mvt_continuous_data[:cdf_num, :],
mvt_discrete_data[:cdf_num, :],
pd_mvt_continuous_data.iloc[:cdf_num, :],
Expand Down Expand Up @@ -298,7 +298,7 @@ def test_prefit_plots(mv_dists_to_test, params_2d, params_3d,
plt.close()


def test_prefit_name(mv_dists_to_test):
def test_prefit_names(mv_dists_to_test):
"""Testing that name of pre-fit multivariate distributions is a string."""
print("\nTesting name")
for name in mv_dists_to_test:
Expand Down

0 comments on commit 2740f23

Please sign in to comment.