From 2740f23f5aacaeeeab6db638fec8fb121dc52786 Mon Sep 17 00:00:00 2001 From: tfm000 Date: Fri, 27 Oct 2023 19:23:27 +0100 Subject: [PATCH] adding copula tests Took 1 minute --- sklarpy/tests/copulas/conftest.py | 18 +- sklarpy/tests/copulas/test_prefit_dists.py | 298 +++++++++++++++++- .../tests/multivariate/test_prefit_dists.py | 4 +- 3 files changed, 307 insertions(+), 13 deletions(-) diff --git a/sklarpy/tests/copulas/conftest.py b/sklarpy/tests/copulas/conftest.py index 1274fff..9e0fbc0 100644 --- a/sklarpy/tests/copulas/conftest.py +++ b/sklarpy/tests/copulas/conftest.py @@ -57,9 +57,9 @@ def pd_mvt_uniform_data(): @pytest.fixture(scope="session", autouse=True) -def all_mv_data(): +def all_mvt_data(): return { - 'mv_continuous': mvt_continuous_data(), + 'mvt_continuous': mvt_continuous_data(), 'mvt_discrete': mvt_discrete_data(), 'pd_mvt_continuous': pd_mvt_continuous_data(), 'pd_mvt_discrete': pd_mvt_discrete_data(), @@ -68,6 +68,14 @@ def all_mv_data(): } +@pytest.fixture(scope="session", autouse=True) +def all_mvt_uniform_data(): + return { + 'np_uniform': mvt_uniform_data(), + 'pd_uniform': pd_mvt_uniform_data(), + } + + @pytest.fixture(scope="session", autouse=True) def copula_params_2d(): return { @@ -242,7 +250,7 @@ def copula_params_3d(): @pytest.fixture(scope="session", autouse=True) def all_mdists_2d(): return { - 'mv_continuous': { + 'mvt_continuous': { 0: lognorm.fit(params=(0.01, -120.05, 119.9)), 1: lognorm.fit(params=(0.01, -150.77, 150.67))}, @@ -251,7 +259,7 @@ def all_mdists_2d(): 1: poisson.fit(params=(7.83,))}, 'pd_mvt_continuous': { - 0: lognorm.fit(params=(0.0, -202.7, 202.72)), + 0: lognorm.fit(params=(0.01, -120.05, 119.9)), 1: cauchy.fit(params=(-0.0, 0.53))}, 'pd_mvt_discrete': { @@ -270,7 +278,7 @@ def all_mdists_2d(): @pytest.fixture(scope="session", autouse=True) def all_mdists_3d(): return { - 'mv_continuous': { + 'mvt_continuous': { 0: lognorm.fit(params=(0.02, -51.43, 51.52)), 1: cauchy.fit(params=(0.08, 0.51)), 2: cauchy.fit(params=(0.09, 0.59))}, diff --git a/sklarpy/tests/copulas/test_prefit_dists.py b/sklarpy/tests/copulas/test_prefit_dists.py index 14f67a9..5e191b5 100644 --- a/sklarpy/tests/copulas/test_prefit_dists.py +++ b/sklarpy/tests/copulas/test_prefit_dists.py @@ -1,10 +1,15 @@ # Contains tests for Pre-Fit SklarPy copula models import numpy as np +from typing import Callable +import pytest +import scipy.stats +import matplotlib.pyplot as plt from sklarpy.copulas import * from sklarpy.copulas._prefit_dists import PreFitCopula from sklarpy.copulas._fitted_dists import FittedCopula from sklarpy._utils import Params, FitError +from sklarpy.tests.copulas.helpers import get_dist def test_correct_type(): @@ -16,14 +21,10 @@ def test_correct_type(): f"{name} is not a child class of PreFitCopula." -def test_fit(mvt_continuous_data, mvt_discrete_data, - pd_mvt_continuous_data, pd_mvt_discrete_data, - mvt_mixed_data, pd_mvt_mixed_data): +def test_fit(all_mvt_data): """Testing we can fit copula distributions to data.""" print("\nTesting fit") - for data in (mvt_continuous_data, pd_mvt_continuous_data, - mvt_discrete_data, pd_mvt_discrete_data, mvt_mixed_data, - pd_mvt_mixed_data): + for data in all_mvt_data.values(): mfitter: MarginalFitter = MarginalFitter(data) mfitter.fit() @@ -84,3 +85,288 @@ def test_fit(mvt_continuous_data, mvt_discrete_data, raise except RuntimeError: pass + + +def test_prefit_logpdf_pdf_cdf_mc_cdfs(all_mvt_data, copula_params_2d, + all_mdists_2d): + """Testing the logpdf, pdf, cdf and mc-cdf functions of pre-fit copula + models.""" + print("\nTesting logpdf, pdf, cdf and mc-cdf functions") + eps: float = 10 ** -5 + num_generate: int = 10 + cdf_num: int = 10 + + for dataset_name, data in all_mvt_data.items(): + mdists = all_mdists_2d[dataset_name] + for name in distributions_map['all']: + copula, _, copula_params = get_dist(name, copula_params_2d, + mdists, data) + + for func_str in ('logpdf', 'pdf', 'mc_cdf'): #, 'cdf'): + func: Callable = eval(f"copula.{func_str}") + if func_str == 'cdf': + func_data = data[:cdf_num, :].copy() \ + if 'pd' not in dataset_name else data.iloc[:cdf_num, :] + else: + func_data = data + + # getting values to test + output = func(x=data, copula_params=copula_params, + mdists=mdists, match_datatype=True, + num_generate=num_generate, show_progress=False) + np_output: np.ndarray = np.asarray(output) + n, d = np.asarray(data).shape + + # checking same datatype + assert isinstance(output, type(data)), \ + f"{func_str} values for {name} do not match the " \ + f"datatype: {type(data)}." + + # checking the correct size + assert np_output.size == n, \ + f"{func_str} values for {name} are not the correct size." + + # checking for nan-values + assert np.isnan(np_output).sum() == 0, \ + f'nans present in {name} {func_str} values.' + + # function specific tests + if func_str == 'pdf': + assert np.all(np_output >= -eps), \ + f"pdf values in {name} are negative." + elif func_str in ('cdf', 'mc_cdf'): + assert np.all((-eps <= np_output) & (output <= 1 + eps)), \ + f"{func_str} values in {name} outside [0, 1]." + + # checking error if wrong dimension + new_dataset: np.ndarray = np.zeros((n, d + 1)) + with pytest.raises(ValueError, + match='mdists number of distributions and ' + 'the number of variables are not ' + 'equal.'): + func(x=new_dataset, copula_params=copula_params, + mdists=mdists, match_datatype=True, + num_generate=num_generate, show_progress=False) + + +def test_prefit_copula_logpdf_pdf_cdf_mc_cdfs(all_mvt_uniform_data, + copula_params_2d, all_mdists_2d): + """Testing the copula-logpdf, copula-pdf, copula-cdf and copula-mc-cdf + functions of pre-fit copula models.""" + print("\nTesting copula logpdf, pdf, cdf and mc-cdf functions") + eps: float = 10 ** -5 + num_generate: int = 10 + cdf_num: int = 10 + + mdists = all_mdists_2d['mvt_mixed'] + for dataset_name, data in all_mvt_uniform_data.items(): + for name in distributions_map['all']: + copula, _, copula_params = get_dist(name, copula_params_2d, + mdists, data) + + for func_str in ('copula_logpdf', 'copula_pdf', + 'copula_mc_cdf'): # , 'copula_cdf'): + func: Callable = eval(f"copula.{func_str}") + if func_str == 'copula_cdf': + func_data = data[:cdf_num, :].copy() \ + if 'pd' not in dataset_name else data.iloc[:cdf_num, :] + else: + func_data = data + + # getting values to test + output = func(u=data, copula_params=copula_params, + mdists=mdists, match_datatype=True, + num_generate=num_generate, show_progress=False) + np_output: np.ndarray = np.asarray(output) + n, d = np.asarray(data).shape + + # checking same datatype + assert isinstance(output, type(data)), \ + f"{func_str} values for {name} do not match the " \ + f"datatype: {type(data)}." + + # checking the correct size + assert np_output.size == n, \ + f"{func_str} values for {name} are not the correct size." + + # checking for nan-values + assert np.isnan(np_output).sum() == 0, \ + f'nans present in {name} {func_str} values.' + + # function specific tests + if func_str == 'pdf': + assert np.all(np_output >= -eps), \ + f"pdf values in {name} are negative." + elif func_str in ('cdf', 'mc_cdf'): + assert np.all((-eps <= np_output) & (output <= 1 + eps)), \ + f"{func_str} values in {name} outside [0, 1]." + + # checking error if wrong dimension + new_dataset: np.ndarray = np.zeros((n, d + 1)) + with pytest.raises(ValueError): + func(u=new_dataset, copula_params=copula_params, + mdists=mdists, match_datatype=True, + num_generate=num_generate, show_progress=False) + + +def test_prefit_rvs(all_mvt_data, copula_params_2d, all_mdists_2d): + """Testing the rvs and copula-rvs functions of pre-fit copula models.""" + print("\nTesting rvs and copula-rvs") + + eps: float = 10 ** -5 + dataset_name: str = 'mvt_mixed' + data: np.ndarray = all_mvt_data[dataset_name] + mdists: dict = all_mdists_2d[dataset_name] + + for name in distributions_map['all']: + copula, _, copula_params = get_dist(name, copula_params_2d, + mdists, data) + for func_str in ('rvs', 'copula_rvs'): + func: Callable = eval(f"copula.{func_str}") + for size in (1, 2, 5, 101): + rvs = func(size=size, copula_params=copula_params, + mdists=mdists) + + # checking correct type + assert isinstance(rvs, np.ndarray), \ + f"pre-fit {func_str} values for {name} are not contained" \ + f" in an array." + + # checking correct shape + assert rvs.shape[0] == size, \ + f"pre-fit {func_str} for {name} did not generate the " \ + f"correct number of pseudo-samples." + + # checking for nan values + assert np.isnan(rvs).sum() == 0, \ + f"nan values present in {name} pre-fit {func_str}." + + # function specific checks + if func_str == 'copula_rvs' and size > 1: + assert np.all((1 - rvs > -eps) & (rvs > -eps)), \ + f"pre-fit copula-rvs are not in the [0, 1] cdf space." + + +def test_prefit_scalars(all_mvt_data, copula_params_2d, all_mdists_2d): + """Testing the likelihood, loglikelihood, AIC and BIC functions of + pre-fit copula models.""" + print("\nTesting scalars") + + for dataset_name, data in all_mvt_data.items(): + mdists = all_mdists_2d[dataset_name] + for name in distributions_map['all']: + copula, _, copula_params = get_dist(name, copula_params_2d, + mdists, data) + for func_str in ('likelihood', 'loglikelihood', 'aic', 'bic'): + func: Callable = eval(f"copula.{func_str}") + value = func(data=data, copula_params=copula_params, + mdists=mdists) + + # checking correct type + assert isinstance(value, float), \ + f"{func_str} for {name} is not a float when datatype is " \ + f"{type(data)}" + + # checking valid number + assert not np.isnan(value), \ + f"{func_str} for {name} is is nan when datatype is " \ + f"{type(data)}" + + if func_str == "likelihood": + # checking positive + assert value >= 0, \ + f"{func_str} for {name} is negative when datatype " \ + f"is {type(data)}." + + # checking error if wrong dimension + n, d = data.shape + new_dataset: np.ndarray = np.zeros((n, d + 1)) + with pytest.raises( + ValueError, + match='mdists number of distributions and the ' + 'number of variables are not equal.'): + func(data=new_dataset, copula_params=copula_params, + mdists=mdists) + + +def test_prefit_integers(all_mvt_data, copula_params_2d, all_mdists_2d): + """Testing the num_marginal_params, num_copula_params, + num_scalar_params and num_params functions of pre-fit copula models.""" + print("\nTesting integers") + + dataset_name: str = 'mvt_mixed' + data: np.ndarray = all_mvt_data[dataset_name] + mdists = all_mdists_2d[dataset_name] + + for name in distributions_map['all']: + copula, _, copula_params = get_dist(name, copula_params_2d, mdists, + data) + for func_str in ("num_scalar_params", "num_copula_params", + "num_marginal_params", "num_params"): + func: Callable = eval(f"copula.{func_str}") + value = func(copula_params=copula_params, mdists=mdists) + + assert isinstance(value, int), \ + f"{func_str} of {name} is not an integer." + assert value >= 0, f"{func_str} of {name} is negative." + + +def test_prefit_plots(all_mvt_data, copula_params_2d, copula_params_3d, + all_mdists_2d, all_mdists_3d): + """Testing the marginal_pairplot, pdf_plot, cdf_plot, mc_cdf_plot, + copula_pdf_plot, copula_cdf_plot and copula_mc_cdf_plot methods of + pre-fit copula models.""" + print("\nTesting plots") + + num_generate: int = 10 + mc_num_generate: int = num_generate + num_points = 2 + + mvt_data_2d: np.ndarray = all_mvt_data['mvt_mixed'] + mdists_2d: dict = all_mdists_2d['mvt_mixed'] + + mvt_data_3d: np.ndarray = scipy.stats.multivariate_normal.rvs( + size=(mvt_data_2d.shape[0], 3)) + mdists_3d: dict = all_mdists_3d['mvt_continuous'] + + for name in distributions_map['all']: + copula, _, cparams_2d = get_dist(name, copula_params_2d, mdists_2d, + mvt_data_2d) + + if name != 'frank_copula': + _, _, cparams_3d = get_dist(name, copula_params_3d, mdists_3d, + mvt_data_3d) + + for func_str in ('marginal_pairplot', 'pdf_plot', 'cdf_plot', + 'mc_cdf_plot', 'copula_pdf_plot', 'copula_cdf_plot', + 'copula_mc_cdf_plot'): + func: Callable = eval(f"copula.{func_str}") + + # testing 3d plots + if name == 'frank_copula': + pass + elif func_str == 'marginal_pairplot': + func(copula_params=cparams_3d, mdists=mdists_3d, show=False, + num_generate=num_generate) + plt.close() + else: + with pytest.raises(NotImplementedError, + match=f"{func_str} is not " + f"implemented when the number of " + f"variables is not 2."): + func(copula_params=cparams_3d, mdists=mdists_3d, + show=False, show_progress=False, + num_generate=num_generate, num_points=num_points) + + # testing 2d plots + func(copula_params=cparams_2d, mdists=mdists_2d, show=False, + show_progress=False, num_generate=num_generate, + mc_num_generate=mc_num_generate, num_points=num_points) + plt.close() + + +def test_prefit_names(): + print("\nTesting name") + for name in distributions_map['all']: + copula = eval(name) + assert isinstance(copula.name, str), f"name of {name} is not a string." diff --git a/sklarpy/tests/multivariate/test_prefit_dists.py b/sklarpy/tests/multivariate/test_prefit_dists.py index 5f36ba1..d6b9f3e 100644 --- a/sklarpy/tests/multivariate/test_prefit_dists.py +++ b/sklarpy/tests/multivariate/test_prefit_dists.py @@ -148,12 +148,12 @@ def test_prefit_logpdf_pdf_cdf_mc_cdfs( print("\nTesting logpdf, pdf, cdf and mc-cdf functions") eps: float = 10 ** -5 num_generate: int = 10 + cdf_num: int = 10 for name in mv_dists_to_test: dist, _, params = get_dist(name, params_2d, mvt_continuous_data) for func_str in ('logpdf', 'pdf', 'mc_cdf'): #, 'cdf'): func: Callable = eval(f'dist.{func_str}') - cdf_num: int = 10 datasets = (mvt_continuous_data[:cdf_num, :], mvt_discrete_data[:cdf_num, :], pd_mvt_continuous_data.iloc[:cdf_num, :], @@ -298,7 +298,7 @@ def test_prefit_plots(mv_dists_to_test, params_2d, params_3d, plt.close() -def test_prefit_name(mv_dists_to_test): +def test_prefit_names(mv_dists_to_test): """Testing that name of pre-fit multivariate distributions is a string.""" print("\nTesting name") for name in mv_dists_to_test: