Skip to content

Commit

Permalink
Merge pull request #339 from guillermo-navas-palencia/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
guillermo-navas-palencia authored Oct 28, 2024
2 parents aebe16b + b3e512e commit 67e766f
Show file tree
Hide file tree
Showing 17 changed files with 124 additions and 82 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: ['3.7', '3.8', '3.9', '3.10']

python-version: ['3.9', '3.10', "3.11", "3.12"]
os: [ubuntu-latest, windows-latest, macos-12]

steps:
Expand All @@ -29,10 +30,9 @@ jobs:
python -m pip install --upgrade pip
pip install -r test_requirements.txt
pip install -r requirements.txt
pip install pympler tdigest
- name: Install package
run: |
pip install -e .[distributed,test]
pip install -e .[distributed,test,ecos]
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
Expand Down
6 changes: 6 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ To include batch and stream binning algorithms (this option is not required for
pip install optbinning[distributed]
To include support for the `ecos <https://github.com/embotech/ecos>`_ solver:

.. code-block:: text
pip install optbinning[ecos]
To install from source, download or clone the git repository

.. code-block:: text
Expand Down
4 changes: 2 additions & 2 deletions doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
author = 'Guillermo Navas-Palencia'

# The short X.Y version
version = '0.19.0'
version = '0.20.0'
# The full version, including alpha/beta/rc tags
release = '0.19.0'
release = '0.20.0'


# -- General configuration ---------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion optbinning/_version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Version information."""

__version__ = "0.19.0"
__version__ = "0.20.0"
14 changes: 12 additions & 2 deletions optbinning/binning/binning_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,8 @@ def build(self, show_digits=2, add_totals=True):
return df

def plot(self, metric="woe", add_special=True, add_missing=True,
style="bin", show_bin_labels=False, savefig=None, figsize=None):
style="bin", show_bin_labels=False, savefig=None, figsize=None,
save_kwargs=None):
"""Plot the binning table.
Visualize the non-event and event count, and the Weight of Evidence or
Expand Down Expand Up @@ -642,6 +643,9 @@ def plot(self, metric="woe", add_special=True, add_missing=True,
figsize : tuple or None (default=None)
Size of the plot.
save_kwargs : dict or None (default=None)
Additional keyword arguments to be passed to `plt.savefig`.
"""
_check_is_built(self)

Expand Down Expand Up @@ -863,7 +867,13 @@ def plot(self, metric="woe", add_special=True, add_missing=True,
if not isinstance(savefig, str):
raise TypeError("savefig must be a string path; got {}."
.format(savefig))
plt.savefig(savefig)
if save_kwargs is None:
save_kwargs = {}
else:
if not isinstance(save_kwargs, dict):
raise TypeError("save_kwargs must be a dictionary; got {}."
.format(save_kwargs))
plt.savefig(savefig, **save_kwargs)
plt.close()

def analysis(self, pvalue_test="chi2", n_samples=100, print_output=True):
Expand Down
2 changes: 1 addition & 1 deletion optbinning/binning/distributed/binning_sketch.py
Original file line number Diff line number Diff line change
Expand Up @@ -956,7 +956,7 @@ def _update_streaming_stats(self):
self._solve_stats[self._n_solve] = {
"n_add": self._n_add,
"n_records": self._bsketch.n,
"divergence".format(self.divergence): dv
"divergence": dv
}

@property
Expand Down
21 changes: 15 additions & 6 deletions optbinning/binning/multidimensional/binning_statistics_2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ def build(self, show_digits=2, show_bin_xy=False, add_totals=True):

return df

def plot(self, metric="woe", savefig=None):
def plot(self, metric="woe", savefig=None, save_kwargs=None):
"""Plot the binning table.
Visualize the Weight of Evidence or the event rate for each bin as a
Expand All @@ -352,6 +352,9 @@ def plot(self, metric="woe", savefig=None):
savefig : str or None (default=None)
Path to save the plot figure.
save_kwargs : dict or None (default=None)
Additional keyword arguments to be passed to `plt.savefig`.
"""
_check_is_built(self)

Expand Down Expand Up @@ -384,7 +387,7 @@ def plot(self, metric="woe", savefig=None):

er = er + [er[-1]]
axtop.step(np.arange(self.n + 1) - 0.5, er,
label=path, where="post")
label=str(path), where="post")

for i in range(self.n):
axtop.axvline(i + 0.5, color="grey", linestyle="--", alpha=0.5)
Expand Down Expand Up @@ -414,7 +417,7 @@ def plot(self, metric="woe", savefig=None):
self.P == p, axis=0).max()) for p in path], [])

er = er + [er[-1]]
axright.step(er, np.arange(self.m + 1) - 0.5, label=path,
axright.step(er, np.arange(self.m + 1) - 0.5, label=str(path),
where="pre")

for j in range(self.m):
Expand All @@ -437,7 +440,13 @@ def plot(self, metric="woe", savefig=None):
if not isinstance(savefig, str):
raise TypeError("savefig must be a string path; got {}."
.format(savefig))
plt.savefig(savefig)
if save_kwargs is None:
save_kwargs = {}
else:
if not isinstance(save_kwargs, dict):
raise TypeError("save_kwargs must be a dictionary; got {}."
.format(save_kwargs))
plt.savefig(savefig, **save_kwargs)
plt.close()

def analysis(self, pvalue_test="chi2", n_samples=100, print_output=True):
Expand Down Expand Up @@ -763,7 +772,7 @@ def plot(self, savefig=None):

er = er + [er[-1]]
axtop.step(np.arange(self.n + 1) - 0.5, er,
label=path, where="post")
label=str(path), where="post")

for i in range(self.n):
axtop.axvline(i + 0.5, color="grey", linestyle="--", alpha=0.5)
Expand Down Expand Up @@ -793,7 +802,7 @@ def plot(self, savefig=None):
self.P == p, axis=0).max()) for p in path], [])

er = er + [er[-1]]
axright.step(er, np.arange(self.m + 1) - 0.5, label=path,
axright.step(er, np.arange(self.m + 1) - 0.5, label=str(path),
where="pre")

for j in range(self.m):
Expand Down
14 changes: 12 additions & 2 deletions optbinning/binning/piecewise/binning_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,8 @@ def build(self, show_digits=2, add_totals=True):

return df

def plot(self, metric="woe", n_samples=10000, savefig=None):
def plot(self, metric="woe", n_samples=10000, savefig=None,
save_kwargs=None):
"""Plot the binning table.
Visualize the non-event and event count, and the predicted Weight of
Expand All @@ -194,6 +195,9 @@ def plot(self, metric="woe", n_samples=10000, savefig=None):
savefig : str or None (default=None)
Path to save the plot figure.
save_kwargs : dict or None (default=None)
Additional keyword arguments to be passed to `plt.savefig`.
"""
_check_is_built(self)

Expand Down Expand Up @@ -258,7 +262,13 @@ def plot(self, metric="woe", n_samples=10000, savefig=None):
if not isinstance(savefig, str):
raise TypeError("savefig must be a string path; got {}."
.format(savefig))
plt.savefig(savefig)
if save_kwargs is None:
save_kwargs = {}
else:
if not isinstance(save_kwargs, dict):
raise TypeError("save_kwargs must be a dictionary; got {}."
.format(save_kwargs))
plt.savefig(savefig, **save_kwargs)
plt.close()

def analysis(self, pvalue_test="chi2", n_samples=100, print_output=True):
Expand Down
2 changes: 1 addition & 1 deletion optbinning/binning/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def categorical_transform(x, y):

def categorical_cutoff(x, y, cutoff=0.01):
cutoff_count = np.ceil(cutoff * len(x))
cat_count = pd.value_counts(x)
cat_count = pd.Series(x).value_counts()
cat_others = cat_count[cat_count < cutoff_count].index.values
mask_others = pd.Series(x).isin(cat_others).values

Expand Down
8 changes: 7 additions & 1 deletion optbinning/scorecard/counterfactual/problem_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,13 @@ def problem_data(scorecard, X):
sc["Points"] = sc["Mean"] * sc["Coefficient"]

# Linear model coefficients
intercept = float(scorecard.estimator_.intercept_)

# Only index into the intercept if it is an array, it is a scalar otherwise
if isinstance(scorecard.estimator_.intercept_, np.ndarray):
intercept = float(scorecard.estimator_.intercept_[0])
else:
intercept = float(scorecard.estimator_.intercept_)

coef = scorecard.estimator_.coef_.ravel()

# Big-M parameters (min, max) points.
Expand Down
2 changes: 2 additions & 0 deletions optbinning/scorecard/monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,8 @@ def psi_plot(self, savefig=None):
plt.legend(handles, labels, loc="upper center",
bbox_to_anchor=(0.5, -0.2), ncol=2, fontsize=12)

plt.tight_layout()

if savefig is None:
plt.show()
else:
Expand Down
46 changes: 17 additions & 29 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
#!/usr/bin/env python

import os
import sys

from setuptools import find_packages, setup, Command
from setuptools.command.test import test as TestCommand


long_description = '''
The optimal binning is the optimal discretization of a variable into bins
Expand Down Expand Up @@ -34,40 +31,30 @@ def run(self):
os.system('rm -vrf ./build ./dist ./*.pyc ./*.tgz ./*.egg-info')


# test suites
class PyTest(TestCommand):
def finalize_options(self):
TestCommand.finalize_options(self)
self.test_args = []
self.test_suite = []

def run_tests(self):
# import here, because outside the eggs aren't loaded
import pytest
errcode = pytest.main(self.test_args)
sys.exit(errcode)


# install requirements
install_requires = [
'matplotlib',
'numpy>=1.16.1,<2',
'numpy>=1.16.1',
'ortools>=9.4',
'pandas',
'ropwr>=1.0.0',
'scikit-learn>=1.0.2',
'scipy>=1.6.0',
]

# test requirements
tests_require = [
'pytest',
'coverage'
]

# extra requirements
extras_require = {
'distributed': ['pympler', 'tdigest'],
'test': [
'coverage',
'flake8',
'pytest',
'pyarrow',
'pympler',
'tdigest',
],
# For ecos support: https://github.com/embotech/ecos
'ecos': ['ecos']
}


Expand All @@ -89,10 +76,9 @@ def run_tests(self):
include_package_data=True,
license="Apache Licence 2.0",
url="https://github.com/guillermo-navas-palencia/optbinning",
cmdclass={'clean': CleanCommand, 'test': PyTest},
cmdclass={'clean': CleanCommand},
python_requires='>=3.7',
install_requires=install_requires,
tests_require=tests_require,
extras_require=extras_require,
classifiers=[
'Topic :: Scientific/Engineering :: Mathematics',
Expand All @@ -103,7 +89,9 @@ def run_tests(self):
'Intended Audience :: Science/Research',
'License :: OSI Approved :: Apache Software License',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9']
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
]
)
4 changes: 3 additions & 1 deletion test_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
coverage
flake8
pytest
pyarrow
pyarrow
pympler
tdigest
16 changes: 7 additions & 9 deletions tests/test_binning_piecewise.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def test_default():
optb.fit(x, y)

optb.binning_table.build()
assert optb.binning_table.iv == approx(5.87152846, rel=1e-6)
assert optb.binning_table.iv == approx(5.87474602, rel=1e-6)

with raises(ValueError):
optb.binning_table.plot(metric="new_metric")
Expand All @@ -188,7 +188,7 @@ def test_default_discontinuous():
optb.fit(x, y)

optb.binning_table.build()
assert optb.binning_table.iv == approx(5.84252707, rel=1e-6)
assert optb.binning_table.iv == approx(5.84465825, rel=1e-6)


def test_bounds_transform():
Expand All @@ -197,11 +197,11 @@ def test_bounds_transform():

x_transform_woe = optb.transform(x, metric="woe")
assert x_transform_woe[:4] == approx(
[3.9899792, 4.2806587, 4.17226985, -3.25509338], rel=1e-6)
[3.99180564, 4.28245092, 4.17407503, -3.2565373], rel=1e-6)

x_transform_event_rate = optb.transform(x, metric="event_rate")
assert x_transform_event_rate[:4] == approx(
[0.03021225, 0.02276486, 0.02530506, 0.97760445], rel=1e-6)
[0.03015878, 0.02272502, 0.02526056, 0.97763604], rel=1e-6)


def test_bounds_fit_transform():
Expand All @@ -211,13 +211,11 @@ def test_bounds_fit_transform():
x, y, lb=0.001, ub=0.999, metric="woe")

assert x_transform_woe[:4] == approx(
[3.9899792, 4.2806587, 4.17226985, -3.25509338], rel=1e-6)

[3.9918056, 4.2824509, 4.17407503, -3.25653732], rel=1e-6)
x_transform_event_rate = optb.fit_transform(
x, y, lb=0.001, ub=0.999, metric="event_rate")

assert x_transform_event_rate[:4] == approx(
[0.03021225, 0.02276486, 0.02530506, 0.97760445], rel=1e-6)
[0.03015878, 0.02272502, 0.02526056, 0.97763604], rel=1e-6)


def test_solvers():
Expand All @@ -226,7 +224,7 @@ def test_solvers():
optb.fit(x, y)

optb.binning_table.build()
assert optb.binning_table.iv == approx(5.87152846, rel=1e-6)
assert optb.binning_table.iv == approx(5.87474602, rel=1e-6)


def test_user_splits():
Expand Down
2 changes: 1 addition & 1 deletion tests/test_continuous_binning_piecewise.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def test_special_codes():
name=variable, monotonic_trend="convex", special_codes=special_codes)
optb.fit(x, y)

x_transform = optb.transform([np.NaN], metric_missing='empirical')
x_transform = optb.transform([np.nan], metric_missing='empirical')
assert x_transform == approx([17.94], rel=1e-6)


Expand Down
Loading

0 comments on commit 67e766f

Please sign in to comment.