Merge pull request #339 from guillermo-navas-palencia/develop

Develop
guillermo-navas-palencia · Oct 28, 2024 · 67e766f · 67e766f
2 parents aebe16b + b3e512e
commit 67e766f
Show file tree

Hide file tree

Showing 17 changed files with 124 additions and 82 deletions.
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -15,7 +15,8 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: ['3.7', '3.8', '3.9', '3.10']
+
+        python-version: ['3.9', '3.10', "3.11", "3.12"]
         os: [ubuntu-latest, windows-latest, macos-12]
 
     steps:
@@ -29,10 +30,9 @@ jobs:
         python -m pip install --upgrade pip
         pip install -r test_requirements.txt
         pip install -r requirements.txt
-        pip install pympler tdigest
     - name: Install package
       run: |
-        pip install -e .[distributed,test]
+        pip install -e .[distributed,test,ecos]
     - name: Lint with flake8
       run: |
         # stop the build if there are Python syntax errors or undefined names

diff --git a/README.rst b/README.rst
@@ -58,6 +58,12 @@ To include batch and stream binning algorithms (this option is not required for
 
   pip install optbinning[distributed]
 
+To include support for the `ecos <https://github.com/embotech/ecos>`_ solver:
+
+.. code-block:: text
+
+  pip install optbinning[ecos]
+
 To install from source, download or clone the git repository
 
 .. code-block:: text

diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -22,9 +22,9 @@
 author = 'Guillermo Navas-Palencia'
 
 # The short X.Y version
-version = '0.19.0'
+version = '0.20.0'
 # The full version, including alpha/beta/rc tags
-release = '0.19.0'
+release = '0.20.0'
 
 
 # -- General configuration ---------------------------------------------------

diff --git a/optbinning/_version.py b/optbinning/_version.py
@@ -1,3 +1,3 @@
 """Version information."""
 
-__version__ = "0.19.0"
+__version__ = "0.20.0"
diff --git a/optbinning/binning/binning_statistics.py b/optbinning/binning/binning_statistics.py
@@ -608,7 +608,8 @@ def build(self, show_digits=2, add_totals=True):
         return df
 
     def plot(self, metric="woe", add_special=True, add_missing=True,
-             style="bin", show_bin_labels=False, savefig=None, figsize=None):
+             style="bin", show_bin_labels=False, savefig=None, figsize=None,
+             save_kwargs=None):
         """Plot the binning table.
 
         Visualize the non-event and event count, and the Weight of Evidence or
@@ -642,6 +643,9 @@ def plot(self, metric="woe", add_special=True, add_missing=True,
 
         figsize : tuple or None (default=None)
             Size of the plot.
+
+        save_kwargs : dict or None (default=None)
+            Additional keyword arguments to be passed to `plt.savefig`.
         """
         _check_is_built(self)
 
@@ -863,7 +867,13 @@ def plot(self, metric="woe", add_special=True, add_missing=True,
             if not isinstance(savefig, str):
                 raise TypeError("savefig must be a string path; got {}."
                                 .format(savefig))
-            plt.savefig(savefig)
+            if save_kwargs is None:
+                save_kwargs = {}
+            else:
+                if not isinstance(save_kwargs, dict):
+                    raise TypeError("save_kwargs must be a dictionary; got {}."
+                                    .format(save_kwargs))
+            plt.savefig(savefig, **save_kwargs)
             plt.close()
 
     def analysis(self, pvalue_test="chi2", n_samples=100, print_output=True):

diff --git a/optbinning/binning/distributed/binning_sketch.py b/optbinning/binning/distributed/binning_sketch.py
@@ -956,7 +956,7 @@ def _update_streaming_stats(self):
         self._solve_stats[self._n_solve] = {
             "n_add": self._n_add,
             "n_records": self._bsketch.n,
-            "divergence".format(self.divergence): dv
+            "divergence": dv
         }
 
     @property

diff --git a/optbinning/binning/multidimensional/binning_statistics_2d.py b/optbinning/binning/multidimensional/binning_statistics_2d.py
@@ -338,7 +338,7 @@ def build(self, show_digits=2, show_bin_xy=False, add_totals=True):
 
         return df
 
-    def plot(self, metric="woe", savefig=None):
+    def plot(self, metric="woe", savefig=None, save_kwargs=None):
         """Plot the binning table.
 
         Visualize the Weight of Evidence or the event rate for each bin as a
@@ -352,6 +352,9 @@ def plot(self, metric="woe", savefig=None):
 
         savefig : str or None (default=None)
             Path to save the plot figure.
+
+        save_kwargs : dict or None (default=None)
+            Additional keyword arguments to be passed to `plt.savefig`.
         """
         _check_is_built(self)
 
@@ -384,7 +387,7 @@ def plot(self, metric="woe", savefig=None):
 
             er = er + [er[-1]]
             axtop.step(np.arange(self.n + 1) - 0.5, er,
-                       label=path, where="post")
+                       label=str(path), where="post")
 
         for i in range(self.n):
             axtop.axvline(i + 0.5, color="grey", linestyle="--", alpha=0.5)
@@ -414,7 +417,7 @@ def plot(self, metric="woe", savefig=None):
                     self.P == p, axis=0).max()) for p in path], [])
 
             er = er + [er[-1]]
-            axright.step(er, np.arange(self.m + 1) - 0.5, label=path,
+            axright.step(er, np.arange(self.m + 1) - 0.5, label=str(path),
                          where="pre")
 
         for j in range(self.m):
@@ -437,7 +440,13 @@ def plot(self, metric="woe", savefig=None):
             if not isinstance(savefig, str):
                 raise TypeError("savefig must be a string path; got {}."
                                 .format(savefig))
-            plt.savefig(savefig)
+            if save_kwargs is None:
+                save_kwargs = {}
+            else:
+                if not isinstance(save_kwargs, dict):
+                    raise TypeError("save_kwargs must be a dictionary; got {}."
+                                    .format(save_kwargs))
+            plt.savefig(savefig, **save_kwargs)
             plt.close()
 
     def analysis(self, pvalue_test="chi2", n_samples=100, print_output=True):
@@ -763,7 +772,7 @@ def plot(self, savefig=None):
 
             er = er + [er[-1]]
             axtop.step(np.arange(self.n + 1) - 0.5, er,
-                       label=path, where="post")
+                       label=str(path), where="post")
 
         for i in range(self.n):
             axtop.axvline(i + 0.5, color="grey", linestyle="--", alpha=0.5)
@@ -793,7 +802,7 @@ def plot(self, savefig=None):
                     self.P == p, axis=0).max()) for p in path], [])
 
             er = er + [er[-1]]
-            axright.step(er, np.arange(self.m + 1) - 0.5, label=path,
+            axright.step(er, np.arange(self.m + 1) - 0.5, label=str(path),
                          where="pre")
 
         for j in range(self.m):

diff --git a/optbinning/binning/piecewise/binning_statistics.py b/optbinning/binning/piecewise/binning_statistics.py
@@ -177,7 +177,8 @@ def build(self, show_digits=2, add_totals=True):
 
         return df
 
-    def plot(self, metric="woe", n_samples=10000, savefig=None):
+    def plot(self, metric="woe", n_samples=10000, savefig=None,
+             save_kwargs=None):
         """Plot the binning table.
 
         Visualize the non-event and event count, and the predicted Weight of
@@ -194,6 +195,9 @@ def plot(self, metric="woe", n_samples=10000, savefig=None):
 
         savefig : str or None (default=None)
             Path to save the plot figure.
+
+        save_kwargs : dict or None (default=None)
+            Additional keyword arguments to be passed to `plt.savefig`.
         """
         _check_is_built(self)
 
@@ -258,7 +262,13 @@ def plot(self, metric="woe", n_samples=10000, savefig=None):
             if not isinstance(savefig, str):
                 raise TypeError("savefig must be a string path; got {}."
                                 .format(savefig))
-            plt.savefig(savefig)
+            if save_kwargs is None:
+                save_kwargs = {}
+            else:
+                if not isinstance(save_kwargs, dict):
+                    raise TypeError("save_kwargs must be a dictionary; got {}."
+                                    .format(save_kwargs))
+            plt.savefig(savefig, **save_kwargs)
             plt.close()
 
     def analysis(self, pvalue_test="chi2", n_samples=100, print_output=True):

diff --git a/optbinning/binning/preprocessing.py b/optbinning/binning/preprocessing.py
@@ -31,7 +31,7 @@ def categorical_transform(x, y):
 
 def categorical_cutoff(x, y, cutoff=0.01):
     cutoff_count = np.ceil(cutoff * len(x))
-    cat_count = pd.value_counts(x)
+    cat_count = pd.Series(x).value_counts()
     cat_others = cat_count[cat_count < cutoff_count].index.values
     mask_others = pd.Series(x).isin(cat_others).values
 

diff --git a/optbinning/scorecard/counterfactual/problem_data.py b/optbinning/scorecard/counterfactual/problem_data.py
@@ -21,7 +21,13 @@ def problem_data(scorecard, X):
         sc["Points"] = sc["Mean"] * sc["Coefficient"]
 
     # Linear model coefficients
-    intercept = float(scorecard.estimator_.intercept_)
+
+    # Only index into the intercept if it is an array, it is a scalar otherwise
+    if isinstance(scorecard.estimator_.intercept_, np.ndarray):
+        intercept = float(scorecard.estimator_.intercept_[0])
+    else:
+        intercept = float(scorecard.estimator_.intercept_)
+
     coef = scorecard.estimator_.coef_.ravel()
 
     # Big-M parameters (min, max) points.

diff --git a/optbinning/scorecard/monitoring.py b/optbinning/scorecard/monitoring.py
@@ -469,6 +469,8 @@ def psi_plot(self, savefig=None):
         plt.legend(handles, labels, loc="upper center",
                    bbox_to_anchor=(0.5, -0.2), ncol=2, fontsize=12)
 
+        plt.tight_layout()
+
         if savefig is None:
             plt.show()
         else:

diff --git a/setup.py b/setup.py
@@ -1,11 +1,8 @@
 #!/usr/bin/env python
 
 import os
-import sys
 
 from setuptools import find_packages, setup, Command
-from setuptools.command.test import test as TestCommand
-
 
 long_description = '''
 The optimal binning is the optimal discretization of a variable into bins
@@ -34,40 +31,30 @@ def run(self):
         os.system('rm -vrf ./build ./dist ./*.pyc ./*.tgz ./*.egg-info')
 
 
-# test suites
-class PyTest(TestCommand):
-    def finalize_options(self):
-        TestCommand.finalize_options(self)
-        self.test_args = []
-        self.test_suite = []
-
-    def run_tests(self):
-        # import here, because outside the eggs aren't loaded
-        import pytest
-        errcode = pytest.main(self.test_args)
-        sys.exit(errcode)
-
-
 # install requirements
 install_requires = [
     'matplotlib',
-    'numpy>=1.16.1,<2',
+    'numpy>=1.16.1',
     'ortools>=9.4',
     'pandas',
     'ropwr>=1.0.0',
     'scikit-learn>=1.0.2',
     'scipy>=1.6.0',
 ]
 
-# test requirements
-tests_require = [
-    'pytest',
-    'coverage'
-]
-
 # extra requirements
 extras_require = {
     'distributed': ['pympler', 'tdigest'],
+    'test': [
+        'coverage', 
+        'flake8',
+        'pytest',
+        'pyarrow',
+        'pympler',
+        'tdigest',
+    ],
+    # For ecos support: https://github.com/embotech/ecos 
+    'ecos': ['ecos']
 }
 
 
@@ -89,10 +76,9 @@ def run_tests(self):
     include_package_data=True,
     license="Apache Licence 2.0",
     url="https://github.com/guillermo-navas-palencia/optbinning",
-    cmdclass={'clean': CleanCommand, 'test': PyTest},
+    cmdclass={'clean': CleanCommand},
     python_requires='>=3.7',
     install_requires=install_requires,
-    tests_require=tests_require,
     extras_require=extras_require,
     classifiers=[
         'Topic :: Scientific/Engineering :: Mathematics',
@@ -103,7 +89,9 @@ def run_tests(self):
         'Intended Audience :: Science/Research',
         'License :: OSI Approved :: Apache Software License',
         'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.7',
-        'Programming Language :: Python :: 3.8',
-        'Programming Language :: Python :: 3.9']
+        'Programming Language :: Python :: 3.9',
+        'Programming Language :: Python :: 3.10',
+        'Programming Language :: Python :: 3.11',
+        'Programming Language :: Python :: 3.12',
+        ]
     )
diff --git a/test_requirements.txt b/test_requirements.txt
@@ -1,4 +1,6 @@
 coverage
 flake8
 pytest
-pyarrow
+pyarrow
+pympler
+tdigest
diff --git a/tests/test_binning_piecewise.py b/tests/test_binning_piecewise.py
@@ -174,7 +174,7 @@ def test_default():
     optb.fit(x, y)
 
     optb.binning_table.build()
-    assert optb.binning_table.iv == approx(5.87152846, rel=1e-6)
+    assert optb.binning_table.iv == approx(5.87474602, rel=1e-6)
 
     with raises(ValueError):
         optb.binning_table.plot(metric="new_metric")
@@ -188,7 +188,7 @@ def test_default_discontinuous():
     optb.fit(x, y)
 
     optb.binning_table.build()
-    assert optb.binning_table.iv == approx(5.84252707, rel=1e-6)
+    assert optb.binning_table.iv == approx(5.84465825, rel=1e-6)
 
 
 def test_bounds_transform():
@@ -197,11 +197,11 @@ def test_bounds_transform():
 
     x_transform_woe = optb.transform(x, metric="woe")
     assert x_transform_woe[:4] == approx(
-        [3.9899792, 4.2806587, 4.17226985, -3.25509338], rel=1e-6)
+        [3.99180564, 4.28245092, 4.17407503, -3.2565373], rel=1e-6)
 
     x_transform_event_rate = optb.transform(x, metric="event_rate")
     assert x_transform_event_rate[:4] == approx(
-        [0.03021225, 0.02276486, 0.02530506, 0.97760445], rel=1e-6)
+        [0.03015878, 0.02272502, 0.02526056, 0.97763604], rel=1e-6)
 
 
 def test_bounds_fit_transform():
@@ -211,13 +211,11 @@ def test_bounds_fit_transform():
         x, y, lb=0.001, ub=0.999, metric="woe")
 
     assert x_transform_woe[:4] == approx(
-        [3.9899792, 4.2806587, 4.17226985, -3.25509338], rel=1e-6)
-
+        [3.9918056, 4.2824509, 4.17407503, -3.25653732], rel=1e-6)
     x_transform_event_rate = optb.fit_transform(
         x, y, lb=0.001, ub=0.999, metric="event_rate")
-
     assert x_transform_event_rate[:4] == approx(
-        [0.03021225, 0.02276486, 0.02530506, 0.97760445], rel=1e-6)
+        [0.03015878, 0.02272502, 0.02526056, 0.97763604], rel=1e-6)
 
 
 def test_solvers():
@@ -226,7 +224,7 @@ def test_solvers():
         optb.fit(x, y)
 
         optb.binning_table.build()
-        assert optb.binning_table.iv == approx(5.87152846, rel=1e-6)
+        assert optb.binning_table.iv == approx(5.87474602, rel=1e-6)
 
 
 def test_user_splits():

diff --git a/tests/test_continuous_binning_piecewise.py b/tests/test_continuous_binning_piecewise.py
@@ -80,7 +80,7 @@ def test_special_codes():
         name=variable, monotonic_trend="convex", special_codes=special_codes)
     optb.fit(x, y)
 
-    x_transform = optb.transform([np.NaN], metric_missing='empirical')
+    x_transform = optb.transform([np.nan], metric_missing='empirical')
     assert x_transform == approx([17.94], rel=1e-6)