From 10197936a5c7de23f0df79de15b8bcb6e419b316 Mon Sep 17 00:00:00 2001 From: will collins Date: Mon, 2 Dec 2024 21:47:14 -0500 Subject: [PATCH 1/8] test: Add simple benchmark test --- benchmarks/core.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/benchmarks/core.py b/benchmarks/core.py index 062c2533..7cb4d2a8 100644 --- a/benchmarks/core.py +++ b/benchmarks/core.py @@ -3,7 +3,13 @@ rounds = 10 fname = "benchmarks/email-enron.json" - +# Below is an example so I can better understand the control flow +def test_simple_benchmark(): + """A simple benchmark to test the workflow.""" + x = 0 + for i in range(1000): + x += i + return x def test_construct_from_edgelist(benchmark): def setup(): H = xgi.read_hif(fname) From cd5fdd17a75afbf192847291180e52954557fdc2 Mon Sep 17 00:00:00 2001 From: will collins Date: Mon, 2 Dec 2024 22:15:08 -0500 Subject: [PATCH 2/8] test: Add pedantic benchmark test to understand workflow --- benchmarks/core.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/benchmarks/core.py b/benchmarks/core.py index 062c2533..e1d28fb9 100644 --- a/benchmarks/core.py +++ b/benchmarks/core.py @@ -4,6 +4,21 @@ fname = "benchmarks/email-enron.json" +# Below is an example so I can better understand the control flow +def test_simple_benchmark(benchmark): + """A simple benchmark to test the workflow.""" + def setup(): + # No setup needed for this simple test + return (), {} + + def run_benchmark(): + x = 0 + for i in range(1000): + x += i + return x + + benchmark.pedantic(run_benchmark, setup=setup, rounds=rounds) + def test_construct_from_edgelist(benchmark): def setup(): H = xgi.read_hif(fname) From 5df4b753a210db7608b5110c40ed3e1401a41e9f Mon Sep 17 00:00:00 2001 From: will collins Date: Mon, 2 Dec 2024 22:54:28 -0500 Subject: [PATCH 3/8] Attempting to change results from data.js --- .github/workflows/benchmark.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 64e4c433..a18a51cc 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -37,4 +37,7 @@ jobs: # Show alert with commit comment on detecting possible performance regression alert-threshold: '200%' comment-on-alert: false - fail-on-alert: true \ No newline at end of file + fail-on-alert: true + output-metric: 'mean' + metric-unit: 'ms' + metric-scale: '1000' \ No newline at end of file From b5261b26c01fd8f6967b2f8650a42a4e773b2def Mon Sep 17 00:00:00 2001 From: will collins Date: Tue, 3 Dec 2024 00:02:11 -0500 Subject: [PATCH 4/8] Reverted benchmarks.yml back to normal --- .github/workflows/benchmark.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index a18a51cc..0e139006 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -38,6 +38,3 @@ jobs: alert-threshold: '200%' comment-on-alert: false fail-on-alert: true - output-metric: 'mean' - metric-unit: 'ms' - metric-scale: '1000' \ No newline at end of file From 2b77a7c19ce8dfe1254da9f5ed7cf18f89c41b9f Mon Sep 17 00:00:00 2001 From: will collins Date: Tue, 3 Dec 2024 11:46:11 -0500 Subject: [PATCH 5/8] Added pull_request to benchmark.yml workflow to understand what happens when a pull request is made --- .github/workflows/benchmark.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 0e139006..6b0ed8b9 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -3,6 +3,9 @@ on: push: branches: - main + pull_request: + branches: + - main workflow_dispatch: permissions: From b31374ec61563419ac14953bafe5ae5b6c53e54c Mon Sep 17 00:00:00 2001 From: will collins Date: Sun, 8 Dec 2024 14:41:43 -0500 Subject: [PATCH 6/8] Added unittests for ashist function to add-plotting-capabilities branch --- tests/stats/test_core_stats_functions.py | 97 ++++++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/tests/stats/test_core_stats_functions.py b/tests/stats/test_core_stats_functions.py index a5581f77..1cb35537 100644 --- a/tests/stats/test_core_stats_functions.py +++ b/tests/stats/test_core_stats_functions.py @@ -575,6 +575,103 @@ def test_issue_468(): assert H.edges.size.ashist().equals(df) + +def test_ashist_plotting_basic(): + """Test basic plotting functionality.""" + H = xgi.sunflower(3, 1, 20) + + # Test that plot=False still returns correct DataFrame + df_no_plot = H.edges.size.ashist(plot=False) + expected_df = pd.DataFrame([[20.0, 3]], columns=["bin_center", "value"]) + assert df_no_plot.equals(expected_df) + + # Test that plot=True returns correct DataFrame + df_with_plot = H.edges.size.ashist(plot=True) + assert df_with_plot.equals(expected_df) + + +def test_ashist_plot_types(): + """Test different plot types.""" + H = xgi.sunflower(3, 1, 20) + + # Test valid plot types + for plot_type in ['bar', 'line', 'step']: + df = H.edges.size.ashist(plot=plot_type) + expected_df = pd.DataFrame([[20.0, 3]], columns=["bin_center", "value"]) + assert df.equals(expected_df) + + +def test_ashist_plot_kwargs(): + """Test plot kwargs functionality.""" + H = xgi.sunflower(3, 1, 20) + + # Test with valid kwargs + df = H.edges.size.ashist( + plot='bar', + plot_kwargs={'color': 'red', 'alpha': 0.5} + ) + expected_df = pd.DataFrame([[20.0, 3]], columns=["bin_center", "value"]) + assert df.equals(expected_df) + +def test_ashist_plot_errors(): + """Test error handling in plotting.""" + H = xgi.sunflower(3, 1, 20) + + # Test invalid plot type + with pytest.raises(ValueError, match="Unknown plot type:"): + H.edges.size.ashist(plot='invalid_type') + + # Test with valid matplotlib parameters + df = H.edges.size.ashist( + plot='bar', + plot_kwargs={'color': 'red', 'alpha': 0.5} # Known valid parameters + ) + assert isinstance(df, pd.DataFrame) + +def test_ashist_density_plotting(): + """Test plotting with density parameter.""" + H = xgi.sunflower(3, 1, 20) + + # Test with density=True + df = H.edges.size.ashist(plot=True, density=True) + # Note: actual values will be different with density=True + assert 'bin_center' in df.columns + assert 'value' in df.columns + +def test_ashist_bin_edges_plotting(): + """Test plotting with bin_edges parameter.""" + H = xgi.sunflower(3, 1, 20) + + # Test with bin_edges=True + df = H.edges.size.ashist(plot=True, bin_edges=True) + assert 'bin_lo' in df.columns + assert 'bin_hi' in df.columns + + + +def test_ashist_single_unique_value(): + """Test ashist when there is only one unique value and multiple bins.""" + # Create a hypergraph with edges all of the same size + H = xgi.Hypergraph() + H.add_nodes_from(range(5)) + # All edges have size 2 + H.add_edges_from([[0, 1], [2, 3], [4, 0]]) + + # Call ashist with multiple bins + df = H.edges.size.ashist(bins=10, plot=False) # plot=False to avoid plotting + + # Assert that only one bin is created + assert len(df) == 1, "There should be only one bin when all values are identical." + + # Assert that the bin center is equal to the unique value + assert df['bin_center'].iloc[0] == 2, "The bin center should be the unique value." + + # Assert that the count is equal to the number of edges + assert df['value'].iloc[0] == 3, "The count should match the number of identical values." + + + + ### Attribute statistics From 65e59437539a8d58af358c8efd1638e1dbf962d3 Mon Sep 17 00:00:00 2001 From: will collins Date: Sun, 8 Dec 2024 14:43:51 -0500 Subject: [PATCH 7/8] Added ashist function to add-plotting-capabilities --- xgi/stats/__init__.py | 81 +++++++++++++++++++++++++++++++++---------- 1 file changed, 63 insertions(+), 18 deletions(-) diff --git a/xgi/stats/__init__.py b/xgi/stats/__init__.py index 5e0e0648..a86fa0a8 100644 --- a/xgi/stats/__init__.py +++ b/xgi/stats/__init__.py @@ -162,8 +162,16 @@ def aspandas(self): """ return pd.Series(self._val, name=self.name) - def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False): - """Return the distribution of a numpy array. + def ashist( + self, + bins=10, + bin_edges=False, + density=False, + log_binning=False, + plot=False, + plot_kwargs=None, + ): + """Return the distribution of a numpy array and optionally plot it. Parameters ---------- @@ -172,34 +180,71 @@ def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False): bins : int, list, or Numpy array The number of bins or the bin edges. bin_edges : bool - Whether to also output the min and max of each bin, - by default, False. + Whether to also output the min and max of each bin. density : bool - Whether to normalize the resulting distribution. + Whether to normalize the distribution. log_binning : bool Whether to bin the values with log-sized bins. - By default, False. - + plot : bool or str + If True, plots histogram using matplotlib. + Can also be 'bar', 'line', or 'step' to specify plot type. + plot_kwargs : dict + Additional keyword arguments for plotting function. Returns ------- Pandas DataFrame - A two-column table with "bin_center" and "value" columns, - where "value" is a count or a probability. If `bin_edges` - is True, outputs two additional columns, `bin_lo` and `bin_hi`, - which outputs the left and right bin edges respectively. + DataFrame with histogram data and optional plot. - Notes - ----- - Originally from https://github.com/jkbren/networks-and-dataviz + Examples + -------- + >>> H.nodes.degree.ashist(plot='bar', plot_kwargs={'color': 'red'}) """ - - # if there is one unique value and more than one bin is specified, - # sets the number of bins to 1. + # Handle single value case if isinstance(bins, int) and len(set(self.aslist())) == 1: bins = 1 - return hist(self.asnumpy(), bins, bin_edges, density, log_binning) + # Get histogram data + df = hist(self.asnumpy(), bins, bin_edges, density, log_binning) + + # Only execute plotting code if plot is True or a string + if plot: + try: + import matplotlib.pyplot as plt + except ImportError: + raise ImportError("Matplotlib is required for plotting.") + + # Set default plotting parameters + plot_kwargs = plot_kwargs or {} + plot_type = "bar" if plot is True else plot + + # Create plot + fig, ax = plt.subplots() + + # Plot based on type + if plot_type == "bar": + ax.bar(df["bin_center"], df["value"], **plot_kwargs) + elif plot_type == "line": + ax.plot(df["bin_center"], df["value"], **plot_kwargs) + elif plot_type == "step": + ax.step(df["bin_center"], df["value"], where="mid", **plot_kwargs) + else: + raise ValueError(f"Unknown plot type: {plot_type}") + + # Set labels + ax.set_xlabel("Value") + ax.set_ylabel("Count" if not density else "Probability") + ax.set_title("Histogram") + + # Add bin edges if requested + if bin_edges: + for _, row in df.iterrows(): + ax.axvline(row["bin_lo"], color="gray", linestyle="--", alpha=0.5) + ax.axvline(row["bin_hi"], color="gray", linestyle="--", alpha=0.5) + + plt.show() + + return df def max(self): """The maximum value of this stat.""" From 3cab05e0465aceb8e420f9cc289052140d6990c4 Mon Sep 17 00:00:00 2001 From: will collins Date: Sun, 8 Dec 2024 15:12:46 -0500 Subject: [PATCH 8/8] Changing benchmark.yml and core.py back to normal --- .github/workflows/benchmark.yml | 5 +---- benchmarks/core.py | 17 +---------------- 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 6b0ed8b9..64e4c433 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -3,9 +3,6 @@ on: push: branches: - main - pull_request: - branches: - - main workflow_dispatch: permissions: @@ -40,4 +37,4 @@ jobs: # Show alert with commit comment on detecting possible performance regression alert-threshold: '200%' comment-on-alert: false - fail-on-alert: true + fail-on-alert: true \ No newline at end of file diff --git a/benchmarks/core.py b/benchmarks/core.py index e1d28fb9..2aca9eb8 100644 --- a/benchmarks/core.py +++ b/benchmarks/core.py @@ -4,21 +4,6 @@ fname = "benchmarks/email-enron.json" -# Below is an example so I can better understand the control flow -def test_simple_benchmark(benchmark): - """A simple benchmark to test the workflow.""" - def setup(): - # No setup needed for this simple test - return (), {} - - def run_benchmark(): - x = 0 - for i in range(1000): - x += i - return x - - benchmark.pedantic(run_benchmark, setup=setup, rounds=rounds) - def test_construct_from_edgelist(benchmark): def setup(): H = xgi.read_hif(fname) @@ -172,4 +157,4 @@ def setup(): def dual(H): H.dual() - benchmark.pedantic(dual, setup=setup, rounds=rounds) + benchmark.pedantic(dual, setup=setup, rounds=rounds) \ No newline at end of file