diff --git a/benchmarks/core.py b/benchmarks/core.py index 062c2533..2aca9eb8 100644 --- a/benchmarks/core.py +++ b/benchmarks/core.py @@ -157,4 +157,4 @@ def setup(): def dual(H): H.dual() - benchmark.pedantic(dual, setup=setup, rounds=rounds) + benchmark.pedantic(dual, setup=setup, rounds=rounds) \ No newline at end of file diff --git a/tests/stats/test_core_stats_functions.py b/tests/stats/test_core_stats_functions.py index a5581f77..1cb35537 100644 --- a/tests/stats/test_core_stats_functions.py +++ b/tests/stats/test_core_stats_functions.py @@ -575,6 +575,103 @@ def test_issue_468(): assert H.edges.size.ashist().equals(df) + +def test_ashist_plotting_basic(): + """Test basic plotting functionality.""" + H = xgi.sunflower(3, 1, 20) + + # Test that plot=False still returns correct DataFrame + df_no_plot = H.edges.size.ashist(plot=False) + expected_df = pd.DataFrame([[20.0, 3]], columns=["bin_center", "value"]) + assert df_no_plot.equals(expected_df) + + # Test that plot=True returns correct DataFrame + df_with_plot = H.edges.size.ashist(plot=True) + assert df_with_plot.equals(expected_df) + + +def test_ashist_plot_types(): + """Test different plot types.""" + H = xgi.sunflower(3, 1, 20) + + # Test valid plot types + for plot_type in ['bar', 'line', 'step']: + df = H.edges.size.ashist(plot=plot_type) + expected_df = pd.DataFrame([[20.0, 3]], columns=["bin_center", "value"]) + assert df.equals(expected_df) + + +def test_ashist_plot_kwargs(): + """Test plot kwargs functionality.""" + H = xgi.sunflower(3, 1, 20) + + # Test with valid kwargs + df = H.edges.size.ashist( + plot='bar', + plot_kwargs={'color': 'red', 'alpha': 0.5} + ) + expected_df = pd.DataFrame([[20.0, 3]], columns=["bin_center", "value"]) + assert df.equals(expected_df) + +def test_ashist_plot_errors(): + """Test error handling in plotting.""" + H = xgi.sunflower(3, 1, 20) + + # Test invalid plot type + with pytest.raises(ValueError, match="Unknown plot type:"): + H.edges.size.ashist(plot='invalid_type') + + # Test with valid matplotlib parameters + df = H.edges.size.ashist( + plot='bar', + plot_kwargs={'color': 'red', 'alpha': 0.5} # Known valid parameters + ) + assert isinstance(df, pd.DataFrame) + +def test_ashist_density_plotting(): + """Test plotting with density parameter.""" + H = xgi.sunflower(3, 1, 20) + + # Test with density=True + df = H.edges.size.ashist(plot=True, density=True) + # Note: actual values will be different with density=True + assert 'bin_center' in df.columns + assert 'value' in df.columns + +def test_ashist_bin_edges_plotting(): + """Test plotting with bin_edges parameter.""" + H = xgi.sunflower(3, 1, 20) + + # Test with bin_edges=True + df = H.edges.size.ashist(plot=True, bin_edges=True) + assert 'bin_lo' in df.columns + assert 'bin_hi' in df.columns + + + +def test_ashist_single_unique_value(): + """Test ashist when there is only one unique value and multiple bins.""" + # Create a hypergraph with edges all of the same size + H = xgi.Hypergraph() + H.add_nodes_from(range(5)) + # All edges have size 2 + H.add_edges_from([[0, 1], [2, 3], [4, 0]]) + + # Call ashist with multiple bins + df = H.edges.size.ashist(bins=10, plot=False) # plot=False to avoid plotting + + # Assert that only one bin is created + assert len(df) == 1, "There should be only one bin when all values are identical." + + # Assert that the bin center is equal to the unique value + assert df['bin_center'].iloc[0] == 2, "The bin center should be the unique value." + + # Assert that the count is equal to the number of edges + assert df['value'].iloc[0] == 3, "The count should match the number of identical values." + + + + ### Attribute statistics diff --git a/xgi/stats/__init__.py b/xgi/stats/__init__.py index 5e0e0648..a86fa0a8 100644 --- a/xgi/stats/__init__.py +++ b/xgi/stats/__init__.py @@ -162,8 +162,16 @@ def aspandas(self): """ return pd.Series(self._val, name=self.name) - def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False): - """Return the distribution of a numpy array. + def ashist( + self, + bins=10, + bin_edges=False, + density=False, + log_binning=False, + plot=False, + plot_kwargs=None, + ): + """Return the distribution of a numpy array and optionally plot it. Parameters ---------- @@ -172,34 +180,71 @@ def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False): bins : int, list, or Numpy array The number of bins or the bin edges. bin_edges : bool - Whether to also output the min and max of each bin, - by default, False. + Whether to also output the min and max of each bin. density : bool - Whether to normalize the resulting distribution. + Whether to normalize the distribution. log_binning : bool Whether to bin the values with log-sized bins. - By default, False. - + plot : bool or str + If True, plots histogram using matplotlib. + Can also be 'bar', 'line', or 'step' to specify plot type. + plot_kwargs : dict + Additional keyword arguments for plotting function. Returns ------- Pandas DataFrame - A two-column table with "bin_center" and "value" columns, - where "value" is a count or a probability. If `bin_edges` - is True, outputs two additional columns, `bin_lo` and `bin_hi`, - which outputs the left and right bin edges respectively. + DataFrame with histogram data and optional plot. - Notes - ----- - Originally from https://github.com/jkbren/networks-and-dataviz + Examples + -------- + >>> H.nodes.degree.ashist(plot='bar', plot_kwargs={'color': 'red'}) """ - - # if there is one unique value and more than one bin is specified, - # sets the number of bins to 1. + # Handle single value case if isinstance(bins, int) and len(set(self.aslist())) == 1: bins = 1 - return hist(self.asnumpy(), bins, bin_edges, density, log_binning) + # Get histogram data + df = hist(self.asnumpy(), bins, bin_edges, density, log_binning) + + # Only execute plotting code if plot is True or a string + if plot: + try: + import matplotlib.pyplot as plt + except ImportError: + raise ImportError("Matplotlib is required for plotting.") + + # Set default plotting parameters + plot_kwargs = plot_kwargs or {} + plot_type = "bar" if plot is True else plot + + # Create plot + fig, ax = plt.subplots() + + # Plot based on type + if plot_type == "bar": + ax.bar(df["bin_center"], df["value"], **plot_kwargs) + elif plot_type == "line": + ax.plot(df["bin_center"], df["value"], **plot_kwargs) + elif plot_type == "step": + ax.step(df["bin_center"], df["value"], where="mid", **plot_kwargs) + else: + raise ValueError(f"Unknown plot type: {plot_type}") + + # Set labels + ax.set_xlabel("Value") + ax.set_ylabel("Count" if not density else "Probability") + ax.set_title("Histogram") + + # Add bin edges if requested + if bin_edges: + for _, row in df.iterrows(): + ax.axvline(row["bin_lo"], color="gray", linestyle="--", alpha=0.5) + ax.axvline(row["bin_hi"], color="gray", linestyle="--", alpha=0.5) + + plt.show() + + return df def max(self): """The maximum value of this stat."""