Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate optional plotting capabilities into ashist() #636

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
2 changes: 1 addition & 1 deletion benchmarks/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,4 +157,4 @@ def setup():
def dual(H):
H.dual()

benchmark.pedantic(dual, setup=setup, rounds=rounds)
benchmark.pedantic(dual, setup=setup, rounds=rounds)
97 changes: 97 additions & 0 deletions tests/stats/test_core_stats_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,103 @@ def test_issue_468():
assert H.edges.size.ashist().equals(df)



def test_ashist_plotting_basic():
"""Test basic plotting functionality."""
H = xgi.sunflower(3, 1, 20)

# Test that plot=False still returns correct DataFrame
df_no_plot = H.edges.size.ashist(plot=False)
expected_df = pd.DataFrame([[20.0, 3]], columns=["bin_center", "value"])
assert df_no_plot.equals(expected_df)

# Test that plot=True returns correct DataFrame
df_with_plot = H.edges.size.ashist(plot=True)
assert df_with_plot.equals(expected_df)


def test_ashist_plot_types():
"""Test different plot types."""
H = xgi.sunflower(3, 1, 20)

# Test valid plot types
for plot_type in ['bar', 'line', 'step']:
df = H.edges.size.ashist(plot=plot_type)
expected_df = pd.DataFrame([[20.0, 3]], columns=["bin_center", "value"])
assert df.equals(expected_df)


def test_ashist_plot_kwargs():
"""Test plot kwargs functionality."""
H = xgi.sunflower(3, 1, 20)

# Test with valid kwargs
df = H.edges.size.ashist(
plot='bar',
plot_kwargs={'color': 'red', 'alpha': 0.5}
)
expected_df = pd.DataFrame([[20.0, 3]], columns=["bin_center", "value"])
assert df.equals(expected_df)

def test_ashist_plot_errors():
"""Test error handling in plotting."""
H = xgi.sunflower(3, 1, 20)

# Test invalid plot type
with pytest.raises(ValueError, match="Unknown plot type:"):
H.edges.size.ashist(plot='invalid_type')

# Test with valid matplotlib parameters
df = H.edges.size.ashist(
plot='bar',
plot_kwargs={'color': 'red', 'alpha': 0.5} # Known valid parameters
)
assert isinstance(df, pd.DataFrame)

def test_ashist_density_plotting():
"""Test plotting with density parameter."""
H = xgi.sunflower(3, 1, 20)

# Test with density=True
df = H.edges.size.ashist(plot=True, density=True)
# Note: actual values will be different with density=True
assert 'bin_center' in df.columns
assert 'value' in df.columns

def test_ashist_bin_edges_plotting():
"""Test plotting with bin_edges parameter."""
H = xgi.sunflower(3, 1, 20)

# Test with bin_edges=True
df = H.edges.size.ashist(plot=True, bin_edges=True)
assert 'bin_lo' in df.columns
assert 'bin_hi' in df.columns



def test_ashist_single_unique_value():
"""Test ashist when there is only one unique value and multiple bins."""
# Create a hypergraph with edges all of the same size
H = xgi.Hypergraph()
H.add_nodes_from(range(5))
# All edges have size 2
H.add_edges_from([[0, 1], [2, 3], [4, 0]])

# Call ashist with multiple bins
df = H.edges.size.ashist(bins=10, plot=False) # plot=False to avoid plotting

# Assert that only one bin is created
assert len(df) == 1, "There should be only one bin when all values are identical."

# Assert that the bin center is equal to the unique value
assert df['bin_center'].iloc[0] == 2, "The bin center should be the unique value."

# Assert that the count is equal to the number of edges
assert df['value'].iloc[0] == 3, "The count should match the number of identical values."




### Attribute statistics


Expand Down
81 changes: 63 additions & 18 deletions xgi/stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,16 @@ def aspandas(self):
"""
return pd.Series(self._val, name=self.name)

def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False):
"""Return the distribution of a numpy array.
def ashist(
self,
bins=10,
bin_edges=False,
density=False,
log_binning=False,
plot=False,
plot_kwargs=None,
):
"""Return the distribution of a numpy array and optionally plot it.

Parameters
----------
Expand All @@ -172,34 +180,71 @@ def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False):
bins : int, list, or Numpy array
The number of bins or the bin edges.
bin_edges : bool
Whether to also output the min and max of each bin,
by default, False.
Whether to also output the min and max of each bin.
density : bool
Whether to normalize the resulting distribution.
Whether to normalize the distribution.
log_binning : bool
Whether to bin the values with log-sized bins.
By default, False.

plot : bool or str
If True, plots histogram using matplotlib.
Can also be 'bar', 'line', or 'step' to specify plot type.
plot_kwargs : dict
Additional keyword arguments for plotting function.

Returns
-------
Pandas DataFrame
A two-column table with "bin_center" and "value" columns,
where "value" is a count or a probability. If `bin_edges`
is True, outputs two additional columns, `bin_lo` and `bin_hi`,
which outputs the left and right bin edges respectively.
DataFrame with histogram data and optional plot.

Notes
-----
Originally from https://github.com/jkbren/networks-and-dataviz
Examples
--------
>>> H.nodes.degree.ashist(plot='bar', plot_kwargs={'color': 'red'})
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Define what H is. maybe a small example? like

H = xgi.Hypergraph([[1, 2, 3], [3, 4], [1, 3, 6]])
H.nodes.degree.ashist(plot='bar', plot_kwargs={'color': 'red'})

"""

# if there is one unique value and more than one bin is specified,
# sets the number of bins to 1.
# Handle single value case
if isinstance(bins, int) and len(set(self.aslist())) == 1:
bins = 1

return hist(self.asnumpy(), bins, bin_edges, density, log_binning)
# Get histogram data
df = hist(self.asnumpy(), bins, bin_edges, density, log_binning)

# Only execute plotting code if plot is True or a string
if plot:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you break this out into a modular helper function with an underscore prefix like _plot_hist or similar?

try:
import matplotlib.pyplot as plt
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If users have installed xgi, I believe that they will also have matplotlib by default, so not sure that you need to check this.

except ImportError:
raise ImportError("Matplotlib is required for plotting.")

# Set default plotting parameters
plot_kwargs = plot_kwargs or {}
plot_type = "bar" if plot is True else plot

# Create plot
fig, ax = plt.subplots()

# Plot based on type
if plot_type == "bar":
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a "scatter" option or replace the "line" option with a scatterplot only plotting the points?

ax.bar(df["bin_center"], df["value"], **plot_kwargs)
elif plot_type == "line":
ax.plot(df["bin_center"], df["value"], **plot_kwargs)
elif plot_type == "step":
ax.step(df["bin_center"], df["value"], where="mid", **plot_kwargs)
else:
raise ValueError(f"Unknown plot type: {plot_type}")

# Set labels
ax.set_xlabel("Value")
ax.set_ylabel("Count" if not density else "Probability")
ax.set_title("Histogram")

# Add bin edges if requested
if bin_edges:
for _, row in df.iterrows():
ax.axvline(row["bin_lo"], color="gray", linestyle="--", alpha=0.5)
nwlandry marked this conversation as resolved.
Show resolved Hide resolved
ax.axvline(row["bin_hi"], color="gray", linestyle="--", alpha=0.5)

plt.show()

return df

def max(self):
"""The maximum value of this stat."""
Expand Down
Loading