Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve changelog generator #639

Merged
merged 2 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmarks/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,4 +157,4 @@ def setup():
def dual(H):
H.dual()

benchmark.pedantic(dual, setup=setup, rounds=rounds)
benchmark.pedantic(dual, setup=setup, rounds=rounds)
1 change: 0 additions & 1 deletion tests/readwrite/test_xgi_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ def test_load_xgi_data(capfd):
assert H.num_edges == 77733



@pytest.mark.skipif(
sys.version_info != (3, 12) and not platform.system() == "Linux",
reason="only need one test",
Expand Down
40 changes: 18 additions & 22 deletions tests/stats/test_core_stats_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,44 +575,41 @@ def test_issue_468():
assert H.edges.size.ashist().equals(df)




def test_ashist_attrs_exist():
"""Test that ashist returns DataFrame with expected attributes."""
H = xgi.sunflower(3, 1, 20)
df = H.edges.size.ashist()

# Check that all expected attributes exist
assert 'xlabel' in df.attrs
assert 'ylabel' in df.attrs
assert 'title' in df.attrs
assert "xlabel" in df.attrs
assert "ylabel" in df.attrs
assert "title" in df.attrs


def test_ashist_density_labels():
"""Test that ylabel changes based on density parameter."""
H = xgi.sunflower(3, 1, 20)

# Test default (density=False)
df_count = H.edges.size.ashist(density=False)
assert df_count.attrs['ylabel'] == 'Count'
assert df_count.attrs["ylabel"] == "Count"

# Test with density=True
df_density = H.edges.size.ashist(density=True)
assert df_density.attrs['ylabel'] == 'Probability'
assert df_density.attrs["ylabel"] == "Probability"


def test_ashist_original_functionality():
"""Test that adding attributes doesn't break original functionality."""
H = xgi.sunflower(3, 1, 20)
df = H.edges.size.ashist()

# Original test case should still pass
expected_df = pd.DataFrame([[20.0, 3]], columns=["bin_center", "value"])
assert df.equals(expected_df) # Original functionality

# And should have attributes
assert 'xlabel' in df.attrs

# And should have attributes
assert "xlabel" in df.attrs


def test_ashist_single_unique_value():
Expand All @@ -621,20 +618,19 @@ def test_ashist_single_unique_value():
H.add_nodes_from(range(5))
# All edges have the same size
H.add_edges_from([[0, 1], [2, 3], [4, 0]])

# The edge sizes will all be 2
df = H.edges.size.ashist(bins=10)

# Since there's only one unique value, bins should be set to 1
assert len(df) == 1 # Only one bin should be present
assert df['bin_center'].iloc[0] == 2 # The bin center should be the unique value
assert df['value'].iloc[0] == 3 # There are three edges of size 2
assert df["bin_center"].iloc[0] == 2 # The bin center should be the unique value
assert df["value"].iloc[0] == 3 # There are three edges of size 2

# Check that attributes are present
assert 'xlabel' in df.attrs
assert 'ylabel' in df.attrs
assert 'title' in df.attrs

assert "xlabel" in df.attrs
assert "ylabel" in df.attrs
assert "title" in df.attrs


### Attribute statistics
Expand Down
102 changes: 78 additions & 24 deletions tools/generate_changelog.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@
import os
import re
from collections import namedtuple
from datetime import datetime, timedelta

import requests

Expand All @@ -140,7 +141,9 @@
GitHubConfig = namedtuple("GitHubConfig", ["base_url", "api_url", "headers"])

Commit = namedtuple("Commit", ["sha", "message"])
PullRequest = namedtuple("PullRequest", ["number", "title", "author"])
PullRequest = namedtuple(
"PullRequest", ["number", "title", "author", "closed_at", "associated_issues"]
)

# Merge commits use a double linebreak between the branch name and the title
MERGE_PR_RE = re.compile(r"^Merge pull request #([0-9]+)*")
Expand Down Expand Up @@ -188,9 +191,7 @@ def get_commit_for_tag(github_config, owner, repo, tag):
tag_json = tag_response.json()

if tag_response.status_code != 200:
raise GitHubError(
"Unable to get tag {}. {}".format(tag, tag_json["message"])
)
raise GitHubError(f"Unable to get tag {tag}. {tag_json["message"]}")

# If we're given a tag object we have to look up the commit
if tag_json["object"]["type"] == "tag":
Expand All @@ -207,7 +208,7 @@ def get_last_commit(github_config, owner, repo, branch=DEFAULT_BRANCH):
)
commits_json = commits_response.json()
if commits_response.status_code != 200:
raise GitHubError("Unable to get commits. {}".format(commits_json["message"]))
raise GitHubError(f"Unable to get commits. {commits_json["message"]}")

return commits_json[0]["sha"]

Expand Down Expand Up @@ -237,14 +238,12 @@ def get_commits_between(github_config, owner, repo, first_commit, last_commit):
commits_json = commits_response.json()
if commits_response.status_code != 200:
raise GitHubError(
"Unable to get commits between {} and {}. {}".format(
first_commit, last_commit, commits_json["message"]
)
f"Unable to get commits between {first_commit} and {last_commit}. {commits_json["message"]}"
)

if "commits" not in commits_json:
raise GitHubError(
"Commits not found between {} and {}.".format(first_commit, last_commit)
f"Commits not found between {first_commit} and {last_commit}."
)

commits = [
Expand All @@ -271,7 +270,7 @@ def extract_pr_number(message):

return numbers[-1] # PullRequest(number=number, title=title, author=author)

raise Exception("Commit isn't a PR merge, {}".format(message))
raise Exception(f"Commit isn't a PR merge, {message}")


def prs_from_numbers(github_config, owner, repo, pr_numbers):
Expand All @@ -291,10 +290,40 @@ def prs_from_numbers(github_config, owner, repo, pr_numbers):
pull_json = pull_response.json()
title = pull_json["title"]
author = pull_json["user"]["login"]
pr_list.append(PullRequest(number=number, title=title, author=author))
closed_at = pull_json["closed_at"]
pr_list.append(
PullRequest(
number=number,
title=title,
author=author,
closed_at=closed_at,
associated_issues=[],
)
)
return pr_list


def get_associated_issues(github_config, owner, repo, prs):
issues_url = "/".join(
[
github_config.api_url,
"repos",
owner,
repo,
"issues?state=closed",
]
)
issues_response = requests.get(issues_url, headers=github_config.headers)
issues_json = issues_response.json()
issues = {entry["number"]: entry["closed_at"] for entry in issues_json}
for pr in prs:
t_pr = datetime.fromisoformat(pr.closed_at)
for i in issues:
t_issue = datetime.fromisoformat(issues[i])
if abs((t_issue - t_pr).total_seconds()) <= 2 and int(i) != int(pr.number):
pr.associated_issues.append(i)


def fetch_changes(
github_config,
owner,
Expand Down Expand Up @@ -327,7 +356,7 @@ def fetch_changes(
]

if len(pr_numbers) == 0 and len(commits_between) > 0:
raise Exception("Lots of commits and no PRs on branch {}".format(branch))
raise Exception("Lots of commits and no PRs on branch {branch}")
else:
prs = prs_from_numbers(github_config, owner, repo, pr_numbers)

Expand All @@ -339,21 +368,45 @@ def format_changes(github_config, owner, repo, prs, markdown=True):
"""Format the list of prs in either text or markdown"""
lines = []
for pr in prs:
number = "#{number}".format(number=pr.number)
pr_number = f"#{pr.number}"
if markdown:
link = "{github_url}/{owner}/{repo}/pull/{number}".format(
github_url=github_config.base_url,
owner=owner,
repo=repo,
number=pr.number,
pr_link = f"{github_config.base_url}/{owner}/{repo}/pull/{pr.number}"
pr_number = f"[{pr_number}]({pr_link})"

issues = pr.associated_issues

if len(issues) == 0:
issues_string = ""

elif len(issues) == 1:
issue_number = issues[0]
issue_link = (
f"{github_config.base_url}/{owner}/{repo}/issues/{issue_number}"
)
number = "[{number}]({link})".format(number=number, link=link)
print(number)
lines.append(
"* {title}. {number} (@{author})".format(
title=pr.title, number=number, author=pr.author
issues_string = f"(Closes Issue [#{issue_number}]({issue_link})) "

elif len(issues) == 2:
issue_number1, issue_number2 = issues
issue_link1 = (
f"{github_config.base_url}/{owner}/{repo}/issues/{issue_number1}"
)
)
issue_link2 = (
f"{github_config.base_url}/{owner}/{repo}/issues/{issue_number2}"
)
issues_string = f"(Closes Issues [#{issue_number1}]({issue_link1}) and [#{issue_number2}]({issue_link2})) "
else:
issues_string = "(Closes Issues "
for i, issue_number in enumerate(issues):
issue_link = (
f"{github_config.base_url}/{owner}/{repo}/issues/{issue_number}"
)

if i < len(issues):
issues_string += f"[#{issue_number}]({issue_link}), "
else:
issues_string += f"and [#{issue_number}]({issue_link})) "

lines.append(f"* {pr.title}. {issues_string}{pr_number} (@{pr.author})")

return lines

Expand All @@ -374,6 +427,7 @@ def generate_changelog(
github_config = get_github_config(github_base_url, github_api_url, github_token)

prs = fetch_changes(github_config, owner, repo, previous_tag, current_tag, branch)
get_associated_issues(github_config, owner, repo, prs)
lines = format_changes(github_config, owner, repo, prs, markdown=markdown)

separator = "\\n" if single_line else "\n"
Expand Down
3 changes: 0 additions & 3 deletions xgi/stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,6 @@ def aspandas(self):
"""
return pd.Series(self._val, name=self.name)


def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False):
"""Return the distribution of a numpy array.

Expand Down Expand Up @@ -216,8 +215,6 @@ def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False):

return df



def max(self):
"""The maximum value of this stat."""
return self.asnumpy().max(axis=0).item()
Expand Down
Loading