From ddc5cbb73ac1da6e9f164b46434f21fb43a326e0 Mon Sep 17 00:00:00 2001
From: Nicholas Landry <nicholas.landry.91@gmail.com>
Date: Fri, 20 Dec 2024 13:18:55 -0500
Subject: [PATCH] Improve changelog generator (#639)

* format with isort and black

* associate issues with PRs
---
 benchmarks/core.py                       |   2 +-
 tests/readwrite/test_xgi_data.py         |   1 -
 tests/stats/test_core_stats_functions.py |  40 ++++-----
 tools/generate_changelog.py              | 102 +++++++++++++++++------
 xgi/stats/__init__.py                    |   3 -
 5 files changed, 97 insertions(+), 51 deletions(-)

diff --git a/benchmarks/core.py b/benchmarks/core.py
index 2aca9eb8..062c2533 100644
--- a/benchmarks/core.py
+++ b/benchmarks/core.py
@@ -157,4 +157,4 @@ def setup():
     def dual(H):
         H.dual()
 
-    benchmark.pedantic(dual, setup=setup, rounds=rounds)
\ No newline at end of file
+    benchmark.pedantic(dual, setup=setup, rounds=rounds)
diff --git a/tests/readwrite/test_xgi_data.py b/tests/readwrite/test_xgi_data.py
index 84b6e721..ab37a897 100644
--- a/tests/readwrite/test_xgi_data.py
+++ b/tests/readwrite/test_xgi_data.py
@@ -66,7 +66,6 @@ def test_load_xgi_data(capfd):
     assert H.num_edges == 77733
 
 
-
 @pytest.mark.skipif(
     sys.version_info != (3, 12) and not platform.system() == "Linux",
     reason="only need one test",
diff --git a/tests/stats/test_core_stats_functions.py b/tests/stats/test_core_stats_functions.py
index f0c5a36d..4db1132d 100644
--- a/tests/stats/test_core_stats_functions.py
+++ b/tests/stats/test_core_stats_functions.py
@@ -575,44 +575,41 @@ def test_issue_468():
     assert H.edges.size.ashist().equals(df)
 
 
-
-
 def test_ashist_attrs_exist():
     """Test that ashist returns DataFrame with expected attributes."""
     H = xgi.sunflower(3, 1, 20)
     df = H.edges.size.ashist()
-    
+
     # Check that all expected attributes exist
-    assert 'xlabel' in df.attrs
-    assert 'ylabel' in df.attrs
-    assert 'title' in df.attrs
+    assert "xlabel" in df.attrs
+    assert "ylabel" in df.attrs
+    assert "title" in df.attrs
 
 
 def test_ashist_density_labels():
     """Test that ylabel changes based on density parameter."""
     H = xgi.sunflower(3, 1, 20)
-    
+
     # Test default (density=False)
     df_count = H.edges.size.ashist(density=False)
-    assert df_count.attrs['ylabel'] == 'Count'
-    
+    assert df_count.attrs["ylabel"] == "Count"
+
     # Test with density=True
     df_density = H.edges.size.ashist(density=True)
-    assert df_density.attrs['ylabel'] == 'Probability'
+    assert df_density.attrs["ylabel"] == "Probability"
 
 
 def test_ashist_original_functionality():
     """Test that adding attributes doesn't break original functionality."""
     H = xgi.sunflower(3, 1, 20)
     df = H.edges.size.ashist()
-    
+
     # Original test case should still pass
     expected_df = pd.DataFrame([[20.0, 3]], columns=["bin_center", "value"])
     assert df.equals(expected_df)  # Original functionality
-    
-    # And should have attributes
-    assert 'xlabel' in df.attrs
 
+    # And should have attributes
+    assert "xlabel" in df.attrs
 
 
 def test_ashist_single_unique_value():
@@ -621,20 +618,19 @@ def test_ashist_single_unique_value():
     H.add_nodes_from(range(5))
     # All edges have the same size
     H.add_edges_from([[0, 1], [2, 3], [4, 0]])
-    
+
     # The edge sizes will all be 2
     df = H.edges.size.ashist(bins=10)
-    
+
     # Since there's only one unique value, bins should be set to 1
     assert len(df) == 1  # Only one bin should be present
-    assert df['bin_center'].iloc[0] == 2  # The bin center should be the unique value
-    assert df['value'].iloc[0] == 3  # There are three edges of size 2
+    assert df["bin_center"].iloc[0] == 2  # The bin center should be the unique value
+    assert df["value"].iloc[0] == 3  # There are three edges of size 2
 
     # Check that attributes are present
-    assert 'xlabel' in df.attrs
-    assert 'ylabel' in df.attrs
-    assert 'title' in df.attrs
-
+    assert "xlabel" in df.attrs
+    assert "ylabel" in df.attrs
+    assert "title" in df.attrs
 
 
 ### Attribute statistics
diff --git a/tools/generate_changelog.py b/tools/generate_changelog.py
index ddbd2377..effe500c 100644
--- a/tools/generate_changelog.py
+++ b/tools/generate_changelog.py
@@ -131,6 +131,7 @@
 import os
 import re
 from collections import namedtuple
+from datetime import datetime, timedelta
 
 import requests
 
@@ -140,7 +141,9 @@
 GitHubConfig = namedtuple("GitHubConfig", ["base_url", "api_url", "headers"])
 
 Commit = namedtuple("Commit", ["sha", "message"])
-PullRequest = namedtuple("PullRequest", ["number", "title", "author"])
+PullRequest = namedtuple(
+    "PullRequest", ["number", "title", "author", "closed_at", "associated_issues"]
+)
 
 # Merge commits use a double linebreak between the branch name and the title
 MERGE_PR_RE = re.compile(r"^Merge pull request #([0-9]+)*")
@@ -188,9 +191,7 @@ def get_commit_for_tag(github_config, owner, repo, tag):
         tag_json = tag_response.json()
 
         if tag_response.status_code != 200:
-            raise GitHubError(
-                "Unable to get tag {}. {}".format(tag, tag_json["message"])
-            )
+            raise GitHubError(f"Unable to get tag {tag}. {tag_json["message"]}")
 
         # If we're given a tag object we have to look up the commit
         if tag_json["object"]["type"] == "tag":
@@ -207,7 +208,7 @@ def get_last_commit(github_config, owner, repo, branch=DEFAULT_BRANCH):
     )
     commits_json = commits_response.json()
     if commits_response.status_code != 200:
-        raise GitHubError("Unable to get commits. {}".format(commits_json["message"]))
+        raise GitHubError(f"Unable to get commits. {commits_json["message"]}")
 
     return commits_json[0]["sha"]
 
@@ -237,14 +238,12 @@ def get_commits_between(github_config, owner, repo, first_commit, last_commit):
     commits_json = commits_response.json()
     if commits_response.status_code != 200:
         raise GitHubError(
-            "Unable to get commits between {} and {}. {}".format(
-                first_commit, last_commit, commits_json["message"]
-            )
+            f"Unable to get commits between {first_commit} and {last_commit}. {commits_json["message"]}"
         )
 
     if "commits" not in commits_json:
         raise GitHubError(
-            "Commits not found between {} and {}.".format(first_commit, last_commit)
+            f"Commits not found between {first_commit} and {last_commit}."
         )
 
     commits = [
@@ -271,7 +270,7 @@ def extract_pr_number(message):
 
         return numbers[-1]  # PullRequest(number=number, title=title, author=author)
 
-    raise Exception("Commit isn't a PR merge, {}".format(message))
+    raise Exception(f"Commit isn't a PR merge, {message}")
 
 
 def prs_from_numbers(github_config, owner, repo, pr_numbers):
@@ -291,10 +290,40 @@ def prs_from_numbers(github_config, owner, repo, pr_numbers):
         pull_json = pull_response.json()
         title = pull_json["title"]
         author = pull_json["user"]["login"]
-        pr_list.append(PullRequest(number=number, title=title, author=author))
+        closed_at = pull_json["closed_at"]
+        pr_list.append(
+            PullRequest(
+                number=number,
+                title=title,
+                author=author,
+                closed_at=closed_at,
+                associated_issues=[],
+            )
+        )
     return pr_list
 
 
+def get_associated_issues(github_config, owner, repo, prs):
+    issues_url = "/".join(
+        [
+            github_config.api_url,
+            "repos",
+            owner,
+            repo,
+            "issues?state=closed",
+        ]
+    )
+    issues_response = requests.get(issues_url, headers=github_config.headers)
+    issues_json = issues_response.json()
+    issues = {entry["number"]: entry["closed_at"] for entry in issues_json}
+    for pr in prs:
+        t_pr = datetime.fromisoformat(pr.closed_at)
+        for i in issues:
+            t_issue = datetime.fromisoformat(issues[i])
+            if abs((t_issue - t_pr).total_seconds()) <= 2 and int(i) != int(pr.number):
+                pr.associated_issues.append(i)
+
+
 def fetch_changes(
     github_config,
     owner,
@@ -327,7 +356,7 @@ def fetch_changes(
     ]
 
     if len(pr_numbers) == 0 and len(commits_between) > 0:
-        raise Exception("Lots of commits and no PRs on branch {}".format(branch))
+        raise Exception("Lots of commits and no PRs on branch {branch}")
     else:
         prs = prs_from_numbers(github_config, owner, repo, pr_numbers)
 
@@ -339,21 +368,45 @@ def format_changes(github_config, owner, repo, prs, markdown=True):
     """Format the list of prs in either text or markdown"""
     lines = []
     for pr in prs:
-        number = "#{number}".format(number=pr.number)
+        pr_number = f"#{pr.number}"
         if markdown:
-            link = "{github_url}/{owner}/{repo}/pull/{number}".format(
-                github_url=github_config.base_url,
-                owner=owner,
-                repo=repo,
-                number=pr.number,
+            pr_link = f"{github_config.base_url}/{owner}/{repo}/pull/{pr.number}"
+            pr_number = f"[{pr_number}]({pr_link})"
+
+        issues = pr.associated_issues
+
+        if len(issues) == 0:
+            issues_string = ""
+
+        elif len(issues) == 1:
+            issue_number = issues[0]
+            issue_link = (
+                f"{github_config.base_url}/{owner}/{repo}/issues/{issue_number}"
             )
-            number = "[{number}]({link})".format(number=number, link=link)
-        print(number)
-        lines.append(
-            "* {title}. {number} (@{author})".format(
-                title=pr.title, number=number, author=pr.author
+            issues_string = f"(Closes Issue [#{issue_number}]({issue_link})) "
+
+        elif len(issues) == 2:
+            issue_number1, issue_number2 = issues
+            issue_link1 = (
+                f"{github_config.base_url}/{owner}/{repo}/issues/{issue_number1}"
             )
-        )
+            issue_link2 = (
+                f"{github_config.base_url}/{owner}/{repo}/issues/{issue_number2}"
+            )
+            issues_string = f"(Closes Issues [#{issue_number1}]({issue_link1}) and [#{issue_number2}]({issue_link2})) "
+        else:
+            issues_string = "(Closes Issues "
+            for i, issue_number in enumerate(issues):
+                issue_link = (
+                    f"{github_config.base_url}/{owner}/{repo}/issues/{issue_number}"
+                )
+
+                if i < len(issues):
+                    issues_string += f"[#{issue_number}]({issue_link}), "
+                else:
+                    issues_string += f"and [#{issue_number}]({issue_link})) "
+
+        lines.append(f"* {pr.title}. {issues_string}{pr_number} (@{pr.author})")
 
     return lines
 
@@ -374,6 +427,7 @@ def generate_changelog(
     github_config = get_github_config(github_base_url, github_api_url, github_token)
 
     prs = fetch_changes(github_config, owner, repo, previous_tag, current_tag, branch)
+    get_associated_issues(github_config, owner, repo, prs)
     lines = format_changes(github_config, owner, repo, prs, markdown=markdown)
 
     separator = "\\n" if single_line else "\n"
diff --git a/xgi/stats/__init__.py b/xgi/stats/__init__.py
index bc82dd84..c327ee94 100644
--- a/xgi/stats/__init__.py
+++ b/xgi/stats/__init__.py
@@ -162,7 +162,6 @@ def aspandas(self):
         """
         return pd.Series(self._val, name=self.name)
 
-
     def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False):
         """Return the distribution of a numpy array.
 
@@ -216,8 +215,6 @@ def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False):
 
         return df
 
-
-
     def max(self):
         """The maximum value of this stat."""
         return self.asnumpy().max(axis=0).item()