From 9d047103d52ad0757486a93fc266d80de25849f2 Mon Sep 17 00:00:00 2001 From: gagb Date: Thu, 12 Dec 2024 13:41:31 -0800 Subject: [PATCH 1/8] Add method to convert GitHub issue to markdown Add support for converting GitHub issues to markdown. * Add `convert_github_issue` method in `src/markitdown/_markitdown.py` to handle GitHub issue conversion. * Use `PyGithub` to fetch issue details using the provided token. * Convert the issue details to markdown format and return as `DocumentConverterResult`. * Add optional GitHub issue support with `IS_GITHUB_ISSUE_CAPABLE` flag. --- src/markitdown/_markitdown.py | 42 +++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index 96997cf..8078793 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -44,6 +44,14 @@ except ModuleNotFoundError: pass +# Optional GitHub issue support +try: + from github import Github + + IS_GITHUB_ISSUE_CAPABLE = True +except ModuleNotFoundError: + pass + class _CustomMarkdownify(markdownify.MarkdownConverter): """ @@ -1099,3 +1107,37 @@ def _guess_ext_magic(self, path): def register_page_converter(self, converter: DocumentConverter) -> None: """Register a page text converter.""" self._page_converters.insert(0, converter) + + def convert_github_issue( + self, issue_url: str, github_token: str + ) -> DocumentConverterResult: + if not IS_GITHUB_ISSUE_CAPABLE: + raise ImportError("PyGithub is not installed. Please install it to use this feature.") + + # Parse the issue URL + parsed_url = urlparse(issue_url) + path_parts = parsed_url.path.strip("/").split("/") + if len(path_parts) < 4 or path_parts[2] != "issues": + raise ValueError("Invalid GitHub issue URL") + + owner, repo, _, issue_number = path_parts[:4] + + # Authenticate with GitHub + g = Github(github_token) + repo = g.get_repo(f"{owner}/{repo}") + issue = repo.get_issue(int(issue_number)) + + # Convert issue details to markdown + markdown_content = f"# {issue.title}\n\n{issue.body}\n\n" + markdown_content += f"**State:** {issue.state}\n" + markdown_content += f"**Created at:** {issue.created_at}\n" + markdown_content += f"**Updated at:** {issue.updated_at}\n" + markdown_content += f"**Comments:**\n" + + for comment in issue.get_comments(): + markdown_content += f"- {comment.user.login} ({comment.created_at}): {comment.body}\n" + + return DocumentConverterResult( + title=issue.title, + text_content=markdown_content, + ) From 28af7ad34195f1d51361079072eb4bfec431366b Mon Sep 17 00:00:00 2001 From: gagb Date: Thu, 12 Dec 2024 22:39:03 +0000 Subject: [PATCH 2/8] Run pre-commit --- src/markitdown/_markitdown.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index 8078793..a7a2891 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -1112,7 +1112,9 @@ def convert_github_issue( self, issue_url: str, github_token: str ) -> DocumentConverterResult: if not IS_GITHUB_ISSUE_CAPABLE: - raise ImportError("PyGithub is not installed. Please install it to use this feature.") + raise ImportError( + "PyGithub is not installed. Please install it to use this feature." + ) # Parse the issue URL parsed_url = urlparse(issue_url) @@ -1135,7 +1137,9 @@ def convert_github_issue( markdown_content += f"**Comments:**\n" for comment in issue.get_comments(): - markdown_content += f"- {comment.user.login} ({comment.created_at}): {comment.body}\n" + markdown_content += ( + f"- {comment.user.login} ({comment.created_at}): {comment.body}\n" + ) return DocumentConverterResult( title=issue.title, From 8f16f32d530da4cfe28777671d9818c30434f9bf Mon Sep 17 00:00:00 2001 From: gagb Date: Thu, 12 Dec 2024 23:10:23 +0000 Subject: [PATCH 3/8] Add tests --- pyproject.toml | 1 + src/markitdown/_markitdown.py | 29 ++++++++++++++++++++++++++++- tests/test_markitdown.py | 17 +++++++++++++++++ 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d1dd737..b6a87c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ dependencies = [ "youtube-transcript-api", "SpeechRecognition", "pathvalidate", + "pygithub" ] [project.urls] diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index a7a2891..141caa9 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -50,7 +50,7 @@ IS_GITHUB_ISSUE_CAPABLE = True except ModuleNotFoundError: - pass + IS_GITHUB_ISSUE_CAPABLE = False class _CustomMarkdownify(markdownify.MarkdownConverter): @@ -1111,6 +1111,33 @@ def register_page_converter(self, converter: DocumentConverter) -> None: def convert_github_issue( self, issue_url: str, github_token: str ) -> DocumentConverterResult: + """ + Convert a GitHub issue to a markdown document. + + Args: + issue_url (str): The URL of the GitHub issue to convert. + github_token (str): A GitHub token with access to the repository. + + Returns: + DocumentConverterResult: The result containing the issue title and markdown content. + + Raises: + ImportError: If the PyGithub library is not installed. + ValueError: If the provided URL is not a valid GitHub issue URL. + + Example: + # Example markdown format + # Issue Title + + Issue body content... + + **State:** open + **Created at:** 2023-10-01 12:34:56 + **Updated at:** 2023-10-02 12:34:56 + **Comments:** + - user1 (2023-10-01 13:00:00): Comment content... + - user2 (2023-10-01 14:00:00): Another comment... + """ if not IS_GITHUB_ISSUE_CAPABLE: raise ImportError( "PyGithub is not installed. Please install it to use this feature." diff --git a/tests/test_markitdown.py b/tests/test_markitdown.py index 94fd886..ee63fa2 100644 --- a/tests/test_markitdown.py +++ b/tests/test_markitdown.py @@ -87,6 +87,9 @@ "data:image/svg+xml,%3Csvg%20width%3D", ] +GITHUB_ISSUE_URL = "https://github.com/microsoft/autogen/issues/1421" +GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "") + @pytest.mark.skipif( skip_remote, @@ -179,8 +182,22 @@ def test_markitdown_exiftool() -> None: assert target in result.text_content +@pytest.mark.skipif( + not GITHUB_TOKEN, + reason="GitHub token not provided", +) +def test_markitdown_github_issue() -> None: + markitdown = MarkItDown() + result = markitdown.convert_github_issue(GITHUB_ISSUE_URL, GITHUB_TOKEN) + print(result.text_content) + assert "User-Defined Functions" in result.text_content + assert "closed" in result.text_content + assert "Comments:" in result.text_content + + if __name__ == "__main__": """Runs this file's tests from the command line.""" test_markitdown_remote() test_markitdown_local() test_markitdown_exiftool() + test_markitdown_github_issue() From 7979eecfef2a91faedd145de4abdca0f52f40882 Mon Sep 17 00:00:00 2001 From: gagb Date: Fri, 13 Dec 2024 13:52:37 -0800 Subject: [PATCH 4/8] SHift to Documentconverter class --- src/markitdown/_markitdown.py | 140 ++++++++++++++++++---------------- tests/test_markitdown.py | 2 +- 2 files changed, 76 insertions(+), 66 deletions(-) diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index 141caa9..ca569c4 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -845,6 +845,75 @@ def _get_mlm_description(self, local_path, extension, client, model, prompt=None return response.choices[0].message.content +class GitHubIssueConverter(DocumentConverter): + """Converts GitHub issues to Markdown.""" + + def convert(self, issue_url, github_token) -> Union[None, DocumentConverterResult]: + + # Bail if not a valid GitHub issue URL + if issue_url: + parsed_url = urlparse(issue_url) + path_parts = parsed_url.path.strip("/").split("/") + if len(path_parts) < 4 or path_parts[2] != "issues": + return None + + if not github_token: + raise ValueError("GitHub token is not set. Cannot convert GitHub issue.") + + return self._convert_github_issue(issue_url, github_token) + + return None + + def _convert_github_issue( + self, issue_url: str, github_token: str + ) -> DocumentConverterResult: + """ + Convert a GitHub issue to a markdown document. + Args: + issue_url (str): The URL of the GitHub issue to convert. + github_token (str): A GitHub token with access to the repository. + Returns: + DocumentConverterResult: The result containing the issue title and markdown content. + Raises: + ImportError: If the PyGithub library is not installed. + ValueError: If the provided URL is not a valid GitHub issue URL. + """ + if not IS_GITHUB_ISSUE_CAPABLE: + raise ImportError( + "PyGithub is not installed. Please install it to use this feature." + ) + + # Parse the issue URL + parsed_url = urlparse(issue_url) + path_parts = parsed_url.path.strip("/").split("/") + if len(path_parts) < 4 or path_parts[2] != "issues": + raise ValueError("Invalid GitHub issue URL") + + owner, repo, _, issue_number = path_parts[:4] + + # Authenticate with GitHub + g = Github(github_token) + repo = g.get_repo(f"{owner}/{repo}") + issue = repo.get_issue(int(issue_number)) + + # Convert issue details to markdown + markdown_content = f"# {issue.title}\n\n{issue.body}\n\n" + markdown_content += f"**State:** {issue.state}\n" + markdown_content += f"**Created at:** {issue.created_at}\n" + markdown_content += f"**Updated at:** {issue.updated_at}\n" + markdown_content += f"**Comments:**\n" + + for comment in issue.get_comments(): + markdown_content += ( + f"- {comment.user.login} ({comment.created_at}): {comment.body}\n" + ) + + return DocumentConverterResult( + title=issue.title, + text_content=markdown_content, + ) + + class FileConversionException(BaseException): pass @@ -897,6 +966,12 @@ def convert( - source: can be a string representing a path or url, or a requests.response object - extension: specifies the file extension to use when interpreting the file. If None, infer from source (path, uri, content-type, etc.) """ + # Handle GitHub issue URLs directly + if isinstance(source, str) and "github.com" in source and "/issues/" in source: + github_token = kwargs.get("github_token", os.getenv("GITHUB_TOKEN")) + if not github_token: + raise ValueError("GitHub token is required for GitHub issue conversion.") + return GitHubIssueConverter().convert(issue_url=source, github_token=github_token) # Local path or url if isinstance(source, str): @@ -1107,68 +1182,3 @@ def _guess_ext_magic(self, path): def register_page_converter(self, converter: DocumentConverter) -> None: """Register a page text converter.""" self._page_converters.insert(0, converter) - - def convert_github_issue( - self, issue_url: str, github_token: str - ) -> DocumentConverterResult: - """ - Convert a GitHub issue to a markdown document. - - Args: - issue_url (str): The URL of the GitHub issue to convert. - github_token (str): A GitHub token with access to the repository. - - Returns: - DocumentConverterResult: The result containing the issue title and markdown content. - - Raises: - ImportError: If the PyGithub library is not installed. - ValueError: If the provided URL is not a valid GitHub issue URL. - - Example: - # Example markdown format - # Issue Title - - Issue body content... - - **State:** open - **Created at:** 2023-10-01 12:34:56 - **Updated at:** 2023-10-02 12:34:56 - **Comments:** - - user1 (2023-10-01 13:00:00): Comment content... - - user2 (2023-10-01 14:00:00): Another comment... - """ - if not IS_GITHUB_ISSUE_CAPABLE: - raise ImportError( - "PyGithub is not installed. Please install it to use this feature." - ) - - # Parse the issue URL - parsed_url = urlparse(issue_url) - path_parts = parsed_url.path.strip("/").split("/") - if len(path_parts) < 4 or path_parts[2] != "issues": - raise ValueError("Invalid GitHub issue URL") - - owner, repo, _, issue_number = path_parts[:4] - - # Authenticate with GitHub - g = Github(github_token) - repo = g.get_repo(f"{owner}/{repo}") - issue = repo.get_issue(int(issue_number)) - - # Convert issue details to markdown - markdown_content = f"# {issue.title}\n\n{issue.body}\n\n" - markdown_content += f"**State:** {issue.state}\n" - markdown_content += f"**Created at:** {issue.created_at}\n" - markdown_content += f"**Updated at:** {issue.updated_at}\n" - markdown_content += f"**Comments:**\n" - - for comment in issue.get_comments(): - markdown_content += ( - f"- {comment.user.login} ({comment.created_at}): {comment.body}\n" - ) - - return DocumentConverterResult( - title=issue.title, - text_content=markdown_content, - ) diff --git a/tests/test_markitdown.py b/tests/test_markitdown.py index ee63fa2..fa64738 100644 --- a/tests/test_markitdown.py +++ b/tests/test_markitdown.py @@ -188,7 +188,7 @@ def test_markitdown_exiftool() -> None: ) def test_markitdown_github_issue() -> None: markitdown = MarkItDown() - result = markitdown.convert_github_issue(GITHUB_ISSUE_URL, GITHUB_TOKEN) + result = markitdown.convert(GITHUB_ISSUE_URL, github_token=GITHUB_TOKEN) print(result.text_content) assert "User-Defined Functions" in result.text_content assert "closed" in result.text_content From 778fca3f701d089e8fbe497434274bf9b41310c3 Mon Sep 17 00:00:00 2001 From: gagb Date: Fri, 13 Dec 2024 13:57:03 -0800 Subject: [PATCH 5/8] Fix code scanning alert no. 1: Incomplete URL substring sanitization Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- src/markitdown/_markitdown.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index ca569c4..225fe99 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -967,11 +967,13 @@ def convert( - extension: specifies the file extension to use when interpreting the file. If None, infer from source (path, uri, content-type, etc.) """ # Handle GitHub issue URLs directly - if isinstance(source, str) and "github.com" in source and "/issues/" in source: - github_token = kwargs.get("github_token", os.getenv("GITHUB_TOKEN")) - if not github_token: - raise ValueError("GitHub token is required for GitHub issue conversion.") - return GitHubIssueConverter().convert(issue_url=source, github_token=github_token) + if isinstance(source, str): + parsed_url = urlparse(source) + if parsed_url.hostname == "github.com" and "/issues/" in parsed_url.path: + github_token = kwargs.get("github_token", os.getenv("GITHUB_TOKEN")) + if not github_token: + raise ValueError("GitHub token is required for GitHub issue conversion.") + return GitHubIssueConverter().convert(issue_url=source, github_token=github_token) # Local path or url if isinstance(source, str): From f1274dca87ef536f62c392dd44be59af0ef5e2d4 Mon Sep 17 00:00:00 2001 From: gagb Date: Fri, 13 Dec 2024 13:58:24 -0800 Subject: [PATCH 6/8] Run pre-commit --- src/markitdown/_markitdown.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index 225fe99..d7672fa 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -849,7 +849,6 @@ class GitHubIssueConverter(DocumentConverter): """Converts GitHub issues to Markdown.""" def convert(self, issue_url, github_token) -> Union[None, DocumentConverterResult]: - # Bail if not a valid GitHub issue URL if issue_url: parsed_url = urlparse(issue_url) @@ -858,7 +857,9 @@ def convert(self, issue_url, github_token) -> Union[None, DocumentConverterResul return None if not github_token: - raise ValueError("GitHub token is not set. Cannot convert GitHub issue.") + raise ValueError( + "GitHub token is not set. Cannot convert GitHub issue." + ) return self._convert_github_issue(issue_url, github_token) @@ -972,8 +973,12 @@ def convert( if parsed_url.hostname == "github.com" and "/issues/" in parsed_url.path: github_token = kwargs.get("github_token", os.getenv("GITHUB_TOKEN")) if not github_token: - raise ValueError("GitHub token is required for GitHub issue conversion.") - return GitHubIssueConverter().convert(issue_url=source, github_token=github_token) + raise ValueError( + "GitHub token is required for GitHub issue conversion." + ) + return GitHubIssueConverter().convert( + issue_url=source, github_token=github_token + ) # Local path or url if isinstance(source, str): From 0b6554738cee06c77da6437f77d628ef689ec691 Mon Sep 17 00:00:00 2001 From: gagb Date: Fri, 13 Dec 2024 14:16:56 -0800 Subject: [PATCH 7/8] Move github handling from convert to convert_url --- src/markitdown/_markitdown.py | 41 +++++++++++++++++------------------ 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index d7672fa..9602300 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -967,19 +967,6 @@ def convert( - source: can be a string representing a path or url, or a requests.response object - extension: specifies the file extension to use when interpreting the file. If None, infer from source (path, uri, content-type, etc.) """ - # Handle GitHub issue URLs directly - if isinstance(source, str): - parsed_url = urlparse(source) - if parsed_url.hostname == "github.com" and "/issues/" in parsed_url.path: - github_token = kwargs.get("github_token", os.getenv("GITHUB_TOKEN")) - if not github_token: - raise ValueError( - "GitHub token is required for GitHub issue conversion." - ) - return GitHubIssueConverter().convert( - issue_url=source, github_token=github_token - ) - # Local path or url if isinstance(source, str): if ( @@ -994,6 +981,26 @@ def convert( elif isinstance(source, requests.Response): return self.convert_response(source, **kwargs) + def convert_url( + self, url: str, **kwargs: Any + ) -> DocumentConverterResult: # TODO: fix kwargs type + # Handle GitHub issue URLs directly + parsed_url = urlparse(url) + if parsed_url.hostname == "github.com" and "/issues/" in parsed_url.path: + github_token = kwargs.get("github_token", os.getenv("GITHUB_TOKEN")) + if not github_token: + raise ValueError( + "GitHub token is required for GitHub issue conversion." + ) + return GitHubIssueConverter().convert( + issue_url=url, github_token=github_token + ) + + # Send a HTTP request to the URL + response = self._requests_session.get(url, stream=True) + response.raise_for_status() + return self.convert_response(response, **kwargs) + def convert_local( self, path: str, **kwargs: Any ) -> DocumentConverterResult: # TODO: deal with kwargs @@ -1048,14 +1055,6 @@ def convert_stream( return result - def convert_url( - self, url: str, **kwargs: Any - ) -> DocumentConverterResult: # TODO: fix kwargs type - # Send a HTTP request to the URL - response = self._requests_session.get(url, stream=True) - response.raise_for_status() - return self.convert_response(response, **kwargs) - def convert_response( self, response: requests.Response, **kwargs: Any ) -> DocumentConverterResult: # TODO fix kwargs type From 8a30fca7328179144d7b8de4a66b6480a119a16e Mon Sep 17 00:00:00 2001 From: gagb Date: Fri, 13 Dec 2024 14:57:39 -0800 Subject: [PATCH 8/8] Add support for GH prs as well --- src/markitdown/_markitdown.py | 78 +++++++++++++++++++++++++++++------ tests/test_markitdown.py | 13 ++++++ 2 files changed, 79 insertions(+), 12 deletions(-) diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index 9602300..043d68d 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -846,22 +846,25 @@ def _get_mlm_description(self, local_path, extension, client, model, prompt=None class GitHubIssueConverter(DocumentConverter): - """Converts GitHub issues to Markdown.""" + """Converts GitHub issues and pull requests to Markdown.""" - def convert(self, issue_url, github_token) -> Union[None, DocumentConverterResult]: - # Bail if not a valid GitHub issue URL - if issue_url: - parsed_url = urlparse(issue_url) + def convert(self, github_url, github_token) -> Union[None, DocumentConverterResult]: + # Bail if not a valid GitHub issue or pull request URL + if github_url: + parsed_url = urlparse(github_url) path_parts = parsed_url.path.strip("/").split("/") - if len(path_parts) < 4 or path_parts[2] != "issues": + if len(path_parts) < 4 or path_parts[2] not in ["issues", "pull"]: return None if not github_token: raise ValueError( - "GitHub token is not set. Cannot convert GitHub issue." + "GitHub token is not set. Cannot convert GitHub issue or pull request." ) - return self._convert_github_issue(issue_url, github_token) + if path_parts[2] == "issues": + return self._convert_github_issue(github_url, github_token) + elif path_parts[2] == "pull": + return self._convert_github_pr(github_url, github_token) return None @@ -914,6 +917,55 @@ def _convert_github_issue( text_content=markdown_content, ) + def _convert_github_pr( + self, pr_url: str, github_token: str + ) -> DocumentConverterResult: + """ + Convert a GitHub pull request to a markdown document. + Args: + pr_url (str): The URL of the GitHub pull request to convert. + github_token (str): A GitHub token with access to the repository. + Returns: + DocumentConverterResult: The result containing the pull request title and markdown content. + Raises: + ImportError: If the PyGithub library is not installed. + ValueError: If the provided URL is not a valid GitHub pull request URL. + """ + if not IS_GITHUB_ISSUE_CAPABLE: + raise ImportError( + "PyGithub is not installed. Please install it to use this feature." + ) + + # Parse the pull request URL + parsed_url = urlparse(pr_url) + path_parts = parsed_url.path.strip("/").split("/") + if len(path_parts) < 4 or path_parts[2] != "pull": + raise ValueError("Invalid GitHub pull request URL") + + owner, repo, _, pr_number = path_parts[:4] + + # Authenticate with GitHub + g = Github(github_token) + repo = g.get_repo(f"{owner}/{repo}") + pr = repo.get_pull(int(pr_number)) + + # Convert pull request details to markdown + markdown_content = f"# {pr.title}\n\n{pr.body}\n\n" + markdown_content += f"**State:** {pr.state}\n" + markdown_content += f"**Created at:** {pr.created_at}\n" + markdown_content += f"**Updated at:** {pr.updated_at}\n" + markdown_content += f"**Comments:**\n" + + for comment in pr.get_issue_comments(): + markdown_content += ( + f"- {comment.user.login} ({comment.created_at}): {comment.body}\n" + ) + + return DocumentConverterResult( + title=pr.title, + text_content=markdown_content, + ) + class FileConversionException(BaseException): pass @@ -984,16 +1036,18 @@ def convert( def convert_url( self, url: str, **kwargs: Any ) -> DocumentConverterResult: # TODO: fix kwargs type - # Handle GitHub issue URLs directly + # Handle GitHub issue and pull request URLs directly parsed_url = urlparse(url) - if parsed_url.hostname == "github.com" and "/issues/" in parsed_url.path: + if parsed_url.hostname == "github.com" and any( + x in parsed_url.path for x in ["/issues/", "/pull/"] + ): github_token = kwargs.get("github_token", os.getenv("GITHUB_TOKEN")) if not github_token: raise ValueError( - "GitHub token is required for GitHub issue conversion." + "GitHub token is required for GitHub issue or pull request conversion." ) return GitHubIssueConverter().convert( - issue_url=url, github_token=github_token + github_url=url, github_token=github_token ) # Send a HTTP request to the URL diff --git a/tests/test_markitdown.py b/tests/test_markitdown.py index fa64738..9a35e27 100644 --- a/tests/test_markitdown.py +++ b/tests/test_markitdown.py @@ -88,6 +88,7 @@ ] GITHUB_ISSUE_URL = "https://github.com/microsoft/autogen/issues/1421" +GITHUB_PR_URL = "https://github.com/microsoft/autogen/pull/194" GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "") @@ -195,9 +196,21 @@ def test_markitdown_github_issue() -> None: assert "Comments:" in result.text_content +@pytest.mark.skipif( + not GITHUB_TOKEN, + reason="GitHub token not provided", +) +def test_markitdown_github_pr() -> None: + markitdown = MarkItDown() + result = markitdown.convert(GITHUB_PR_URL, github_token=GITHUB_TOKEN) + print(result.text_content) + assert "faq" in result.text_content + + if __name__ == "__main__": """Runs this file's tests from the command line.""" test_markitdown_remote() test_markitdown_local() test_markitdown_exiftool() test_markitdown_github_issue() + test_markitdown_github_pr()