From 0b6554738cee06c77da6437f77d628ef689ec691 Mon Sep 17 00:00:00 2001 From: gagb Date: Fri, 13 Dec 2024 14:16:56 -0800 Subject: [PATCH] Move github handling from convert to convert_url --- src/markitdown/_markitdown.py | 41 +++++++++++++++++------------------ 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index d7672fa..9602300 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -967,19 +967,6 @@ def convert( - source: can be a string representing a path or url, or a requests.response object - extension: specifies the file extension to use when interpreting the file. If None, infer from source (path, uri, content-type, etc.) """ - # Handle GitHub issue URLs directly - if isinstance(source, str): - parsed_url = urlparse(source) - if parsed_url.hostname == "github.com" and "/issues/" in parsed_url.path: - github_token = kwargs.get("github_token", os.getenv("GITHUB_TOKEN")) - if not github_token: - raise ValueError( - "GitHub token is required for GitHub issue conversion." - ) - return GitHubIssueConverter().convert( - issue_url=source, github_token=github_token - ) - # Local path or url if isinstance(source, str): if ( @@ -994,6 +981,26 @@ def convert( elif isinstance(source, requests.Response): return self.convert_response(source, **kwargs) + def convert_url( + self, url: str, **kwargs: Any + ) -> DocumentConverterResult: # TODO: fix kwargs type + # Handle GitHub issue URLs directly + parsed_url = urlparse(url) + if parsed_url.hostname == "github.com" and "/issues/" in parsed_url.path: + github_token = kwargs.get("github_token", os.getenv("GITHUB_TOKEN")) + if not github_token: + raise ValueError( + "GitHub token is required for GitHub issue conversion." + ) + return GitHubIssueConverter().convert( + issue_url=url, github_token=github_token + ) + + # Send a HTTP request to the URL + response = self._requests_session.get(url, stream=True) + response.raise_for_status() + return self.convert_response(response, **kwargs) + def convert_local( self, path: str, **kwargs: Any ) -> DocumentConverterResult: # TODO: deal with kwargs @@ -1048,14 +1055,6 @@ def convert_stream( return result - def convert_url( - self, url: str, **kwargs: Any - ) -> DocumentConverterResult: # TODO: fix kwargs type - # Send a HTTP request to the URL - response = self._requests_session.get(url, stream=True) - response.raise_for_status() - return self.convert_response(response, **kwargs) - def convert_response( self, response: requests.Response, **kwargs: Any ) -> DocumentConverterResult: # TODO fix kwargs type