Skip to content

Commit

Permalink
Move github handling from convert to convert_url
Browse files Browse the repository at this point in the history
  • Loading branch information
gagb committed Dec 13, 2024
1 parent f1274dc commit 0b65547
Showing 1 changed file with 20 additions and 21 deletions.
41 changes: 20 additions & 21 deletions src/markitdown/_markitdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -967,19 +967,6 @@ def convert(
- source: can be a string representing a path or url, or a requests.response object
- extension: specifies the file extension to use when interpreting the file. If None, infer from source (path, uri, content-type, etc.)
"""
# Handle GitHub issue URLs directly
if isinstance(source, str):
parsed_url = urlparse(source)
if parsed_url.hostname == "github.com" and "/issues/" in parsed_url.path:
github_token = kwargs.get("github_token", os.getenv("GITHUB_TOKEN"))
if not github_token:
raise ValueError(
"GitHub token is required for GitHub issue conversion."
)
return GitHubIssueConverter().convert(
issue_url=source, github_token=github_token
)

# Local path or url
if isinstance(source, str):
if (
Expand All @@ -994,6 +981,26 @@ def convert(
elif isinstance(source, requests.Response):
return self.convert_response(source, **kwargs)

def convert_url(
self, url: str, **kwargs: Any
) -> DocumentConverterResult: # TODO: fix kwargs type
# Handle GitHub issue URLs directly
parsed_url = urlparse(url)
if parsed_url.hostname == "github.com" and "/issues/" in parsed_url.path:
github_token = kwargs.get("github_token", os.getenv("GITHUB_TOKEN"))
if not github_token:
raise ValueError(
"GitHub token is required for GitHub issue conversion."
)
return GitHubIssueConverter().convert(
issue_url=url, github_token=github_token
)

# Send a HTTP request to the URL
response = self._requests_session.get(url, stream=True)
response.raise_for_status()
return self.convert_response(response, **kwargs)

def convert_local(
self, path: str, **kwargs: Any
) -> DocumentConverterResult: # TODO: deal with kwargs
Expand Down Expand Up @@ -1048,14 +1055,6 @@ def convert_stream(

return result

def convert_url(
self, url: str, **kwargs: Any
) -> DocumentConverterResult: # TODO: fix kwargs type
# Send a HTTP request to the URL
response = self._requests_session.get(url, stream=True)
response.raise_for_status()
return self.convert_response(response, **kwargs)

def convert_response(
self, response: requests.Response, **kwargs: Any
) -> DocumentConverterResult: # TODO fix kwargs type
Expand Down

0 comments on commit 0b65547

Please sign in to comment.