Skip to content

Commit

Permalink
Added support to use Pathlib (#93)
Browse files Browse the repository at this point in the history
* Add support for Path objects in MarkItDown conversion methods

* Remove unnecessary blank line in test_markitdown_exiftool function

* Remove unnecessary blank line in test_markitdown_exiftool function

* remove pathlib path in test file

---------

Co-authored-by: afourney <[email protected]>
Co-authored-by: gagb <[email protected]>
  • Loading branch information
3 people authored Dec 20, 2024
1 parent 7e6c36c commit 5276616
Showing 1 changed file with 8 additions and 3 deletions.
11 changes: 8 additions & 3 deletions src/markitdown/_markitdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import zipfile
from xml.dom import minidom
from typing import Any, Dict, List, Optional, Union
from pathlib import Path
from urllib.parse import parse_qs, quote, unquote, urlparse, urlunparse
from warnings import warn, resetwarnings, catch_warnings

Expand Down Expand Up @@ -1286,11 +1287,11 @@ def __init__(
self.register_page_converter(ZipConverter())

def convert(
self, source: Union[str, requests.Response], **kwargs: Any
self, source: Union[str, requests.Response, Path], **kwargs: Any
) -> DocumentConverterResult: # TODO: deal with kwargs
"""
Args:
- source: can be a string representing a path or url, or a requests.response object
- source: can be a string representing a path either as string pathlib path object or url, or a requests.response object
- extension: specifies the file extension to use when interpreting the file. If None, infer from source (path, uri, content-type, etc.)
"""

Expand All @@ -1307,10 +1308,14 @@ def convert(
# Request response
elif isinstance(source, requests.Response):
return self.convert_response(source, **kwargs)
elif isinstance(source, Path):
return self.convert_local(source, **kwargs)

def convert_local(
self, path: str, **kwargs: Any
self, path: Union[str, Path], **kwargs: Any
) -> DocumentConverterResult: # TODO: deal with kwargs
if isinstance(path, Path):
path = str(path)
# Prepare a list of extensions to try (in order of priority)
ext = kwargs.get("file_extension")
extensions = [ext] if ext is not None else []
Expand Down

0 comments on commit 5276616

Please sign in to comment.