Skip to content

Commit

Permalink
JsonConverter for Converting JSON Files into Structured Markdown Files
Browse files Browse the repository at this point in the history
Converts Jsons to Markdown. The output preserves the structure of the JSON file as closely as possible, while using Markdown syntax for readability.
  • Loading branch information
ZeyuTeng96 authored Jan 3, 2025
1 parent 125e206 commit 0f948ad
Showing 1 changed file with 37 additions and 0 deletions.
37 changes: 37 additions & 0 deletions src/markitdown/_markitdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -1204,6 +1204,42 @@ def convert(
text_content=f"[ERROR] Failed to process zip file {local_path}: {str(e)}",
)

class JsonConverter(DocumentConverter):
"""
Converts Jsons to Markdown. The output preserves the structure of the JSON file as closely as possible, while using Markdown syntax for readability.
"""

def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
# Bail if not a Json
extension = kwargs.get("file_extension", "")
if extension.lower() != ".json":
return None

list_heading = kwargs.get("list_heading", "Elem")
with open(local_path, "r") as f:
json_data = json.load(f)

md_content = ""

if isinstance(json_data, dict):
md_content += json.dumps(json_data, indent=4, ensure_ascii=False) + "\n"
elif isinstance(json_data, list):
for idx, item in enumerate(json_data, start=1):
md_content += f"# {list_heading} {idx}\n"
if isinstance(item, (dict, list)):
md_content += json.dumps(item, indent=4, ensure_ascii=False) + "\n\n"
else:
md_content += f"{item}\n\n"
else:
md_content += f"{json_data}\n"

# removing tailing \n
md_content = md_content.strip()

return DocumentConverterResult(
title=None,
text_content=md_content,
)

class FileConversionException(BaseException):
pass
Expand Down Expand Up @@ -1276,6 +1312,7 @@ def __init__(
self.register_page_converter(WikipediaConverter())
self.register_page_converter(YouTubeConverter())
self.register_page_converter(BingSerpConverter())
self.register_page_converter(JsonConverter())
self.register_page_converter(DocxConverter())
self.register_page_converter(XlsxConverter())
self.register_page_converter(PptxConverter())
Expand Down

0 comments on commit 0f948ad

Please sign in to comment.