-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
This PR introduces a script to automate the process of updating the README.md file. The script ensures that tables in the README are refreshed with new metadata extracted from new samples. Added a Python script to scan and process README.md files. • Implemented features to clean existing tables and replace them with updated content. • Linked the script functionality to Linear ticket DOC-16 for tracking. --------- Co-authored-by: Daniel Abraham <[email protected]>
- Loading branch information
1 parent
f604b2c
commit 43e9afc
Showing
3 changed files
with
225 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
"""Extract metadata from project README files to update table in repo's README file.""" | ||
|
||
from pathlib import Path | ||
import re | ||
|
||
|
||
ROOT_PATH = Path(__file__).parent | ||
|
||
|
||
def generate_readme_table(folder_path: Path) -> list[str]: | ||
"""Generate a list of table rows from README metadata.""" | ||
rows = [] | ||
for readme_file in sorted(folder_path.rglob("README.md")): | ||
metadata = extract_metadata(readme_file) | ||
rows.append(to_table_row(readme_file.parent, metadata)) | ||
return [row for row in rows if row] # Remove empty rows. | ||
|
||
|
||
def extract_metadata(readme_file: Path) -> dict: | ||
"""Extract metadata from a project's README file.""" | ||
field_pattern = r"^([a-z]+):\s+(.+)" # "key: value" | ||
f = readme_file.read_text(encoding="utf-8") | ||
metadata = {} | ||
|
||
for k, v in re.findall(field_pattern, f, re.MULTILINE): | ||
# Integrations value is a list of strings, others are just strings. | ||
metadata[k] = re.findall(r'"(.+?)"', v) if k == "integrations" else v | ||
|
||
return metadata | ||
|
||
|
||
def to_table_row(project_dir: Path, metadata: dict) -> str: | ||
"""Convert metadata into a markdown table row.""" | ||
title = metadata.get("title", "") | ||
if not title: | ||
return "" | ||
|
||
description = metadata.get("description", "") | ||
integrations = ", ".join(metadata.get("integrations", [])) | ||
path = project_dir.relative_to(ROOT_PATH) | ||
|
||
return f"| [{title}](./{path}/) | {description} | {integrations} |\n" | ||
|
||
|
||
def insert_rows_to_table(readme_file: Path, new_rows: list[str]) -> None: | ||
"""Insert rows into the table section of the README file.""" | ||
md = readme_file.read_text(encoding="utf-8") | ||
table = "-->\n| Name | Description | Integration |\n| :--- | :---------- | :---------- |\n" | ||
|
||
for row in new_rows: | ||
table += row | ||
|
||
md = re.sub("-->.+<!--", table + "<!--", md, flags=re.DOTALL) | ||
readme_file.write_text(md, encoding="utf-8") | ||
|
||
|
||
if __name__ == "__main__": | ||
new_rows = generate_readme_table(ROOT_PATH) | ||
insert_rows_to_table(ROOT_PATH / "README.md", new_rows) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
from pathlib import Path | ||
import tempfile | ||
import unittest | ||
|
||
import update_projects_table | ||
|
||
|
||
class TestCreateProjectTable(unittest.TestCase): | ||
"""Unit tests for the "update_projects_table" module.""" | ||
|
||
def generic_test_extract_metadata(self, input, expected): | ||
actual = {} | ||
with tempfile.NamedTemporaryFile(delete_on_close=False) as f: | ||
if input: | ||
f.write(input) | ||
f.close() | ||
actual = update_projects_table.extract_metadata(Path(f.name)) | ||
self.assertEqual(expected, actual) | ||
|
||
def test_extract_metadata(self): | ||
# Empty metadata. | ||
self.generic_test_extract_metadata(None, {}) | ||
|
||
# Single basic field. | ||
self.generic_test_extract_metadata(b"foo: bar", {"foo": "bar"}) | ||
|
||
# Single basic field with extra whitespaces (a recoverable typo). | ||
self.generic_test_extract_metadata(b"foo: bar\n\n\n", {"foo": "bar"}) | ||
|
||
# Multiple basic fields. | ||
input = b"aaa: 111\nbbb: 222\nccc: 333\n" | ||
expected = {"aaa": "111", "bbb": "222", "ccc": "333"} | ||
self.generic_test_extract_metadata(input, expected) | ||
|
||
# Empty integrations field. | ||
self.generic_test_extract_metadata(b"integrations:", {}) | ||
self.generic_test_extract_metadata(b"integrations: ", {}) | ||
self.generic_test_extract_metadata(b"integrations: []", {"integrations": []}) | ||
|
||
# Non-empty integrations field. | ||
input = b'integrations: ["a"]' | ||
expected = {"integrations": ["a"]} | ||
self.generic_test_extract_metadata(input, expected) | ||
|
||
input = b'integrations: ["a",]' | ||
expected = {"integrations": ["a"]} | ||
self.generic_test_extract_metadata(input, expected) | ||
|
||
input = b'integrations: ["a","b"]' | ||
expected = {"integrations": ["a", "b"]} | ||
self.generic_test_extract_metadata(input, expected) | ||
|
||
input = b'integrations: ["a", "b"]' | ||
expected = {"integrations": ["a", "b"]} | ||
self.generic_test_extract_metadata(input, expected) | ||
|
||
# Combination of basic and integrations fields. | ||
input = b'a: 1\nb: 2\nintegrations: ["c", "d"]\ne: 3' | ||
expected = {"a": "1", "b": "2", "integrations": ["c", "d"], "e": "3"} | ||
self.generic_test_extract_metadata(input, expected) | ||
|
||
def test_to_table_row(self): | ||
# Metadata without a title - ignore it. | ||
metadata = {"description": "d", "integrations": ["1"]} | ||
actual = update_projects_table.to_table_row(Path("dir").absolute(), metadata) | ||
self.assertEqual("", actual) | ||
|
||
# Metadata with nothing but the title. | ||
metadata = {"title": "blah blah blah"} | ||
expected = "| [blah blah blah](./dir/) | | |\n" | ||
actual = update_projects_table.to_table_row(Path("dir").absolute(), metadata) | ||
self.assertEqual(expected, actual) | ||
|
||
# Regular project with a single integration. | ||
metadata = {"title": "t", "description": "d", "integrations": ["1"]} | ||
expected = "| [t](./dir/) | d | 1 |\n" | ||
actual = update_projects_table.to_table_row(Path("dir").absolute(), metadata) | ||
self.assertEqual(expected, actual) | ||
|
||
# Regular project with multiple integrations. | ||
metadata = {"title": "t", "description": "d", "integrations": ["1", "2", "3"]} | ||
expected = "| [t](./dir/) | d | 1, 2, 3 |\n" | ||
actual = update_projects_table.to_table_row(Path("dir").absolute(), metadata) | ||
self.assertEqual(expected, actual) | ||
|
||
def generic_test_insert_rows_to_table(self, num_rows, expected): | ||
actual = "" | ||
with tempfile.NamedTemporaryFile(delete_on_close=False) as f: | ||
f.write(b"prefix\n<!--start-table-->\ngarbage\n<!--end-table-->\nsuffix\n") | ||
f.close() | ||
rows = ["| row |\n"] * num_rows | ||
update_projects_table.insert_rows_to_table(Path(f.name), rows) | ||
actual = Path(f.name).read_text() | ||
self.assertEqual(expected, actual) | ||
|
||
def test_insert_rows_to_table_empty(self): | ||
expected = ( | ||
"prefix\n<!--start-table-->\n" | ||
"| Name | Description | Integration |\n" | ||
"| :--- | :---------- | :---------- |\n" | ||
"<!--end-table-->\nsuffix\n" | ||
) | ||
self.generic_test_insert_rows_to_table(0, expected) | ||
|
||
def test_insert_rows_to_table_one_row(self): | ||
expected = ( | ||
"prefix\n<!--start-table-->\n" | ||
"| Name | Description | Integration |\n" | ||
"| :--- | :---------- | :---------- |\n" | ||
"| row |\n" | ||
"<!--end-table-->\nsuffix\n" | ||
) | ||
self.generic_test_insert_rows_to_table(1, expected) | ||
|
||
def test_insert_rows_to_table_multiple_rows(self): | ||
expected = ( | ||
"prefix\n<!--start-table-->\n" | ||
"| Name | Description | Integration |\n" | ||
"| :--- | :---------- | :---------- |\n" | ||
"| row |\n" | ||
"| row |\n" | ||
"| row |\n" | ||
"<!--end-table-->\nsuffix\n" | ||
) | ||
self.generic_test_insert_rows_to_table(3, expected) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |