-
Notifications
You must be signed in to change notification settings - Fork 363
/
generate_toc.py
118 lines (96 loc) · 3.29 KB
/
generate_toc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import dataclasses
import re
import sys
from typing import List
@dataclasses.dataclass()
class Header:
name: str
level: int
@property
def slug(self):
text = self.name.replace(' ', '-')
# single chars that are removed
text = re.sub(r'[`~!@#$%^&*()+=<>?,./:;"\'|{}\[\]\\–—]', '', text)
# CJK punctuations that are removed
text = re.sub(r'[ 。?!,、;:“”【】()〔〕[]﹃﹄“”‘’﹁﹂—…-~《》〈〉「」]', '', text)
return text
class TOCMaker:
def __init__(
self,
*,
max_depth=6,
link_prefix='',
indentation_size=2,
list_bullets=('-', '*', '+', '-'),
header_class=Header,
):
self.max_depth = max_depth
self.link_prefix = link_prefix
self.indentation_size = indentation_size
self.list_bullets = list_bullets
self.header_class = header_class
def make(self, text):
headers = self._collect_headers(text)
return self._make_toc(headers)
def make_from_file(self, fp):
return self.make(fp.read())
def _collect_headers(self, text):
headers = []
code_blocks = 0
for line in text.splitlines():
line = line.strip()
code_blocks += line.count('```') % 2
if code_blocks % 2 == 0 and line.startswith('#'):
header = self._parse_header_from_line(line)
if header.level <= self.max_depth:
headers.append(self._parse_header_from_line(line))
return headers
def _make_toc(self, headers: List[Header]):
toc = []
for header in headers:
indentation = ' ' * ((header.level - 1) * self.indentation_size)
bullet = self._get_bullet(header.level)
toc.append(f'{indentation}{bullet} [{header.name}]({self.link_prefix}#{header.slug})')
return '\n'.join(toc)
def _get_bullet(self, level):
if level > len(self.list_bullets):
return self.list_bullets[-1]
return self.list_bullets[level - 1]
def _parse_header_from_line(self, line):
level = 0
name = ''
for char in line:
if char == '#':
level += 1
else:
name = line[level + 1:].strip()
break
return self.header_class(
name=name,
level=level
)
def paste_after(delimiter, content, text):
result = []
for line in text.splitlines():
if line.strip() != delimiter:
result.append(line)
else:
result.append(f'{delimiter}\n')
result.append(f'{content}\n')
return '\n'.join(result)
raise ValueError(f"Can't find delimiter '{delimiter}'")
if __name__ == '__main__':
with open('questions.md') as fp:
maker = TOCMaker(link_prefix='questions.md/')
toc = maker.make_from_file(fp)
with open('README.md', 'r') as fp:
original = fp.read()
changed = paste_after('<!-- toc -->', toc, original)
if '--check' in sys.argv:
if original != changed:
print('Error')
sys.exit(1)
else:
with open('README.md', 'w') as fp:
fp.write(changed)
print('Done')