Skip to content

Commit

Permalink
fix: chop documentation markdown at tables due to ugly formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
Mangern committed Nov 22, 2024
1 parent 09380fc commit f2616d6
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,23 +48,21 @@ public static void fetchSchemaDocs(Path targetPath) throws IOException {
Map<String, String> schemaMarkdownContent = new SchemaDocumentationFetcher(SCHEMA_URL).getMarkdownContent();

for (var entry : schemaMarkdownContent.entrySet()) {
String fileName = convertToToken(entry.getKey());
String tokenName = convertToToken(entry.getKey());
String content = entry.getValue();

if (REPLACE_FILENAME_MAP.containsKey(fileName)) {
for (String replacedFileName : REPLACE_FILENAME_MAP.get(fileName)) {
Files.write(writePath.resolve(replacedFileName + ".md"), content.getBytes(), StandardOpenOption.CREATE);
}
} else {
Files.write(writePath.resolve(fileName + ".md"), content.getBytes(), StandardOpenOption.CREATE);
List<String> fileNamesToWrite = REPLACE_FILENAME_MAP.getOrDefault(tokenName, List.of(tokenName));

for (String fileName : fileNamesToWrite) {
writeMarkdown(writePath.resolve(fileName + ".md"), content);
}
}

Map<String, String> rankFeatureMarkdownContent = new RankFeatureDocumentationFetcher(RANK_FEATURE_URL).getMarkdownContent();

writePath = targetPath.resolve("rankExpression");
for (var entry : rankFeatureMarkdownContent.entrySet()) {
Files.write(writePath.resolve(entry.getKey() + ".md"), entry.getValue().getBytes(), StandardOpenOption.CREATE);
writeMarkdown(writePath.resolve(entry.getKey() + ".md"), entry.getValue());
}
}

Expand All @@ -81,11 +79,15 @@ public static void fetchServicesDocs(Path targetPath) throws IOException {
for (var entry : markdownContent.entrySet()) {
if (entry.getKey().contains("/")) continue;
String fileName = entry.getKey().toLowerCase();
Files.write(writePath.resolve(fileName + ".md"), entry.getValue().getBytes(), StandardOpenOption.CREATE);
writeMarkdown(writePath.resolve(fileName + ".md"), entry.getValue());
}
}
}

private static void writeMarkdown(Path writePath, String markdown) throws IOException {
Files.write(writePath, markdown.getBytes(), StandardOpenOption.CREATE);
}

private static String convertToToken(String h2Id) {
return h2Id.toUpperCase().replaceAll("-", "_");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,10 @@ Map<String, String> getMarkdownContent() throws IOException {
}

if (element.tag().equals(Tag.valueOf("table"))) {
Element tbody = element.selectFirst("tbody");
// replace all <th> in tbody with <td>
tbody.select("th").tagName("td");

// some tables have very big texts in td. For our purposes, only keep the first sentence.
if (prevH2.id().equals("field"))
manuallyFixFieldTable(tbody);
// The tables in the docs are inherently problematic
// so we just replace the first table and everything after with "read more"
prevH2 = null;
continue;
}

currentBuilder.append(element.outerHtml());
Expand All @@ -107,28 +104,4 @@ Map<String, String> getMarkdownContent() throws IOException {
}
return result;
}

private static void manuallyFixFieldTable(Element tbodyElement) {
for (Element td : tbodyElement.select("tr td:nth-child(2)")) {
String curr = td.html();
int level = 0;
int i;
for (i = 0; i < curr.length(); ++i) {
if ((
(curr.charAt(i) == '.' && !curr.substring(i-1, Math.min(curr.length(), i+3)).equals("i.e.") && !curr.substring(i-3,i+1).equals("i.e."))
|| curr.substring(i).startsWith("<code>")
|| curr.substring(i).startsWith("<pre>")
|| curr.charAt(i) == ':') && level == 0) {
break;
}
if (curr.charAt(i) == '(')++level;
if (curr.charAt(i) == ')')--level;
if (curr.charAt(i) == '<')++level;
if (curr.charAt(i) == '>')--level;
}
String firstSentence = curr.substring(0, i) + ".";
td.html(firstSentence);
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@ Map<String, String> getMarkdownContent() throws IOException {
currentBuilder.append(nodeIterator.toString());
continue;
}
if (element.tag().equals(Tag.valueOf("table"))) {
// tables are inherently problematic so we just replace everything after the first table with "read more"
prevH2 = null;
continue;
}

currentBuilder.append(getElementHTML(element));
}
Expand Down

0 comments on commit f2616d6

Please sign in to comment.