fix: chop documentation markdown at tables due to ugly formatting

vespa-engine · Nov 22, 2024 · f2616d6 · f2616d6
1 parent 09380fc
commit f2616d6
Show file tree

Hide file tree

Showing 3 changed files with 20 additions and 40 deletions.
diff --git a/...ver/language-server/src/main/java/ai/vespa/schemals/documentation/FetchDocumentation.java b/...ver/language-server/src/main/java/ai/vespa/schemals/documentation/FetchDocumentation.java
@@ -48,23 +48,21 @@ public static void fetchSchemaDocs(Path targetPath) throws IOException {
         Map<String, String> schemaMarkdownContent = new SchemaDocumentationFetcher(SCHEMA_URL).getMarkdownContent();
 
         for (var entry : schemaMarkdownContent.entrySet()) {
-            String fileName = convertToToken(entry.getKey());
+            String tokenName = convertToToken(entry.getKey());
             String content = entry.getValue();
 
-            if (REPLACE_FILENAME_MAP.containsKey(fileName)) {
-                for (String replacedFileName : REPLACE_FILENAME_MAP.get(fileName)) {
-                    Files.write(writePath.resolve(replacedFileName + ".md"), content.getBytes(), StandardOpenOption.CREATE);
-                }
-            } else {
-                Files.write(writePath.resolve(fileName + ".md"), content.getBytes(), StandardOpenOption.CREATE);
+            List<String> fileNamesToWrite = REPLACE_FILENAME_MAP.getOrDefault(tokenName, List.of(tokenName));
+
+            for (String fileName : fileNamesToWrite) {
+                writeMarkdown(writePath.resolve(fileName + ".md"), content);
             }
         }
 
         Map<String, String> rankFeatureMarkdownContent = new RankFeatureDocumentationFetcher(RANK_FEATURE_URL).getMarkdownContent();
 
         writePath = targetPath.resolve("rankExpression");
         for (var entry : rankFeatureMarkdownContent.entrySet()) {
-            Files.write(writePath.resolve(entry.getKey() + ".md"), entry.getValue().getBytes(), StandardOpenOption.CREATE);
+            writeMarkdown(writePath.resolve(entry.getKey() + ".md"), entry.getValue());
         }
     }
 
@@ -81,11 +79,15 @@ public static void fetchServicesDocs(Path targetPath) throws IOException {
             for (var entry : markdownContent.entrySet()) {
                 if (entry.getKey().contains("/")) continue;
                 String fileName = entry.getKey().toLowerCase();
-                Files.write(writePath.resolve(fileName + ".md"), entry.getValue().getBytes(), StandardOpenOption.CREATE);
+                writeMarkdown(writePath.resolve(fileName + ".md"), entry.getValue());
             }
         }
     }
 
+    private static void writeMarkdown(Path writePath, String markdown) throws IOException {
+        Files.write(writePath, markdown.getBytes(), StandardOpenOption.CREATE);
+    }
+
     private static String convertToToken(String h2Id) {
         return h2Id.toUpperCase().replaceAll("-", "_");
     }

diff --git a/...uage-server/src/main/java/ai/vespa/schemals/documentation/SchemaDocumentationFetcher.java b/...uage-server/src/main/java/ai/vespa/schemals/documentation/SchemaDocumentationFetcher.java
@@ -75,13 +75,10 @@ Map<String, String> getMarkdownContent() throws IOException {
             }
 
             if (element.tag().equals(Tag.valueOf("table"))) {
-                Element tbody = element.selectFirst("tbody");
-                // replace all <th> in tbody with <td>
-                tbody.select("th").tagName("td");
-
-                // some tables have very big texts in td. For our purposes, only keep the first sentence.
-                if (prevH2.id().equals("field"))
-                    manuallyFixFieldTable(tbody);
+                // The tables in the docs are inherently problematic 
+                // so we just replace the first table and everything after with "read more"
+                prevH2 = null;
+                continue;
             }
 
             currentBuilder.append(element.outerHtml());
@@ -107,28 +104,4 @@ Map<String, String> getMarkdownContent() throws IOException {
         }
         return result;
 	}
-
-    private static void manuallyFixFieldTable(Element tbodyElement) {
-        for (Element td : tbodyElement.select("tr td:nth-child(2)")) {
-            String curr = td.html();
-            int level = 0;
-            int i;
-            for (i = 0; i < curr.length(); ++i) {
-                if ((
-                    (curr.charAt(i) == '.' && !curr.substring(i-1, Math.min(curr.length(), i+3)).equals("i.e.") && !curr.substring(i-3,i+1).equals("i.e."))
-                    || curr.substring(i).startsWith("<code>") 
-                    || curr.substring(i).startsWith("<pre>") 
-                    || curr.charAt(i) == ':') && level == 0) {
-                    break;
-                }
-                if (curr.charAt(i) == '(')++level;
-                if (curr.charAt(i) == ')')--level;
-                if (curr.charAt(i) == '<')++level;
-                if (curr.charAt(i) == '>')--level;
-            }
-            String firstSentence = curr.substring(0, i) + ".";
-            td.html(firstSentence);
-        }
-    }
-
 }
diff --git a/...ge-server/src/main/java/ai/vespa/schemals/documentation/ServicesDocumentationFetcher.java b/...ge-server/src/main/java/ai/vespa/schemals/documentation/ServicesDocumentationFetcher.java
@@ -97,6 +97,11 @@ Map<String, String> getMarkdownContent() throws IOException {
                     currentBuilder.append(nodeIterator.toString());
                 continue;
             }
+            if (element.tag().equals(Tag.valueOf("table"))) {
+                // tables are inherently problematic so we just replace everything after the first table with "read more"
+                prevH2 = null;
+                continue;
+            }
 
             currentBuilder.append(getElementHTML(element));
         }