From 2f690b4b7d36615180069943fe4049e1c45a7476 Mon Sep 17 00:00:00 2001 From: Hans-Peter Stoerr Date: Tue, 17 Oct 2023 18:43:25 +0200 Subject: [PATCH] extend table markdown according to what's used in AEM and add link titles --- .../impl/AemApproximateMarkdownServicePlugin.java | 1 + .../AemApproximateMarkdownServicePluginTest.java | 2 +- .../test/composum-ai-testpages/.content.xml | 10 ++++++++-- .../chat/impl/HtmlToMarkdownConverter.java | 15 ++++++++++++++- .../chat/impl/HtmlToMarkdownConverterTest.java | 9 ++++++--- .../ComposumApproximateMarkdownServicePlugin.java | 1 + ...posumApproximateMarkdownServicePluginTest.java | 2 +- 7 files changed, 32 insertions(+), 8 deletions(-) diff --git a/aem/core/src/main/java/com/composum/ai/aem/core/impl/AemApproximateMarkdownServicePlugin.java b/aem/core/src/main/java/com/composum/ai/aem/core/impl/AemApproximateMarkdownServicePlugin.java index 863394fcb..e96c57e9b 100644 --- a/aem/core/src/main/java/com/composum/ai/aem/core/impl/AemApproximateMarkdownServicePlugin.java +++ b/aem/core/src/main/java/com/composum/ai/aem/core/impl/AemApproximateMarkdownServicePlugin.java @@ -88,6 +88,7 @@ protected boolean pageHandling(Resource resource, PrintWriter out, @Nonnull Appr } outputIfNotBlank(out, vm, "shortDescription", service); outputIfNotBlank(out, vm, JCR_DESCRIPTION, service); + out.println(); } return isPage; } diff --git a/aem/core/src/test/java/com/composum/ai/aem/core/impl/AemApproximateMarkdownServicePluginTest.java b/aem/core/src/test/java/com/composum/ai/aem/core/impl/AemApproximateMarkdownServicePluginTest.java index 78733c422..c42230ffe 100644 --- a/aem/core/src/test/java/com/composum/ai/aem/core/impl/AemApproximateMarkdownServicePluginTest.java +++ b/aem/core/src/test/java/com/composum/ai/aem/core/impl/AemApproximateMarkdownServicePluginTest.java @@ -86,7 +86,7 @@ public void testPageHandlingWithPageResource() { service.approximateMarkdown(component, printWriter, request, response); String expectedOutput = "# myPage\n\n" + - "The best page!\n"; + "The best page!\n\n"; assertThat(writer.toString(), is(expectedOutput)); } diff --git a/aem/ui.content/src/test/content/jcr_root/content/wknd/language-masters/test/composum-ai-testpages/.content.xml b/aem/ui.content/src/test/content/jcr_root/content/wknd/language-masters/test/composum-ai-testpages/.content.xml index 46795bb50..e452fe034 100644 --- a/aem/ui.content/src/test/content/jcr_root/content/wknd/language-masters/test/composum-ai-testpages/.content.xml +++ b/aem/ui.content/src/test/content/jcr_root/content/wknd/language-masters/test/composum-ai-testpages/.content.xml @@ -25,11 +25,16 @@ + <testdialog jcr:primaryType="nt:unstructured" sling:resourceType="composum-ai/test/components/testdialog" + text="Thisisatext" textIsRich="true" therichText="<p>Give a little bit of <b>rich</b> <i>text</i></p><ul><li>he</li><li>re</li></ul>" thetext="This is, not surprising, some plain text which we have here. Another line of plain text." @@ -56,7 +61,8 @@ <experiencefragment jcr:primaryType="nt:unstructured" sling:resourceType="wknd/components/experiencefragment" - fragmentVariationPath="/content/experience-fragments/wknd/us/en/adventures/adventures-2021/master"/> + fragmentVariationPath="/content/experience-fragments/wknd/us/en/adventures/adventures-2021/master" + text="New Adventures for 2021 Go somewhere incredible next year. This past year was challenging on a number of levels but we've got your back. We've made several changes and improvements to all the adventures to make them safer, more flexible and as stress-free as possible. All adventures offer a no-hassle cancellation, fully refundable, no questions asked. New! Bali Surf Camp Surfing in Bali is on the bucket list of every surfer - whether you're a beginner or someone who's been surfing for decades, there will be a break to cater to your ability. Bali offers warm water, tropical vibes, awesome breaks and low cost expenses. Bali Surf Camp"/> </container> </root> </jcr:content> diff --git a/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/HtmlToMarkdownConverter.java b/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/HtmlToMarkdownConverter.java index 4a392951a..4ad534792 100644 --- a/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/HtmlToMarkdownConverter.java +++ b/backend/base/src/main/java/com/composum/ai/backend/base/service/chat/impl/HtmlToMarkdownConverter.java @@ -10,6 +10,7 @@ import javax.annotation.Nullable; import org.jsoup.Jsoup; +import org.jsoup.internal.StringUtil; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; @@ -38,7 +39,8 @@ public class HtmlToMarkdownConverter { /** * Important table attributes we need to keep. */ - private static final List<String> TABLE_ATTRIBUTES = List.of("border", "colspan", "rowspan", "align", "valign", "scope"); + private static final List<String> TABLE_ATTRIBUTES = List.of("border", "colspan", "rowspan", + "align", "valign", "scope", "cellpadding", "cellspacing", "width", "height", "bgcolor"); // continued indentation. Two spaces since four would be code block private final String indentStep = " "; @@ -50,6 +52,8 @@ public class HtmlToMarkdownConverter { @Nonnull public String convert(@Nullable String html) { + sb.setLength(0); + continuedIndentation = ""; if (html != null) { Document doc = Jsoup.parseBodyFragment(html); convertElement(doc.body()); @@ -102,6 +106,15 @@ private void convertElement(Element element) { convertChildren(element); sb.append("]("); sb.append(element.attr("href")); + String title = element.attr("title"); + if (StringUtil.isBlank(title)) { + title = element.attr("alt"); + } + if (!StringUtil.isBlank(title)) { + sb.append(" \""); + sb.append(title.replaceAll("\"", "\\\"")); + sb.append("\""); + } sb.append(")"); break; diff --git a/backend/base/src/test/java/com/composum/ai/backend/base/service/chat/impl/HtmlToMarkdownConverterTest.java b/backend/base/src/test/java/com/composum/ai/backend/base/service/chat/impl/HtmlToMarkdownConverterTest.java index ad4c861be..300e2e414 100644 --- a/backend/base/src/test/java/com/composum/ai/backend/base/service/chat/impl/HtmlToMarkdownConverterTest.java +++ b/backend/base/src/test/java/com/composum/ai/backend/base/service/chat/impl/HtmlToMarkdownConverterTest.java @@ -18,9 +18,12 @@ public class HtmlToMarkdownConverterTest { @Test public void testConvertTagA() { - String html = "<a href=\"http://example.com\">click here</a>"; - String markdown = converter.convert(html); - ec.checkThat(markdown, is("[click here](http://example.com)")); + ec.checkThat(converter.convert("<a href=\"http://example.com\">click here</a>"), + is("[click here](http://example.com)")); + + // alt text + ec.checkThat(converter.convert("<a href=\"http://example.com\" alt=\"An example link\">click here</a>"), + is("[click here](http://example.com \"An example link\")")); } @Test diff --git a/composum/bundle/src/main/java/com/composum/ai/composum/bundle/service/ComposumApproximateMarkdownServicePlugin.java b/composum/bundle/src/main/java/com/composum/ai/composum/bundle/service/ComposumApproximateMarkdownServicePlugin.java index 46b768635..1d19ea2cb 100644 --- a/composum/bundle/src/main/java/com/composum/ai/composum/bundle/service/ComposumApproximateMarkdownServicePlugin.java +++ b/composum/bundle/src/main/java/com/composum/ai/composum/bundle/service/ComposumApproximateMarkdownServicePlugin.java @@ -69,6 +69,7 @@ protected boolean pageHandling(Resource resource, PrintWriter out, @Nonnull Appr if (StringUtils.isNotBlank(description)) { out.println(helper.getMarkdown(description)); } + out.println(); } return isPage; } diff --git a/composum/bundle/src/test/java/com/composum/ai/composum/bundle/service/ComposumApproximateMarkdownServicePluginTest.java b/composum/bundle/src/test/java/com/composum/ai/composum/bundle/service/ComposumApproximateMarkdownServicePluginTest.java index 2f8e8203b..9e71c2617 100644 --- a/composum/bundle/src/test/java/com/composum/ai/composum/bundle/service/ComposumApproximateMarkdownServicePluginTest.java +++ b/composum/bundle/src/test/java/com/composum/ai/composum/bundle/service/ComposumApproximateMarkdownServicePluginTest.java @@ -105,7 +105,7 @@ public void testPageHandlingWithPageResource() { service.approximateMarkdown(component, printWriter, request, response); String expectedOutput = "# myPage\n\n" + - "The best page!\n"; + "The best page!\n\n"; assertEquals(expectedOutput, writer.toString()); }