feat: impl html element node

usememos · Jul 13, 2024 · e1871ea · e1871ea
1 parent 99d0ab9
commit e1871ea
Show file tree

Hide file tree

Showing 7 changed files with 136 additions and 1 deletion.
diff --git a/ast/ast.go b/ast/ast.go
@@ -37,6 +37,7 @@ const (
 	SuperscriptNode       NodeType = "SUPERSCRIPT"
 	ReferencedContentNode NodeType = "REFERENCED_CONTENT"
 	SpoilerNode           NodeType = "SPOILER"
+	HTMLElementNode       NodeType = "HTML_ELEMENT"
 )
 
 type Node interface {

diff --git a/ast/inline.go b/ast/inline.go
@@ -1,6 +1,9 @@
 package ast
 
-import "fmt"
+import (
+	"fmt"
+	"strings"
+)
 
 type BaseInline struct {
 	BaseNode
@@ -267,3 +270,26 @@ func (*Spoiler) Type() NodeType {
 func (n *Spoiler) Restore() string {
 	return fmt.Sprintf("||%s||", n.Content)
 }
+
+type HTMLElement struct {
+	BaseInline
+
+	TagName    string
+	Attributes map[string]string
+}
+
+func (*HTMLElement) Type() NodeType {
+	return HTMLElementNode
+}
+
+func (n *HTMLElement) Restore() string {
+	attributes := []string{}
+	for key, value := range n.Attributes {
+		attributes = append(attributes, fmt.Sprintf(`%s="%s"`, key, value))
+	}
+	attrStr := ""
+	if len(attributes) > 0 {
+		attrStr = " " + strings.Join(attributes, " ")
+	}
+	return fmt.Sprintf("<%s%s />", n.TagName, attrStr)
+}
diff --git a/parser/html_element.go b/parser/html_element.go
@@ -0,0 +1,50 @@
+package parser
+
+import (
+	"slices"
+
+	"github.com/usememos/gomark/ast"
+	"github.com/usememos/gomark/parser/tokenizer"
+)
+
+type HTMLElementParser struct{}
+
+func NewHTMLElementParser() *HTMLElementParser {
+	return &HTMLElementParser{}
+}
+
+var (
+	availableHTMLElements = []string{
+		"br",
+	}
+)
+
+func (*HTMLElementParser) Match(tokens []*tokenizer.Token) (ast.Node, int) {
+	if len(tokens) < 5 {
+		return nil, 0
+	}
+	if tokens[0].Type != tokenizer.LessThan {
+		return nil, 0
+	}
+	tagName := tokenizer.Stringify([]*tokenizer.Token{tokens[1]})
+	if !slices.Contains(availableHTMLElements, tagName) {
+		return nil, 0
+	}
+
+	greaterThanIndex := tokenizer.FindUnescaped(tokens, tokenizer.GreaterThan)
+	if greaterThanIndex+1 < 5 || tokens[greaterThanIndex-1].Type != tokenizer.Slash || tokens[greaterThanIndex-2].Type != tokenizer.Space {
+		return nil, 0
+	}
+
+	matchedTokens := tokens[:greaterThanIndex]
+	attributeTokens := matchedTokens[2 : greaterThanIndex-2]
+	// TODO: Implement attribute parser.
+	if len(attributeTokens) != 0 {
+		return nil, 0
+	}
+
+	return &ast.HTMLElement{
+		TagName:    tagName,
+		Attributes: make(map[string]string),
+	}, len(matchedTokens)
+}
diff --git a/parser/html_element_test.go b/parser/html_element_test.go
@@ -0,0 +1,31 @@
+package parser
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/usememos/gomark/ast"
+	"github.com/usememos/gomark/parser/tokenizer"
+	"github.com/usememos/gomark/restore"
+)
+
+func TestHTMLElementParser(t *testing.T) {
+	tests := []struct {
+		text        string
+		htmlElement ast.Node
+	}{
+		{
+			text: "<br />",
+			htmlElement: &ast.HTMLElement{
+				TagName: "br",
+			},
+		},
+	}
+
+	for _, test := range tests {
+		tokens := tokenizer.Tokenize(test.text)
+		node, _ := NewHTMLElementParser().Match(tokens)
+		require.Equal(t, restore.Restore([]ast.Node{test.htmlElement}), restore.Restore([]ast.Node{node}))
+	}
+}
diff --git a/parser/parser.go b/parser/parser.go
@@ -79,6 +79,7 @@ var defaultInlineParsers = []InlineParser{
 	NewReferencedContentParser(),
 	NewTagParser(),
 	NewStrikethroughParser(),
+	NewHTMLElementParser(),
 	NewLineBreakParser(),
 	NewTextParser(),
 }

diff --git a/parser/parser_test.go b/parser/parser_test.go
@@ -234,6 +234,29 @@ func TestParser(t *testing.T) {
 				},
 			},
 		},
+		{
+			text: "Hello\nworld<br />",
+			nodes: []ast.Node{
+				&ast.Paragraph{
+					Children: []ast.Node{
+						&ast.Text{
+							Content: "Hello",
+						},
+					},
+				},
+				&ast.LineBreak{},
+				&ast.Paragraph{
+					Children: []ast.Node{
+						&ast.Text{
+							Content: "world",
+						},
+						&ast.HTMLElement{
+							TagName: "br",
+						},
+					},
+				},
+			},
+		},
 	}
 
 	for _, test := range tests {

diff --git a/parser/tokenizer/tokenizer.go b/parser/tokenizer/tokenizer.go
@@ -26,6 +26,7 @@ const (
 	Colon              TokenType = ":"
 	Caret              TokenType = "^"
 	Backslash          TokenType = "\\"
+	Slash              TokenType = "/"
 	NewLine            TokenType = "\n"
 	Space              TokenType = " "
 )
@@ -96,6 +97,8 @@ func Tokenize(text string) []*Token {
 			tokens = append(tokens, NewToken(Caret, "^"))
 		case '\\':
 			tokens = append(tokens, NewToken(Backslash, `\`))
+		case '/':
+			tokens = append(tokens, NewToken(Slash, "/"))
 		case '\n':
 			tokens = append(tokens, NewToken(NewLine, "\n"))
 		case ' ':