Skip to content

Commit

Permalink
fix: parsing CDATA that has embedded XML doc (#11)
Browse files Browse the repository at this point in the history
  • Loading branch information
muktihari authored Jun 16, 2024
1 parent fc0d0cc commit 9a55b7a
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 9 deletions.
16 changes: 13 additions & 3 deletions testdata/cdata.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<data>
<![CDATA[ text ]]>
</data>
<content>
<data>
<![CDATA[ text ]]>
</data>
<data>
<![CDATA[<element>text</element>]]>
</data>
<data>
<![CDATA[
<element>text</element>
]]>
</data>
</content>
14 changes: 14 additions & 0 deletions testdata/cdata_clrf.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<content>
<data>
<![CDATA[ text ]]>
</data>
<data>
<![CDATA[<element>text</element>]]>
</data>
<data>
<![CDATA[
<element>text</element>
]]>
</data>
</content>
10 changes: 6 additions & 4 deletions tokenizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,6 @@ func (t *Tokenizer) RawToken() (b []byte, err error) {
}
if k < len(prefix) {
if t.buf[i] != prefix[k] {
k = 0
break
}
k++
Expand Down Expand Up @@ -216,7 +215,7 @@ func (t *Tokenizer) RawToken() (b []byte, err error) {
if t.buf[i] == '<' {
pos = i - 1
// Might be in the form of <![CDATA[ CharData ]]>
const prefix = "<![CDATA["
const prefix, suffix = "<![CDATA[", "]]>"
var k int = 1
for j := i + 1; ; j++ {
if j >= t.last {
Expand All @@ -235,7 +234,9 @@ func (t *Tokenizer) RawToken() (b []byte, err error) {
k++
continue
}
if t.buf[j] == '>' {
xx := string(t.buf[off : j+1])
_ = xx
if t.buf[j] == '>' && string(t.buf[j-2:j+1]) == suffix {
pos = j
break
}
Expand Down Expand Up @@ -384,11 +385,12 @@ func trim(b []byte) []byte {

func trimPrefix(b []byte) []byte {
var start int
for i := range b {
for i := 0; i < len(b); i++ {
switch b[i] {
case '\r':
if i+1 < len(b) && b[i+1] == '\n' {
start += 2
i++
}
case '\n', ' ':
start++
Expand Down
36 changes: 34 additions & 2 deletions tokenizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,44 @@ func TestSmallXML(t *testing.T) {
}{
{filename: "cdata.xml", expecteds: []xmltokenizer.Token{
tokenHeader,
{Name: xmltokenizer.Name{Local: []byte("content"), Full: []byte("content")}},
{
Name: xmltokenizer.Name{Local: []byte("data"), Full: []byte("data")},
Data: []byte("text"),
},
{Name: xmltokenizer.Name{Local: []byte("/data"), Full: []byte("/data")}}},
},
{Name: xmltokenizer.Name{Local: []byte("/data"), Full: []byte("/data")}},
{
Name: xmltokenizer.Name{Local: []byte("data"), Full: []byte("data")},
Data: []byte("<element>text</element>"),
},
{Name: xmltokenizer.Name{Local: []byte("/data"), Full: []byte("/data")}},
{
Name: xmltokenizer.Name{Local: []byte("data"), Full: []byte("data")},
Data: []byte("<element>text</element>"),
},
{Name: xmltokenizer.Name{Local: []byte("/data"), Full: []byte("/data")}},
{Name: xmltokenizer.Name{Local: []byte("/content"), Full: []byte("/content")}},
}},
{filename: "cdata_clrf.xml", expecteds: []xmltokenizer.Token{
tokenHeader,
{Name: xmltokenizer.Name{Local: []byte("content"), Full: []byte("content")}},
{
Name: xmltokenizer.Name{Local: []byte("data"), Full: []byte("data")},
Data: []byte("text"),
},
{Name: xmltokenizer.Name{Local: []byte("/data"), Full: []byte("/data")}},
{
Name: xmltokenizer.Name{Local: []byte("data"), Full: []byte("data")},
Data: []byte("<element>text</element>"),
},
{Name: xmltokenizer.Name{Local: []byte("/data"), Full: []byte("/data")}},
{
Name: xmltokenizer.Name{Local: []byte("data"), Full: []byte("data")},
Data: []byte("<element>text</element>"),
},
{Name: xmltokenizer.Name{Local: []byte("/data"), Full: []byte("/data")}},
{Name: xmltokenizer.Name{Local: []byte("/content"), Full: []byte("/content")}},
}},
{filename: "self_closing.xml", expecteds: []xmltokenizer.Token{
tokenHeader,
{Name: xmltokenizer.Name{Local: []byte("a"), Full: []byte("a")}, SelfClosing: true},
Expand Down

0 comments on commit 9a55b7a

Please sign in to comment.