-
Notifications
You must be signed in to change notification settings - Fork 34
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
(feat) Next iteration of all xml-support #929
Changes from all commits
fe903bb
254da37
6ed5710
a5bf9ad
7f386b2
38d8b13
396e675
86f5f3c
14ec10f
39ed055
57af4ad
8a34709
f11cf5a
47b66a9
fb921cc
8b4fcaa
bf330f0
c924e77
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
import unittest | ||
from vespa.configuration.vt import compare_xml | ||
|
||
|
||
class TestXMLComparison(unittest.TestCase): | ||
def test_equal_simple(self): | ||
xml1 = "<root><child>Text</child></root>" | ||
xml2 = "<root><child>Text</child></root>" | ||
self.assertTrue(compare_xml(xml1, xml2)) | ||
|
||
def test_whitespace_differences(self): | ||
xml1 = "<root><child>Text</child></root>" | ||
xml2 = "<root>\n <child>Text</child>\n</root>" | ||
self.assertTrue(compare_xml(xml1, xml2)) | ||
|
||
def test_attribute_order(self): | ||
xml1 = '<root><child b="2" a="1">Text</child></root>' | ||
xml2 = '<root><child a="1" b="2">Text</child></root>' | ||
self.assertTrue(compare_xml(xml1, xml2)) | ||
|
||
def test_text_whitespace(self): | ||
xml1 = "<root><child> Text </child></root>" | ||
xml2 = "<root><child>Text</child></root>" | ||
self.assertTrue(compare_xml(xml1, xml2)) | ||
|
||
def test_different_text(self): | ||
xml1 = "<root><child>Text1</child></root>" | ||
xml2 = "<root><child>Text2</child></root>" | ||
self.assertFalse(compare_xml(xml1, xml2)) | ||
|
||
def test_different_structure(self): | ||
xml1 = "<root><child>Text</child></root>" | ||
xml2 = "<root><child><subchild>Text</subchild></child></root>" | ||
self.assertFalse(compare_xml(xml1, xml2)) | ||
|
||
def test_namespace_handling(self): | ||
xml1 = '<root xmlns="namespace"><child>Text</child></root>' | ||
xml2 = "<root><child>Text</child></root>" | ||
# Namespaces are considered in the tag comparison | ||
self.assertFalse(compare_xml(xml1, xml2)) | ||
|
||
def test_comments_ignored(self): | ||
xml1 = "<root><!-- A comment --><child>Text</child></root>" | ||
xml2 = "<root><child>Text</child></root>" | ||
# Comments are not part of the element tree; they are ignored | ||
self.assertTrue(compare_xml(xml1, xml2)) | ||
|
||
def test_processing_instructions(self): | ||
xml1 = "<?xml version='1.0'?><root><child>Text</child></root>" | ||
xml2 = "<root><child>Text</child></root>" | ||
self.assertTrue(compare_xml(xml1, xml2)) | ||
|
||
def test_different_attributes(self): | ||
xml1 = '<root><child a="1">Text</child></root>' | ||
xml2 = '<root><child a="2">Text</child></root>' | ||
self.assertFalse(compare_xml(xml1, xml2)) | ||
|
||
def test_additional_attributes(self): | ||
xml1 = '<root><child a="1" b="2">Text</child></root>' | ||
xml2 = '<root><child a="1">Text</child></root>' | ||
self.assertFalse(compare_xml(xml1, xml2)) | ||
|
||
def test_multiple_children_order(self): | ||
xml1 = "<root><child>1</child><child>2</child></root>" | ||
xml2 = "<root><child>2</child><child>1</child></root>" | ||
self.assertTrue(compare_xml(xml1, xml2)) | ||
|
||
def test_namespace_prefixes(self): | ||
xml1 = '<root xmlns:ns="namespace"><ns:child>Text</ns:child></root>' | ||
xml2 = "<root><child>Text</child></root>" | ||
# Different namespaces make the tags different | ||
self.assertFalse(compare_xml(xml1, xml2)) | ||
|
||
def test_cdata_handling(self): | ||
xml1 = "<root><child><![CDATA[Text]]></child></root>" | ||
xml2 = "<root><child>Text</child></root>" | ||
self.assertTrue(compare_xml(xml1, xml2)) | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
import types | ||
from xml.sax.saxutils import escape | ||
from fastcore.utils import patch | ||
import xml.etree.ElementTree as ET | ||
|
||
# If the vespa tags correspond to reserved Python keywords, they are replaced with the following: | ||
replace_reserved = { | ||
|
@@ -64,6 +65,7 @@ def __iter__(self): | |
|
||
|
||
def attrmap(o): | ||
o = dict(_global="global").get(o, o) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the purpose of this? |
||
return o.lstrip("_").replace("_", "-") | ||
|
||
|
||
|
@@ -98,16 +100,21 @@ def vt( | |
**kw, | ||
): | ||
"Create an `VT` structure for `to_xml()`" | ||
return VT( | ||
tag.lower(), *_preproc(c, kw, attrmap=attrmap, valmap=valmap), void_=void_ | ||
) | ||
# NB! fastcore.xml uses tag.lower() for tag names. This is not done here. | ||
return VT(tag, *_preproc(c, kw, attrmap=attrmap, valmap=valmap), void_=void_) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What does preproc do? |
||
|
||
|
||
# XML void tags (self-closing) | ||
# TODO: Add self-closing tags for Vespa configuration | ||
voids = set("".split()) | ||
|
||
|
||
def Xml(*c, version="1.0", encoding="UTF-8", **kwargs) -> VT: | ||
"An top level XML tag, with `encoding` and children `c`" | ||
res = vt("?xml", *c, version=version, encoding=encoding, void_="?") | ||
return res | ||
|
||
|
||
# Replace the 'partial' based tag creation | ||
def create_tag_function(tag, void_): | ||
def tag_function(*c, **kwargs): | ||
|
@@ -157,6 +164,8 @@ def _to_xml(elm, lvl, indent, do_escape): | |
|
||
# Handle void (self-closing) tags | ||
if elm.void_: | ||
if isinstance(elm.void_, str): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For contxt, void_ is a bool? |
||
return f"{sp}<{stag}{attr_str} {elm.void_}>{nl}" | ||
return f"{sp}<{stag}{attr_str} />{nl}" | ||
|
||
# Handle non-void tags with children or no children | ||
|
@@ -214,3 +223,49 @@ def __call__(self: VT, *c, **kw): | |
if kw: | ||
self.attrs = {**self.attrs, **kw} | ||
return self | ||
|
||
|
||
def canonicalize(element): | ||
"""Recursively sort attributes and children to canonicalize the element.""" | ||
# Sort attributes | ||
if element.attrib: | ||
element.attrib = dict(sorted(element.attrib.items())) | ||
# Sort children by tag and text | ||
children = list(element) | ||
for child in children: | ||
canonicalize(child) | ||
element[:] = sorted(children, key=lambda e: (e.tag, (e.text or "").strip())) | ||
# Strip whitespace from text and tail | ||
if element.text: | ||
element.text = element.text.strip() | ||
if element.tail: | ||
element.tail = element.tail.strip() | ||
|
||
|
||
def elements_equal(e1, e2): | ||
"""Compare two elements for equality.""" | ||
if e1.tag != e2.tag: | ||
return False | ||
if sorted(e1.attrib.items()) != sorted(e2.attrib.items()): | ||
return False | ||
if (e1.text or "").strip() != (e2.text or "").strip(): | ||
return False | ||
if (e1.tail or "").strip() != (e2.tail or "").strip(): | ||
return False | ||
if len(e1) != len(e2): | ||
return False | ||
return all(elements_equal(c1, c2) for c1, c2 in zip(e1, e2)) | ||
|
||
|
||
def compare_xml(xml_str1, xml_str2): | ||
"""Compare two XML strings for equality.""" | ||
try: | ||
tree1 = ET.ElementTree(ET.fromstring(xml_str1)) | ||
tree2 = ET.ElementTree(ET.fromstring(xml_str2)) | ||
except ET.ParseError: | ||
return False | ||
root1 = tree1.getroot() | ||
root2 = tree2.getroot() | ||
canonicalize(root1) | ||
canonicalize(root2) | ||
return elements_equal(root1, root2) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The intention is for "" to equal None, why?