Skip to content

Commit

Permalink
Merge pull request #955 from vespa-engine/thomasht86/fix-vt-tag-under…
Browse files Browse the repository at this point in the history
…score-replacement

(bugfix) fix vt tag underscore replacement
  • Loading branch information
thomasht86 authored Oct 17, 2024
2 parents a6525ca + 0177095 commit 244d5e6
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 13 deletions.
125 changes: 121 additions & 4 deletions tests/unit/test_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,19 @@
class TestVT(unittest.TestCase):
def test_sanitize_tag_name(self):
self.assertEqual(VT.sanitize_tag_name("content-node"), "content_node")
self.assertEqual(VT.sanitize_tag_name("search-engine"), "search_engine")
self.assertEqual(VT.sanitize_tag_name("from"), "from_")

def test_restore_tag_name(self):
self.assertEqual(VT.restore_tag_name("content_node"), "content-node")
self.assertEqual(VT.restore_tag_name("search_engine"), "search-engine")
self.assertEqual(vt("content_node").restore_tag_name(), "content-node")
self.assertEqual(vt("search_engine").restore_tag_name(), "search-engine")

def test_restore_with_underscores(self):
self.assertEqual(
vt("content_node", replace_underscores=False).tag, "content_node"
)
self.assertEqual(
vt("search_engine", replace_underscores=False).tag, "search_engine"
)

def test_attrmap(self):
self.assertEqual(attrmap("_max_memory"), "max-memory")
Expand All @@ -39,7 +47,7 @@ def test_single_tag(self):
self.assertEqual(str(xml_output), '<content attr="value"></content>')

def test_nested_tags(self):
nested_tag = VT("content", (VT("document", ()),), {"attr": "value"})
nested_tag = vt("content", attr="value")(vt("document"))
xml_output = to_xml(nested_tag, indent=False)
# Expecting nested tags with proper newlines and indentation
expected_output = '<content attr="value"><document></document></content>'
Expand Down Expand Up @@ -705,5 +713,114 @@ def test_document_expiry(self):
self.assertTrue(compare_xml(self.xml_schema, str(generated_xml)))


class TestUnderscoreAttributes(unittest.TestCase):
def setUp(self):
self.xml_schema = """<services version="1.0">
<container id="colpalidemo_container" version="1.0">
<search></search>
<document-api></document-api>
<document-processing></document-processing>
<clients>
<client id="mtls" permissions="read,write">
<certificate file="security/clients.pem" />
</client>
<client id="token_write" permissions="read,write">
<token id="colpalidemo_write" />
</client>
<client id="token_read" permissions="read">
<token id="colpalidemo_read" />
</client>
</clients>
<config name="container.qr-searchers">
<tag>
<bold>
<open>&lt;strong&gt;</open>
<close>&lt;/strong&gt;</close>
</bold>
<separator>...</separator>
</tag>
</config>
</container>
<content id="colpalidemo_content" version="1.0">
<redundancy>1</redundancy>
<documents>
<document type="pdf_page" mode="index"></document>
</documents>
<nodes>
<node distribution-key="0" hostalias="node1"></node>
</nodes>
<config name="vespa.config.search.summary.juniperrc">
<max_matches>2</max_matches>
<length>1000</length>
<surround_max>500</surround_max>
<min_length>300</min_length>
</config>
</content>
</services>
"""

def test_valid_config_from_string(self):
self.assertTrue(validate_services(self.xml_schema))

def test_generate_schema(self):
generated = services(
container(
search(),
document_api(),
document_processing(),
clients(
client(
certificate(file="security/clients.pem"),
id="mtls",
permissions="read,write",
),
client(
token(id="colpalidemo_write"),
id="token_write",
permissions="read,write",
),
client(
token(id="colpalidemo_read"),
id="token_read",
permissions="read",
),
),
config(
vt("tag")(
vt("bold")(
vt("open", "<strong>"),
vt("close", "</strong>"),
),
vt("separator", "..."),
),
name="container.qr-searchers",
),
id="colpalidemo_container",
version="1.0",
),
content(
redundancy("1"),
documents(document(type="pdf_page", mode="index")),
nodes(node(distribution_key="0", hostalias="node1")),
config(
vt("max_matches", "2", replace_underscores=False),
vt("length", "1000"),
vt("surround_max", "500", replace_underscores=False),
vt("min_length", "300", replace_underscores=False),
name="vespa.config.search.summary.juniperrc",
),
id="colpalidemo_content",
version="1.0",
),
version="1.0",
)
generated_xml = generated.to_xml()
# Validate against relaxng
print(self.xml_schema)
print(generated_xml)
self.assertTrue(validate_services(str(generated_xml)))
self.assertTrue(compare_xml(self.xml_schema, str(generated_xml)))


if __name__ == "__main__":
unittest.main()
2 changes: 2 additions & 0 deletions vespa/configuration/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@
"maxsize",
"clients",
"client",
"certificate",
"token",
]

# Fail if any tag is duplicated. Provide feedback of which tags are duplicated.
Expand Down
45 changes: 36 additions & 9 deletions vespa/configuration/vt.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"for": "for_",
"time": "time_",
"io": "io_",
"from": "from_",
}
restore_reserved = {v: k for k, v in replace_reserved.items()}

Expand All @@ -25,19 +26,29 @@ def sanitize_tag_name(tag: str) -> str:
replaced = tag.replace("-", "_")
return replace_reserved.get(replaced, replaced)

@staticmethod
def restore_tag_name(tag: str) -> str:
"Restore sanitized tag names back to the original names for XML generation"
return restore_reserved.get(tag, tag).replace("_", "-")

def __init__(self, tag: str, cs: tuple, attrs: dict = None, void_=False, **kwargs):
def __init__(
self,
tag: str,
cs: tuple,
attrs: dict = None,
void_=False,
replace_underscores: bool = True,
**kwargs,
):
assert isinstance(cs, tuple)
self.tag = self.sanitize_tag_name(tag) # Sanitize tag name
self.children, self.attrs = cs, attrs or {}
self.void_ = void_
self.replace_underscores = replace_underscores

def __setattr__(self, k, v):
if k.startswith("__") or k in ("tag", "children", "attrs", "void_"):
if k.startswith("__") or k in (
"tag",
"children",
"attrs",
"void_",
"replace_underscores",
):
return super().__setattr__(k, v)
self.attrs[k.lstrip("_").replace("_", "-")] = v

Expand All @@ -53,6 +64,15 @@ def list(self):
def get(self, k, default=None):
return self.attrs.get(k.lstrip("_").replace("_", "-"), default)

def restore_tag_name(
self,
) -> str:
"Restore sanitized tag names back to the original names for XML generation"
restored = restore_reserved.get(self.tag, self.tag)
if self.replace_underscores:
return restored.replace("_", "-")
return restored

def __repr__(self):
return f"{self.tag}({self.children},{self.attrs})"

Expand Down Expand Up @@ -120,11 +140,17 @@ def vt(
void_: bool = False,
attrmap: callable = attrmap,
valmap: callable = valmap,
replace_underscores: bool = True,
**kw,
):
"Create a VT structure with `tag`, `children` and `attrs`"
# NB! fastcore.xml uses tag.lower() for tag names. This is not done here.
return VT(tag, *_preproc(c, kw, attrmap=attrmap, valmap=valmap), void_=void_)
return VT(
tag,
*_preproc(c, kw, attrmap=attrmap, valmap=valmap),
void_=void_,
replace_underscores=replace_underscores,
)


# XML void tags (self-closing)
Expand Down Expand Up @@ -178,7 +204,8 @@ def _to_xml(elm, lvl, indent, do_escape):
return f"{esc_fn(str(elm).strip())}{nl if indent else ''}"

tag, cs, attrs = elm.list
stag = VT.restore_tag_name(tag)

stag = elm.restore_tag_name()

# Prepare the attribute string only once
attr_str = ""
Expand Down

0 comments on commit 244d5e6

Please sign in to comment.