Skip to content

Commit

Permalink
gh-128302: Fix bugs in xml.dom.xmlbuilder (GH-128284)
Browse files Browse the repository at this point in the history
* Allow DOMParser.parse() to correctly handle DOMInputSource instances
  that only have a systemId attribute set.
* Fix DOMEntityResolver.resolveEntity(), which was broken by the
  Python 3.0 transition.
* Add Lib/test/test_xml_dom_xmlbuilder.py with few tests.
  • Loading branch information
tungol authored Jan 7, 2025
1 parent 145276a commit 6ea04da
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 5 deletions.
88 changes: 88 additions & 0 deletions Lib/test/test_xml_dom_xmlbuilder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import io
import unittest
from http import client
from test.test_httplib import FakeSocket
from unittest import mock
from xml.dom import getDOMImplementation, minidom, xmlbuilder

SMALL_SAMPLE = b"""<?xml version="1.0"?>
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books">
<!-- A comment -->
<title>Introduction to XSL</title>
<hr/>
<p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p>
</html>"""


class XMLBuilderTest(unittest.TestCase):
def test_entity_resolver(self):
body = (
b"HTTP/1.1 200 OK\r\nContent-Type: text/xml; charset=utf-8\r\n\r\n"
+ SMALL_SAMPLE
)

sock = FakeSocket(body)
response = client.HTTPResponse(sock)
response.begin()
attrs = {"open.return_value": response}
opener = mock.Mock(**attrs)

resolver = xmlbuilder.DOMEntityResolver()

with mock.patch("urllib.request.build_opener") as mock_build:
mock_build.return_value = opener
source = resolver.resolveEntity(None, "http://example.com/2000/svg")

self.assertIsInstance(source, xmlbuilder.DOMInputSource)
self.assertIsNone(source.publicId)
self.assertEqual(source.systemId, "http://example.com/2000/svg")
self.assertEqual(source.baseURI, "http://example.com/2000/")
self.assertEqual(source.encoding, "utf-8")
self.assertIs(source.byteStream, response)

self.assertIsNone(source.characterStream)
self.assertIsNone(source.stringData)

def test_builder(self):
imp = getDOMImplementation()
self.assertIsInstance(imp, xmlbuilder.DOMImplementationLS)

builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None)
self.assertIsInstance(builder, xmlbuilder.DOMBuilder)

def test_parse_uri(self):
body = (
b"HTTP/1.1 200 OK\r\nContent-Type: text/xml; charset=utf-8\r\n\r\n"
+ SMALL_SAMPLE
)

sock = FakeSocket(body)
response = client.HTTPResponse(sock)
response.begin()
attrs = {"open.return_value": response}
opener = mock.Mock(**attrs)

with mock.patch("urllib.request.build_opener") as mock_build:
mock_build.return_value = opener

imp = getDOMImplementation()
builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None)
document = builder.parseURI("http://example.com/2000/svg")

self.assertIsInstance(document, minidom.Document)
self.assertEqual(len(document.childNodes), 1)

def test_parse_with_systemId(self):
response = io.BytesIO(SMALL_SAMPLE)

with mock.patch("urllib.request.urlopen") as mock_open:
mock_open.return_value = response

imp = getDOMImplementation()
source = imp.createDOMInputSource()
builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None)
source.systemId = "http://example.com/2000/svg"
document = builder.parse(source)

self.assertIsInstance(document, minidom.Document)
self.assertEqual(len(document.childNodes), 1)
12 changes: 7 additions & 5 deletions Lib/xml/dom/xmlbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def parse(self, input):
options.filter = self.filter
options.errorHandler = self.errorHandler
fp = input.byteStream
if fp is None and options.systemId:
if fp is None and input.systemId:
import urllib.request
fp = urllib.request.urlopen(input.systemId)
return self._parse_bytestream(fp, options)
Expand Down Expand Up @@ -247,10 +247,12 @@ def _create_opener(self):

def _guess_media_encoding(self, source):
info = source.byteStream.info()
if "Content-Type" in info:
for param in info.getplist():
if param.startswith("charset="):
return param.split("=", 1)[1].lower()
# import email.message
# assert isinstance(info, email.message.Message)
charset = info.get_param('charset')
if charset is not None:
return charset.lower()
return None


class DOMInputSource(object):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Allow :meth:`!xml.dom.xmlbuilder.DOMParser.parse` to correctly handle
:class:`!xml.dom.xmlbuilder.DOMInputSource` instances that only have a
:attr:`!systemId` attribute set.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix :meth:`!xml.dom.xmlbuilder.DOMEntityResolver.resolveEntity`, which was
broken by the Python 3.0 transition.

0 comments on commit 6ea04da

Please sign in to comment.