From 07a65cdcc576a84aaf272201866b14b4af5698e4 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Sat, 11 Jan 2025 12:30:29 +0100 Subject: [PATCH] [3.12] gh-128302: Fix bugs in xml.dom.xmlbuilder (GH-128284) (#128583) gh-128302: Fix bugs in xml.dom.xmlbuilder (GH-128284) * Allow DOMParser.parse() to correctly handle DOMInputSource instances that only have a systemId attribute set. * Fix DOMEntityResolver.resolveEntity(), which was broken by the Python 3.0 transition. * Add Lib/test/test_xml_dom_xmlbuilder.py with few tests. (cherry picked from commit 6ea04da27036eaa69d65150148bb8c537d9beacf) Co-authored-by: Stephen Morton --- Lib/test/test_xml_dom_xmlbuilder.py | 88 +++++++++++++++++++ Lib/xml/dom/xmlbuilder.py | 12 +-- ...-12-27-16-28-57.gh-issue-128302.2GMvyl.rst | 3 + ...-12-29-13-49-46.gh-issue-128302.psRpPN.rst | 2 + 4 files changed, 100 insertions(+), 5 deletions(-) create mode 100644 Lib/test/test_xml_dom_xmlbuilder.py create mode 100644 Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst create mode 100644 Misc/NEWS.d/next/Library/2024-12-29-13-49-46.gh-issue-128302.psRpPN.rst diff --git a/Lib/test/test_xml_dom_xmlbuilder.py b/Lib/test/test_xml_dom_xmlbuilder.py new file mode 100644 index 00000000000000..5f5f2eb328df9f --- /dev/null +++ b/Lib/test/test_xml_dom_xmlbuilder.py @@ -0,0 +1,88 @@ +import io +import unittest +from http import client +from test.test_httplib import FakeSocket +from unittest import mock +from xml.dom import getDOMImplementation, minidom, xmlbuilder + +SMALL_SAMPLE = b""" + + +Introduction to XSL +
+

A. Namespace

+""" + + +class XMLBuilderTest(unittest.TestCase): + def test_entity_resolver(self): + body = ( + b"HTTP/1.1 200 OK\r\nContent-Type: text/xml; charset=utf-8\r\n\r\n" + + SMALL_SAMPLE + ) + + sock = FakeSocket(body) + response = client.HTTPResponse(sock) + response.begin() + attrs = {"open.return_value": response} + opener = mock.Mock(**attrs) + + resolver = xmlbuilder.DOMEntityResolver() + + with mock.patch("urllib.request.build_opener") as mock_build: + mock_build.return_value = opener + source = resolver.resolveEntity(None, "http://example.com/2000/svg") + + self.assertIsInstance(source, xmlbuilder.DOMInputSource) + self.assertIsNone(source.publicId) + self.assertEqual(source.systemId, "http://example.com/2000/svg") + self.assertEqual(source.baseURI, "http://example.com/2000/") + self.assertEqual(source.encoding, "utf-8") + self.assertIs(source.byteStream, response) + + self.assertIsNone(source.characterStream) + self.assertIsNone(source.stringData) + + def test_builder(self): + imp = getDOMImplementation() + self.assertIsInstance(imp, xmlbuilder.DOMImplementationLS) + + builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None) + self.assertIsInstance(builder, xmlbuilder.DOMBuilder) + + def test_parse_uri(self): + body = ( + b"HTTP/1.1 200 OK\r\nContent-Type: text/xml; charset=utf-8\r\n\r\n" + + SMALL_SAMPLE + ) + + sock = FakeSocket(body) + response = client.HTTPResponse(sock) + response.begin() + attrs = {"open.return_value": response} + opener = mock.Mock(**attrs) + + with mock.patch("urllib.request.build_opener") as mock_build: + mock_build.return_value = opener + + imp = getDOMImplementation() + builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None) + document = builder.parseURI("http://example.com/2000/svg") + + self.assertIsInstance(document, minidom.Document) + self.assertEqual(len(document.childNodes), 1) + + def test_parse_with_systemId(self): + response = io.BytesIO(SMALL_SAMPLE) + + with mock.patch("urllib.request.urlopen") as mock_open: + mock_open.return_value = response + + imp = getDOMImplementation() + source = imp.createDOMInputSource() + builder = imp.createDOMBuilder(imp.MODE_SYNCHRONOUS, None) + source.systemId = "http://example.com/2000/svg" + document = builder.parse(source) + + self.assertIsInstance(document, minidom.Document) + self.assertEqual(len(document.childNodes), 1) diff --git a/Lib/xml/dom/xmlbuilder.py b/Lib/xml/dom/xmlbuilder.py index 8a200263497b89..a8852625a2f9a2 100644 --- a/Lib/xml/dom/xmlbuilder.py +++ b/Lib/xml/dom/xmlbuilder.py @@ -189,7 +189,7 @@ def parse(self, input): options.filter = self.filter options.errorHandler = self.errorHandler fp = input.byteStream - if fp is None and options.systemId: + if fp is None and input.systemId: import urllib.request fp = urllib.request.urlopen(input.systemId) return self._parse_bytestream(fp, options) @@ -247,10 +247,12 @@ def _create_opener(self): def _guess_media_encoding(self, source): info = source.byteStream.info() - if "Content-Type" in info: - for param in info.getplist(): - if param.startswith("charset="): - return param.split("=", 1)[1].lower() + # import email.message + # assert isinstance(info, email.message.Message) + charset = info.get_param('charset') + if charset is not None: + return charset.lower() + return None class DOMInputSource(object): diff --git a/Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst b/Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst new file mode 100644 index 00000000000000..56e2fe6f85f4bf --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-27-16-28-57.gh-issue-128302.2GMvyl.rst @@ -0,0 +1,3 @@ +Allow :meth:`!xml.dom.xmlbuilder.DOMParser.parse` to correctly handle +:class:`!xml.dom.xmlbuilder.DOMInputSource` instances that only have a +:attr:`!systemId` attribute set. diff --git a/Misc/NEWS.d/next/Library/2024-12-29-13-49-46.gh-issue-128302.psRpPN.rst b/Misc/NEWS.d/next/Library/2024-12-29-13-49-46.gh-issue-128302.psRpPN.rst new file mode 100644 index 00000000000000..98c07297b06f8a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-29-13-49-46.gh-issue-128302.psRpPN.rst @@ -0,0 +1,2 @@ +Fix :meth:`!xml.dom.xmlbuilder.DOMEntityResolver.resolveEntity`, which was +broken by the Python 3.0 transition.