diff --git a/.bandit.yml b/.bandit.yml index bb9aab2..4f60a02 100644 --- a/.bandit.yml +++ b/.bandit.yml @@ -1,4 +1,6 @@ skips: - B101 +- B311 - B320 - B410 +exclude_dirs: ['tests'] diff --git a/.flake8 b/.flake8 index d086822..7e5efc6 100644 --- a/.flake8 +++ b/.flake8 @@ -9,6 +9,7 @@ per-file-ignores = setup.py:E501 tests/test_selector.py:E501 tests/test_selector_csstranslator.py:E501 + tests/test_selector_jmespath.py:E501 tests/test_utils.py:E501 tests/test_xpathfuncs.py:E501 tests/typing/*.py:E,F diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000..00d5546 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,2 @@ +# applying pre-commit hooks to the project +a57c23e3b7be0f001595bd8767fe05e40a66e730 \ No newline at end of file diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 22576e0..0aa7558 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -10,10 +10,7 @@ jobs: include: - python-version: "3.12" env: - TOXENV: security - - python-version: "3.12" - env: - TOXENV: flake8 + TOXENV: pre-commit - python-version: "3.12" env: TOXENV: pylint @@ -23,9 +20,6 @@ jobs: - python-version: "3.12" env: TOXENV: typing - - python-version: "3.12" - env: - TOXENV: black - python-version: "3.12" env: TOXENV: twinecheck diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 0000000..6860bdb --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,2 @@ +[settings] +profile = black \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..9f1a2f1 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,18 @@ +repos: +- repo: https://github.com/PyCQA/bandit + rev: 1.7.7 + hooks: + - id: bandit + args: [-r, -c, .bandit.yml] +- repo: https://github.com/PyCQA/flake8 + rev: 7.0.0 + hooks: + - id: flake8 +- repo: https://github.com/psf/black.git + rev: 24.1.1 + hooks: + - id: black +- repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 689af48..4d7b0d6 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -3,7 +3,6 @@ import os import sys - # Get the project root dir, which is the parent dir of this cwd = os.getcwd() project_root = os.path.dirname(cwd) @@ -13,8 +12,7 @@ # version is used. sys.path.insert(0, project_root) -import parsel - +import parsel # noqa: E402 # -- General configuration --------------------------------------------- @@ -98,10 +96,9 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - ("index", "parsel", "Parsel Documentation", ["Scrapy Project"], 1) + ("index", "parsel", "Parsel Documentation", ["Scrapy Project"], 1), ] - # -- Options for Texinfo output ---------------------------------------- # Grouping the document tree into Texinfo files. List of tuples diff --git a/parsel/__init__.py b/parsel/__init__.py index 955f3df..cd00fe6 100644 --- a/parsel/__init__.py +++ b/parsel/__init__.py @@ -13,8 +13,8 @@ "xpathfuncs", ] -from parsel.selector import Selector, SelectorList # NOQA -from parsel.csstranslator import css2xpath # NOQA from parsel import xpathfuncs # NOQA +from parsel.csstranslator import css2xpath # NOQA +from parsel.selector import Selector, SelectorList # NOQA xpathfuncs.setup() diff --git a/parsel/csstranslator.py b/parsel/csstranslator.py index e1adc0a..ac6af32 100644 --- a/parsel/csstranslator.py +++ b/parsel/csstranslator.py @@ -3,10 +3,9 @@ from cssselect import GenericTranslator as OriginalGenericTranslator from cssselect import HTMLTranslator as OriginalHTMLTranslator -from cssselect.xpath import XPathExpr as OriginalXPathExpr -from cssselect.xpath import ExpressionError from cssselect.parser import Element, FunctionalPseudoElement, PseudoElement - +from cssselect.xpath import ExpressionError +from cssselect.xpath import XPathExpr as OriginalXPathExpr if TYPE_CHECKING: # typing.Self requires Python 3.11 @@ -25,9 +24,7 @@ def from_xpath( textnode: bool = False, attribute: Optional[str] = None, ) -> "Self": - x = cls( - path=xpath.path, element=xpath.element, condition=xpath.condition - ) + x = cls(path=xpath.path, element=xpath.element, condition=xpath.condition) x.textnode = textnode x.attribute = attribute return x @@ -82,9 +79,7 @@ class TranslatorMixin: Currently supported pseudo-elements are ``::text`` and ``::attr(ATTR_NAME)``. """ - def xpath_element( - self: TranslatorProtocol, selector: Element - ) -> XPathExpr: + def xpath_element(self: TranslatorProtocol, selector: Element) -> XPathExpr: # https://github.com/python/mypy/issues/12344 xpath = super().xpath_element(selector) # type: ignore[safe-super] return XPathExpr.from_xpath(xpath) @@ -104,7 +99,9 @@ def xpath_pseudo_element( ) xpath = method(xpath, pseudo_element) else: - method_name = f"xpath_{pseudo_element.replace('-', '_')}_simple_pseudo_element" + method_name = ( + f"xpath_{pseudo_element.replace('-', '_')}_simple_pseudo_element" + ) method = getattr(self, method_name, None) if not method: raise ExpressionError( @@ -121,30 +118,22 @@ def xpath_attr_functional_pseudo_element( raise ExpressionError( f"Expected a single string or ident for ::attr(), got {function.arguments!r}" # noqa: E231 ) - return XPathExpr.from_xpath( - xpath, attribute=function.arguments[0].value - ) + return XPathExpr.from_xpath(xpath, attribute=function.arguments[0].value) - def xpath_text_simple_pseudo_element( - self, xpath: OriginalXPathExpr - ) -> XPathExpr: + def xpath_text_simple_pseudo_element(self, xpath: OriginalXPathExpr) -> XPathExpr: """Support selecting text nodes using ::text pseudo-element""" return XPathExpr.from_xpath(xpath, textnode=True) class GenericTranslator(TranslatorMixin, OriginalGenericTranslator): @lru_cache(maxsize=256) - def css_to_xpath( - self, css: str, prefix: str = "descendant-or-self::" - ) -> str: + def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str: return super().css_to_xpath(css, prefix) class HTMLTranslator(TranslatorMixin, OriginalHTMLTranslator): @lru_cache(maxsize=256) - def css_to_xpath( - self, css: str, prefix: str = "descendant-or-self::" - ) -> str: + def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str: return super().css_to_xpath(css, prefix) diff --git a/parsel/selector.py b/parsel/selector.py index 11d7979..dd9c936 100644 --- a/parsel/selector.py +++ b/parsel/selector.py @@ -29,7 +29,6 @@ from .csstranslator import GenericTranslator, HTMLTranslator from .utils import extract_regex, flatten, iflatten, shorten - _SelectorType = TypeVar("_SelectorType", bound="Selector") _ParserType = Union[etree.XMLParser, etree.HTMLParser] # simplified _OutputMethodArg from types-lxml @@ -135,18 +134,14 @@ def __getitem__( ) -> Union[_SelectorType, "SelectorList[_SelectorType]"]: o = super().__getitem__(pos) if isinstance(pos, slice): - return self.__class__( - typing.cast("SelectorList[_SelectorType]", o) - ) + return self.__class__(typing.cast("SelectorList[_SelectorType]", o)) else: return typing.cast(_SelectorType, o) def __getstate__(self) -> None: raise TypeError("can't pickle SelectorList objects") - def jmespath( - self, query: str, **kwargs: Any - ) -> "SelectorList[_SelectorType]": + def jmespath(self, query: str, **kwargs: Any) -> "SelectorList[_SelectorType]": """ Call the ``.jmespath()`` method for each element in this list and return their results flattened as another :class:`SelectorList`. @@ -158,9 +153,7 @@ def jmespath( selector.jmespath('author.name', options=jmespath.Options(dict_cls=collections.OrderedDict)) """ - return self.__class__( - flatten([x.jmespath(query, **kwargs) for x in self]) - ) + return self.__class__(flatten([x.jmespath(query, **kwargs) for x in self])) def xpath( self, @@ -185,9 +178,7 @@ def xpath( selector.xpath('//a[href=$url]', url="http://www.example.com") """ return self.__class__( - flatten( - [x.xpath(xpath, namespaces=namespaces, **kwargs) for x in self] - ) + flatten([x.xpath(xpath, namespaces=namespaces, **kwargs) for x in self]) ) def css(self, query: str) -> "SelectorList[_SelectorType]": @@ -211,9 +202,7 @@ def re( Passing ``replace_entities`` as ``False`` switches off these replacements. """ - return flatten( - [x.re(regex, replace_entities=replace_entities) for x in self] - ) + return flatten([x.re(regex, replace_entities=replace_entities) for x in self]) @typing.overload def re_first( @@ -316,9 +305,7 @@ def drop(self) -> None: _NOT_SET = object() -def _get_root_from_text( - text: str, *, type: str, **lxml_kwargs: Any -) -> etree._Element: +def _get_root_from_text(text: str, *, type: str, **lxml_kwargs: Any) -> etree._Element: return create_root_node(text, _ctgroup[type]["_parser"], **lxml_kwargs) @@ -583,9 +570,7 @@ def make_selector(x: Any) -> _SelectorType: # closure function return self.__class__(root=x, _expr=query) result = [make_selector(x) for x in result] - return typing.cast( - SelectorList[_SelectorType], self.selectorlist_cls(result) - ) + return typing.cast(SelectorList[_SelectorType], self.selectorlist_cls(result)) def xpath( self: _SelectorType, @@ -611,9 +596,7 @@ def xpath( selector.xpath('//a[href=$url]', url="http://www.example.com") """ if self.type not in ("html", "xml", "text"): - raise ValueError( - f"Cannot use xpath on a Selector of type {self.type!r}" - ) + raise ValueError(f"Cannot use xpath on a Selector of type {self.type!r}") if self.type in ("html", "xml"): try: xpathev = self.root.xpath @@ -654,9 +637,7 @@ def xpath( ) for x in result ] - return typing.cast( - SelectorList[_SelectorType], self.selectorlist_cls(result) - ) + return typing.cast(SelectorList[_SelectorType], self.selectorlist_cls(result)) def css(self: _SelectorType, query: str) -> SelectorList[_SelectorType]: """ @@ -670,9 +651,7 @@ def css(self: _SelectorType, query: str) -> SelectorList[_SelectorType]: .. _cssselect: https://pypi.python.org/pypi/cssselect/ """ if self.type not in ("html", "xml", "text"): - raise ValueError( - f"Cannot use css on a Selector of type {self.type!r}" - ) + raise ValueError(f"Cannot use css on a Selector of type {self.type!r}") return self.xpath(self._css2xpath(query)) def _css2xpath(self, query: str) -> str: diff --git a/parsel/utils.py b/parsel/utils.py index 2677f47..ec77d74 100644 --- a/parsel/utils.py +++ b/parsel/utils.py @@ -1,5 +1,6 @@ import re from typing import Any, Iterable, Iterator, List, Match, Pattern, Union, cast + from w3lib.html import replace_entities as w3lib_replace_entities diff --git a/parsel/xpathfuncs.py b/parsel/xpathfuncs.py index e8cea0a..7b984c5 100644 --- a/parsel/xpathfuncs.py +++ b/parsel/xpathfuncs.py @@ -2,10 +2,8 @@ from typing import Any, Callable, Optional from lxml import etree - from w3lib.html import HTML5_WHITESPACE - regex = f"[{HTML5_WHITESPACE}]+" replace_html5_whitespaces = re.compile(regex).sub @@ -43,14 +41,10 @@ def has_class(context: Any, *classes: str) -> bool: """ if not context.eval_context.get("args_checked"): if not classes: - raise ValueError( - "XPath error: has-class must have at least 1 argument" - ) + raise ValueError("XPath error: has-class must have at least 1 argument") for c in classes: if not isinstance(c, str): - raise ValueError( - "XPath error: has-class arguments must be strings" - ) + raise ValueError("XPath error: has-class arguments must be strings") context.eval_context["args_checked"] = True node_cls = context.context_node.get("class") diff --git a/setup.py b/setup.py index 8ba4b0c..1be8413 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,6 @@ from setuptools import setup - with open("README.rst", encoding="utf-8") as readme_file: readme = readme_file.read() diff --git a/tests/test_selector.py b/tests/test_selector.py index 645b999..96713f9 100644 --- a/tests/test_selector.py +++ b/tests/test_selector.py @@ -1,11 +1,10 @@ +import pickle import re +import typing +import unittest import warnings import weakref -import unittest -import pickle - -import typing -from typing import cast, Any, Optional, Mapping +from typing import Any, Mapping, Optional, cast from lxml import etree from lxml.html import HtmlElement @@ -13,10 +12,10 @@ from parsel import Selector, SelectorList from parsel.selector import ( - CannotRemoveElementWithoutRoot, - CannotRemoveElementWithoutParent, - LXML_SUPPORTS_HUGE_TREE, _NOT_SET, + LXML_SUPPORTS_HUGE_TREE, + CannotRemoveElementWithoutParent, + CannotRemoveElementWithoutRoot, ) @@ -32,9 +31,7 @@ def assertIsSelectorList(self, value: Any) -> None: def test_pickle_selector(self) -> None: sel = self.sscls(text="

some text

") - self.assertRaises( - TypeError, lambda s: pickle.dumps(s, protocol=2), sel - ) + self.assertRaises(TypeError, lambda s: pickle.dumps(s, protocol=2), sel) def test_pickle_selector_list(self) -> None: sel = self.sscls( @@ -44,9 +41,7 @@ def test_pickle_selector_list(self) -> None: empty_sel_list = sel.css("p") self.assertIsSelectorList(sel_list) self.assertIsSelectorList(empty_sel_list) - self.assertRaises( - TypeError, lambda s: pickle.dumps(s, protocol=2), sel_list - ) + self.assertRaises(TypeError, lambda s: pickle.dumps(s, protocol=2), sel_list) self.assertRaises( TypeError, lambda s: pickle.dumps(s, protocol=2), empty_sel_list ) @@ -99,10 +94,7 @@ def test_simple_selection_with_variables(self) -> None: sel = self.sscls(text=body) self.assertEqual( - [ - x.extract() - for x in sel.xpath("//input[@value=$number]/@name", number=1) - ], + [x.extract() for x in sel.xpath("//input[@value=$number]/@name", number=1)], ["a"], ) self.assertEqual( @@ -124,15 +116,11 @@ def test_simple_selection_with_variables(self) -> None: # you can also pass booleans self.assertEqual( - sel.xpath( - "boolean(count(//input)=$cnt)=$test", cnt=2, test=True - ).extract(), + sel.xpath("boolean(count(//input)=$cnt)=$test", cnt=2, test=True).extract(), ["1"], ) self.assertEqual( - sel.xpath( - "boolean(count(//input)=$cnt)=$test", cnt=4, test=True - ).extract(), + sel.xpath("boolean(count(//input)=$cnt)=$test", cnt=4, test=True).extract(), ["0"], ) self.assertEqual( @@ -162,16 +150,11 @@ def test_simple_selection_with_variables_escape_friendly(self) -> None: t = 'I say "Yeah!"' # naive string formatting with give something like: # ValueError: XPath error: Invalid predicate in //input[@value="I say "Yeah!""]/@name - self.assertRaises( - ValueError, sel.xpath, f'//input[@value="{t}"]/@name' - ) + self.assertRaises(ValueError, sel.xpath, f'//input[@value="{t}"]/@name') # with XPath variables, escaping is done for you self.assertEqual( - [ - x.extract() - for x in sel.xpath("//input[@value=$text]/@name", text=t) - ], + [x.extract() for x in sel.xpath("//input[@value=$text]/@name", text=t)], ["a"], ) lt = """I'm mixing single and "double quotes" and I don't care :)""" @@ -184,9 +167,7 @@ def test_simple_selection_with_variables_escape_friendly(self) -> None: self.assertEqual( [ x.extract() - for x in sel.xpath( - "//p[normalize-space()=$lng]//@name", lng=lt - ) + for x in sel.xpath("//p[normalize-space()=$lng]//@name", lng=lt) ], ["a"], ) @@ -210,9 +191,7 @@ def test_accessing_attributes(self) -> None: ) # for a SelectorList, bring the attributes of first-element only - self.assertEqual( - {"id": "some-list", "class": "list-cls"}, sel.css("ul").attrib - ) + self.assertEqual({"id": "some-list", "class": "list-cls"}, sel.css("ul").attrib) self.assertEqual( {"class": "item-cls", "id": "list-item-1"}, sel.css("li").attrib ) @@ -232,9 +211,7 @@ def test_representation_slice(self) -> None: body = f"

" sel = self.sscls(text=body) - representation = ( - f"" - ) + representation = f"" self.assertEqual( [repr(it) for it in sel.xpath("//input/@name")], [representation] @@ -243,9 +220,7 @@ def test_representation_slice(self) -> None: def test_representation_unicode_query(self) -> None: body = f"

" - representation = ( - "" - ) + representation = "" sel = self.sscls(text=body) self.assertEqual( @@ -304,9 +279,7 @@ def test_selector_get_alias(self) -> None: self.assertEqual( sel.xpath("//ul/li[position()>1]")[0].get(), '
  • 2
  • ' ) - self.assertEqual( - sel.xpath("//ul/li[position()>1]/text()")[0].get(), "2" - ) + self.assertEqual(sel.xpath("//ul/li[position()>1]/text()")[0].get(), "2") def test_selector_getall_alias(self) -> None: """Test if get() returns extracted value on a Selector""" @@ -376,9 +349,7 @@ def test_extract_first_re_default(self) -> None: def test_select_unicode_query(self) -> None: body = "

    " sel = self.sscls(text=body) - self.assertEqual( - sel.xpath('//input[@name="\xa9"]/@value').extract(), ["1"] - ) + self.assertEqual(sel.xpath('//input[@name="\xa9"]/@value').extract(), ["1"]) def test_list_elements_type(self) -> None: """Test Selector returning the same type in selection methods""" @@ -395,12 +366,8 @@ def test_list_elements_type(self) -> None: def test_boolean_result(self) -> None: body = "

    " xs = self.sscls(text=body) - self.assertEqual( - xs.xpath("//input[@name='a']/@name='a'").extract(), ["1"] - ) - self.assertEqual( - xs.xpath("//input[@name='a']/@name='n'").extract(), ["0"] - ) + self.assertEqual(xs.xpath("//input[@name='a']/@name='a'").extract(), ["1"]) + self.assertEqual(xs.xpath("//input[@name='a']/@name='n'").extract(), ["0"]) def test_differences_parsing_xml_vs_html(self) -> None: """Test that XML and HTML Selector's behave differently""" @@ -530,9 +497,7 @@ def test_mixed_nested_selectors(self) -> None: self.assertEqual( sel.xpath('//div[@id="1"]').css("span::text").extract(), ["me"] ) - self.assertEqual( - sel.css("#1").xpath("./span/text()").extract(), ["me"] - ) + self.assertEqual(sel.css("#1").xpath("./span/text()").extract(), ["me"]) def test_dont_strip(self) -> None: sel = self.sscls(text='
    fff: zzz
    ') @@ -607,16 +572,12 @@ def test_namespaces_multiple(self) -> None: x.register_namespace("b", "http://somens.com") self.assertEqual(len(x.xpath("//xmlns:TestTag")), 1) self.assertEqual(x.xpath("//b:Operation/text()").extract()[0], "hello") - self.assertEqual( - x.xpath("//xmlns:TestTag/@b:att").extract()[0], "value" - ) + self.assertEqual(x.xpath("//xmlns:TestTag/@b:att").extract()[0], "value") self.assertEqual( x.xpath("//p:SecondTestTag/xmlns:price/text()").extract()[0], "90" ) self.assertEqual( - x.xpath("//p:SecondTestTag") - .xpath("./xmlns:price/text()")[0] - .extract(), + x.xpath("//p:SecondTestTag").xpath("./xmlns:price/text()")[0].extract(), "90", ) self.assertEqual( @@ -713,9 +674,7 @@ def test_namespaces_multiple_adhoc(self) -> None: ) # "p" prefix is not cached from previous calls - self.assertRaises( - ValueError, x.xpath, "//p:SecondTestTag/xmlns:price/text()" - ) + self.assertRaises(ValueError, x.xpath, "//p:SecondTestTag/xmlns:price/text()") x.register_namespace("p", "http://www.scrapy.org/product") self.assertEqual( @@ -787,9 +746,7 @@ def test_re_replace_entities(self) -> None: ) self.assertEqual( - x.xpath("//script/text()").re_first( - name_re, replace_entities=False - ), + x.xpath("//script/text()").re_first(name_re, replace_entities=False), expected, ) self.assertEqual( @@ -800,15 +757,11 @@ def test_re_replace_entities(self) -> None: def test_re_intl(self) -> None: body = "
    Evento: cumplea\xf1os
    " x = self.sscls(text=body) - self.assertEqual( - x.xpath("//div").re(r"Evento: (\w+)"), ["cumplea\xf1os"] - ) + self.assertEqual(x.xpath("//div").re(r"Evento: (\w+)"), ["cumplea\xf1os"]) def test_selector_over_text(self) -> None: hs = self.sscls(text="lala") - self.assertEqual( - hs.extract(), "lala" - ) + self.assertEqual(hs.extract(), "lala") xs = self.sscls(text="lala", type="xml") self.assertEqual(xs.extract(), "lala") self.assertEqual(xs.xpath(".").extract(), ["lala"]) @@ -834,17 +787,13 @@ def test_http_header_encoding_precedence(self) -> None: \xa3""" x = self.sscls(text=text) - self.assertEqual( - x.xpath("//span[@id='blank']/text()").extract(), ["\xa3"] - ) + self.assertEqual(x.xpath("//span[@id='blank']/text()").extract(), ["\xa3"]) def test_empty_bodies_shouldnt_raise_errors(self) -> None: self.sscls(text="").xpath("//text()").extract() def test_bodies_with_comments_only(self) -> None: - sel = self.sscls( - text="", base_url="http://example.com" - ) + sel = self.sscls(text="", base_url="http://example.com") self.assertEqual("http://example.com", sel.root.base) def test_null_bytes_shouldnt_raise_errors(self) -> None: @@ -870,9 +819,7 @@ def test_select_on_unevaluable_nodes(self) -> None: self.assertEqual(x1.xpath(".//text()").extract(), []) def test_select_on_text_nodes(self) -> None: - r = self.sscls( - text="
    Options:opt1
    Otheropt2
    " - ) + r = self.sscls(text="
    Options:opt1
    Otheropt2
    ") x1 = r.xpath( "//div/descendant::text()[preceding-sibling::b[contains(text(), 'Options')]]" ) @@ -886,9 +833,7 @@ def test_select_on_text_nodes(self) -> None: @unittest.skip("Text nodes lost parent node reference in lxml") def test_nested_select_on_text_nodes(self) -> None: # FIXME: does not work with lxml backend [upstream] - r = self.sscls( - text="
    Options:opt1
    Otheropt2
    " - ) + r = self.sscls(text="
    Options:opt1
    Otheropt2
    ") x1 = r.xpath("//div/descendant::text()") x2 = x1.xpath("./preceding-sibling::b[contains(text(), 'Options')]") self.assertEqual(x2.extract(), ["Options:"]) @@ -948,11 +893,7 @@ def test_remove_namespaces_embedded(self) -> None: 2, ) self.assertEqual( - len( - sel.xpath( - "//s:stop", namespaces={"s": "http://www.w3.org/2000/svg"} - ) - ), + len(sel.xpath("//s:stop", namespaces={"s": "http://www.w3.org/2000/svg"})), 2, ) sel.remove_namespaces() @@ -1000,19 +941,13 @@ class SmartStringsSelector(Selector): li_text = x.xpath("//li/text()") self.assertFalse(any([hasattr(e.root, "getparent") for e in li_text])) div_class = x.xpath("//div/@class") - self.assertFalse( - any([hasattr(e.root, "getparent") for e in div_class]) - ) + self.assertFalse(any([hasattr(e.root, "getparent") for e in div_class])) smart_x = SmartStringsSelector(text=body) smart_li_text = smart_x.xpath("//li/text()") - self.assertTrue( - all([hasattr(e.root, "getparent") for e in smart_li_text]) - ) + self.assertTrue(all([hasattr(e.root, "getparent") for e in smart_li_text])) smart_div_class = smart_x.xpath("//div/@class") - self.assertTrue( - all([hasattr(e.root, "getparent") for e in smart_div_class]) - ) + self.assertTrue(all([hasattr(e.root, "getparent") for e in smart_div_class])) def test_xml_entity_expansion(self) -> None: malicious_xml = ( @@ -1280,30 +1215,18 @@ def test_regexp(self) -> None: # re:test() self.assertEqual( sel.xpath('//input[re:test(@name, "[A-Z]+", "i")]').extract(), - [ - x.extract() - for x in sel.xpath('//input[re:test(@name, "[A-Z]+", "i")]') - ], + [x.extract() for x in sel.xpath('//input[re:test(@name, "[A-Z]+", "i")]')], ) self.assertEqual( - [ - x.extract() - for x in sel.xpath(r'//a[re:test(@href, "\.html$")]/text()') - ], + [x.extract() for x in sel.xpath(r'//a[re:test(@href, "\.html$")]/text()')], ["first link", "second link"], ) self.assertEqual( - [ - x.extract() - for x in sel.xpath('//a[re:test(@href, "first")]/text()') - ], + [x.extract() for x in sel.xpath('//a[re:test(@href, "first")]/text()')], ["first link"], ) self.assertEqual( - [ - x.extract() - for x in sel.xpath('//a[re:test(@href, "second")]/text()') - ], + [x.extract() for x in sel.xpath('//a[re:test(@href, "second")]/text()')], ["second link"], ) @@ -1333,9 +1256,7 @@ def test_regexp(self) -> None: r're:replace(//a[re:test(@href, "\.xml$")]/@href,' r'"(\w+)://(.+)(\.xml)", "","https://\2.html")' ).extract(), - [ - "https://www.bayes.co.uk/xml/index.xml?/xml/utils/rechecker.html" - ], + ["https://www.bayes.co.uk/xml/index.xml?/xml/utils/rechecker.html"], ) def test_set(self) -> None: diff --git a/tests/test_selector_csstranslator.py b/tests/test_selector_csstranslator.py index b3cdfa9..2adc2f5 100644 --- a/tests/test_selector_csstranslator.py +++ b/tests/test_selector_csstranslator.py @@ -1,22 +1,18 @@ """ Selector tests for cssselect backend """ + import unittest -from typing import Any, Callable, List, Type, Protocol, Tuple, Union +from typing import Any, Callable, List, Protocol, Tuple, Type, Union import cssselect import pytest -from packaging.version import Version - -from parsel.csstranslator import ( - GenericTranslator, - HTMLTranslator, - TranslatorProtocol, -) -from parsel import Selector from cssselect.parser import SelectorSyntaxError from cssselect.xpath import ExpressionError +from packaging.version import Version +from parsel import Selector +from parsel.csstranslator import GenericTranslator, HTMLTranslator, TranslatorProtocol HTMLBODY = """ @@ -66,9 +62,7 @@ def assertEqual(self, first: Any, second: Any, msg: Any = ...) -> None: def assertRaises( self, - expected_exception: Union[ - Type[BaseException], Tuple[Type[BaseException], ...] - ], + expected_exception: Union[Type[BaseException], Tuple[Type[BaseException], ...]], callable: Callable[..., object], *args: Any, **kwargs: Any, @@ -184,9 +178,7 @@ def setUp(self) -> None: self.sel = self.sscls(text=HTMLBODY) def x(self, *a: Any, **kw: Any) -> List[str]: - return [ - v.strip() for v in self.sel.css(*a, **kw).extract() if v.strip() - ] + return [v.strip() for v in self.sel.css(*a, **kw).extract() if v.strip()] def test_selector_simple(self) -> None: for x in self.sel.css("input"): @@ -206,9 +198,7 @@ def test_text_pseudo_element(self) -> None: ["lorem ipsum text", "hi", "there", "guy"], ) self.assertEqual(self.x("p::text"), ["lorem ipsum text"]) - self.assertEqual( - self.x("p ::text"), ["lorem ipsum text", "hi", "there", "guy"] - ) + self.assertEqual(self.x("p ::text"), ["lorem ipsum text", "hi", "there", "guy"]) def test_attribute_function(self) -> None: self.assertEqual(self.x("#p-b2::attr(id)"), ["p-b2"]) @@ -221,9 +211,7 @@ def test_attribute_function(self) -> None: ) def test_nested_selector(self) -> None: - self.assertEqual( - self.sel.css("p").css("b::text").extract(), ["hi", "guy"] - ) + self.assertEqual(self.sel.css("p").css("b::text").extract(), ["hi", "guy"]) self.assertEqual( self.sel.css("div").css("area:last-child").extract(), [''], diff --git a/tests/test_selector_jmespath.py b/tests/test_selector_jmespath.py index fa607fa..5afbd6d 100644 --- a/tests/test_selector_jmespath.py +++ b/tests/test_selector_jmespath.py @@ -43,12 +43,8 @@ def test_json_has_html(self) -> None: sel.jmespath("html").xpath("//div/a/text()").getall(), ["a", "b", "d"], ) - self.assertEqual( - sel.jmespath("html").css("div > b").getall(), ["f"] - ) - self.assertEqual( - sel.jmespath("content").jmespath("name.age").get(), 18 - ) + self.assertEqual(sel.jmespath("html").css("div > b").getall(), ["f"]) + self.assertEqual(sel.jmespath("content").jmespath("name.age").get(), 18) def test_html_has_json(self) -> None: html_text = """ @@ -82,9 +78,7 @@ def test_html_has_json(self) -> None: """ sel = Selector(text=html_text) self.assertEqual( - sel.xpath("//div/content/text()") - .jmespath("user[*].name") - .getall(), + sel.xpath("//div/content/text()").jmespath("user[*].name").getall(), ["A", "B", "C", "D"], ) self.assertEqual( @@ -125,9 +119,7 @@ def test_jmestpath_with_re(self) -> None: """ sel = Selector(text=html_text) self.assertEqual( - sel.xpath("//div/content/text()") - .jmespath("user[*].name") - .re(r"(\w+)"), + sel.xpath("//div/content/text()").jmespath("user[*].name").re(r"(\w+)"), ["A", "B", "C", "D"], ) self.assertEqual( @@ -143,9 +135,7 @@ def test_jmestpath_with_re(self) -> None: ) self.assertEqual( - sel.xpath("//div/content") - .jmespath("unavailable") - .re_first(r"(\d+)"), + sel.xpath("//div/content").jmespath("unavailable").re_first(r"(\d+)"), None, ) @@ -171,14 +161,10 @@ def test_json_types(self) -> None: ): selector = Selector(text=text, root=_NOT_SET) self.assertEqual(selector.type, "json") - self.assertEqual( - selector._text, text # pylint: disable=protected-access - ) + self.assertEqual(selector._text, text) # pylint: disable=protected-access self.assertEqual(selector.root, root) selector = Selector(text=None, root=root) self.assertEqual(selector.type, "json") - self.assertEqual( - selector._text, None # pylint: disable=protected-access - ) + self.assertEqual(selector._text, None) # pylint: disable=protected-access self.assertEqual(selector.root, root) diff --git a/tests/test_utils.py b/tests/test_utils.py index b82540e..ee3e112 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,9 +1,9 @@ -from typing import Pattern, List, Type, Union - -from parsel.utils import shorten, extract_regex +from typing import List, Pattern, Type, Union from pytest import mark, raises +from parsel.utils import extract_regex, shorten + @mark.parametrize( "width,expected", diff --git a/tests/test_xml_attacks.py b/tests/test_xml_attacks.py index e38a983..e7b5a48 100644 --- a/tests/test_xml_attacks.py +++ b/tests/test_xml_attacks.py @@ -7,7 +7,6 @@ from parsel import Selector - MiB_1 = 1024**2 diff --git a/tests/test_xpathfuncs.py b/tests/test_xpathfuncs.py index 3adad0d..7739982 100644 --- a/tests/test_xpathfuncs.py +++ b/tests/test_xpathfuncs.py @@ -1,5 +1,5 @@ -from typing import Any import unittest +from typing import Any from parsel import Selector from parsel.xpathfuncs import set_xpathfunc @@ -23,17 +23,11 @@ def test_has_class_simple(self) -> None: ["Third"], ) self.assertEqual( - [ - x.extract() - for x in sel.xpath('//p[has-class("foo","bar")]/text()') - ], + [x.extract() for x in sel.xpath('//p[has-class("foo","bar")]/text()')], [], ) self.assertEqual( - [ - x.extract() - for x in sel.xpath('//p[has-class("foo","bar-baz")]/text()') - ], + [x.extract() for x in sel.xpath('//p[has-class("foo","bar-baz")]/text()')], ["First"], ) diff --git a/tox.ini b/tox.ini index 8fe3f79..ce003c1 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = security,flake8,typing,pylint,black,docs,twinecheck,py38,py39,py310,py311,py312,pypy3.9,pypy3.10 +envlist = typing,pylint,docs,twinecheck,pre-commit,py38,py39,py310,py311,py312,pypy3.9,pypy3.10 [testenv] usedevelop = True @@ -7,19 +7,6 @@ deps = -r{toxinidir}/tests/requirements.txt commands = py.test --cov=parsel --cov-report=xml {posargs:docs parsel tests} -[testenv:security] -deps = - bandit -commands = - bandit -r -c .bandit.yml {posargs:parsel} - -[testenv:flake8] -deps = - {[testenv]deps} - flake8==5.0.4 -commands = - flake8 {posargs: parsel tests setup.py} - [testenv:typing] deps = {[testenv]deps} @@ -39,12 +26,6 @@ deps = commands = pylint docs parsel tests setup.py -[testenv:black] -deps = - black==22.10.0 -commands = - black --line-length=79 {posargs:--check parsel tests setup.py} - [docs] changedir = docs deps = -rdocs/requirements.txt @@ -67,3 +48,8 @@ deps = commands = python -m build --sdist twine check dist/* + +[testenv:pre-commit] +deps = pre-commit +commands = pre-commit run --all-files --show-diff-on-failure +skip_install = true