From 6a1c0b790039e223ad36a0e5773878bd6df52a9d Mon Sep 17 00:00:00 2001 From: Luke Plant Date: Fri, 27 Dec 2024 19:39:44 +0000 Subject: [PATCH 1/3] Added search_python_files(python_file_processor=) parameter --- docs/history.rst | 8 +++-- docs/library.rst | 70 ++++++++++++++++++++++++++++++++++------- src/pyastgrep/api.py | 11 ++++++- src/pyastgrep/files.py | 6 +++- src/pyastgrep/search.py | 18 +++++++++-- tests/test_library.py | 20 ++++++++++++ 6 files changed, 115 insertions(+), 18 deletions(-) diff --git a/docs/history.rst b/docs/history.rst index d22ce0f..83d604a 100644 --- a/docs/history.rst +++ b/docs/history.rst @@ -2,8 +2,12 @@ History ======= -Version 1.3.3 - unreleased --------------------------- +Version 1.4 - unreleased +------------------------ + +* Added ``python_file_processor`` parameter to :func:`pyastgrep.api.search_python_files`, + which particularly serves the needs of people using pyastgrep as a library who + want to apply caching. Version 1.3.2 - 2024-01-10 -------------------------- diff --git a/docs/library.rst b/docs/library.rst index b3026be..c20a65e 100644 --- a/docs/library.rst +++ b/docs/library.rst @@ -5,11 +5,18 @@ Use as a library pyastgrep is structured internally to make it easy to use as a library as well as a CLI, with a clear separation of the different layers. For now, the following API is documented as public and we will strive to maintain backwards -compatibility with it: +compatibility with it. + +For other things, we while we will try not to break things without good reason, +at this point we are not documenting or guaranteeing API stability for these +functions. Please contribute to `the discussion +`_ if you have needs +here. + .. currentmodule:: pyastgrep.api -.. function:: search_python_files(paths, expression) +.. function:: search_python_files(paths, expression, python_file_processor=process_python_file) Searches for files with AST matching the given XPath ``expression``, in the given ``paths``. @@ -19,18 +26,32 @@ compatibility with it: Returns an iterable of :class:`Match` object, plus other objects. - The other objects are used to indicate errors, usually things like a failure to parse a file that had a ``.py`` extension. The details of these other objects are not being documented yet, so use at own risk, and ensure that you filter the results by doing an ``isinstance`` check for the ``Match`` objects. + The other objects are used to indicate errors, usually things like a failure + to parse a file that had a ``.py`` extension. The details of these other + objects are not being documented yet, so use at own risk, and ensure that you + filter the results by doing an ``isinstance`` check for the ``Match`` + objects. + + By default, ``search_python_files`` does no caching of the conversion of + Python to XML, which is appropriate for the normal command line usage. + However, this conversion is relatively expensive, and for various use cases + as a library, you might want to cache this operation. + + To achieve this, you can pass the ``python_file_processor`` argument. This value must be a callable that takes a :class:`pathlib.Path` objects and returns a :class:`ProcessedPython` object or a :class:`ReadError` object. + + By default this is :func:`process_python_file` but an alternative can be + provided, such as :func:`process_python_file_cached`, or your own callable + that typically will wrap :func:`process_python_file` in some other way. :param paths: List of paths to search, which can be files or directories, of type :class:`pathlib.Path` :type paths: list[pathlib.Path] :param expression: XPath expression - :type expression: str - :return: Iterable[Match | Any] - + :param python_file_processor: callable that takes a :class:`pathlib.Path` objects and returns a :class:`ProcessedPython` object or a :class:`ReadError` object. + :return: Iterable[Match | Any] .. class:: Match @@ -75,12 +96,39 @@ compatibility with it: :type: int +.. function:: process_python_file(path) + + Default value of ``python_file_processor`` parameter above: a function that + parses a Python file to create the AST and the XML version. This does no + caching. You should not need to call this yourself. + + +.. function:: process_python_file_cached(path) + + Wrapper for :func:`process_python_file` that caches infinitely in memory, based + on the input filename only. + + This can be an appropriate caching strategy: + + - if you are operating on a fairly limited number of Python files (or, if + available memory is not a problem) + + - if you have a fairly short-lived process + + - if you don’t need to respond to on-disk changes to file contents + for the life-time of the process. + +.. class:: ProcessPython + + Return type of :func:`process_python_file`. For now, this is an opaque type, + as you should not need to construct this yourself – you should be wrapping + :func:`process_python_file` which will construct this for you. + +.. class:: ReadError + + Return type of :func:`process_python_file` for the case of error reading the + file. This is again an opaque type for now. -For other things, we while we will try not to break things without good reason, -at this point we are not documenting or guaranteeing API stability for these -functions. Please contribute to `the discussion -`_ if you have needs -here. Example ======= diff --git a/src/pyastgrep/api.py b/src/pyastgrep/api.py index c4bc97b..9c03d30 100644 --- a/src/pyastgrep/api.py +++ b/src/pyastgrep/api.py @@ -1,3 +1,12 @@ +from .files import ProcessedPython, ReadError, process_python_file, process_python_file_cached from .search import Match, Position, search_python_files -__all__ = ["search_python_files", "Match", "Position"] +__all__ = [ + "search_python_files", + "Match", + "Position", + "process_python_file", + "process_python_file_cached", + "ProcessedPython", + "ReadError", +] diff --git a/src/pyastgrep/files.py b/src/pyastgrep/files.py index ca666d2..aaafdbc 100644 --- a/src/pyastgrep/files.py +++ b/src/pyastgrep/files.py @@ -4,8 +4,9 @@ import os import re from dataclasses import dataclass +from functools import cache from pathlib import Path -from typing import BinaryIO, Iterable, Literal, Sequence, Union +from typing import BinaryIO, Callable, Iterable, Literal, Sequence, Union from lxml.etree import _Element from typing_extensions import TypeAlias @@ -162,6 +163,9 @@ def process_python_file(path: Path) -> ProcessedPython | ReadError: return process_python_source(filename=path, contents=contents, auto_dedent=False) +process_python_file_cached: Callable[[Path], ProcessedPython | ReadError] = cache(process_python_file) + + def process_python_source( *, filename: Pathlike, diff --git a/src/pyastgrep/search.py b/src/pyastgrep/search.py index 5809b3c..3317530 100644 --- a/src/pyastgrep/search.py +++ b/src/pyastgrep/search.py @@ -12,7 +12,15 @@ from pyastgrep.ignores import WalkError from . import xml -from .files import MissingPath, Pathlike, ReadError, get_files_to_search, process_python_file, process_python_source +from .files import ( + MissingPath, + Pathlike, + ProcessedPython, + ReadError, + get_files_to_search, + process_python_file, + process_python_source, +) @dataclass(frozen=True) @@ -72,10 +80,12 @@ def get_query_func(*, xpath2: bool) -> XMLQueryFunc: def search_python_files( paths: Sequence[Path | BinaryIO], expression: str, + *, xpath2: bool = False, include_hidden: bool = False, respect_global_ignores: bool = True, respect_vcs_ignores: bool = True, + python_file_processor: Callable[[Path], ProcessedPython | ReadError] = process_python_file, ) -> Iterable[Match | MissingPath | ReadError | WalkError | NonElementReturned | FileFinished]: """ Perform a recursive search through Python files. @@ -97,7 +107,7 @@ def search_python_files( elif isinstance(path, WalkError): yield path else: - yield from search_python_file(path, query_func, expression) + yield from search_python_file(path, query_func, expression, python_file_processor=python_file_processor) yield FileFinished(path) @@ -105,9 +115,11 @@ def search_python_file( path: Path | BinaryIO, query_func: XMLQueryFunc, expression: str, + *, + python_file_processor: Callable[[Path], ProcessedPython | ReadError] = process_python_file, ) -> Iterable[Match | ReadError | NonElementReturned]: if isinstance(path, Path): - processed_python = process_python_file(path) + processed_python = python_file_processor(path) else: processed_python = process_python_source(filename="", contents=path.read(), auto_dedent=True) diff --git a/tests/test_library.py b/tests/test_library.py index 751e4db..0da2101 100644 --- a/tests/test_library.py +++ b/tests/test_library.py @@ -7,7 +7,9 @@ import ast from pathlib import Path +from lxml import etree from pyastgrep.api import Match, Position, search_python_files +from pyastgrep.files import ProcessedPython, process_python_file_cached DIR = Path(__file__).parent / "examples" / "test_library" @@ -21,3 +23,21 @@ def test_search_python_files(): assert match.position == Position(lineno=2, col_offset=4) assert isinstance(match.ast_node, ast.For) assert match.matching_line == " for item in [1, 2, 3]:" + + +def test_search_python_files_with_cached_python_processor(): + results = list(search_python_files([DIR], ".//Name", python_file_processor=process_python_file_cached)) + filtered_results = [result for result in results if isinstance(result, Match)] + assert len(filtered_results) > 0 + + +def null_python_processor(path): + # Replacement for process_python_file that treats all files as if they + # were empty. + return ProcessedPython(path=path, contents="", ast=ast.parse(""), xml=etree.fromstring(""), node_mappings={}) + + +def test_search_python_files_with_custom_python_processor(): + results = list(search_python_files([DIR], ".//Name", python_file_processor=null_python_processor)) + filtered_results = [result for result in results if isinstance(result, Match)] + assert len(filtered_results) == 0 From 89c30ba42fce827a32f6a9a23a57931a8e474a2f Mon Sep 17 00:00:00 2001 From: Luke Plant Date: Fri, 27 Dec 2024 19:40:14 +0000 Subject: [PATCH 2/3] Docs fixes for dev env --- docs/conf.py | 2 +- pyproject.toml | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 4160952..3895bf2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -27,7 +27,7 @@ # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output html_theme = "sphinx_rtd_theme" -html_static_path = ["_static"] +html_static_path = [] pygments_style = "sphinx" diff --git a/pyproject.toml b/pyproject.toml index b13f07b..d803ef4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,4 +105,6 @@ dev-dependencies = [ "tox-uv>=1.13.1", "tox>=4.21.2", "types-lxml>=2024.12.13", + "sphinx>=7.1.2", + "sphinx-rtd-theme>=3.0.2", ] From 6d68ffa0875d3dde425ae8839540b2f002a936a4 Mon Sep 17 00:00:00 2001 From: Luke Plant Date: Fri, 27 Dec 2024 19:50:27 +0000 Subject: [PATCH 3/3] Dropped support for Python 3.8 which is past EOL --- .github/workflows/tests.yml | 2 +- docs/history.rst | 1 + docs/install.rst | 2 +- pyproject.toml | 4 ++-- tests/test_xml.py | 2 -- tox.ini | 2 +- 6 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index aef79e0..9eeb5ac 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8, 3.9, "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 diff --git a/docs/history.rst b/docs/history.rst index 83d604a..501311a 100644 --- a/docs/history.rst +++ b/docs/history.rst @@ -8,6 +8,7 @@ Version 1.4 - unreleased * Added ``python_file_processor`` parameter to :func:`pyastgrep.api.search_python_files`, which particularly serves the needs of people using pyastgrep as a library who want to apply caching. +* Dropped support for Python 3.8. Version 1.3.2 - 2024-01-10 -------------------------- diff --git a/docs/install.rst b/docs/install.rst index 3d680c6..7df213e 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -2,7 +2,7 @@ Installation ============ -Python 3.8+ required. +Python 3.9+ required. We recommend `pipx `_ to install it conveniently in an isolated environment: diff --git a/pyproject.toml b/pyproject.toml index d803ef4..83e7d8e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,18 +8,18 @@ classifiers = [ "Development Status :: 4 - Beta", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Intended Audience :: Developers", "Operating System :: OS Independent", "License :: OSI Approved :: MIT License", "Natural Language :: English", ] urls = {Homepage = "https://github.com/spookylukey/pyastgrep"} -requires-python = ">=3.8" +requires-python = ">=3.9" dependencies = [ "lxml>=3.3.5", "elementpath", diff --git a/tests/test_xml.py b/tests/test_xml.py index f6973b5..e4171ff 100644 --- a/tests/test_xml.py +++ b/tests/test_xml.py @@ -3,7 +3,6 @@ from pathlib import Path import lxml.etree -import pytest from pyastgrep.asts import ast_to_xml from pyastgrep.files import parse_python_file @@ -102,7 +101,6 @@ def _file_to_xml(path: Path): return lxml.etree.tostring(doc, pretty_print=True).decode("utf-8") -@pytest.mark.skipif(sys.version_info < (3, 9), reason="AST different on Python 3.8") def test_xml_everything(): # Smoke test to check we didn't break anything. EXPECTED = """ diff --git a/tox.ini b/tox.ini index d0235f5..206eea4 100644 --- a/tox.ini +++ b/tox.ini @@ -1,6 +1,6 @@ [tox] # Remember to add to .github/workflows/tests.yml if this is added to. -envlist = py38, py39, py310, py311, py312, py313, pyright +envlist = py39, py310, py311, py312, py313, pyright [testenv] commands = pytest {posargs}