diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2d53e4d6..be329d2c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ jobs: - name: Install dependencies run: | - python -m pip install --upgrade pip + python -m pip install -U pip pip install flake8 - name: Lint with flake8 @@ -25,7 +25,7 @@ jobs: # stop the build if there are Python syntax errors or undefined names flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + flake8 . --count --statistics pre-commit: @@ -46,7 +46,7 @@ jobs: - name: Install dependencies run: | - python -m pip install --upgrade pip + python -m pip install -U pip pip install -U setuptools pip install pre-commit @@ -72,7 +72,7 @@ jobs: - name: Install dependencies run: | - python -m pip install --upgrade pip + python -m pip install -U pip pip install -U setuptools pip install -e .[testing] diff --git a/optimade_client/informational.py b/optimade_client/informational.py index d0998790..94556382 100644 --- a/optimade_client/informational.py +++ b/optimade_client/informational.py @@ -1,13 +1,14 @@ import logging import os from pathlib import Path +import shutil from typing import Union from urllib.parse import urlencode import ipywidgets as ipw -from optimade_client.logger import LOGGER, WIDGET_HANDLER, REPORT_HANDLER -from optimade_client.utils import __optimade_version__, ButtonStyle +from optimade_client.logger import LOG_DIR, LOGGER, REPORT_HANDLER, WIDGET_HANDLER +from optimade_client.utils import __optimade_version__, ButtonStyle, CACHE_DIR IMG_DIR = Path(__file__).parent.joinpath("img") @@ -244,16 +245,54 @@ def __init__(self, **kwargs): description="Show DEBUG messages", disabled=False, indent=False, + width="auto", + height="auto", + ) + self.clear_cache = ipw.Button( + description="Clear cache", + disabled=False, + tooltip="Clear cached responses (not logs)", + icon="cube", + layout={ + "visibility": "visible" if self._debug else "hidden", + "width": "auto", + }, + ) + self.clear_logs = ipw.Button( + description="Clear logs", + disabled=False, + tooltip="Clear all log history", + icon="edit", + layout={ + "visibility": "visible" if self._debug else "hidden", + "width": "auto", + }, ) self.log_output = WIDGET_HANDLER.get_widget() super().__init__( - children=(ipw.VBox(children=(self.toggle_debug, self.log_output)),), + children=( + ipw.VBox( + children=( + ipw.HBox( + children=( + self.toggle_debug, + self.clear_cache, + self.clear_logs, + ), + layout={"height": "auto", "width": "auto"}, + ), + self.log_output, + ) + ), + ), **kwargs, ) self.set_title(0, "Log") - self.selected_index = None + self.selected_index = 0 if self._debug else None self.toggle_debug.observe(self._toggle_debug_logging, names="value") + self.clear_cache.on_click(self._clear_cache) + self.clear_logs.on_click(self._clear_logs) def freeze(self): """Disable widget""" @@ -272,16 +311,68 @@ def reset(self): self.toggle_debug.disabled = False self.log_output.reset() - @staticmethod - def _toggle_debug_logging(change: dict): + def _toggle_debug_logging(self, change: dict): """Set logging level depending on toggle button""" if change["new"]: # Set logging level DEBUG WIDGET_HANDLER.setLevel(logging.DEBUG) LOGGER.info("Set log output in widget to level DEBUG") LOGGER.debug("This should now be shown") + + # Show debug buttons + self.clear_cache.layout.visibility = "visible" + self.clear_logs.layout.visibility = "visible" else: # Set logging level to INFO WIDGET_HANDLER.setLevel(logging.INFO) LOGGER.info("Set log output in widget to level INFO") LOGGER.debug("This should now NOT be shown") + + # Hide debug buttons + self.clear_cache.layout.visibility = "hidden" + self.clear_logs.layout.visibility = "hidden" + + @staticmethod + def _clear_cache(_): + """Clear cached responses (not logs)""" + if str(LOG_DIR).startswith(str(CACHE_DIR)): + log_sub_dir = list(Path(str(LOG_DIR)[len(f"{CACHE_DIR}/") :]).parts) + + LOGGER.debug( + "Cache dir: %s - Log dir: %s - Log sub dir parts: %s", + CACHE_DIR, + LOG_DIR, + log_sub_dir, + ) + + for dirpath, dirnames, filenames in os.walk(CACHE_DIR): + log_dir_part = log_sub_dir.pop(0) if log_sub_dir else "" + if not log_sub_dir: + LOGGER.debug( + "No more log sub directory parts. Removing %r from dirnames list.", + log_dir_part, + ) + dirnames.remove(log_dir_part) + + for directory in list(dirnames): + if directory == log_dir_part: + continue + LOGGER.debug( + "Removing folder: %s", Path(dirpath).joinpath(directory).resolve() + ) + shutil.rmtree( + Path(dirpath).joinpath(directory).resolve(), ignore_errors=True + ) + dirnames.remove(directory) + for filename in filenames: + LOGGER.debug( + "Removing file: %s", Path(dirpath).joinpath(filename).resolve() + ) + os.remove(Path(dirpath).joinpath(filename).resolve()) + CACHE_DIR.mkdir(parents=True, exist_ok=True) + + @staticmethod + def _clear_logs(_): + """Clear all logs""" + shutil.rmtree(LOG_DIR, ignore_errors=True) + LOG_DIR.mkdir(parents=True, exist_ok=True) diff --git a/optimade_client/query_filter.py b/optimade_client/query_filter.py index eab14537..b17205bc 100644 --- a/optimade_client/query_filter.py +++ b/optimade_client/query_filter.py @@ -1,7 +1,7 @@ from typing import Union -import requests import traitlets import ipywidgets as ipw +import requests try: from simplejson import JSONDecodeError @@ -15,16 +15,18 @@ from optimade_client.exceptions import BadResource, QueryError from optimade_client.logger import LOGGER from optimade_client.subwidgets import ( - StructureDropdown, FilterTabs, ResultsPageChooser, + StructureDropdown, ) from optimade_client.utils import ( ButtonStyle, - perform_optimade_query, + check_entry_properties, handle_errors, + ordered_query_url, + perform_optimade_query, + SESSION, TIMEOUT_SECONDS, - check_entry_properties, ) @@ -372,7 +374,11 @@ def _query(self, link: str = None) -> dict: # If a complete link is provided, use it straight up if link is not None: try: - response = requests.get(link, timeout=TIMEOUT_SECONDS).json() + link = ordered_query_url(link) + response = SESSION.get(link, timeout=TIMEOUT_SECONDS) + if response.from_cache: + LOGGER.debug("Request to %s was taken from cache !", link) + response = response.json() except ( requests.exceptions.ConnectTimeout, requests.exceptions.ConnectionError, diff --git a/optimade_client/subwidgets/__init__.py b/optimade_client/subwidgets/__init__.py index 91a36067..07b373a1 100644 --- a/optimade_client/subwidgets/__init__.py +++ b/optimade_client/subwidgets/__init__.py @@ -1,17 +1,17 @@ # pylint: disable=undefined-variable -from .filter_inputs import * -from .multi_checkbox import * -from .output_summary import * -from .periodic_table import * -from .provider_database import * -from .results import * +from .filter_inputs import * # noqa: F403 +from .multi_checkbox import * # noqa: F403 +from .output_summary import * # noqa: F403 +from .periodic_table import * # noqa: F403 +from .provider_database import * # noqa: F403 +from .results import * # noqa: F403 __all__ = ( - filter_inputs.__all__ # noqa - + multi_checkbox.__all__ # noqa - + output_summary.__all__ # noqa - + periodic_table.__all__ # noqa - + provider_database.__all__ # noqa - + results.__all__ # noqa + filter_inputs.__all__ # noqa: F405 + + multi_checkbox.__all__ # noqa: F405 + + output_summary.__all__ # noqa: F405 + + periodic_table.__all__ # noqa: F405 + + provider_database.__all__ # noqa: F405 + + results.__all__ # noqa: F405 ) diff --git a/optimade_client/subwidgets/provider_database.py b/optimade_client/subwidgets/provider_database.py index 9b65d060..ce9ca655 100644 --- a/optimade_client/subwidgets/provider_database.py +++ b/optimade_client/subwidgets/provider_database.py @@ -11,20 +11,22 @@ import requests import traitlets -from optimade.models import LinksResourceAttributes, LinksResource +from optimade.models import LinksResource, LinksResourceAttributes from optimade.models.links import LinkType -from optimade_client.exceptions import QueryError, OptimadeClientError +from optimade_client.exceptions import OptimadeClientError, QueryError from optimade_client.logger import LOGGER from optimade_client.subwidgets.results import ResultsPageChooser from optimade_client.utils import ( get_list_of_valid_providers, get_versioned_base_url, handle_errors, + ordered_query_url, perform_optimade_query, - validate_api_version, + SESSION, TIMEOUT_SECONDS, update_old_links_resources, + validate_api_version, ) @@ -453,7 +455,11 @@ def _query( # pylint: disable=too-many-locals,too-many-branches,too-many-statem f"?{parsed_query}" ) - response = requests.get(link, timeout=TIMEOUT_SECONDS).json() + link = ordered_query_url(link) + response = SESSION.get(link, timeout=TIMEOUT_SECONDS) + if response.from_cache: + LOGGER.debug("Request to %s was taken from cache !", link) + response = response.json() except ( requests.exceptions.ConnectTimeout, requests.exceptions.ConnectionError, @@ -641,13 +647,13 @@ def _on_database_change(self, change): def _update_provider(self): """Update provider summary""" html_text = f"""{getattr(self.provider, 'name', 'Provider')} -

{getattr(self.provider, 'description', '')}

""" +

{getattr(self.provider, 'description', '')}

""" self.provider_summary.value = html_text def _update_database(self): """Update database summary""" html_text = f"""{getattr(self.database, 'name', 'Database')} -

{getattr(self.database, 'description', '')}

""" +

{getattr(self.database, 'description', '')}

""" self.database_summary.value = html_text def freeze(self): diff --git a/optimade_client/utils.py b/optimade_client/utils.py index 5b4322a8..b61561d2 100644 --- a/optimade_client/utils.py +++ b/optimade_client/utils.py @@ -1,8 +1,9 @@ +from collections import OrderedDict from enum import Enum from pathlib import Path import re from typing import Tuple, List, Union, Iterable -from urllib.parse import urlencode +from urllib.parse import urlencode, urlparse, urlunparse, parse_qs try: import simplejson as json @@ -12,6 +13,9 @@ from json import JSONDecodeError import appdirs +from cachecontrol import CacheControlAdapter +from cachecontrol.caches.file_cache import FileCache +from cachecontrol.heuristics import ExpiresAfter from pydantic import ValidationError, AnyUrl # pylint: disable=no-name-in-module import requests @@ -40,6 +44,16 @@ CACHE_DIR.mkdir(parents=True, exist_ok=True) CACHED_PROVIDERS = CACHE_DIR / "cached_providers.json" +SESSION = requests.Session() +SESSION_ADAPTER = CacheControlAdapter( + cache=FileCache(CACHE_DIR / ".requests_cache"), heuristic=ExpiresAfter(days=1) +) +SESSION_ADAPTER_DEBUG = CacheControlAdapter() +SESSION.mount("http://", SESSION_ADAPTER) +SESSION.mount("https://", SESSION_ADAPTER) +SESSION.mount("http://localhost", SESSION_ADAPTER_DEBUG) +SESSION.mount("http://127.0.0.1", SESSION_ADAPTER_DEBUG) + class ButtonStyle(Enum): """Enumeration of button styles""" @@ -65,7 +79,7 @@ def perform_optimade_query( # pylint: disable=too-many-arguments,too-many-branc page_number: int = None, ) -> dict: """Perform query of database""" - queries = {} + queries = OrderedDict() if endpoint is None: endpoint = "/structures" @@ -115,7 +129,9 @@ def perform_optimade_query( # pylint: disable=too-many-arguments,too-many-branc complete_url = f"{url_path}?{url_query}" LOGGER.debug("Performing OPTIMADE query:\n%s", complete_url) try: - response = requests.get(complete_url, timeout=TIMEOUT_SECONDS) + response = SESSION.get(complete_url, timeout=TIMEOUT_SECONDS) + if response.from_cache: + LOGGER.debug("Request to %s was taken from cache !", complete_url) except ( requests.exceptions.ConnectTimeout, requests.exceptions.ConnectionError, @@ -273,7 +289,9 @@ def get_versioned_base_url( # pylint: disable=too-many-branches f"{base_url}versions" if base_url.endswith("/") else f"{base_url}/versions" ) try: - response = requests.get(versions_endpoint, timeout=TIMEOUT_SECONDS) + response = SESSION.get(versions_endpoint, timeout=TIMEOUT_SECONDS) + if response.from_cache: + LOGGER.debug("Request to %s was taken from cache !", versions_endpoint) except ( requests.exceptions.ConnectTimeout, requests.exceptions.ConnectionError, @@ -312,9 +330,13 @@ def get_versioned_base_url( # pylint: disable=too-many-branches base_url + version[1:] if base_url.endswith("/") else base_url + version ) try: - response = requests.get( + response = SESSION.get( f"{versioned_base_url}/info", timeout=timeout_seconds ) + if response.from_cache: + LOGGER.debug( + "Request to %s/info was taken from cache !", versioned_base_url + ) except ( requests.exceptions.ConnectTimeout, requests.exceptions.ConnectionError, @@ -416,7 +438,9 @@ def get_structures_schema(base_url: str) -> dict: ) try: - response = requests.get(url_path, timeout=TIMEOUT_SECONDS) + response = SESSION.get(url_path, timeout=TIMEOUT_SECONDS) + if response.from_cache: + LOGGER.debug("Request to %s was taken from cache !", url_path) except ( requests.exceptions.ConnectTimeout, requests.exceptions.ConnectionError, @@ -609,3 +633,23 @@ def update_old_links_resources(resource: dict) -> Union[LinksResource, None]: return res else: return res + + +def ordered_query_url(url: str) -> str: + """Decode URL, sort queries, re-encode URL""" + parsed_url = urlparse(url) + queries = parse_qs(parsed_url.query) + + sorted_keys = sorted(queries.keys()) + + res = OrderedDict() + for key in sorted_keys: + # Since the values are all lists, we also sort these + res[key] = sorted(queries[key]) + + res = urlencode(res) + res = ( + f"{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path};{parsed_url.params}?{res}" + f"#{parsed_url.fragment}" + ) + return urlunparse(urlparse(res)) diff --git a/requirements.txt b/requirements.txt index dc38b49b..adba9bd9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ appdirs~=1.4.4 appmode~=0.8.0 ase~=3.20 +cachecontrol[filecache]~=0.12.6 click~=7.1 ipywidgets~=7.5 jupyterlab~=2.2 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..2f584c74 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,8 @@ +[flake8] +ignore = + # Line to long. Handled by black. + E501 + # Line break before binary operator. This is preferred formatting for black. + W503 + # Whitespace before ':' + E203 diff --git a/tasks.py b/tasks.py index 67c6c907..952defea 100644 --- a/tasks.py +++ b/tasks.py @@ -13,7 +13,9 @@ def update_file(filename: str, sub_line: Tuple[str, str], strip: str = None): """Utility function for tasks to read, update, and write files""" with open(filename, "r") as handle: - lines = [re.sub(sub_line[0], sub_line[1], l.rstrip(strip)) for l in handle] + lines = [ + re.sub(sub_line[0], sub_line[1], line.rstrip(strip)) for line in handle + ] with open(filename, "w") as handle: handle.write("\n".join(lines))