From 5132c0dd2fd6eb8da53b471004311d5b34b65beb Mon Sep 17 00:00:00 2001 From: Zdenek Styblik Date: Sun, 30 Jun 2024 21:51:55 +0200 Subject: [PATCH 1/6] Move CachedData and HTTPSource classes into separate files --- cache_stats.py | 5 +- gh2slack.py | 16 ++- git_commits2slack.py | 6 +- lib/__init__.py | 8 ++ lib/cached_data.py | 45 ++++++ lib/config_options.py | 9 ++ lib/http_source.py | 41 ++++++ lib/tests/test_cached_data.py | 60 ++++++++ lib/tests/test_http_source.py | 62 ++++++++ migrations/convert_cache_to_dataclass_v1.py | 11 +- migrations/convert_cache_to_dataclass_v2.py | 14 +- .../test_convert_cache_to_dataclass_v1.py | 5 +- phpbb2slack.py | 27 ++-- rss2irc.py | 86 ++--------- rss2slack.py | 11 +- tests/test_cache_stats.py | 7 +- tests/test_gh2slack.py | 9 +- tests/test_phpbb2slack.py | 18 ++- tests/test_rss2irc.py | 133 ++---------------- tests/test_rss2slack.py | 14 +- 20 files changed, 328 insertions(+), 259 deletions(-) create mode 100644 lib/__init__.py create mode 100644 lib/cached_data.py create mode 100644 lib/config_options.py create mode 100644 lib/http_source.py create mode 100644 lib/tests/test_cached_data.py create mode 100644 lib/tests/test_http_source.py diff --git a/cache_stats.py b/cache_stats.py index 2772c27..a835313 100755 --- a/cache_stats.py +++ b/cache_stats.py @@ -10,6 +10,7 @@ from dataclasses import dataclass import rss2irc +from lib import CachedData BUCKET_COUNT = 10 @@ -24,7 +25,7 @@ class Bucket: def calc_distribution( - logger: logging.Logger, cache: rss2irc.CachedData, buckets + logger: logging.Logger, cache: CachedData, buckets ) -> int: """Calculate item distribution inside cache.""" keys = list(buckets.keys()) @@ -70,7 +71,7 @@ def get_timestamp(data) -> int: def get_timestamp_minmax( - logger: logging.Logger, cache: rss2irc.CachedData + logger: logging.Logger, cache: CachedData ) -> (int, int, int): """Return timestamp min, max and no. of errors.""" ts_min = 99999999999 diff --git a/gh2slack.py b/gh2slack.py index 8037c23..f555d5d 100755 --- a/gh2slack.py +++ b/gh2slack.py @@ -17,7 +17,9 @@ import requests import rss2irc # noqa: I202 -import rss2slack +import rss2slack # noqa: I202 +from lib import CachedData # noqa: I202 +from lib import config_options # noqa: I202 ALIASES = { "issues": "issue", @@ -101,7 +103,7 @@ def gh_parse_next_page(link_header: str) -> str: def gh_request( - logger: logging.Logger, url: str, timeout: int = rss2irc.HTTP_TIMEOUT + logger: logging.Logger, url: str, timeout: int = config_options.HTTP_TIMEOUT ) -> List: """Return list of responses from GitHub. @@ -223,7 +225,7 @@ def parse_args() -> argparse.Namespace: "--cache-expiration", dest="cache_expiration", type=int, - default=rss2irc.CACHE_EXPIRATION, + default=config_options.CACHE_EXPIRATION, help="Time, in seconds, for how long to keep items " "in cache.", ) parser.add_argument( @@ -275,9 +277,9 @@ def parse_args() -> argparse.Namespace: "--slack-timeout", dest="slack_timeout", type=int, - default=rss2irc.HTTP_TIMEOUT, + default=config_options.HTTP_TIMEOUT, help="Slack API Timeout. Defaults to {:d} seconds.".format( - rss2irc.HTTP_TIMEOUT + config_options.HTTP_TIMEOUT ), ) parser.add_argument( @@ -303,7 +305,7 @@ def parse_args() -> argparse.Namespace: def process_page_items( logger: logging.Logger, - cache: rss2irc.CachedData, + cache: CachedData, pages: List, expiration: int, repository_url: str, @@ -347,7 +349,7 @@ def process_page_items( return to_publish -def scrub_items(logger: logging.Logger, cache: rss2irc.CachedData) -> None: +def scrub_items(logger: logging.Logger, cache: CachedData) -> None: """Scrub cache and remove expired items.""" time_now = int(time.time()) for key in list(cache.items.keys()): diff --git a/git_commits2slack.py b/git_commits2slack.py index b3196fe..1fad68c 100755 --- a/git_commits2slack.py +++ b/git_commits2slack.py @@ -16,8 +16,8 @@ from typing import Dict from typing import List -import rss2irc import rss2slack +from lib import config_options RE_GIT_AUTD = re.compile(r"^Already up-to-date.$") RE_GIT_UPDATING = re.compile(r"^Updating [a-z0-9]+", re.I) @@ -254,9 +254,9 @@ def parse_args() -> argparse.Namespace: "--slack-timeout", dest="slack_timeout", type=int, - default=rss2irc.HTTP_TIMEOUT, + default=config_options.HTTP_TIMEOUT, help="Slack API Timeout. Defaults to {:d} seconds.".format( - rss2irc.HTTP_TIMEOUT + config_options.HTTP_TIMEOUT ), ) parser.add_argument( diff --git a/lib/__init__.py b/lib/__init__.py new file mode 100644 index 0000000..3ce316d --- /dev/null +++ b/lib/__init__.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python3 +"""Just init. + +I love how black and reorder-python-imports play nicely together and no +workarounds are needed what so ever. +""" +from .cached_data import CachedData # noqa: F401 +from .http_source import HTTPSource # noqa: F401 diff --git a/lib/cached_data.py b/lib/cached_data.py new file mode 100644 index 0000000..419c8ef --- /dev/null +++ b/lib/cached_data.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +"""Code related to Cache. + +I love how black and reorder-python-imports play nicely together and no +workarounds are needed what so ever. +""" +import time +from dataclasses import dataclass +from dataclasses import field + +from .config_options import DATA_SOURCE_EXPIRATION +from .http_source import HTTPSource + + +@dataclass +class CachedData: + """CachedData represents locally cached data and state.""" + + data_sources: dict = field(default_factory=dict) + items: dict = field(default_factory=dict) + + def get_source_by_url(self, url: str) -> HTTPSource: + """Return source by URL. + + If source doesn't exist, it will be created. + """ + source = self.data_sources.get(url, None) + if source: + source.last_used_ts = int(time.time()) + return source + + self.data_sources[url] = HTTPSource( + last_used_ts=int(time.time()), url=url + ) + return self.get_source_by_url(url) + + def scrub_data_sources( + self, expiration: int = DATA_SOURCE_EXPIRATION + ) -> None: + """Delete expired data sources.""" + now = int(time.time()) + for key in list(self.data_sources.keys()): + diff = now - self.data_sources[key].last_used_ts + if int(diff) > expiration: + self.data_sources.pop(key) diff --git a/lib/config_options.py b/lib/config_options.py new file mode 100644 index 0000000..33b1fb7 --- /dev/null +++ b/lib/config_options.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python3 +"""Common configuration options. + +I love how black and reorder-python-imports play nicely together and no +workarounds are needed what so ever. +""" +CACHE_EXPIRATION = 86400 # seconds +DATA_SOURCE_EXPIRATION = 30 * 86400 # seconds +HTTP_TIMEOUT = 30 # seconds diff --git a/lib/http_source.py b/lib/http_source.py new file mode 100644 index 0000000..470c4ef --- /dev/null +++ b/lib/http_source.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 +"""Code related to HTTP Source. + +I love how black and reorder-python-imports play nicely together and no +workarounds are needed what so ever. +""" +from dataclasses import dataclass +from dataclasses import field +from typing import Dict + + +@dataclass +class HTTPSource: + """Class represents HTTP data source.""" + + http_etag: str = field(default_factory=str) + http_last_modified: str = field(default_factory=str) + last_used_ts: int = 0 + url: str = field(default_factory=str) + + def extract_caching_headers(self, headers: Dict[str, str]) -> None: + """Extract cache related headers from given dict.""" + self.http_etag = "" + self.http_last_modified = "" + for key, value in headers.items(): + key = key.lower() + if key == "etag": + self.http_etag = value + elif key == "last-modified": + self.http_last_modified = value + + def make_caching_headers(self) -> Dict[str, str]: + """Return cache related headers as a dict.""" + headers = {} + if self.http_etag: + headers["if-none-match"] = self.http_etag + + if self.http_last_modified: + headers["if-modified-since"] = self.http_last_modified + + return headers diff --git a/lib/tests/test_cached_data.py b/lib/tests/test_cached_data.py new file mode 100644 index 0000000..ca2ef8f --- /dev/null +++ b/lib/tests/test_cached_data.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +"""Unit tests for cached_data.py.""" +import time +from unittest.mock import patch + +from lib import CachedData +from lib import config_options +from lib import HTTPSource # noqa: I100 + + +@patch("lib.cached_data.time.time") +def test_cache_get_source_by_url(mock_time): + """Test that CachedData.get_source_by_url() sets last_used_ts attr.""" + mock_time.return_value = 1717428213 + url = "http://example.com" + source = HTTPSource( + last_used_ts=0, + url=url, + ) + cache = CachedData( + data_sources={ + url: source, + } + ) + result = cache.get_source_by_url(url) + assert result == source + assert result.last_used_ts == 1717428213 + + +def test_cache_scrub_data_sources_empty(cache): + """Test that CachedData.scrub_data_sources() when there are no sources.""" + cache = CachedData() + assert not cache.data_sources + cache.scrub_data_sources() + assert not cache.data_sources + + +def test_cache_scrub_data_sources(cache): + """Test that CachedData.scrub_data_sources() expired source is removed.""" + source1_url = "http://ww1.example.com" + source2_url = "http://ww2.example.com" + cache = CachedData() + source1 = cache.get_source_by_url(source1_url) + assert source1.last_used_ts > 0 + source1.last_used_ts = ( + int(time.time()) - 2 * config_options.DATA_SOURCE_EXPIRATION + ) + + source2 = cache.get_source_by_url(source2_url) + assert source2.last_used_ts > 0 + + assert "http://ww1.example.com" in cache.data_sources + assert source1.url == source1_url + assert "http://ww2.example.com" in cache.data_sources + assert source2.url == source2_url + + cache.scrub_data_sources() + + assert "http://ww1.example.com" not in cache.data_sources + assert "http://ww2.example.com" in cache.data_sources diff --git a/lib/tests/test_http_source.py b/lib/tests/test_http_source.py new file mode 100644 index 0000000..adfa261 --- /dev/null +++ b/lib/tests/test_http_source.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +"""Unit tests for http_source.py.""" +import pytest + +from lib import HTTPSource # noqa: I202 + + +@pytest.mark.parametrize( + "source,input_data,expected", + [ + # No attrs should bet set + ( + HTTPSource(), + {}, + {"etag": "", "last_modified": ""}, + ), + # Reset attrs + ( + HTTPSource(http_etag="et_test", http_last_modified="lm_test"), + {"header1": "firt", "header2": "second"}, + {"etag": "", "last_modified": ""}, + ), + # Set attrs + ( + HTTPSource(http_etag="et_test", http_last_modified="lm_test"), + {"ETag": "test123", "Last-Modified": "abc123", "some": "header"}, + {"etag": "test123", "last_modified": "abc123"}, + ), + ], +) +def test_http_source_extract_caching_headers(source, input_data, expected): + """Test that HTTPSource.extract_caching_headers() works as expected.""" + source.extract_caching_headers(input_data) + assert source.http_etag == expected["etag"] + assert source.http_last_modified == expected["last_modified"] + + +@pytest.mark.parametrize( + "source,expected", + [ + ( + HTTPSource(), + {}, + ), + ( + HTTPSource(http_etag="et_test"), + {"if-none-match": "et_test"}, + ), + ( + HTTPSource(http_last_modified="lm_test"), + {"if-modified-since": "lm_test"}, + ), + ( + HTTPSource(http_etag="et_test", http_last_modified="lm_test"), + {"if-modified-since": "lm_test", "if-none-match": "et_test"}, + ), + ], +) +def test_http_source_make_caching_headers(source, expected): + """Test that HTTPSource.make_caching_headers() works as expected.""" + result = source.make_caching_headers() + assert result == expected diff --git a/migrations/convert_cache_to_dataclass_v1.py b/migrations/convert_cache_to_dataclass_v1.py index 33baa1f..d51bbe8 100755 --- a/migrations/convert_cache_to_dataclass_v1.py +++ b/migrations/convert_cache_to_dataclass_v1.py @@ -15,13 +15,14 @@ import sys from importlib.machinery import SourceFileLoader -# NOTICE: An ugly hack in order to be able to import CachedData class from -# rss2irc. I'm real sorry about this, son. +# NOTICE: An ugly hack in order to be able to import CachedData class. +# I'm real sorry about this, son. # NOTE: Sadly, importlib.util and spec didn't cut it. Also, I'm out of time on # this. Therefore, see you again in the future once this ceases to work. SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__)) -rss2irc_module_path = os.path.join(SCRIPT_PATH, "..", "rss2irc.py") -rss2irc = SourceFileLoader("rss2irc", rss2irc_module_path).load_module() +lib_module_path = os.path.join(SCRIPT_PATH, "..", "lib", "__init__.py") +lib = SourceFileLoader("lib", lib_module_path).load_module() +CachedData = lib.cached_data.CachedData def main(): @@ -50,7 +51,7 @@ def main(): logger.info("Create backup file '%s' from '%s'.", bak_file, args.cache) shutil.copy2(args.cache, bak_file) - new_cache = rss2irc.CachedData() + new_cache = CachedData() for key, value in cache.items(): new_cache.items[key] = value diff --git a/migrations/convert_cache_to_dataclass_v2.py b/migrations/convert_cache_to_dataclass_v2.py index 3c70887..6de9594 100644 --- a/migrations/convert_cache_to_dataclass_v2.py +++ b/migrations/convert_cache_to_dataclass_v2.py @@ -14,14 +14,16 @@ import sys from importlib.machinery import SourceFileLoader -# NOTICE: An ugly hack in order to be able to import CachedData class from -# rss2irc. I'm real sorry about this, son. -# NOTE: Sadly, importlib.util and spec didn't cut it. As usual, I'm out of time -# on this. Therefore, see you again in the future once this ceases to work. +# NOTICE: An ugly hack in order to be able to import CachedData class. +# I'm real sorry about this, son. +# NOTE: Sadly, importlib.util and spec didn't cut it. Also, I'm out of time on +# this. Therefore, see you again in the future once this ceases to work. SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__)) +lib_module_path = os.path.join(SCRIPT_PATH, "..", "lib", "__init__.py") +lib = SourceFileLoader("lib", lib_module_path).load_module() rss2irc_module_path = os.path.join(SCRIPT_PATH, "..", "rss2irc.py") rss2irc = SourceFileLoader("rss2irc", rss2irc_module_path).load_module() -CachedData = rss2irc.CachedData +CachedData = lib.cached_data.CachedData def main(): @@ -43,7 +45,7 @@ def main(): logger.info("Create backup file '%s' from '%s'.", bak_file, args.cache) shutil.copy2(args.cache, bak_file) - new_cache = rss2irc.CachedData() + new_cache = CachedData() for key, value in cache.items.items(): new_cache.items[key] = value diff --git a/migrations/tests/test_convert_cache_to_dataclass_v1.py b/migrations/tests/test_convert_cache_to_dataclass_v1.py index 53546bb..86606b3 100644 --- a/migrations/tests/test_convert_cache_to_dataclass_v1.py +++ b/migrations/tests/test_convert_cache_to_dataclass_v1.py @@ -8,7 +8,8 @@ import pytest -import rss2irc # noqa:I202 +import rss2irc # noqa: I202 +from lib import CachedData # noqa: I202 SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__)) @@ -57,7 +58,7 @@ def test_migration(fixture_cache_file, fixture_bak_cleanup): with open(fixture_cache_file, "wb") as fhandle: pickle.dump(test_data, fhandle, pickle.HIGHEST_PROTOCOL) - expected_cache = rss2irc.CachedData( + expected_cache = CachedData( items={ "test1": 1234, "test2": 0, diff --git a/phpbb2slack.py b/phpbb2slack.py index 9452e20..07f26ae 100755 --- a/phpbb2slack.py +++ b/phpbb2slack.py @@ -14,10 +14,9 @@ import feedparser import rss2irc # noqa: I202 -import rss2slack - -CACHE_EXPIRATION = 86400 # seconds -HTTP_TIMEOUT = 30 # seconds +import rss2slack # noqa: I202 +from lib import CachedData # noqa: I202 +from lib import config_options # noqa: I202 def format_message( @@ -163,7 +162,7 @@ def parse_args() -> argparse.Namespace: "--cache-expiration", dest="cache_expiration", type=int, - default=CACHE_EXPIRATION, + default=config_options.CACHE_EXPIRATION, help="Time, in seconds, for how long to keep items in cache.", ) parser.add_argument( @@ -194,8 +193,10 @@ def parse_args() -> argparse.Namespace: "--rss-http-timeout", dest="rss_http_timeout", type=int, - default=HTTP_TIMEOUT, - help="HTTP Timeout. Defaults to {:d} seconds.".format(HTTP_TIMEOUT), + default=config_options.HTTP_TIMEOUT, + help="HTTP Timeout. Defaults to {:d} seconds.".format( + config_options.HTTP_TIMEOUT + ), ) parser.add_argument( "--slack-base-url", @@ -215,9 +216,9 @@ def parse_args() -> argparse.Namespace: "--slack-timeout", dest="slack_timeout", type=int, - default=HTTP_TIMEOUT, + default=config_options.HTTP_TIMEOUT, help="Slack API Timeout. Defaults to {:d} seconds.".format( - HTTP_TIMEOUT + config_options.HTTP_TIMEOUT ), ) parser.add_argument( @@ -274,9 +275,9 @@ def parse_news(data: str, authors: List[str]) -> Dict: def prune_news( logger: logging.Logger, - cache: rss2irc.CachedData, + cache: CachedData, news: Dict[str, Dict], - expiration: int = CACHE_EXPIRATION, + expiration: int = config_options.CACHE_EXPIRATION, ) -> None: """Prune news which already are in cache.""" item_expiration = int(time.time()) + expiration @@ -292,7 +293,7 @@ def prune_news( news.pop(key) -def scrub_items(logger: logging.Logger, cache: rss2irc.CachedData) -> None: +def scrub_items(logger: logging.Logger, cache: CachedData) -> None: """Scrub cache and remove expired items.""" time_now = int(time.time()) for key in list(cache.items.keys()): @@ -312,7 +313,7 @@ def scrub_items(logger: logging.Logger, cache: rss2irc.CachedData) -> None: def update_items_expiration( - cache: rss2irc.CachedData, news: Dict, expiration: int + cache: CachedData, news: Dict, expiration: int ) -> None: """Update cache contents.""" item_expiration = int(time.time()) + expiration diff --git a/rss2irc.py b/rss2irc.py index 5471ca4..a3c527b 100755 --- a/rss2irc.py +++ b/rss2irc.py @@ -12,8 +12,6 @@ import sys import time import traceback -from dataclasses import dataclass -from dataclasses import field from typing import BinaryIO from typing import Dict from typing import Tuple @@ -21,74 +19,8 @@ import feedparser import requests -CACHE_EXPIRATION = 86400 # seconds -DATA_SOURCE_EXPIRATION = 30 * 86400 # seconds -HTTP_TIMEOUT = 30 # seconds - - -@dataclass -class HTTPSource: - """Class represents HTTP data source.""" - - http_etag: str = field(default_factory=str) - http_last_modified: str = field(default_factory=str) - last_used_ts: int = 0 - url: str = field(default_factory=str) - - def extract_caching_headers(self, headers: Dict[str, str]) -> None: - """Extract cache related headers from given dict.""" - self.http_etag = "" - self.http_last_modified = "" - for key, value in headers.items(): - key = key.lower() - if key == "etag": - self.http_etag = value - elif key == "last-modified": - self.http_last_modified = value - - def make_caching_headers(self) -> Dict[str, str]: - """Return cache related headers as a dict.""" - headers = {} - if self.http_etag: - headers["if-none-match"] = self.http_etag - - if self.http_last_modified: - headers["if-modified-since"] = self.http_last_modified - - return headers - - -@dataclass -class CachedData: - """CachedData represents locally cached data and state.""" - - data_sources: dict = field(default_factory=dict) - items: dict = field(default_factory=dict) - - def get_source_by_url(self, url: str) -> HTTPSource: - """Return source by URL. - - If source doesn't exist, it will be created. - """ - source = self.data_sources.get(url, None) - if source: - source.last_used_ts = int(time.time()) - return source - - self.data_sources[url] = HTTPSource( - last_used_ts=int(time.time()), url=url - ) - return self.get_source_by_url(url) - - def scrub_data_sources( - self, expiration: int = DATA_SOURCE_EXPIRATION - ) -> None: - """Delete expired data sources.""" - now = int(time.time()) - for key in list(self.data_sources.keys()): - diff = now - self.data_sources[key].last_used_ts - if int(diff) > expiration: - self.data_sources.pop(key) +from lib import CachedData # noqa: I202 +from lib import config_options # noqa: I202 def format_message( @@ -114,7 +46,7 @@ def format_message( def get_rss( logger: logging.Logger, url: str, - timeout: int = HTTP_TIMEOUT, + timeout: int = config_options.HTTP_TIMEOUT, extra_headers: Dict = None, ) -> requests.models.Response: """Return body of given URL as a string.""" @@ -215,8 +147,10 @@ def parse_args() -> argparse.Namespace: "--rss-http-timeout", dest="rss_http_timeout", type=int, - default=HTTP_TIMEOUT, - help="HTTP Timeout. Defaults to {:d} seconds.".format(HTTP_TIMEOUT), + default=config_options.HTTP_TIMEOUT, + help="HTTP Timeout. Defaults to {:d} seconds.".format( + config_options.HTTP_TIMEOUT + ), ) parser.add_argument( "--handle", @@ -243,7 +177,7 @@ def parse_args() -> argparse.Namespace: "--cache-expiration", dest="cache_expiration", type=int, - default=CACHE_EXPIRATION, + default=config_options.CACHE_EXPIRATION, help="Time, in seconds, for how long to keep items in cache.", ) parser.add_argument( @@ -287,7 +221,7 @@ def prune_news( logger: logging.Logger, cache: CachedData, news: Dict[str, Tuple[str, str]], - expiration: int = CACHE_EXPIRATION, + expiration: int = config_options.CACHE_EXPIRATION, ) -> None: """Prune news which already are in cache.""" item_expiration = int(time.time()) + expiration @@ -349,7 +283,7 @@ def scrub_items(logger: logging.Logger, cache: CachedData) -> None: def update_items_expiration( cache: CachedData, news: Dict[str, Tuple[str, str]], - expiration: int = CACHE_EXPIRATION, + expiration: int = config_options.CACHE_EXPIRATION, ) -> None: """Update expiration of items in cache based on news dict.""" item_expiration = int(time.time()) + expiration diff --git a/rss2slack.py b/rss2slack.py index 4bbebdd..4033c08 100755 --- a/rss2slack.py +++ b/rss2slack.py @@ -16,6 +16,7 @@ from slack import WebClient import rss2irc # noqa: I100, I202 +from lib import config_options # noqa: I100, I202 SLACK_BASE_URL = WebClient.BASE_URL @@ -152,7 +153,7 @@ def parse_args() -> argparse.Namespace: "--cache-expiration", dest="cache_expiration", type=int, - default=rss2irc.CACHE_EXPIRATION, + default=config_options.CACHE_EXPIRATION, help="Time, in seconds, for how long to keep items in cache.", ) parser.add_argument( @@ -183,9 +184,9 @@ def parse_args() -> argparse.Namespace: "--rss-http-timeout", dest="rss_http_timeout", type=int, - default=rss2irc.HTTP_TIMEOUT, + default=config_options.HTTP_TIMEOUT, help="HTTP Timeout. Defaults to {:d} seconds.".format( - rss2irc.HTTP_TIMEOUT + config_options.HTTP_TIMEOUT ), ) parser.add_argument( @@ -206,9 +207,9 @@ def parse_args() -> argparse.Namespace: "--slack-timeout", dest="slack_timeout", type=int, - default=rss2irc.HTTP_TIMEOUT, + default=config_options.HTTP_TIMEOUT, help="Slack API Timeout. Defaults to {:d} seconds.".format( - rss2irc.HTTP_TIMEOUT + config_options.HTTP_TIMEOUT ), ) parser.add_argument( diff --git a/tests/test_cache_stats.py b/tests/test_cache_stats.py index 53874a1..41d68fe 100644 --- a/tests/test_cache_stats.py +++ b/tests/test_cache_stats.py @@ -6,8 +6,9 @@ import time from unittest.mock import patch -import cache_stats # noqa:I202 -import rss2irc # noqa:I202 +import cache_stats # noqa: I202 +import rss2irc # noqa: I202 +from lib import CachedData # noqa: I202 SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__)) @@ -16,7 +17,7 @@ def test_main_ideal(fixture_cache_file): """Simple run-through test.""" rss_url = "https://example.com/rss" - cache = rss2irc.CachedData() + cache = CachedData() cache.items = { "a": int(time.time()), "b": int(time.time()), diff --git a/tests/test_gh2slack.py b/tests/test_gh2slack.py index ce27125..bcf051e 100644 --- a/tests/test_gh2slack.py +++ b/tests/test_gh2slack.py @@ -12,8 +12,9 @@ import pytest -import gh2slack # noqa:I100,I202 -import rss2irc # noqa:I100,I202 +import gh2slack # noqa: I100, I202 +import rss2irc # noqa: I100, I202 +from lib import CachedData # noqa: I100, I202 SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__)) @@ -351,7 +352,7 @@ def test_process_page_items(): ], ] repository_url = "http://example.com" - cache = rss2irc.CachedData( + cache = CachedData( items={ "http://example.com/bar": { "expiration": 0, @@ -391,7 +392,7 @@ def test_process_page_items(): def test_scrub_items(): """Test scrub_items().""" item_expiration = int(time.time()) + 60 - test_cache = rss2irc.CachedData( + test_cache = CachedData( items={ "foo": { "expiration": item_expiration, diff --git a/tests/test_phpbb2slack.py b/tests/test_phpbb2slack.py index b3e2b73..d7ed26c 100644 --- a/tests/test_phpbb2slack.py +++ b/tests/test_phpbb2slack.py @@ -10,8 +10,10 @@ import pytest -import phpbb2slack # noqa:I100,I202 -import rss2irc +import phpbb2slack # noqa: I100, I202 +import rss2irc # noqa: I100, I202 +from lib import CachedData # noqa: I100, I202 +from lib import config_options # noqa: I100, I202 ITEM_EXPIRATION = int(time.time()) SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__)) @@ -156,13 +158,15 @@ def test_main_ideal( ) fixture_http_server.capture_requests = True - cache = rss2irc.CachedData() + cache = CachedData() source1 = cache.get_source_by_url(rss_url) source1.http_etag = "" source1.http_last_modified = "" source1.last_used_ts = int(time.time()) - 2 * 86400 source2 = cache.get_source_by_url("http://delete.example.com") - source2.last_used_ts = int(time.time()) - 2 * rss2irc.DATA_SOURCE_EXPIRATION + source2.last_used_ts = ( + int(time.time()) - 2 * config_options.DATA_SOURCE_EXPIRATION + ) rss2irc.write_cache(cache, fixture_cache_file) # authors_file = os.path.join(SCRIPT_PATH, "files", "authors.txt") @@ -261,7 +265,7 @@ def test_main_cache_hit( ) fixture_http_server.capture_requests = True - cache = rss2irc.CachedData() + cache = CachedData() source1 = cache.get_source_by_url(rss_url) source1.http_etag = "pytest_etag" source1.http_last_modified = "pytest_lm" @@ -358,7 +362,7 @@ def test_parse_news(): "cache,expected_cache", [ ( - rss2irc.CachedData( + CachedData( items={ "foo": { "expiration": get_item_expiration() + 60, @@ -399,7 +403,7 @@ def test_scrub_items(cache, expected_cache): "comments_cnt": 20, }, }, - rss2irc.CachedData( + CachedData( items={ "http://example.com": { "expiration": 0, diff --git a/tests/test_rss2irc.py b/tests/test_rss2irc.py index 98d772a..873420b 100644 --- a/tests/test_rss2irc.py +++ b/tests/test_rss2irc.py @@ -9,124 +9,13 @@ import pytest -import rss2irc # noqa:I202 +import rss2irc # noqa: I202 +from lib import CachedData # noqa: I202 +from lib import config_options # noqa: I202 SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__)) -@pytest.mark.parametrize( - "source,input_data,expected", - [ - # No attrs should bet set - ( - rss2irc.HTTPSource(), - {}, - {"etag": "", "last_modified": ""}, - ), - # Reset aatrs - ( - rss2irc.HTTPSource( - http_etag="et_test", http_last_modified="lm_test" - ), - {"header1": "firt", "header2": "second"}, - {"etag": "", "last_modified": ""}, - ), - # Set attrs - ( - rss2irc.HTTPSource( - http_etag="et_test", http_last_modified="lm_test" - ), - {"ETag": "test123", "Last-Modified": "abc123", "some": "header"}, - {"etag": "test123", "last_modified": "abc123"}, - ), - ], -) -def test_http_source_extract_caching_headers(source, input_data, expected): - """Test that HTTPSource.extract_caching_headers() works as expected.""" - source.extract_caching_headers(input_data) - assert source.http_etag == expected["etag"] - assert source.http_last_modified == expected["last_modified"] - - -@pytest.mark.parametrize( - "source,expected", - [ - ( - rss2irc.HTTPSource(), - {}, - ), - ( - rss2irc.HTTPSource(http_etag="et_test"), - {"if-none-match": "et_test"}, - ), - ( - rss2irc.HTTPSource(http_last_modified="lm_test"), - {"if-modified-since": "lm_test"}, - ), - ( - rss2irc.HTTPSource( - http_etag="et_test", http_last_modified="lm_test" - ), - {"if-modified-since": "lm_test", "if-none-match": "et_test"}, - ), - ], -) -def test_http_source_make_caching_headers(source, expected): - """Test that HTTPSource.make_caching_headers() works as expected.""" - result = source.make_caching_headers() - assert result == expected - - -@patch("rss2irc.time.time") -def test_cache_get_source_by_url(mock_time): - """Test that CachedData.get_source_by_url() sets last_used_ts attr.""" - mock_time.return_value = 1717428213 - url = "http://example.com" - source = rss2irc.HTTPSource( - last_used_ts=0, - url=url, - ) - cache = rss2irc.CachedData( - data_sources={ - url: source, - } - ) - result = cache.get_source_by_url(url) - assert result == source - assert result.last_used_ts == 1717428213 - - -def test_cache_scrub_data_sources_empty(cache): - """Test that CachedData.scrub_data_sources() when there are no sources.""" - cache = rss2irc.CachedData() - assert not cache.data_sources - cache.scrub_data_sources() - assert not cache.data_sources - - -def test_cache_scrub_data_sources(cache): - """Test that CachedData.scrub_data_sources() expired source is removed.""" - source1_url = "http://ww1.example.com" - source2_url = "http://ww2.example.com" - cache = rss2irc.CachedData() - source1 = cache.get_source_by_url(source1_url) - assert source1.last_used_ts > 0 - source1.last_used_ts = int(time.time()) - 2 * rss2irc.DATA_SOURCE_EXPIRATION - - source2 = cache.get_source_by_url(source2_url) - assert source2.last_used_ts > 0 - - assert "http://ww1.example.com" in cache.data_sources - assert source1.url == source1_url - assert "http://ww2.example.com" in cache.data_sources - assert source2.url == source2_url - - cache.scrub_data_sources() - - assert "http://ww1.example.com" not in cache.data_sources - assert "http://ww2.example.com" in cache.data_sources - - @pytest.mark.parametrize( "url,msg_attrs,handle,expected", [ @@ -183,13 +72,15 @@ def test_main_ideal( {"ETag": "pytest_etag", "Last-Modified": "pytest_lm"}, ) - cache = rss2irc.CachedData() + cache = CachedData() source1 = cache.get_source_by_url(rss_url) source1.http_etag = "" source1.http_last_modified = "" source1.last_used_ts = int(time.time()) - 2 * 86400 source2 = cache.get_source_by_url("http://delete.example.com") - source2.last_used_ts = int(time.time()) - 2 * rss2irc.DATA_SOURCE_EXPIRATION + source2.last_used_ts = ( + int(time.time()) - 2 * config_options.DATA_SOURCE_EXPIRATION + ) rss2irc.write_cache(cache, fixture_cache_file) logger = logging.getLogger("test") @@ -283,7 +174,7 @@ def test_main_cache_operations( {"ETag": "pytest_etag", "Last-Modified": "pytest_lm"}, ) - cache = rss2irc.CachedData() + cache = CachedData() cache.items[cache_key] = frozen_ts + 60 cache.items["https://expired.example.com"] = 123456 source1 = cache.get_source_by_url(rss_url) @@ -291,7 +182,7 @@ def test_main_cache_operations( source1.http_last_modified = "" source1.last_used_ts = frozen_ts - 2 * 86400 source2 = cache.get_source_by_url("http://delete.example.com") - source2.last_used_ts = frozen_ts - 2 * rss2irc.DATA_SOURCE_EXPIRATION + source2.last_used_ts = frozen_ts - 2 * config_options.DATA_SOURCE_EXPIRATION rss2irc.write_cache(cache, fixture_cache_file) logger = logging.getLogger("test") @@ -340,7 +231,7 @@ def test_main_cache_operations( print("Cache: {}".format(cache)) assert list(cache.items.keys()) == expected_cache_keys # Verify item expiration is updated - assert cache.items[cache_key] == frozen_ts + rss2irc.CACHE_EXPIRATION + assert cache.items[cache_key] == frozen_ts + config_options.CACHE_EXPIRATION # Verify data sources assert rss_url in cache.data_sources.keys() source = cache.get_source_by_url(rss_url) @@ -378,7 +269,7 @@ def test_main_cache_hit( }, ) - cache = rss2irc.CachedData() + cache = CachedData() source1 = cache.get_source_by_url(rss_url) source1.http_etag = "pytest_etag" source1.http_last_modified = "pytest_last_modified" @@ -471,7 +362,7 @@ def test_scrub_items(): logger.disabled = True item_expiration = int(time.time()) + 60 - test_cache = rss2irc.CachedData( + test_cache = CachedData( items={ "foo": item_expiration, "bar": int(time.time()) - 3600, diff --git a/tests/test_rss2slack.py b/tests/test_rss2slack.py index da3f146..cb59835 100644 --- a/tests/test_rss2slack.py +++ b/tests/test_rss2slack.py @@ -10,8 +10,10 @@ import pytest -import rss2irc # noqa:I100,I202 -import rss2slack # noqa:I100,I202 +import rss2irc # noqa: I100, I202 +import rss2slack # noqa: I100, I202 +from lib import CachedData # noqa: I100, I202 +from lib import config_options # noqa: I100, I202 SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__)) @@ -150,13 +152,15 @@ def test_main_ideal( ) fixture_http_server.capture_requests = True - cache = rss2irc.CachedData() + cache = CachedData() source1 = cache.get_source_by_url(rss_url) source1.http_etag = "" source1.http_last_modified = "" source1.last_used_ts = int(time.time()) - 2 * 86400 source2 = cache.get_source_by_url("http://delete.example.com") - source2.last_used_ts = int(time.time()) - 2 * rss2irc.DATA_SOURCE_EXPIRATION + source2.last_used_ts = ( + int(time.time()) - 2 * config_options.DATA_SOURCE_EXPIRATION + ) rss2irc.write_cache(cache, fixture_cache_file) # @@ -260,7 +264,7 @@ def test_main_cache_hit( ) fixture_http_server.capture_requests = True - cache = rss2irc.CachedData() + cache = CachedData() source1 = cache.get_source_by_url(rss_url) source1.http_etag = "pytest_etag" source1.http_last_modified = "pytest_lm" From d03a16d0f5eee95b0ad618efd5e98b4829cc8657 Mon Sep 17 00:00:00 2001 From: Zdenek Styblik Date: Sun, 30 Jun 2024 22:00:58 +0200 Subject: [PATCH 2/6] Run reorder-python-imports with py311-plus --- ci/run-reorder-python-imports.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/run-reorder-python-imports.sh b/ci/run-reorder-python-imports.sh index 4fad265..48dfe35 100755 --- a/ci/run-reorder-python-imports.sh +++ b/ci/run-reorder-python-imports.sh @@ -2,4 +2,4 @@ set -e set -u -reorder-python-imports `find . ! -path '*/\.*' -name '*.py'` +reorder-python-imports --py311-plus `find . ! -path '*/\.*' -name '*.py'` From 9738c8dac1bbf5bf7c7e38ee28cc0bad7ea64a78 Mon Sep 17 00:00:00 2001 From: Zdenek Styblik Date: Mon, 1 Jul 2024 22:46:49 +0200 Subject: [PATCH 3/6] Add +x to migrations/convert_cache_to_dataclass_v2.py --- migrations/convert_cache_to_dataclass_v2.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 migrations/convert_cache_to_dataclass_v2.py diff --git a/migrations/convert_cache_to_dataclass_v2.py b/migrations/convert_cache_to_dataclass_v2.py old mode 100644 new mode 100755 From 21210d879174580d777f3a2a191ebb3bf6090527 Mon Sep 17 00:00:00 2001 From: Zdenek Styblik Date: Mon, 1 Jul 2024 22:57:27 +0200 Subject: [PATCH 4/6] Add at least one test for migrations/convert_cache_to_dataclass_v2.py --- .../test_convert_cache_to_dataclass_v2.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 migrations/tests/test_convert_cache_to_dataclass_v2.py diff --git a/migrations/tests/test_convert_cache_to_dataclass_v2.py b/migrations/tests/test_convert_cache_to_dataclass_v2.py new file mode 100644 index 0000000..905c2e5 --- /dev/null +++ b/migrations/tests/test_convert_cache_to_dataclass_v2.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 +"""Unit tests for convert_cache_to_dataclass_v2.py.""" +import os +import subprocess + +SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__)) + + +def test_convert_v2_help_arg(): + """Run convert v2 with --help arg. + + As a matter of fact, this is just a dry run to catch import and syntax + errors. In other words to have at least some "test". + """ + migration_script_fpath = os.path.join( + SCRIPT_PATH, "..", "convert_cache_to_dataclass_v2.py" + ) + migration_proc = subprocess.Popen( + [migration_script_fpath, "--help"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + cwd=SCRIPT_PATH, + ) + out, err = migration_proc.communicate() + assert migration_proc.returncode == 0 + assert "usage: convert_cache_to_dataclass_v2.py" in out.decode("utf-8") + assert err.decode("utf-8") == "" From e20f948e28d49abf4fdd89f31d5bf4bb278f8b15 Mon Sep 17 00:00:00 2001 From: Zdenek Styblik Date: Tue, 2 Jul 2024 13:30:24 +0200 Subject: [PATCH 5/6] Set User-Agent HTTP header in requests from gh2slack --- gh2slack.py | 6 +++++- tests/test_gh2slack.py | 14 +++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/gh2slack.py b/gh2slack.py index f555d5d..fb91f33 100755 --- a/gh2slack.py +++ b/gh2slack.py @@ -111,9 +111,13 @@ def gh_request( responses. """ logger.debug("Requesting %s", url) + user_agent = "gh2slack_{:d}".format(int(time.time())) rsp = requests.get( url, - headers={"Accept": "application/vnd.github.v3+json"}, + headers={ + "Accept": "application/vnd.github.v3+json", + "User-Agent": user_agent, + }, params={"state": "open", "sort": "created"}, timeout=timeout, ) diff --git a/tests/test_gh2slack.py b/tests/test_gh2slack.py index bcf051e..edccd61 100644 --- a/tests/test_gh2slack.py +++ b/tests/test_gh2slack.py @@ -100,10 +100,12 @@ def test_gh_request(mock_get): assert mocked_response._raise_for_status_called is True +@patch("gh2slack.time.time") @patch("requests.get") -def test_gh_request_follows_link_header(mock_get): +def test_gh_request_follows_link_header(mock_get, mock_time): """Test gh_request() follows up on 'Link' header.""" url = "https://api.github.com/repos/foo/bar" + mock_time.return_value = 123 mocked_response1 = MockedResponse( "foo", {"link": '; rel="next"'} ) @@ -112,13 +114,19 @@ def test_gh_request_follows_link_header(mock_get): expected_mock_calls = [ call( "https://api.github.com/repos/foo/bar", - headers={"Accept": "application/vnd.github.v3+json"}, + headers={ + "Accept": "application/vnd.github.v3+json", + "User-Agent": "gh2slack_123", + }, params={"sort": "created", "state": "open"}, timeout=30, ), call( "http://example.com", - headers={"Accept": "application/vnd.github.v3+json"}, + headers={ + "Accept": "application/vnd.github.v3+json", + "User-Agent": "gh2slack_123", + }, params={"sort": "created", "state": "open"}, timeout=30, ), From 672cdda8aa442ed152480d93fa1c67b6bce9e9cc Mon Sep 17 00:00:00 2001 From: Zdenek Styblik Date: Tue, 2 Jul 2024 15:18:21 +0200 Subject: [PATCH 6/6] Use capture_requests arg in MyContentServer --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 01e25a9..05265e3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,7 +19,7 @@ class MyContentServer(ContentServer): def __init__(self, capture_requests=False, *args, **kwargs): """Init.""" self.captured_requests = [] - self.capture_requests = False + self.capture_requests = capture_requests super(MyContentServer, self).__init__(*args, **kwargs) def __call__(self, environ, start_response):