Skip to content

Commit

Permalink
Merge pull request #29 from zstyblik/refactoring_into_lib
Browse files Browse the repository at this point in the history
Move CachedData and HTTPSource classes into separate files
  • Loading branch information
zstyblik authored Jul 4, 2024
2 parents 4fbbce1 + 672cdda commit 085a31a
Show file tree
Hide file tree
Showing 23 changed files with 373 additions and 265 deletions.
5 changes: 3 additions & 2 deletions cache_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from dataclasses import dataclass

import rss2irc
from lib import CachedData

BUCKET_COUNT = 10

Expand All @@ -24,7 +25,7 @@ class Bucket:


def calc_distribution(
logger: logging.Logger, cache: rss2irc.CachedData, buckets
logger: logging.Logger, cache: CachedData, buckets
) -> int:
"""Calculate item distribution inside cache."""
keys = list(buckets.keys())
Expand Down Expand Up @@ -70,7 +71,7 @@ def get_timestamp(data) -> int:


def get_timestamp_minmax(
logger: logging.Logger, cache: rss2irc.CachedData
logger: logging.Logger, cache: CachedData
) -> (int, int, int):
"""Return timestamp min, max and no. of errors."""
ts_min = 99999999999
Expand Down
2 changes: 1 addition & 1 deletion ci/run-reorder-python-imports.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
set -e
set -u

reorder-python-imports `find . ! -path '*/\.*' -name '*.py'`
reorder-python-imports --py311-plus `find . ! -path '*/\.*' -name '*.py'`
22 changes: 14 additions & 8 deletions gh2slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
import requests

import rss2irc # noqa: I202
import rss2slack
import rss2slack # noqa: I202
from lib import CachedData # noqa: I202
from lib import config_options # noqa: I202

ALIASES = {
"issues": "issue",
Expand Down Expand Up @@ -101,17 +103,21 @@ def gh_parse_next_page(link_header: str) -> str:


def gh_request(
logger: logging.Logger, url: str, timeout: int = rss2irc.HTTP_TIMEOUT
logger: logging.Logger, url: str, timeout: int = config_options.HTTP_TIMEOUT
) -> List:
"""Return list of responses from GitHub.
Makes request to GH, follows 'Link' header if present, and returns list
responses.
"""
logger.debug("Requesting %s", url)
user_agent = "gh2slack_{:d}".format(int(time.time()))
rsp = requests.get(
url,
headers={"Accept": "application/vnd.github.v3+json"},
headers={
"Accept": "application/vnd.github.v3+json",
"User-Agent": user_agent,
},
params={"state": "open", "sort": "created"},
timeout=timeout,
)
Expand Down Expand Up @@ -223,7 +229,7 @@ def parse_args() -> argparse.Namespace:
"--cache-expiration",
dest="cache_expiration",
type=int,
default=rss2irc.CACHE_EXPIRATION,
default=config_options.CACHE_EXPIRATION,
help="Time, in seconds, for how long to keep items " "in cache.",
)
parser.add_argument(
Expand Down Expand Up @@ -275,9 +281,9 @@ def parse_args() -> argparse.Namespace:
"--slack-timeout",
dest="slack_timeout",
type=int,
default=rss2irc.HTTP_TIMEOUT,
default=config_options.HTTP_TIMEOUT,
help="Slack API Timeout. Defaults to {:d} seconds.".format(
rss2irc.HTTP_TIMEOUT
config_options.HTTP_TIMEOUT
),
)
parser.add_argument(
Expand All @@ -303,7 +309,7 @@ def parse_args() -> argparse.Namespace:

def process_page_items(
logger: logging.Logger,
cache: rss2irc.CachedData,
cache: CachedData,
pages: List,
expiration: int,
repository_url: str,
Expand Down Expand Up @@ -347,7 +353,7 @@ def process_page_items(
return to_publish


def scrub_items(logger: logging.Logger, cache: rss2irc.CachedData) -> None:
def scrub_items(logger: logging.Logger, cache: CachedData) -> None:
"""Scrub cache and remove expired items."""
time_now = int(time.time())
for key in list(cache.items.keys()):
Expand Down
6 changes: 3 additions & 3 deletions git_commits2slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
from typing import Dict
from typing import List

import rss2irc
import rss2slack
from lib import config_options

RE_GIT_AUTD = re.compile(r"^Already up-to-date.$")
RE_GIT_UPDATING = re.compile(r"^Updating [a-z0-9]+", re.I)
Expand Down Expand Up @@ -254,9 +254,9 @@ def parse_args() -> argparse.Namespace:
"--slack-timeout",
dest="slack_timeout",
type=int,
default=rss2irc.HTTP_TIMEOUT,
default=config_options.HTTP_TIMEOUT,
help="Slack API Timeout. Defaults to {:d} seconds.".format(
rss2irc.HTTP_TIMEOUT
config_options.HTTP_TIMEOUT
),
)
parser.add_argument(
Expand Down
8 changes: 8 additions & 0 deletions lib/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/env python3
"""Just init.
I love how black and reorder-python-imports play nicely together and no
workarounds are needed what so ever.
"""
from .cached_data import CachedData # noqa: F401
from .http_source import HTTPSource # noqa: F401
45 changes: 45 additions & 0 deletions lib/cached_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/usr/bin/env python3
"""Code related to Cache.
I love how black and reorder-python-imports play nicely together and no
workarounds are needed what so ever.
"""
import time
from dataclasses import dataclass
from dataclasses import field

from .config_options import DATA_SOURCE_EXPIRATION
from .http_source import HTTPSource


@dataclass
class CachedData:
"""CachedData represents locally cached data and state."""

data_sources: dict = field(default_factory=dict)
items: dict = field(default_factory=dict)

def get_source_by_url(self, url: str) -> HTTPSource:
"""Return source by URL.
If source doesn't exist, it will be created.
"""
source = self.data_sources.get(url, None)
if source:
source.last_used_ts = int(time.time())
return source

self.data_sources[url] = HTTPSource(
last_used_ts=int(time.time()), url=url
)
return self.get_source_by_url(url)

def scrub_data_sources(
self, expiration: int = DATA_SOURCE_EXPIRATION
) -> None:
"""Delete expired data sources."""
now = int(time.time())
for key in list(self.data_sources.keys()):
diff = now - self.data_sources[key].last_used_ts
if int(diff) > expiration:
self.data_sources.pop(key)
9 changes: 9 additions & 0 deletions lib/config_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/usr/bin/env python3
"""Common configuration options.
I love how black and reorder-python-imports play nicely together and no
workarounds are needed what so ever.
"""
CACHE_EXPIRATION = 86400 # seconds
DATA_SOURCE_EXPIRATION = 30 * 86400 # seconds
HTTP_TIMEOUT = 30 # seconds
41 changes: 41 additions & 0 deletions lib/http_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env python3
"""Code related to HTTP Source.
I love how black and reorder-python-imports play nicely together and no
workarounds are needed what so ever.
"""
from dataclasses import dataclass
from dataclasses import field
from typing import Dict


@dataclass
class HTTPSource:
"""Class represents HTTP data source."""

http_etag: str = field(default_factory=str)
http_last_modified: str = field(default_factory=str)
last_used_ts: int = 0
url: str = field(default_factory=str)

def extract_caching_headers(self, headers: Dict[str, str]) -> None:
"""Extract cache related headers from given dict."""
self.http_etag = ""
self.http_last_modified = ""
for key, value in headers.items():
key = key.lower()
if key == "etag":
self.http_etag = value
elif key == "last-modified":
self.http_last_modified = value

def make_caching_headers(self) -> Dict[str, str]:
"""Return cache related headers as a dict."""
headers = {}
if self.http_etag:
headers["if-none-match"] = self.http_etag

if self.http_last_modified:
headers["if-modified-since"] = self.http_last_modified

return headers
60 changes: 60 additions & 0 deletions lib/tests/test_cached_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env python3
"""Unit tests for cached_data.py."""
import time
from unittest.mock import patch

from lib import CachedData
from lib import config_options
from lib import HTTPSource # noqa: I100


@patch("lib.cached_data.time.time")
def test_cache_get_source_by_url(mock_time):
"""Test that CachedData.get_source_by_url() sets last_used_ts attr."""
mock_time.return_value = 1717428213
url = "http://example.com"
source = HTTPSource(
last_used_ts=0,
url=url,
)
cache = CachedData(
data_sources={
url: source,
}
)
result = cache.get_source_by_url(url)
assert result == source
assert result.last_used_ts == 1717428213


def test_cache_scrub_data_sources_empty(cache):
"""Test that CachedData.scrub_data_sources() when there are no sources."""
cache = CachedData()
assert not cache.data_sources
cache.scrub_data_sources()
assert not cache.data_sources


def test_cache_scrub_data_sources(cache):
"""Test that CachedData.scrub_data_sources() expired source is removed."""
source1_url = "http://ww1.example.com"
source2_url = "http://ww2.example.com"
cache = CachedData()
source1 = cache.get_source_by_url(source1_url)
assert source1.last_used_ts > 0
source1.last_used_ts = (
int(time.time()) - 2 * config_options.DATA_SOURCE_EXPIRATION
)

source2 = cache.get_source_by_url(source2_url)
assert source2.last_used_ts > 0

assert "http://ww1.example.com" in cache.data_sources
assert source1.url == source1_url
assert "http://ww2.example.com" in cache.data_sources
assert source2.url == source2_url

cache.scrub_data_sources()

assert "http://ww1.example.com" not in cache.data_sources
assert "http://ww2.example.com" in cache.data_sources
62 changes: 62 additions & 0 deletions lib/tests/test_http_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/usr/bin/env python3
"""Unit tests for http_source.py."""
import pytest

from lib import HTTPSource # noqa: I202


@pytest.mark.parametrize(
"source,input_data,expected",
[
# No attrs should bet set
(
HTTPSource(),
{},
{"etag": "", "last_modified": ""},
),
# Reset attrs
(
HTTPSource(http_etag="et_test", http_last_modified="lm_test"),
{"header1": "firt", "header2": "second"},
{"etag": "", "last_modified": ""},
),
# Set attrs
(
HTTPSource(http_etag="et_test", http_last_modified="lm_test"),
{"ETag": "test123", "Last-Modified": "abc123", "some": "header"},
{"etag": "test123", "last_modified": "abc123"},
),
],
)
def test_http_source_extract_caching_headers(source, input_data, expected):
"""Test that HTTPSource.extract_caching_headers() works as expected."""
source.extract_caching_headers(input_data)
assert source.http_etag == expected["etag"]
assert source.http_last_modified == expected["last_modified"]


@pytest.mark.parametrize(
"source,expected",
[
(
HTTPSource(),
{},
),
(
HTTPSource(http_etag="et_test"),
{"if-none-match": "et_test"},
),
(
HTTPSource(http_last_modified="lm_test"),
{"if-modified-since": "lm_test"},
),
(
HTTPSource(http_etag="et_test", http_last_modified="lm_test"),
{"if-modified-since": "lm_test", "if-none-match": "et_test"},
),
],
)
def test_http_source_make_caching_headers(source, expected):
"""Test that HTTPSource.make_caching_headers() works as expected."""
result = source.make_caching_headers()
assert result == expected
11 changes: 6 additions & 5 deletions migrations/convert_cache_to_dataclass_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@
import sys
from importlib.machinery import SourceFileLoader

# NOTICE: An ugly hack in order to be able to import CachedData class from
# rss2irc. I'm real sorry about this, son.
# NOTICE: An ugly hack in order to be able to import CachedData class.
# I'm real sorry about this, son.
# NOTE: Sadly, importlib.util and spec didn't cut it. Also, I'm out of time on
# this. Therefore, see you again in the future once this ceases to work.
SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__))
rss2irc_module_path = os.path.join(SCRIPT_PATH, "..", "rss2irc.py")
rss2irc = SourceFileLoader("rss2irc", rss2irc_module_path).load_module()
lib_module_path = os.path.join(SCRIPT_PATH, "..", "lib", "__init__.py")
lib = SourceFileLoader("lib", lib_module_path).load_module()
CachedData = lib.cached_data.CachedData


def main():
Expand Down Expand Up @@ -50,7 +51,7 @@ def main():
logger.info("Create backup file '%s' from '%s'.", bak_file, args.cache)
shutil.copy2(args.cache, bak_file)

new_cache = rss2irc.CachedData()
new_cache = CachedData()
for key, value in cache.items():
new_cache.items[key] = value

Expand Down
Loading

0 comments on commit 085a31a

Please sign in to comment.