Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move CachedData and HTTPSource classes into separate files #29

Merged
merged 6 commits into from
Jul 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions cache_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from dataclasses import dataclass

import rss2irc
from lib import CachedData

BUCKET_COUNT = 10

Expand All @@ -24,7 +25,7 @@ class Bucket:


def calc_distribution(
logger: logging.Logger, cache: rss2irc.CachedData, buckets
logger: logging.Logger, cache: CachedData, buckets
) -> int:
"""Calculate item distribution inside cache."""
keys = list(buckets.keys())
Expand Down Expand Up @@ -70,7 +71,7 @@ def get_timestamp(data) -> int:


def get_timestamp_minmax(
logger: logging.Logger, cache: rss2irc.CachedData
logger: logging.Logger, cache: CachedData
) -> (int, int, int):
"""Return timestamp min, max and no. of errors."""
ts_min = 99999999999
Expand Down
2 changes: 1 addition & 1 deletion ci/run-reorder-python-imports.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
set -e
set -u

reorder-python-imports `find . ! -path '*/\.*' -name '*.py'`
reorder-python-imports --py311-plus `find . ! -path '*/\.*' -name '*.py'`
22 changes: 14 additions & 8 deletions gh2slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
import requests

import rss2irc # noqa: I202
import rss2slack
import rss2slack # noqa: I202
from lib import CachedData # noqa: I202
from lib import config_options # noqa: I202

ALIASES = {
"issues": "issue",
Expand Down Expand Up @@ -101,17 +103,21 @@ def gh_parse_next_page(link_header: str) -> str:


def gh_request(
logger: logging.Logger, url: str, timeout: int = rss2irc.HTTP_TIMEOUT
logger: logging.Logger, url: str, timeout: int = config_options.HTTP_TIMEOUT
) -> List:
"""Return list of responses from GitHub.

Makes request to GH, follows 'Link' header if present, and returns list
responses.
"""
logger.debug("Requesting %s", url)
user_agent = "gh2slack_{:d}".format(int(time.time()))
rsp = requests.get(
url,
headers={"Accept": "application/vnd.github.v3+json"},
headers={
"Accept": "application/vnd.github.v3+json",
"User-Agent": user_agent,
},
params={"state": "open", "sort": "created"},
timeout=timeout,
)
Expand Down Expand Up @@ -223,7 +229,7 @@ def parse_args() -> argparse.Namespace:
"--cache-expiration",
dest="cache_expiration",
type=int,
default=rss2irc.CACHE_EXPIRATION,
default=config_options.CACHE_EXPIRATION,
help="Time, in seconds, for how long to keep items " "in cache.",
)
parser.add_argument(
Expand Down Expand Up @@ -275,9 +281,9 @@ def parse_args() -> argparse.Namespace:
"--slack-timeout",
dest="slack_timeout",
type=int,
default=rss2irc.HTTP_TIMEOUT,
default=config_options.HTTP_TIMEOUT,
help="Slack API Timeout. Defaults to {:d} seconds.".format(
rss2irc.HTTP_TIMEOUT
config_options.HTTP_TIMEOUT
),
)
parser.add_argument(
Expand All @@ -303,7 +309,7 @@ def parse_args() -> argparse.Namespace:

def process_page_items(
logger: logging.Logger,
cache: rss2irc.CachedData,
cache: CachedData,
pages: List,
expiration: int,
repository_url: str,
Expand Down Expand Up @@ -347,7 +353,7 @@ def process_page_items(
return to_publish


def scrub_items(logger: logging.Logger, cache: rss2irc.CachedData) -> None:
def scrub_items(logger: logging.Logger, cache: CachedData) -> None:
"""Scrub cache and remove expired items."""
time_now = int(time.time())
for key in list(cache.items.keys()):
Expand Down
6 changes: 3 additions & 3 deletions git_commits2slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
from typing import Dict
from typing import List

import rss2irc
import rss2slack
from lib import config_options

RE_GIT_AUTD = re.compile(r"^Already up-to-date.$")
RE_GIT_UPDATING = re.compile(r"^Updating [a-z0-9]+", re.I)
Expand Down Expand Up @@ -254,9 +254,9 @@ def parse_args() -> argparse.Namespace:
"--slack-timeout",
dest="slack_timeout",
type=int,
default=rss2irc.HTTP_TIMEOUT,
default=config_options.HTTP_TIMEOUT,
help="Slack API Timeout. Defaults to {:d} seconds.".format(
rss2irc.HTTP_TIMEOUT
config_options.HTTP_TIMEOUT
),
)
parser.add_argument(
Expand Down
8 changes: 8 additions & 0 deletions lib/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/env python3
"""Just init.

I love how black and reorder-python-imports play nicely together and no
workarounds are needed what so ever.
"""
from .cached_data import CachedData # noqa: F401
from .http_source import HTTPSource # noqa: F401
45 changes: 45 additions & 0 deletions lib/cached_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/usr/bin/env python3
"""Code related to Cache.

I love how black and reorder-python-imports play nicely together and no
workarounds are needed what so ever.
"""
import time
from dataclasses import dataclass
from dataclasses import field

from .config_options import DATA_SOURCE_EXPIRATION
from .http_source import HTTPSource


@dataclass
class CachedData:
"""CachedData represents locally cached data and state."""

data_sources: dict = field(default_factory=dict)
items: dict = field(default_factory=dict)

def get_source_by_url(self, url: str) -> HTTPSource:
"""Return source by URL.

If source doesn't exist, it will be created.
"""
source = self.data_sources.get(url, None)
if source:
source.last_used_ts = int(time.time())
return source

self.data_sources[url] = HTTPSource(
last_used_ts=int(time.time()), url=url
)
return self.get_source_by_url(url)

def scrub_data_sources(
self, expiration: int = DATA_SOURCE_EXPIRATION
) -> None:
"""Delete expired data sources."""
now = int(time.time())
for key in list(self.data_sources.keys()):
diff = now - self.data_sources[key].last_used_ts
if int(diff) > expiration:
self.data_sources.pop(key)
9 changes: 9 additions & 0 deletions lib/config_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/usr/bin/env python3
"""Common configuration options.

I love how black and reorder-python-imports play nicely together and no
workarounds are needed what so ever.
"""
CACHE_EXPIRATION = 86400 # seconds
DATA_SOURCE_EXPIRATION = 30 * 86400 # seconds
HTTP_TIMEOUT = 30 # seconds
41 changes: 41 additions & 0 deletions lib/http_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env python3
"""Code related to HTTP Source.

I love how black and reorder-python-imports play nicely together and no
workarounds are needed what so ever.
"""
from dataclasses import dataclass
from dataclasses import field
from typing import Dict


@dataclass
class HTTPSource:
"""Class represents HTTP data source."""

http_etag: str = field(default_factory=str)
http_last_modified: str = field(default_factory=str)
last_used_ts: int = 0
url: str = field(default_factory=str)

def extract_caching_headers(self, headers: Dict[str, str]) -> None:
"""Extract cache related headers from given dict."""
self.http_etag = ""
self.http_last_modified = ""
for key, value in headers.items():
key = key.lower()
if key == "etag":
self.http_etag = value
elif key == "last-modified":
self.http_last_modified = value

def make_caching_headers(self) -> Dict[str, str]:
"""Return cache related headers as a dict."""
headers = {}
if self.http_etag:
headers["if-none-match"] = self.http_etag

if self.http_last_modified:
headers["if-modified-since"] = self.http_last_modified

return headers
60 changes: 60 additions & 0 deletions lib/tests/test_cached_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env python3
"""Unit tests for cached_data.py."""
import time
from unittest.mock import patch

from lib import CachedData
from lib import config_options
from lib import HTTPSource # noqa: I100


@patch("lib.cached_data.time.time")
def test_cache_get_source_by_url(mock_time):
"""Test that CachedData.get_source_by_url() sets last_used_ts attr."""
mock_time.return_value = 1717428213
url = "http://example.com"
source = HTTPSource(
last_used_ts=0,
url=url,
)
cache = CachedData(
data_sources={
url: source,
}
)
result = cache.get_source_by_url(url)
assert result == source
assert result.last_used_ts == 1717428213


def test_cache_scrub_data_sources_empty(cache):
"""Test that CachedData.scrub_data_sources() when there are no sources."""
cache = CachedData()
assert not cache.data_sources
cache.scrub_data_sources()
assert not cache.data_sources


def test_cache_scrub_data_sources(cache):
"""Test that CachedData.scrub_data_sources() expired source is removed."""
source1_url = "http://ww1.example.com"
source2_url = "http://ww2.example.com"
cache = CachedData()
source1 = cache.get_source_by_url(source1_url)
assert source1.last_used_ts > 0
source1.last_used_ts = (
int(time.time()) - 2 * config_options.DATA_SOURCE_EXPIRATION
)

source2 = cache.get_source_by_url(source2_url)
assert source2.last_used_ts > 0

assert "http://ww1.example.com" in cache.data_sources
assert source1.url == source1_url
assert "http://ww2.example.com" in cache.data_sources
assert source2.url == source2_url

cache.scrub_data_sources()

assert "http://ww1.example.com" not in cache.data_sources
assert "http://ww2.example.com" in cache.data_sources
62 changes: 62 additions & 0 deletions lib/tests/test_http_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/usr/bin/env python3
"""Unit tests for http_source.py."""
import pytest

from lib import HTTPSource # noqa: I202


@pytest.mark.parametrize(
"source,input_data,expected",
[
# No attrs should bet set
(
HTTPSource(),
{},
{"etag": "", "last_modified": ""},
),
# Reset attrs
(
HTTPSource(http_etag="et_test", http_last_modified="lm_test"),
{"header1": "firt", "header2": "second"},
{"etag": "", "last_modified": ""},
),
# Set attrs
(
HTTPSource(http_etag="et_test", http_last_modified="lm_test"),
{"ETag": "test123", "Last-Modified": "abc123", "some": "header"},
{"etag": "test123", "last_modified": "abc123"},
),
],
)
def test_http_source_extract_caching_headers(source, input_data, expected):
"""Test that HTTPSource.extract_caching_headers() works as expected."""
source.extract_caching_headers(input_data)
assert source.http_etag == expected["etag"]
assert source.http_last_modified == expected["last_modified"]


@pytest.mark.parametrize(
"source,expected",
[
(
HTTPSource(),
{},
),
(
HTTPSource(http_etag="et_test"),
{"if-none-match": "et_test"},
),
(
HTTPSource(http_last_modified="lm_test"),
{"if-modified-since": "lm_test"},
),
(
HTTPSource(http_etag="et_test", http_last_modified="lm_test"),
{"if-modified-since": "lm_test", "if-none-match": "et_test"},
),
],
)
def test_http_source_make_caching_headers(source, expected):
"""Test that HTTPSource.make_caching_headers() works as expected."""
result = source.make_caching_headers()
assert result == expected
11 changes: 6 additions & 5 deletions migrations/convert_cache_to_dataclass_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@
import sys
from importlib.machinery import SourceFileLoader

# NOTICE: An ugly hack in order to be able to import CachedData class from
# rss2irc. I'm real sorry about this, son.
# NOTICE: An ugly hack in order to be able to import CachedData class.
# I'm real sorry about this, son.
# NOTE: Sadly, importlib.util and spec didn't cut it. Also, I'm out of time on
# this. Therefore, see you again in the future once this ceases to work.
SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__))
rss2irc_module_path = os.path.join(SCRIPT_PATH, "..", "rss2irc.py")
rss2irc = SourceFileLoader("rss2irc", rss2irc_module_path).load_module()
lib_module_path = os.path.join(SCRIPT_PATH, "..", "lib", "__init__.py")
lib = SourceFileLoader("lib", lib_module_path).load_module()
CachedData = lib.cached_data.CachedData


def main():
Expand Down Expand Up @@ -50,7 +51,7 @@ def main():
logger.info("Create backup file '%s' from '%s'.", bak_file, args.cache)
shutil.copy2(args.cache, bak_file)

new_cache = rss2irc.CachedData()
new_cache = CachedData()
for key, value in cache.items():
new_cache.items[key] = value

Expand Down
Loading