Skip to content

Commit

Permalink
WIP: Move CachedData and HTTPSource into separate files
Browse files Browse the repository at this point in the history
TODO:
* CI
* split-off tests
* cleanup everything and every reference to rss2irc
  • Loading branch information
zstyblik committed Jul 1, 2024
1 parent 4fbbce1 commit e74982c
Show file tree
Hide file tree
Showing 18 changed files with 206 additions and 153 deletions.
5 changes: 3 additions & 2 deletions cache_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from dataclasses import dataclass

import rss2irc
from lib import CachedData

BUCKET_COUNT = 10

Expand All @@ -24,7 +25,7 @@ class Bucket:


def calc_distribution(
logger: logging.Logger, cache: rss2irc.CachedData, buckets
logger: logging.Logger, cache: CachedData, buckets
) -> int:
"""Calculate item distribution inside cache."""
keys = list(buckets.keys())
Expand Down Expand Up @@ -70,7 +71,7 @@ def get_timestamp(data) -> int:


def get_timestamp_minmax(
logger: logging.Logger, cache: rss2irc.CachedData
logger: logging.Logger, cache: CachedData
) -> (int, int, int):
"""Return timestamp min, max and no. of errors."""
ts_min = 99999999999
Expand Down
14 changes: 8 additions & 6 deletions gh2slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

import rss2irc # noqa: I202
import rss2slack
from lib import CachedData
from lib import config_options

ALIASES = {
"issues": "issue",
Expand Down Expand Up @@ -101,7 +103,7 @@ def gh_parse_next_page(link_header: str) -> str:


def gh_request(
logger: logging.Logger, url: str, timeout: int = rss2irc.HTTP_TIMEOUT
logger: logging.Logger, url: str, timeout: int = config_options.HTTP_TIMEOUT
) -> List:
"""Return list of responses from GitHub.
Expand Down Expand Up @@ -223,7 +225,7 @@ def parse_args() -> argparse.Namespace:
"--cache-expiration",
dest="cache_expiration",
type=int,
default=rss2irc.CACHE_EXPIRATION,
default=config_options.CACHE_EXPIRATION,
help="Time, in seconds, for how long to keep items " "in cache.",
)
parser.add_argument(
Expand Down Expand Up @@ -275,9 +277,9 @@ def parse_args() -> argparse.Namespace:
"--slack-timeout",
dest="slack_timeout",
type=int,
default=rss2irc.HTTP_TIMEOUT,
default=config_options.HTTP_TIMEOUT,
help="Slack API Timeout. Defaults to {:d} seconds.".format(
rss2irc.HTTP_TIMEOUT
config_options.HTTP_TIMEOUT
),
)
parser.add_argument(
Expand All @@ -303,7 +305,7 @@ def parse_args() -> argparse.Namespace:

def process_page_items(
logger: logging.Logger,
cache: rss2irc.CachedData,
cache: CachedData,
pages: List,
expiration: int,
repository_url: str,
Expand Down Expand Up @@ -347,7 +349,7 @@ def process_page_items(
return to_publish


def scrub_items(logger: logging.Logger, cache: rss2irc.CachedData) -> None:
def scrub_items(logger: logging.Logger, cache: CachedData) -> None:
"""Scrub cache and remove expired items."""
time_now = int(time.time())
for key in list(cache.items.keys()):
Expand Down
6 changes: 3 additions & 3 deletions git_commits2slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
from typing import Dict
from typing import List

import rss2irc
import rss2slack
from lib import config_options

RE_GIT_AUTD = re.compile(r"^Already up-to-date.$")
RE_GIT_UPDATING = re.compile(r"^Updating [a-z0-9]+", re.I)
Expand Down Expand Up @@ -254,9 +254,9 @@ def parse_args() -> argparse.Namespace:
"--slack-timeout",
dest="slack_timeout",
type=int,
default=rss2irc.HTTP_TIMEOUT,
default=config_options.HTTP_TIMEOUT,
help="Slack API Timeout. Defaults to {:d} seconds.".format(
rss2irc.HTTP_TIMEOUT
config_options.HTTP_TIMEOUT
),
)
parser.add_argument(
Expand Down
4 changes: 4 additions & 0 deletions lib/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/usr/bin/env python3
"""FIXME."""
from .cached_data import CachedData # noqa: F401
from .http_source import HTTPSource # noqa: F401
45 changes: 45 additions & 0 deletions lib/cached_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/usr/bin/env python3
"""Code related to Cache.
I love how black and reorder-python-imports play nicely together and no
workarounds are needed.
"""
import time
from dataclasses import dataclass
from dataclasses import field

from .config_options import DATA_SOURCE_EXPIRATION
from .http_source import HTTPSource


@dataclass
class CachedData:
"""CachedData represents locally cached data and state."""

data_sources: dict = field(default_factory=dict)
items: dict = field(default_factory=dict)

def get_source_by_url(self, url: str) -> HTTPSource:
"""Return source by URL.
If source doesn't exist, it will be created.
"""
source = self.data_sources.get(url, None)
if source:
source.last_used_ts = int(time.time())
return source

self.data_sources[url] = HTTPSource(
last_used_ts=int(time.time()), url=url
)
return self.get_source_by_url(url)

def scrub_data_sources(
self, expiration: int = DATA_SOURCE_EXPIRATION
) -> None:
"""Delete expired data sources."""
now = int(time.time())
for key in list(self.data_sources.keys()):
diff = now - self.data_sources[key].last_used_ts
if int(diff) > expiration:
self.data_sources.pop(key)
9 changes: 9 additions & 0 deletions lib/config_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/usr/bin/env python3
"""Common configuration options.
I love how black and reorder-python-imports play nicely together and no
workarounds are needed.
"""
CACHE_EXPIRATION = 86400 # seconds
DATA_SOURCE_EXPIRATION = 30 * 86400 # seconds
HTTP_TIMEOUT = 30 # seconds
41 changes: 41 additions & 0 deletions lib/http_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env python3
"""Code related to HTTP Source.
I love how black and reorder-python-imports play nicely together and no
workarounds are needed.
"""
from dataclasses import dataclass
from dataclasses import field
from typing import Dict


@dataclass
class HTTPSource:
"""Class represents HTTP data source."""

http_etag: str = field(default_factory=str)
http_last_modified: str = field(default_factory=str)
last_used_ts: int = 0
url: str = field(default_factory=str)

def extract_caching_headers(self, headers: Dict[str, str]) -> None:
"""Extract cache related headers from given dict."""
self.http_etag = ""
self.http_last_modified = ""
for key, value in headers.items():
key = key.lower()
if key == "etag":
self.http_etag = value
elif key == "last-modified":
self.http_last_modified = value

def make_caching_headers(self) -> Dict[str, str]:
"""Return cache related headers as a dict."""
headers = {}
if self.http_etag:
headers["if-none-match"] = self.http_etag

if self.http_last_modified:
headers["if-modified-since"] = self.http_last_modified

return headers
11 changes: 6 additions & 5 deletions migrations/convert_cache_to_dataclass_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@
import sys
from importlib.machinery import SourceFileLoader

# NOTICE: An ugly hack in order to be able to import CachedData class from
# rss2irc. I'm real sorry about this, son.
# NOTICE: An ugly hack in order to be able to import CachedData class.
# I'm real sorry about this, son.
# NOTE: Sadly, importlib.util and spec didn't cut it. Also, I'm out of time on
# this. Therefore, see you again in the future once this ceases to work.
SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__))
rss2irc_module_path = os.path.join(SCRIPT_PATH, "..", "rss2irc.py")
rss2irc = SourceFileLoader("rss2irc", rss2irc_module_path).load_module()
lib_module_path = os.path.join(SCRIPT_PATH, "..", "lib", "__init__.py")
lib = SourceFileLoader("lib", lib_module_path).load_module()
CachedData = lib.cached_data.CachedData


def main():
Expand Down Expand Up @@ -50,7 +51,7 @@ def main():
logger.info("Create backup file '%s' from '%s'.", bak_file, args.cache)
shutil.copy2(args.cache, bak_file)

new_cache = rss2irc.CachedData()
new_cache = CachedData()
for key, value in cache.items():
new_cache.items[key] = value

Expand Down
14 changes: 8 additions & 6 deletions migrations/convert_cache_to_dataclass_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,16 @@
import sys
from importlib.machinery import SourceFileLoader

# NOTICE: An ugly hack in order to be able to import CachedData class from
# rss2irc. I'm real sorry about this, son.
# NOTE: Sadly, importlib.util and spec didn't cut it. As usual, I'm out of time
# on this. Therefore, see you again in the future once this ceases to work.
# NOTICE: An ugly hack in order to be able to import CachedData class.
# I'm real sorry about this, son.
# NOTE: Sadly, importlib.util and spec didn't cut it. Also, I'm out of time on
# this. Therefore, see you again in the future once this ceases to work.
SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__))
lib_module_path = os.path.join(SCRIPT_PATH, "..", "lib", "__init__.py")
lib = SourceFileLoader("lib", lib_module_path).load_module()
rss2irc_module_path = os.path.join(SCRIPT_PATH, "..", "rss2irc.py")
rss2irc = SourceFileLoader("rss2irc", rss2irc_module_path).load_module()
CachedData = rss2irc.CachedData
CachedData = lib.cached_data.CachedData


def main():
Expand All @@ -43,7 +45,7 @@ def main():
logger.info("Create backup file '%s' from '%s'.", bak_file, args.cache)
shutil.copy2(args.cache, bak_file)

new_cache = rss2irc.CachedData()
new_cache = CachedData()
for key, value in cache.items.items():
new_cache.items[key] = value

Expand Down
3 changes: 2 additions & 1 deletion migrations/tests/test_convert_cache_to_dataclass_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import pytest

import rss2irc # noqa:I202
from lib import CachedData

SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__))

Expand Down Expand Up @@ -57,7 +58,7 @@ def test_migration(fixture_cache_file, fixture_bak_cleanup):
with open(fixture_cache_file, "wb") as fhandle:
pickle.dump(test_data, fhandle, pickle.HIGHEST_PROTOCOL)

expected_cache = rss2irc.CachedData(
expected_cache = CachedData(
items={
"test1": 1234,
"test2": 0,
Expand Down
27 changes: 14 additions & 13 deletions phpbb2slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,9 @@
import feedparser

import rss2irc # noqa: I202
import rss2slack

CACHE_EXPIRATION = 86400 # seconds
HTTP_TIMEOUT = 30 # seconds
import rss2slack # noqa: I202
from lib import CachedData # noqa: I202
from lib import config_options # noqa: I202


def format_message(
Expand Down Expand Up @@ -163,7 +162,7 @@ def parse_args() -> argparse.Namespace:
"--cache-expiration",
dest="cache_expiration",
type=int,
default=CACHE_EXPIRATION,
default=config_options.CACHE_EXPIRATION,
help="Time, in seconds, for how long to keep items in cache.",
)
parser.add_argument(
Expand Down Expand Up @@ -194,8 +193,10 @@ def parse_args() -> argparse.Namespace:
"--rss-http-timeout",
dest="rss_http_timeout",
type=int,
default=HTTP_TIMEOUT,
help="HTTP Timeout. Defaults to {:d} seconds.".format(HTTP_TIMEOUT),
default=config_options.HTTP_TIMEOUT,
help="HTTP Timeout. Defaults to {:d} seconds.".format(
config_options.HTTP_TIMEOUT
),
)
parser.add_argument(
"--slack-base-url",
Expand All @@ -215,9 +216,9 @@ def parse_args() -> argparse.Namespace:
"--slack-timeout",
dest="slack_timeout",
type=int,
default=HTTP_TIMEOUT,
default=config_options.HTTP_TIMEOUT,
help="Slack API Timeout. Defaults to {:d} seconds.".format(
HTTP_TIMEOUT
config_options.HTTP_TIMEOUT
),
)
parser.add_argument(
Expand Down Expand Up @@ -274,9 +275,9 @@ def parse_news(data: str, authors: List[str]) -> Dict:

def prune_news(
logger: logging.Logger,
cache: rss2irc.CachedData,
cache: CachedData,
news: Dict[str, Dict],
expiration: int = CACHE_EXPIRATION,
expiration: int = config_options.CACHE_EXPIRATION,
) -> None:
"""Prune news which already are in cache."""
item_expiration = int(time.time()) + expiration
Expand All @@ -292,7 +293,7 @@ def prune_news(
news.pop(key)


def scrub_items(logger: logging.Logger, cache: rss2irc.CachedData) -> None:
def scrub_items(logger: logging.Logger, cache: CachedData) -> None:
"""Scrub cache and remove expired items."""
time_now = int(time.time())
for key in list(cache.items.keys()):
Expand All @@ -312,7 +313,7 @@ def scrub_items(logger: logging.Logger, cache: rss2irc.CachedData) -> None:


def update_items_expiration(
cache: rss2irc.CachedData, news: Dict, expiration: int
cache: CachedData, news: Dict, expiration: int
) -> None:
"""Update cache contents."""
item_expiration = int(time.time()) + expiration
Expand Down
Loading

0 comments on commit e74982c

Please sign in to comment.