Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for and utilize HTTP headers ETag/Last-Modified #26

Merged
merged 4 commits into from
Jun 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions gh2slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,11 @@ def format_message(
try:
title = cache_item["title"].encode("utf-8")
except UnicodeEncodeError:
logger.error("Failed to encode title as UTF-8: %s", repr(title))
logger.error(traceback.format_exc())
logger.error(
"Failed to encode title as UTF-8: %s",
repr(cache_item.get("title", None)),
)
logger.error("%s", traceback.format_exc())
title = "Unknown title due to UTF-8 exception, {:s}#{:d}".format(
section, cache_item["number"]
)
Expand Down Expand Up @@ -157,7 +160,7 @@ def main():
sys.exit(0)

cache = rss2irc.read_cache(logger, args.cache)
scrub_cache(logger, cache)
scrub_items(logger, cache)

# Note: I have failed to find web link to repo in GH response.
# Therefore, let's create one.
Expand Down Expand Up @@ -191,14 +194,14 @@ def main():
args.slack_channel,
)
except Exception:
logger.error(traceback.format_exc())
logger.error("%s", traceback.format_exc())
cache.items.pop(html_url)
finally:
time.sleep(args.sleep)

rss2irc.write_cache(cache, args.cache)
except Exception:
logger.debug(traceback.format_exc())
logger.debug("%s", traceback.format_exc())
# TODO(zstyblik):
# 1. touch error file
# 2. send error message to the channel
Expand All @@ -220,7 +223,7 @@ def parse_args() -> argparse.Namespace:
"--cache-expiration",
dest="cache_expiration",
type=int,
default=rss2irc.EXPIRATION,
default=rss2irc.CACHE_EXPIRATION,
help="Time, in seconds, for how long to keep items " "in cache.",
)
parser.add_argument(
Expand Down Expand Up @@ -344,14 +347,14 @@ def process_page_items(
return to_publish


def scrub_cache(logger: logging.Logger, cache: rss2irc.CachedData) -> None:
def scrub_items(logger: logging.Logger, cache: rss2irc.CachedData) -> None:
"""Scrub cache and remove expired items."""
time_now = int(time.time())
for key in list(cache.items.keys()):
try:
expiration = int(cache.items[key]["expiration"])
except (KeyError, ValueError):
logger.error(traceback.format_exc())
logger.error("%s", traceback.format_exc())
logger.error(
"Invalid cache entry will be removed: '%s'", cache.items[key]
)
Expand Down
2 changes: 1 addition & 1 deletion git_commits2slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def main():
args.slack_channel,
)
except Exception:
logger.debug(traceback.format_exc())
logger.debug("%s", traceback.format_exc())
# TODO(zstyblik):
# 1. touch error file
# 2. send error message to the channel
Expand Down
82 changes: 82 additions & 0 deletions migrations/convert_cache_to_dataclass_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#!/usr/bin/env python3
"""Convert data class v1 to v2.

Migration:
* disable all 2IRC/2Slack scripts in eg. cron/systemd/runit/etc.
* migrate cache files with this script
* enable 2IRC/2Slack scripts again
* if everything is ok, remove bak files
"""
import argparse
import logging
import os
import shutil
import sys
from importlib.machinery import SourceFileLoader

# NOTICE: An ugly hack in order to be able to import CachedData class from
# rss2irc. I'm real sorry about this, son.
# NOTE: Sadly, importlib.util and spec didn't cut it. As usual, I'm out of time
# on this. Therefore, see you again in the future once this ceases to work.
SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__))
rss2irc_module_path = os.path.join(SCRIPT_PATH, "..", "rss2irc.py")
rss2irc = SourceFileLoader("rss2irc", rss2irc_module_path).load_module()
CachedData = rss2irc.CachedData


def main():
"""Open cache file, convert it and overwrite it.

Backup file is created in the process. Manual cleanup is required after
migration.
"""
logging.basicConfig(level=logging.INFO, stream=sys.stdout)
logger = logging.getLogger("migrate-dataclass-v1-to-v2")
args = parse_args()
if args.verbosity:
logger.setLevel(logging.DEBUG)

logger.info("Read cache from file '%s'.", args.cache)
cache = rss2irc.read_cache(logger, args.cache)

bak_file = "{}.bak".format(args.cache)
logger.info("Create backup file '%s' from '%s'.", bak_file, args.cache)
shutil.copy2(args.cache, bak_file)

new_cache = rss2irc.CachedData()
for key, value in cache.items.items():
new_cache.items[key] = value

logger.info("Write converted cache into file '%s'.", args.cache)
rss2irc.write_cache(new_cache, args.cache)
logger.info("Read converted cache file '%s' and check attr.", args.cache)
cache = rss2irc.read_cache(logger, args.cache)
# Check this really is new class.
_ = cache.get_source_by_url("test")
logger.info("Migration complete and '%s' can be removed.", bak_file)


def parse_args() -> argparse.Namespace:
"""Return parsed CLI args."""
parser = argparse.ArgumentParser()
parser.add_argument(
"-v",
"--verbose",
dest="verbosity",
action="store_true",
default=False,
help="Increase logging verbosity.",
)
parser.add_argument(
"--cache",
dest="cache",
type=str,
default=None,
required=True,
help="File which contains cache.",
)
return parser.parse_args()


if __name__ == "__main__":
main()
80 changes: 51 additions & 29 deletions phpbb2slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def get_authors_from_file(logger: logging.Logger, fname: str) -> List[str]:
if line.decode("utf-8").strip() != ""
]
except Exception:
logger.error(traceback.format_exc())
logger.error("%s", traceback.format_exc())
authors = []

return authors
Expand All @@ -81,32 +81,32 @@ def main():
try:
slack_token = rss2slack.get_slack_token()
authors = get_authors_from_file(logger, args.authors_file)
cache = rss2irc.read_cache(logger, args.cache)
source = cache.get_source_by_url(args.rss_url)

rsp = rss2irc.get_rss(
logger,
args.rss_url,
args.rss_http_timeout,
source.make_caching_headers(),
)
if rsp.status_code == 304:
logger.debug("No new RSS data since the last run")
rss2irc.write_cache(cache, args.cache)
sys.exit(0)

data = rss2irc.get_rss(logger, args.rss_url, args.rss_http_timeout)
if not data:
if not rsp.text:
logger.error("Failed to get RSS from %s", args.rss_url)
sys.exit(1)

news = parse_news(data, authors)
news = parse_news(rsp.text, authors)
if not news:
logger.info("No news?")
sys.exit(0)

cache = rss2irc.read_cache(logger, args.cache)
scrub_cache(logger, cache)

for key in list(news.keys()):
if key not in cache.items:
continue

logger.debug("Key %s found in cache", key)
comments_cached = int(cache.items[key]["comments_cnt"])
comments_actual = int(news[key]["comments_cnt"])
if comments_cached == comments_actual:
cache.items[key]["expiration"] = (
int(time.time()) + args.cache_expiration
)
news.pop(key)
source.extract_caching_headers(rsp.headers)
scrub_items(logger, cache)
prune_news(logger, cache, news, args.cache_expiration)

slack_client = rss2slack.get_slack_web_client(
slack_token, args.slack_base_url, args.slack_timeout
Expand All @@ -126,11 +126,11 @@ def main():
finally:
time.sleep(args.sleep)

expiration = int(time.time()) + args.cache_expiration
update_cache(cache, news, expiration)
update_items_expiration(cache, news, args.cache_expiration)
cache.scrub_data_sources()
rss2irc.write_cache(cache, args.cache)
except Exception:
logger.debug(traceback.format_exc())
logger.debug("%s", traceback.format_exc())
# TODO(zstyblik):
# 1. touch error file
# 2. send error message to the channel
Expand Down Expand Up @@ -247,14 +247,15 @@ def parse_news(data: str, authors: List[str]) -> Dict:
feed = feedparser.parse(data)
for entry in feed["entries"]:
link = entry.pop("link", None)
title = entry.pop("title", None)
author_detail = entry.pop("author_detail", {"name": None})
if not "link" and not "title":
if not link:
# If we don't have a link, there is nothing we can do.
continue

author_detail = entry.pop("author_detail", {"name": None})
if authors and author_detail["name"] not in authors:
continue

title = entry.pop("title", "No title")
category = entry.pop("category", None)
comments_cnt = entry.pop("slash_comments", 0)
try:
Expand All @@ -271,14 +272,34 @@ def parse_news(data: str, authors: List[str]) -> Dict:
return news


def scrub_cache(logger: logging.Logger, cache: rss2irc.CachedData) -> None:
def prune_news(
logger: logging.Logger,
cache: rss2irc.CachedData,
news: Dict[str, Dict],
expiration: int = CACHE_EXPIRATION,
) -> None:
"""Prune news which already are in cache."""
item_expiration = int(time.time()) + expiration
for key in list(news.keys()):
if key not in cache.items:
continue

logger.debug("Key %s found in cache", key)
comments_cached = int(cache.items[key]["comments_cnt"])
comments_actual = int(news[key]["comments_cnt"])
if comments_cached == comments_actual:
cache.items[key]["expiration"] = item_expiration
news.pop(key)


def scrub_items(logger: logging.Logger, cache: rss2irc.CachedData) -> None:
"""Scrub cache and remove expired items."""
time_now = int(time.time())
for key in list(cache.items.keys()):
try:
expiration = int(cache.items[key]["expiration"])
except (KeyError, ValueError):
logger.error(traceback.format_exc())
logger.error("%s", traceback.format_exc())
logger.error(
"Invalid cache entry will be removed: '%s'", cache.items[key]
)
Expand All @@ -290,13 +311,14 @@ def scrub_cache(logger: logging.Logger, cache: rss2irc.CachedData) -> None:
cache.items.pop(key)


def update_cache(
def update_items_expiration(
cache: rss2irc.CachedData, news: Dict, expiration: int
) -> None:
"""Update cache contents."""
item_expiration = int(time.time()) + expiration
for key in list(news.keys()):
cache.items[key] = {
"expiration": expiration,
"expiration": item_expiration,
"comments_cnt": int(news[key]["comments_cnt"]),
}

Expand Down
4 changes: 2 additions & 2 deletions requirements-ci.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ flake8-import-order
reorder-python-imports
# tests
pytest
pytest-localserver
requests_mock
pytest-localserver==0.8.1
requests-mock==1.12.1
Loading