From a63dfffd3d125aaa0b9129326d5a6350647b2dd0 Mon Sep 17 00:00:00 2001 From: Zdenek Styblik Date: Thu, 6 Jun 2024 16:45:41 +0200 Subject: [PATCH] Fix bug in parse_news() - entries without link not being filtered out I don't know what I was doing, but code in question doesn't make sense. --- phpbb2slack.py | 7 ++++--- rss2irc.py | 5 +++-- tests/files/phpbb-rss.xml | 8 ++++++++ tests/files/rss.xml | 4 ++++ tests/test_phpbb2slack.py | 23 +++++++++++++++++++++++ tests/test_rss2irc.py | 21 +++++++++++++++++++++ 6 files changed, 63 insertions(+), 5 deletions(-) diff --git a/phpbb2slack.py b/phpbb2slack.py index b879cee..9a0a3a0 100755 --- a/phpbb2slack.py +++ b/phpbb2slack.py @@ -247,11 +247,12 @@ def parse_news(data: str, authors: List[str]) -> Dict: feed = feedparser.parse(data) for entry in feed["entries"]: link = entry.pop("link", None) - title = entry.pop("title", None) - author_detail = entry.pop("author_detail", {"name": None}) - if not "link" and not "title": + title = entry.pop("title", "No title") + if not link: + # If we don't have a link, there is nothing we can do. continue + author_detail = entry.pop("author_detail", {"name": None}) if authors and author_detail["name"] not in authors: continue diff --git a/rss2irc.py b/rss2irc.py index 48617f3..8c55af4 100755 --- a/rss2irc.py +++ b/rss2irc.py @@ -270,10 +270,11 @@ def parse_news(data: str) -> Dict[str, Tuple[str, str]]: feed = feedparser.parse(data) for entry in feed["entries"]: link = entry.pop("link", "") - title = entry.pop("title", "") - if not "link" and not "title": + if not link: + # If we don't have a link, there is nothing we can do. continue + title = entry.pop("title", "No title") category = entry.pop("category", "") news[link] = (title, category) diff --git a/tests/files/phpbb-rss.xml b/tests/files/phpbb-rss.xml index 64d1c34..3442b3c 100644 --- a/tests/files/phpbb-rss.xml +++ b/tests/files/phpbb-rss.xml @@ -29,5 +29,13 @@ Does anyone else have the same situation?]]> 1 + + Broken entry + Fri, 14 Feb 2020 20:50:36 +0000 + invalid@example.com (otherUser) + otherUser + + 1 + diff --git a/tests/files/rss.xml b/tests/files/rss.xml index 426ce7e..4226dac 100644 --- a/tests/files/rss.xml +++ b/tests/files/rss.xml @@ -19,5 +19,9 @@ Item2 description Sun, 02 Feb 2020 11:26:26 -0500 + + Item3 description - purposefully broken + Sun, 02 Feb 2020 11:25:26 -0500 + diff --git a/tests/test_phpbb2slack.py b/tests/test_phpbb2slack.py index e31883c..b3e2b73 100644 --- a/tests/test_phpbb2slack.py +++ b/tests/test_phpbb2slack.py @@ -331,6 +331,29 @@ def test_main_cache_hit( assert len(fixture_http_server.requests) == 0 +def test_parse_news(): + """Test parse_news().""" + expected_news = { + "https://phpbb.example.com/threads/something-of-something.424837/": { + "title": "Some other problem", + "category": None, + "comments_cnt": 0, + }, + "https://phpbb.example.com/threads/something-not-received.424836/": { + "title": "Something not received", + "category": None, + "comments_cnt": 1, + }, + } + + rss_fname = os.path.join(SCRIPT_PATH, "files", "phpbb-rss.xml") + with open(rss_fname, "rb") as fhandle: + rss_data = fhandle.read().decode("utf-8") + + result = phpbb2slack.parse_news(rss_data, []) + assert result == expected_news + + @pytest.mark.parametrize( "cache,expected_cache", [ diff --git a/tests/test_rss2irc.py b/tests/test_rss2irc.py index 34c99df..32e9545 100644 --- a/tests/test_rss2irc.py +++ b/tests/test_rss2irc.py @@ -337,6 +337,27 @@ def test_main_cache_hit( assert sorted(output) == sorted(expected_output) +def test_parse_news(): + """Test parse_news().""" + expected_news = { + "http://www.example.com/scan.php?page=news_item&px=item1": ( + "Item1", + "", + ), + "http://www.example.com/scan.php?page=news_item&px=item2": ( + "Item2", + "", + ), + } + + rss_fname = os.path.join(SCRIPT_PATH, "files", "rss.xml") + with open(rss_fname, "rb") as fhandle: + rss_data = fhandle.read().decode("utf-8") + + result = rss2irc.parse_news(rss_data) + assert result == expected_news + + def test_scrub_items(): """Test scrub_items().""" logging.basicConfig(level=logging.CRITICAL)