From d1a04175948c28942ed69e58490892e87f20bff2 Mon Sep 17 00:00:00 2001 From: Tatsiujin Chin Date: Mon, 21 Sep 2020 19:09:12 +0800 Subject: [PATCH] remove tags in description --- twsc_calendar.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/twsc_calendar.py b/twsc_calendar.py index 45413ac..375e3ca 100644 --- a/twsc_calendar.py +++ b/twsc_calendar.py @@ -1,4 +1,5 @@ import os +import re import pickle import datetime @@ -51,7 +52,7 @@ def retrieve_para(self, e, begin_token, end_token): begin_idx = desc.find(begin_token) + len(begin_token) end_idx = desc.find(end_token) - desc = desc[begin_idx:end_idx].strip().replace('\n', ' ') + desc = re.sub(r'<[^<>]*>', '', desc[begin_idx:end_idx].replace('
', ' ').replace('\n', ' ')).strip() return desc