Skip to content
This repository has been archived by the owner on Apr 1, 2021. It is now read-only.

Commit

Permalink
Merge pull request #81 from macs1207/master
Browse files Browse the repository at this point in the history
Fix announcement parser
  • Loading branch information
abc873693 authored Feb 29, 2020
2 parents 3684c2f + c50f6df commit 842545b
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions src/crawler/school_announcements_crawler.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from lxml import etree
import requests
import re
from utils import error_code
from utils import config

Expand Down Expand Up @@ -30,9 +31,9 @@ def acad(page=0):

if req.status_code == 200:
req = req.json()['content']

root = etree.HTML(req)
date = root.xpath('//*[@class="mdate before"]')
node = root.xpath('//*[@class="d-txt"]')
date = [node[i] for i in range(0, len(node), 3)]
href = root.xpath('//*[@class="d-txt"]//a')

base_id = page*15
Expand All @@ -42,7 +43,7 @@ def acad(page=0):
'info':{
'id': base_id+index,
'title': href_data.attrib['title'],
'date':date_time.text
'date': re.search("([12]\d{3}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01]))", date_time.text).group()
}
} for index, (date_time, href_data) in enumerate(zip(date, href))]
return notification
Expand Down

0 comments on commit 842545b

Please sign in to comment.