-
Notifications
You must be signed in to change notification settings - Fork 3
/
searcher.py
109 lines (83 loc) · 3.22 KB
/
searcher.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import logging
import aiohttp
import asyncio
from lxml import html
from config import (
API_REFERENCE_URL,
EXAMPLES_URL,
EXAMPLES_ALIASES,
CACHE_MAX_AGE,
API_ARTICLE_ANCHOR_XPATH,
EXAMPLES_LINK_XPATH,
)
async def fetch(session: aiohttp.ClientSession, url: str, encoding: str = 'utf-8') -> str:
async with session.get(url) as response:
assert response.status == 200
return await response.text(encoding=encoding)
class Searcher:
def __init__(self):
self._cached_articles: list
self._cached_examples: list
self.loop = asyncio.get_event_loop()
self._session = aiohttp.ClientSession()
self.loop.create_task(self._cache_updater())
async def _cache_updater(self):
while True:
logging.debug('Updating cache')
self._cached_articles = await self._get_articles_from_html()
self._cached_examples = await self._get_examples_from_html()
await asyncio.sleep(CACHE_MAX_AGE)
async def _get_all_articles(self) -> list:
if self._cached_articles is None:
logging.debug('Articles cache is empty. Updating manually')
self._cached_articles = await self._get_articles_from_html()
return self._cached_articles
async def _get_articles_from_html(self) -> list:
results = []
content = await fetch(self._session, API_REFERENCE_URL)
tree = html.fromstring(content)
expr = API_ARTICLE_ANCHOR_XPATH
for tag in tree.xpath(expr):
res = {
'type': 'API Reference',
'title': tag.xpath('following-sibling::text()')[0],
'link': '{}{}'.format(API_REFERENCE_URL, tag.xpath('@href')[0])
}
results.append(res)
return results
async def _get_all_examples(self) -> list:
if not self._cached_examples:
self._cached_examples = await self._get_examples_from_html()
return self._cached_examples
async def _get_examples_from_html(self) -> list:
results = []
content = await fetch(self._session, EXAMPLES_URL)
tree = html.fromstring(content)
expr = EXAMPLES_LINK_XPATH
for tag in tree.xpath(expr):
res = {
'type': 'Aiogram example',
'title': tag.xpath('text()')[0],
'link': '{}{}'.format('https://github.com', tag.xpath('@href')[0])
}
results.append(res)
return results
async def get_api_articles(self, query: str) -> list:
results = []
query = query.lower()
articles = await self._get_all_articles()
for article in articles:
if query in article['title']:
results.append(article)
elif query in article['link']:
results.append(article)
return results
async def get_aiogram_examples(self, query: str) -> list:
results = []
query = query.lower()
examples = await self._get_all_examples()
for example in examples:
if query in example['title'] or query in EXAMPLES_ALIASES.get(example['title'], []):
results.append(example)
return results
searcher = Searcher()