-
Notifications
You must be signed in to change notification settings - Fork 1
/
proj_utils.py
executable file
·33 lines (27 loc) · 975 Bytes
/
proj_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#!/home/neilpquinn-wmf/venv/bin/python3
import json
import requests
import pandas as pd
# Get a list of active wikipedias
response = requests.get(
'https://www.mediawiki.org/w/api.php?action=sitematrix&format=json&smtype=language&formatversion=2')
site_matrix = json.loads(response.text)['sitematrix']
if 'count' in site_matrix:
del site_matrix['count']
active_wikis = pd.DataFrame([])
for index in site_matrix.keys():
df = pd.DataFrame(
columns=[
'code',
'sitename',
'url',
'dbname',
'closed'],
data=site_matrix[index]['site'])
df['language_code'] = site_matrix[index]['code']
df['language_name'] = site_matrix[index]['localname']
active_wikis = active_wikis.append(df, ignore_index=True)
active_wikis = active_wikis[(active_wikis.code == 'wiki') & (
active_wikis.closed.isnull())] # only keep active wikipedia
del active_wikis['code']
del active_wikis['closed']