-
Notifications
You must be signed in to change notification settings - Fork 0
/
intractable_disease.py
66 lines (55 loc) · 2.07 KB
/
intractable_disease.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env python
import numpy as np
import pandas as pd
def intractable_disease_table(
xlsx_url: str = "https://www.mhlw.go.jp/content/001212238.xlsx",
num_period_cols: int = 2,
period_colname: list[str] = ["no", "name"],
skiprows: int = 3,
) -> pd.Series:
"""Get intractable disease table from xlsx_url.
Args:
xlsx_url (str, optional): a url of xlsx file.
Defaults to "https://www.mhlw.go.jp/content/001212238.xlsx".
num_period_cols (int, optional): number of periods of columns. Defaults to 2.
period_colname (list[str], optional): column names of a period.
Defaults to ["no", "name"].
skiprows (int, optional): number of rows to skip from the beginning.
Returns:
pd.Series: a series of intractable disease table.
Note:
xlsx_url is from https://www.mhlw.go.jp/stf/seisakunitsuite/bunya/0000084783.html
"""
# load xlsx
colnames = [f"{col}_{i}" for i in range(num_period_cols) for col in period_colname]
multicol_df = (
pd.read_excel(xlsx_url, names=colnames, skiprows=skiprows)
.filter(regex="^(?!_)")
.dropna(how="all")
)
# split to no and name
no, name = (
np.asarray(
[multicol_df.filter(regex=f"_{i}$").values for i in range(num_period_cols)]
)
.reshape(-1, num_period_cols)
.T
)
# make series
series = (
pd.Series(name, index=no, name="name")
.str.replace("\n", "")
.drop("番号")
.dropna()
)
series.index.rename("no", inplace=True)
return series.sort_index()
if __name__ == "__main__":
intractable_disease = intractable_disease_table()
intractable_disease.to_csv("intractable_disease.csv", encoding="utf-8-sig")
import json
import yaml
with open("intractable_disease.json", "w", encoding="utf-8") as f:
json.dump(intractable_disease.to_dict(), f, ensure_ascii=False)
with open("intractable_disease.yaml", "w", encoding="utf-8") as f:
yaml.safe_dump(intractable_disease.to_dict(), f, allow_unicode=True)