-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCluster_2_Data Process.py
67 lines (58 loc) · 2.21 KB
/
Cluster_2_Data Process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import pandas as pd
import numpy as np
df = pd.read_csv('In_Data_2.csv', encoding = "ISO-8859-1")
exclude=['BALANCED SALT SOLUTION - UNSPECIFIED %',
'CHLORHEXIDINE TOPICAL 0.12%',
'CHLORHEXIDINE',
'DEXTRAN 40',
'DEXTROSE',
'DEXTROSE / LACTATED RINGERS 5%',
'DEXTROSE / SALINE 5% / 0.225%',
'DEXTROSE / SALINE 5% / 0.45%',
'DEXTROSE / SALINE 5% / 0.9%',
'DEXTROSE / SALINE W/KCL 5% / 0.45% + 10 MEQ/L',
'DEXTROSE / SALINE W/KCL 5% / 0.45% + 20 MEQ/L',
'DEXTROSE / SALINE W/KCL 5% / 0.9% + 20 MEQ/L',
'DEXTROSE / WATER 10%',
'DEXTROSE / WATER 5%',
'DEXTROSE 50%',
'DOCUSATE/SENNA 50/8.6',
'ELECTROLYTE-A SOLUTION',
'General - Medications - Current',
'General - Medications - Home',
'INHALATIONAL',
'Mean Inspiratory Pressure',
'OTHER - EPIDURAL MEDICATION',
'OTHER - INTRATHECAL MEDICATION',
'OTHER - INTRAVENOUS MEDICATION',
'OTHER - LOCAL INFILTRATION MEDICATION',
'OTHER - ORAL MEDICATION',
'OTHER - UNSPECIFIED ROUTE MEDICATION',
'OTHER FLUID IN, MEDICATION INFUSION VOLUME',
'OTHER FLUID IN, UNSPECIFIED',
'SALINE 0.45%',
'SALINE 0.9%',
'SALINE 5%',
'SODIUM CHLORIDE 0.9%',
'SODIUM CITRATE',
'Uncategorized \x96 Preop/PACU/Floor/ICU nursing documentation',
'Unknown Concept'
]
df = df[~df['concept_desc'].isin(exclude)]
df = df[df.groupby("concept_desc").concept_desc.transform("size") >= 5]
df = df.drop(columns=['PROCEDURE_TXT'])
print(len(set(df['concept_desc'])))
print(len(set(df['MPOG_CASE_ID'])))
print(df.shape)
print(set(df['concept_desc']))
df.replace('ACETYLCYSTEINE 20%', 'ACETYLCYSTEINE')
df.replace(['ALBUMIN 25%', 'ALBUMIN 5%'], 'ALBUMIN')
df.replace('ALBUTEROL / IPRATROPIUM 2.5MG / 0.5MG', 'ALBUTEROL')
df.replace('ATROPINE 1%', 'ATROPINE')
cases = list(set(df['MPOG_CASE_ID']))
meds = list(set(df['concept_desc']))
df2 = pd.DataFrame(0, index=cases, columns=meds)
for index, row in df.iterrows():
df2.at[row['MPOG_CASE_ID'], row['concept_desc']] = 1
print(df2.values.sum())
df2.to_csv('Mid_Data_2_full.csv')