-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPathology_Read_Entry_Function.R
executable file
·109 lines (73 loc) · 3.24 KB
/
Pathology_Read_Entry_Function.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# Function for reading one entry in the pathology dataset
# Input: One row of the pathology dataset
# Output: DataFrame to be rbind in the main dataset
read.pathology.entry <- function(pathology_row) {
# Define all possible options for every category
Reversibility_Values=c("r", "tr", "nr", "ap_r", "na","r_tr")
Severity_Values=c("minimal", "mild", "slight", "moderate","marked","severe")
Second_level_info_Values=c("immunogenicity", "secondary", "stress")
#Create the empty dataFrame
df=data.frame(identifier=character(),
study_id=character(),
species=character(),
type=character(),
duration=character(),
dose=character(),
dose_interval=character(),
severity=character(),
description=character(),
target=character(),
adversity=character(),
reversibility=character(),
extra_info=character(),
stringsAsFactors = FALSE)
#Split the entry into the different findings and delete uneccesary gaps in the beginning and the end of each finding
findings_vector=unlist(strsplit(pathology_row$description, ","))
findings_vector=as.character(sapply(findings_vector,trimws))
#Define number of findings
numb_find=length(findings_vector)
# Start breaking each of the findings
for (i in 1:numb_find) {
#Define one specific finding
finding=findings_vector[i]
#Split the entries in the finding and delete uneccessary spaces
finding=unlist(strsplit(finding,"[-]"))
finding=as.character(sapply(finding,trimws))
#Start the row-entry in the dataframe
df_row=c(pathology_row$identifier,pathology_row$study_id, pathology_row$species, pathology_row$type, pathology_row$duration, pathology_row$dose, pathology_row$dose_interval)
#Define Severity
index=1
if (finding[index] %in% Severity_Values) {
severity=finding[index]
index=index+1
}else {
severity=NA
}
#Define Description
description=finding[index]
#Change Reversibility
if (pathology_row$reversible=="r") {
reversibility="Reversible"
} else if (pathology_row$reversible=="nr") {
reversibility="Not_Reversible"
} else if (pathology_row$reversible=="ap_r") {
reversibility="Appeared_in_Recovery"
} else if (pathology_row$reversible=="tr") {
reversibility="Trend_Recovery"
} else if (pathology_row$reversible=="na") {
reversibility=NA
} else if (pathology_row$reversible=="r_tr") {
reversibility="Trend_Recovery_or_Recovery"
} else {
print(paste("Error in reversibility at study", pathology_row$study_id,"in the dose", pathology_row$dose ))
}
#Combine everything into a new row of the df
df_row= append(df_row,c(severity,description,pathology_row$target_organ,pathology_row$adverse,reversibility,pathology_row$extra_info))
#Add it to the df dataframe
df_row=data.frame(t(df_row), stringsAsFactors = FALSE)
names(df_row)=names(df)
df=rbind(df,df_row)
rm(df_row)
}
return(df)
}