-
Notifications
You must be signed in to change notification settings - Fork 0
/
Code.R
64 lines (45 loc) · 1.48 KB
/
Code.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
getwd()
train <- read.csv("/home/ashok/Desktop/titanic/train .csv")
test <- read.csv("/home/ashok/Desktop/titanic/test .csv")
PassengerId=test$PassengerId
summary(train)
summary(test)
hist(train$Age)
train$Age[is.na(train$Age)] = 29.07
test$Age[is.na(test$Age)] = 29.07
test$Fare[is.na(test$Fare)] = 35.627
summary(train)
summary(test)
train$Sex = ifelse(train$Sex=="female", 1, 0)
test$Sex = ifelse(test$Sex=="female", 1, 0)
train$embarked_c = ifelse(train$Embarked=="C", 1, 0)
test$embarked_c = ifelse(test$Embarked=="C", 1, 0)
train$embarked_s = ifelse(train$Embarked=="S", 1, 0)
test$embarked_s = ifelse(test$Embarked=="S", 1, 0)
head(train)
train = train[-c(1, 4, 9,11,12)]
head(test)
test = test[-c(3, 8,10,11)]
boxplot(train$Age)
train$Age = ifelse(train$Age>=52, 52, train$Age)
train$Age = ifelse(train$Age<=4, 4, train$Age)
test$Age = ifelse(test$Age>=52, 52, test$Age)
test$Age = ifelse(test$Age<=4, 4, test$Age)
boxplot(test$Age)
boxplot(train$Fare)
train$Fare = ifelse(train$Fare>=136, 136, train$Fare)
test$Fare = ifelse(test$Fare>=136, 136, test$Fare)
library(car)
model <- lm(Survived~., data=train)
t = vif(model)
sort(t, decreasing=TRUE)
model1<- glm(as.factor(Survived)~., family="binomial", data=train)
summary(model1)
stepmodel = step(model1, direction="both")
test$score <- predict(model1, newdata = test, type="response")
head(test$score)
test$Survived <- ifelse(test$score>=0.56, 1, 0)
head(test)
df=test[c(1,11)]
head(df)
write.csv(df,file = "df.csv",row.names = FALSE)