-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathCredit Card Modeling 65%.R
101 lines (66 loc) · 2.29 KB
/
Credit Card Modeling 65%.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# Credit Card Predictive Modeling
### Load Libraries
library(randomForest)
library(e1071)
library(rpart)
library(rpart.plot)
library(caTools)
library(readr)
library(caret)
# Receiving Dataset and Change Working Directory
setwd('./Kaggle')
ccard <- read_csv("./creditcard.csv")
head(ccard)
str(ccard)
## Predictive Modeling
### Set Data 65:35
set.seed(22540)
split <- sample.split(ccard$Class, SplitRatio = 0.65)
train <- subset(ccard, split == T)
cv <- subset(ccard, split == F)
### check output Class distributiion
table(cv$Class)
# Logistic Regression
glm.model <- glm(Class ~ ., data = train, family = "binomial")
glm.predict <- predict(glm.model, cv, type = "response")
table(cv$Class, glm.predict > 0.5)
# Decision Tree Model
tree.model <- rpart(Class ~ ., data = train, method = "class", minbucket = 50)
prp(tree.model)
tree.predict <- predict(tree.model, cv, type = "class")
confusionMatrix(cv$Class, tree.predict)
# Keeping 10,000 Rows with Class=0
data.class.0 <- subset(ccard, ccard$Class == 0)
data.class.1 <- subset(ccard, ccard$Class == 1)
nrow(data.class.0)
nrow(data.class.1)
data.class.0 <- data.class.0[1:10000, ]
nrow(data.class.0)
data <- rbind(data.class.0, data.class.1)
nrow(data)
# Split Data 65:35
set.seed(205)
split <- sample.split(data$Class, SplitRatio = 0.65)
train <- subset(data, split == T)
cv <- subset(data, split == F)
table(cv$Class)
# Logistic Regression for Split
glm.model <- glm(Class ~ ., data = train, family = "binomial", control = list(maxit = 50))
glm.predict <- predict(glm.model, cv, type = "response")
table(cv$Class, glm.predict > 0.5)
# SVM Model
svm.model <- svm(Class ~ ., data = train, kernel = "radial", cost = 1, gamma = 0.3)
svm.predict <- predict(svm.model, cv)
confusionMatrix(cv$Class, svm.predict)
# Decision Tree Split
tree.model <- rpart(Class ~ ., data = train, method = "class", minbucket = 10)
prp(tree.model)
tree.predict <- predict(tree.model, cv, type = "class")
confusionMatrix(cv$Class, tree.predict)
# Random Forest Predictions
set.seed(10)
rf.model <- randomForest(Class ~ ., data = train,
ntree = 2000, nodesize = 20)
rf.predict <- predict(rf.model, cv)
confusionMatrix(cv$Class, rf.predict)
varImpPlot(rf.model)