-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path6_regression.r
49 lines (30 loc) · 1.3 KB
/
6_regression.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
fifa = read.csv("1_cleaned_data.csv")
options(scipen = 999)
library("ggplot2")
##CORRELATION
#SCATTER PLOT BETWEEN LOG(VALUE) VS OVERALL
print(ggplot(fifa, aes(x=Overall, y =unlist(lapply(lapply(fifa$Value, log), as.numeric))))+ylab("Value") + geom_jitter()+geom_smooth(method = lm))
#PEARSON CORRELATION TEST
print(cor.test(unlist(lapply(lapply(fifa$Value, log), as.numeric)), fifa$Overall, method="pearson"))
##REGRESSION
#CREATING THE TRAINING AND TEST DATA
set.seed(100)
trainingRows<-sample(1:nrow(fifa), 0.8*nrow(fifa))
trainingData<-fifa[trainingRows, ]
testData<-fifa[-trainingRows, ]
#FIT THE MODEL ON TRAINING DATA AND PREDICT ON TEST DATA
model = lm(log(Value)~Overall, data = trainingData)
predictedData = predict(model,testData)
predictedData = exp(predictedData)
#REVIEW DIAGNOSTIC MEASURES
summary(model)
##CALCULATE PREDICTION ACCURACY AND ERROR RATES
diff = data.frame(cbind(actuals=testData$Value, predicteds=predictedData))
#CORERLATION ACCURACY
correlation_accuracy<-cor(diff)
print(correlation_accuracy)
#MIN-MAX ACCURACY CALCULATION
min_max_accuracy <-mean(apply(diff, 1, min)/apply(diff,1,max))
print(min_max_accuracy)
#MEAN-ABSOLUTE-PERCENTAGE-ERROR (MAPE) CALCULATION
mape <- mean(abs((diff$predicteds - diff$actuals))/diff$actuals)