-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFig8.R
99 lines (80 loc) · 3.78 KB
/
Fig8.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# This script checks the performance of
# logistic regression classifiers
# The script is used to generate plots in
# Fig. 8 of the SEAMS submission.
# Author: [email protected]
# Last Updated: 1st March 2018
rm(list=ls())
#library(e1071)
#library(klaR)
# setwd("~/repos/github/relais2015/r-scripts/")
for (n_users in c(6,9)) {
sr_fname <- paste("data/share_request",n_users,"300",sep = "-")
sr_fname <- paste(sr_fname,"rds",sep = ".")
sl_fname <- paste("data/share_label",n_users,"300",sep = "-")
sl_fname <- paste(sl_fname,"rds",sep = ".")
requests_data <- readRDS(sr_fname)
labels_data <- readRDS(sl_fname)
# Identifiy rows for attack and non-attack
# get the class balance so that sample has same balance
attack_cases <- rep(1:length(labels_data[labels_data == 1]))
nattack_cases <-
rep(length(labels_data[labels_data == 1]) + 1:length(labels_data[labels_data ==
0]))
#define the number of instances in each of the sizes
sizes <- c(100,200,300)
for (k in c(1:length(sizes))) {
#define the number of attack instances to consider
# in the sample size
num_pos_inst <-
round(length(attack_cases) * sizes[k] / length(labels_data))
a_index <- sample(attack_cases,num_pos_inst,replace = FALSE)
na_index <-
sample(nattack_cases,sizes[k] - length(a_index),replace = FALSE) # keep the same number of total number of instances
class_rat <- length(na_index) / length(a_index)
rindex <- sort(c(a_index,na_index))
#share_requests<-requests_data[rindex, ]
#share_labels<-labels_data[rindex]
attack_positive <- matrix(NA,num_pos_inst,10)
colnames(attack_positive) <- c(paste("runs# =", 1:10))
rownames(attack_positive) <-
c(paste("#post ins", 1:num_pos_inst))
attack_negative <- attack_positive
for (i in 1:num_pos_inst) {#postive instances
for (j in 1:10) {
sample_a_index <- sort(sample(a_index,i,replace = FALSE))
sample_na_index <-
sort(sample(na_index,i * class_rat,replace = FALSE))
sample_rindex <- c(sample_a_index,sample_na_index)
train.data <- requests_data[sample_rindex,]
train.lab <- labels_data[sample_rindex]
test.data <- requests_data
test.lab <- labels_data
train.requests<-cbind(train.data,train.lab)
colnames(train.requests)[which(names(train.requests) == "train.lab")] <- "attacked"
train.requests<-train.requests[,apply(train.requests, 2, var, na.rm=TRUE) != 0]
test.requests<-cbind(test.data,test.lab)
colnames(test.requests)[which(names(test.requests) == "test.lab")] <- "attacked"
test.requests<-test.requests[,colnames(train.requests)]
classifier.a <- glm(attacked~.,family = "binomial",data = train.requests,control=glm.control(maxit=50))
classification.result.a <-
predict(classifier.a,newdata = test.requests, type = "response")
attack_positive[i,j] <-
table(round(classification.result.a),test.lab)[2,2] / length(attack_cases)
attack_negative[i,j] <-
table(round(classification.result.a),test.lab)[1,1] / length(nattack_cases)
} #end of j
} # end of i
result <-rbind(rowMeans(attack_positive),rowMeans(attack_negative))
out_fname <- paste("figs/logreg-perf",n_users,sizes[k],sep = "-")
out_fname <- paste(out_fname,"pdf",sep = ".")
pdf(out_fname)
par(mar=c(5,5,1,1))
matplot(t(result), type = "l", cex.lab=2, cex.axis=1.6,xlab = "#attack instance", ylab = "Accuracy of classification")
legend(
'bottomright', inset = .05, cex=1.3, legend = c("attack","non-attack"),
pch = "--", horiz = FALSE, col = 1:5
)
dev.off()
} # end of k
} #end of nusers