-
Notifications
You must be signed in to change notification settings - Fork 0
/
som.R
200 lines (168 loc) · 7.48 KB
/
som.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
library(ggplot2)
library(readr)
#**********************************
# Functions
#**********************************
{
neighborhood <- function(n_nodes, winner, k, sigma) {
#'
#' @param n_nodes amount of nodes used for SOM
#' @param winner winning neuron in this iteration step
#' @param k iteration parameter over the nodes
#' @param sigma radius of the neighborhood function
#' @return neighborhood-factor influencing the SOM learning rule
nodes <- c(1:n_nodes)
distance <- sqrt((nodes[winner] - nodes[k]) ** 2)
distance <- min(distance, n_nodes-distance)
h <- 1 / exp((distance ** 2) / (2 * sigma ** 2))
return(h)
}
determine_winner <- function(n_nodes, coordinates, w, current_coordinates) {
#' Function to determine the winning neuron
#' @param n_nodes amount of nodes used for SOM
#' @param coordinates matrix containing coordinations of places in travling salesman problem
#' @param w matrix which will get updated in SOM and later contain the winning neurons with their coordinates
#' @param current_coordinates currently considered place in travling salesman problem
#' @return the current winning neuron
differences <- c()
for (i in c(1:n_nodes)) {
differences[i] <-
sqrt(
(w[i, 1] - coordinates[current_coordinates, 1]) ** 2 +
(w[i, 2] - coordinates[current_coordinates, 2]) ** 2)
}
minimum <- min(differences)
winner <- as.integer(which(differences == minimum))
return(winner)
}
learning_rate <- function(rate, t) {
#' @param rate learning rate value
#' @param t iteration step
#' @return the learning rate or step width
return(0.01 + rate * t ** (-0.5))
}
sigma <- function(sigma, t) {
#' Halbwertsbreite der neighborhood-function
#' @param sigma radius value
#' @param t iteration step
#' @return radius at step t
return(sigma / (t ** 0.5))
}
som <- function(n_nodes, coordinates, w, sigma, rate, iterations=8000, save_plots=FALSE){
#' Main algorithm for a Self-Organizing-Map for the Travelling Salesman Problem
#' @param n_nodes amount of nodes used for SOM
#' @param coordinates matrix containing coordinations of places in travling salesman problem
#' @param w matrix which will get updated in SOM and later contain the winning neurons with their coordinates
#' @param sigma radius value
#' @param rate learning rate value
#' @param iterations value how often SOM algorithm shall be executed
#' @param save_plots if set to TRUE, the algorithm will save a plot for each iteration
#' @return matrix w which contains the winning neurons with their coordinates
# Iteration for each learning cycle
for (t in c(1:iterations)) {
# Iteration over all places in travelling salesman problem
for (j in c(1:nrow(coordinates))) {
# Determine the current winning neuron
winner <- determine_winner(n_nodes,coordinates, w, j)
# Update neurons
sig <- sigma(sigma, t)
for (k in c(1:n_nodes)) {
# Update equatation: w + (learning rate * neighborhood * coordinates)
w[k,] <-
w[k,] + learning_rate(rate, t) * neighborhood(n_nodes, winner, k, sig) * (coordinates[j,] - w[k,])
}
}
# Plot every graph
if (save_plots) {
if (t %% 15 == 0) {
gname <- paste(c("N", n_nodes, "-", t, ".png"), collapse = "")
coord.df <- as.data.frame(coordinates)
w.df <- as.data.frame(w)
colnames(coord.df) <- c("Grad O", "Grad N")
colnames(w.df) <- c("Grad O", "Grad N")
g <- ggplot() +
geom_point(data = coord.df, aes(`Grad O`, `Grad N`), color = "red", shape = 16, size = 4)+
geom_point(data = w.df, aes(`Grad O`, `Grad N`), color = "green", shape = 4, size = 5)+
geom_path(data = w.df, aes(`Grad O`, `Grad N`), color = "green")+
ggtitle(paste(c("Iteration: ", t, " Nodes: ", n_nodes), collapse = ""))
ggsave(filename = gname, plot = g, path = "Exercise II - ANN/TSP/plots")
}
}
}
return(w)
}
initialize_w_random <- function(n_nodes, grad_o_low, grad_o_high, grad_n_low, grad_n_high) {
#' Function for initializing matrix w with random values between certain ranges
#' w which will contain the winning neurons later
#' @param n_nodes amount of nodes used for SOM - matrix w will contain as many entries
#' @param grad_o_low min value for Grad O
#' @param grad_o_high max value for Grad O
#' @param grad_n_low min value for Grad N
#' @param grad_n_high max value for Grad N
#' @return matrix w with start points
w <- matrix(nrow = n_nodes, ncol = 2)
for (i in c(1:n_nodes)) {
w[i, 1] <- runif(1, grad_o_low, grad_o_high)
w[i, 2] <- runif(1, grad_n_low, grad_n_high)
}
colnames(w, c("Grad O","Grad N"), do.NULL = FALSE)
return(w)
}
initialize_w_from_df <- function(n_nodes, coordinates.df) {
#' Function for initializing matrix w with samples from given dateframe
#' w which will contain the winning neurons later
#' @param n_nodes amount of nodes used for SOM - matrix w will contain as many entries
#' @param coordinates.df data.frame containing coordinations of places in travling salesman problem
#' @return matrix w with start points
w <- as.matrix(dplyr::sample_n(coordinates.df, n_nodes, replace = TRUE)) * 1000
return(w)
}
plot_data <- function(w,coordinates,title){
coordinates.df <- as.data.frame(coordinates)
w.df <- as.data.frame(w)
colnames(coordinates.df) <- c("Grad O", "Grad N")
colnames(w.df) <- c("Grad O", "Grad N")
plot_result <- ggplot() +
geom_point(data = coordinates.df, aes(`Grad O`, `Grad N`), color = "red", shape = 16, size = 4 ) +
geom_point(data = w.df, aes(`Grad O`, `Grad N`), color = "green", shape = 4, size = 5)+
geom_path(data = w.df, aes(`Grad O`, `Grad N`), color = "green")+
ggtitle(title)
return(plot_result)
}
}
#**********************************
# Data preperation
#**********************************
{
# Read data with coordinates and create data.frame
beer_gardens <- read_csv("Exercise II - ANN/TSP/beer_gardens.csv")
beer_gardens <- as.data.frame(beer_gardens)
ggplot(beer_gardens, aes(x=`Grad O`, y=`Grad N`))+
geom_point(aes(),color="red")+
ggtitle("Coordinates of beer gardens in Regensburg")
# Create matrices for easier calculation
coordinates <- as.matrix(beer_gardens)
coordinates <- coordinates * 1000
# Learning rate value
rate <- 0.2
# Sigma - radius value for neighborhood function
s <- 18
# Amount of nodes
n_nodes <- 18
# Iterations for SOM executions
iterations <- 8000
}
#**********************************
# Exercise
#**********************************
# SOM with start points from existing data
w1 <- initialize_w_from_df(n_nodes, beer_gardens)
w1_som <- som(n_nodes, coordinates, w1, s, rate, iterations, save_plots = TRUE)
#p1<-plot_data(w1_som,coordinates, "Start points from data.frame with 18 nodes - 8000 iterations")
#p1
# SOM with random start points from existing data
#w2 <- initialize_w_random(n_nodes, grad_o_low=12092, grad_o_high=12180,
# grad_n_low=49009, grad_n_high=49028)
#w2_som <- som(n_nodes, coordinates, w2, s, rate, iterations, save_plots = FALSE)
#p2<-plot_data(w2_som,coordinates, "Start points random with 25 nodes - 8000 iterations")
#p2