-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
ann
475 lines (400 loc) · 26.2 KB
/
ann
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
/*
Copyright (C) 2018-2024 Geoffrey Daniels. https://gpdaniels.com/
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, version 3 of the License only.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#pragma once
#ifndef GTL_AI_ANN_HPP
#define GTL_AI_ANN_HPP
// Summary: Artificial neural network using the backpropagation algorithm for training.
#ifndef NDEBUG
# if defined(_MSC_VER)
# define __builtin_trap() __debugbreak()
# endif
/// @brief A simple assert macro to break the program if the ann is misused.
# define GTL_ANN_ASSERT(ASSERTION, MESSAGE) static_cast<void>((ASSERTION) || (__builtin_trap(), 0))
#else
/// @brief At release time the assert macro is implemented as a nop.
# define GTL_ANN_ASSERT(ASSERTION, MESSAGE) static_cast<void>(0)
#endif
#if defined(_MSC_VER)
#pragma warning(push, 0)
#endif
#include <cmath>
#include <vector>
#if defined(_MSC_VER)
#pragma warning(pop)
#endif
namespace gtl {
/// @brief A simple artifical neural network class that uses back-propagation to learn.
/// @tparam scalar_type The floating point type used to represent values in the network.
template<typename scalar_type>
class ann final {
private:
/// @brief The random_pcg class is a stripped down pcg pseudo-random-number generator.
class random_pcg final
{
private:
/// @brief Current state of the random number generator.
unsigned long long int state = 0x853C49E6748FEA9Bull;
/// @brief Increment added to the state during pseudo-random-number generation.
unsigned long long int increment = 0xDA3E39CB94B95BDBull;
public:
/// @brief Get a random number in the closed interval 0 <= x <= 1.
/// @return A pseudo-random number.
scalar_type get_random_inclusive() {
// Save current state.
const unsigned long long int state_previous = this->state;
// Advance internal state.
this->state = state_previous * 0x5851F42D4C957F2Dull + this->increment;
// Calculate output function.
const unsigned int state_shift_xor_shift = static_cast<unsigned int>(((state_previous >> 18u) ^ state_previous) >> 27u);
const int rotation = state_previous >> 59u;
const unsigned int randon_number = (state_shift_xor_shift >> rotation) | (state_shift_xor_shift << ((-rotation) & 31));
return static_cast<scalar_type>(randon_number) * (1 / static_cast<scalar_type>((1ull << 32) - 1));
}
};
public:
/// @brief Value to use when initialising the weights of the neurons.
enum class initialisation_type {
zero,
positive_one,
negative_one,
random,
positive_random,
negative_random
};
/// @brief The activation function of each neuron.
enum class activation_type {
none,
identity,
step,
softplus,
sigmoid,
tanh,
erf
};
private:
/// @brief The structure of an individual neuron in the network.
struct neuron_type {
/// @brief The activation function used to transform the weighted summed input from the layer above.
activation_type activation_function;
/// @brief Output signal of the neuron to the neurons in the next layer.
scalar_type output;
/// @brief Back-propagated gradient of the error of this neuron, used when training.
scalar_type error_gradient;
/// @brief Weighting of the output signal of each neuron on the layer above (unused for input layer).
std::vector<scalar_type> weights;
/// @brief The last change made to the weight during training (unused for input layer).
std::vector<scalar_type> weight_deltas;
};
private:
/// @brief The structure of the network is a fully connected number of layers.
std::vector<std::vector<neuron_type>> layers_of_neurons;
/// @brief To aid training only percentage of the output error is computed as the weight delta.
scalar_type learning_rate = static_cast<scalar_type>(0.7);
/// @brief To aid training a momentum is applied to the computed weight delta values.
scalar_type momentum = static_cast<scalar_type>(0.2);
public:
/// @brief Defaulted constructor to allow the creation of empty networks.
constexpr ann() = default;
/// @brief Main constructor builds a network with the provided number and size of layers, using the provided initialisation and activation methods.
/// @param neurons_per_layer The number of neurons per layer, starting with the input later and ending with the output layer.
/// @param initialisation_function The function to use to initialise the weights of the network.
/// @param activation_function The activation function to use for each neuron.
constexpr ann(const std::vector<unsigned long long int>& neurons_per_layer, const initialisation_type& initialisation_function = initialisation_type::random, const activation_type& activation_function = activation_type::sigmoid) {
const unsigned long long int number_of_layers = neurons_per_layer.size();
GTL_ANN_ASSERT(number_of_layers >= 2, "Must have at least an input and output layer.");
// Allocate the layers.
this->layers_of_neurons.resize(number_of_layers);
// Input layer.
this->layers_of_neurons[0].resize(neurons_per_layer[0]);
for (unsigned long long int j = 0; j < neurons_per_layer[0]; ++j) {
this->layers_of_neurons[0][j].activation_function = activation_type::none;
this->layers_of_neurons[0][j].error_gradient = 0.0;
this->layers_of_neurons[0][j].output = 0.0;
this->layers_of_neurons[0][j].weights.clear();
this->layers_of_neurons[0][j].weight_deltas.clear();
}
// Minimal random number generator for weight and delta initialisation.
random_pcg random_number_generator;
// Hidden and output layers.
for (unsigned long long int layer_index = 1; layer_index < number_of_layers; ++layer_index) {
this->layers_of_neurons[layer_index].resize(neurons_per_layer[layer_index]);
for (unsigned long long int j = 0; j < neurons_per_layer[layer_index]; ++j) {
// Change the activation function for the output layer.
if (layer_index == number_of_layers - 1) {
this->layers_of_neurons[layer_index][j].activation_function = activation_type::identity;
}
else {
this->layers_of_neurons[layer_index][j].activation_function = activation_function;
}
const unsigned long long int neurons_on_layer_above = neurons_per_layer[layer_index - 1];
this->layers_of_neurons[layer_index][j].error_gradient = 0.0;
this->layers_of_neurons[layer_index][j].output = 0.0;
this->layers_of_neurons[layer_index][j].weights.resize(neurons_on_layer_above);
this->layers_of_neurons[layer_index][j].weight_deltas.resize(neurons_on_layer_above);
for (unsigned long long int k = 0; k < neurons_on_layer_above; k++) {
switch (initialisation_function) {
case initialisation_type::zero: {
this->layers_of_neurons[layer_index][j].weights[k] = 0;
} break;
case initialisation_type::positive_one: {
this->layers_of_neurons[layer_index][j].weights[k] = 1;
} break;
case initialisation_type::negative_one: {
this->layers_of_neurons[layer_index][j].weights[k] = -1;
} break;
case initialisation_type::random: {
this->layers_of_neurons[layer_index][j].weights[k] = (random_number_generator.get_random_inclusive() - static_cast<scalar_type>(0.5)) * static_cast<scalar_type>(2.0);
} break;
case initialisation_type::positive_random: {
this->layers_of_neurons[layer_index][j].weights[k] = random_number_generator.get_random_inclusive();
} break;
case initialisation_type::negative_random: {
this->layers_of_neurons[layer_index][j].weights[k] = random_number_generator.get_random_inclusive() - static_cast<scalar_type>(1.0);
} break;
}
this->layers_of_neurons[layer_index][j].weight_deltas[k] = (random_number_generator.get_random_inclusive() - static_cast<scalar_type>(0.5)) * static_cast<scalar_type>(2.0);
}
}
}
}
public:
/// @brief Train the network through back-propagation by providing inputs with known outputs.
/// @param inputs The input values to the network, size must equal that of the input layer.
/// @param targets The expected output balues of the network, size must equal that of the output layer.
constexpr void train(const std::vector<scalar_type>& inputs, const std::vector<scalar_type>& targets) {
if (this->layers_of_neurons.empty()) {
return;
}
GTL_ANN_ASSERT((this->layers_of_neurons.front().size() == inputs.size()), "Size of input data must match number of input neurons in the first layer.");
GTL_ANN_ASSERT((this->layers_of_neurons.back().size() == targets.size()), "Size of target data must match number of output neurons in the last layer.");
// Forwards process.
for (unsigned long long int layer_index = 0; layer_index < this->layers_of_neurons[0].size(); ++layer_index) {
this->layers_of_neurons[0][layer_index].output = inputs[layer_index];
}
for (unsigned long long int layer_index = 1; layer_index < this->layers_of_neurons.size(); ++layer_index) {
this->forward_propagate(layer_index - 1, layer_index);
}
// Backwards process.
this->update_output_error(targets);
for (unsigned long long int layer_index = this->layers_of_neurons.size() - 1; layer_index > 1; --layer_index) {
this->back_propagate(layer_index, layer_index - 1);
}
// Update weights.
this->adjust_weights();
}
/// @brief Compute the output of the network when provided with inputs.
/// @param inputs The input values to the network, size must equal that of the input layer.
constexpr std::vector<scalar_type> process(const std::vector<scalar_type>& inputs) {
if (this->layers_of_neurons.empty()) {
return {};
}
GTL_ANN_ASSERT((this->layers_of_neurons.front().size() == inputs.size()), "Size of input data must match number of input neurons in the first layer.");
// Forwards process.
for (unsigned long long int layer_index = 0; layer_index < this->layers_of_neurons[0].size(); ++layer_index) {
this->layers_of_neurons[0][layer_index].output = inputs[layer_index];
}
for (unsigned long long int layer_index = 1; layer_index < this->layers_of_neurons.size(); ++layer_index) {
this->forward_propagate(layer_index - 1, layer_index);
}
const unsigned long long int output_index = this->layers_of_neurons.size() - 1;
// Extract results.
std::vector<scalar_type> result;
result.resize(this->layers_of_neurons[output_index].size());
for (unsigned long long int neuron_index = 0; neuron_index < this->layers_of_neurons[output_index].size(); ++neuron_index) {
result[neuron_index] = this->layers_of_neurons[output_index][neuron_index].output;
}
return result;
}
private:
/// @brief Propage data forward through the network from one input layer to next.
/// @param source_index The index of the source layer.
/// @param destination_index The index of the destination layer.
constexpr void forward_propagate(const unsigned long long int source_index, const unsigned long long int destination_index) {
GTL_ANN_ASSERT(source_index < this->layers_of_neurons.size(), "Source layer index is greater than number of layers.");
GTL_ANN_ASSERT(destination_index < this->layers_of_neurons.size(), "Destination layer index is greater than number of layers.");
for (unsigned long long int destination_neuron_index = 0; destination_neuron_index < this->layers_of_neurons[destination_index].size(); ++destination_neuron_index) {
scalar_type value = 0.0;
for (unsigned long long int source_neuron_index = 0; source_neuron_index < this->layers_of_neurons[source_index].size(); ++source_neuron_index) {
const scalar_type source_weight = this->layers_of_neurons[destination_index][destination_neuron_index].weights[source_neuron_index];
const scalar_type destination_output = this->layers_of_neurons[source_index][source_neuron_index].output;
value += source_weight * destination_output;
}
this->layers_of_neurons[destination_index][destination_neuron_index].output = this->activate(this->layers_of_neurons[destination_index][destination_neuron_index].activation_function,
value);
}
}
/// @brief Compute the error of the network output compared to the target output.
/// @param targets The target value of the network output.
constexpr void update_output_error(const std::vector<scalar_type>& targets) {
GTL_ANN_ASSERT((this->layers_of_neurons.back().size() == targets.size()), "Size of target data must match number of output neurons in the last layer.");
const unsigned long long int output_index = this->layers_of_neurons.size() - 1;
for (unsigned long long int output_neuron_index = 0; output_neuron_index < this->layers_of_neurons[output_index].size(); ++output_neuron_index) {
const scalar_type output = this->layers_of_neurons[output_index][output_neuron_index].output;
const scalar_type error = targets[output_neuron_index] - output;
const scalar_type gradient = this->activate_differential(this->layers_of_neurons[output_index][output_neuron_index].activation_function, output);
this->layers_of_neurons[output_index][output_neuron_index].error_gradient = gradient * error;
}
}
/// @brief Propage errors backward through the network from one input layer to next.
/// @param source_index The index of the source layer.
/// @param destination_index The index of the destination layer.
constexpr void back_propagate(const unsigned long long int source_index, const unsigned long long int destination_index) {
GTL_ANN_ASSERT(source_index < this->layers_of_neurons.size(), "Source layer index is greater than number of layers.");
GTL_ANN_ASSERT(destination_index < this->layers_of_neurons.size(), "Destination layer index is greater than number of layers.");
for (unsigned long long int destination_neuron_index = 0; destination_neuron_index < this->layers_of_neurons[destination_index].size(); ++destination_neuron_index) {
scalar_type error = 0;
for (unsigned long long int source_neuron_index = 0; source_neuron_index < this->layers_of_neurons[source_index].size(); ++source_neuron_index) {
error += this->layers_of_neurons[source_index][source_neuron_index].weights[destination_neuron_index] * this->layers_of_neurons[source_index][source_neuron_index].error_gradient;
}
const scalar_type output = this->layers_of_neurons[destination_index][destination_neuron_index].output;
const scalar_type gradient = this->activate_differential(this->layers_of_neurons[destination_index][destination_neuron_index].activation_function, output);
this->layers_of_neurons[destination_index][destination_neuron_index].error_gradient = error * gradient;
}
}
/// @brief Adjust the weights in the entire network, from top to bottom, based on the error gradients.
constexpr void adjust_weights() {
for (unsigned long long int layer_index = 1; layer_index < this->layers_of_neurons.size(); ++layer_index) {
for (unsigned long long int neuron_index = 0; neuron_index < this->layers_of_neurons[layer_index].size(); ++neuron_index) {
const scalar_type error_gradient = this->layers_of_neurons[layer_index][neuron_index].error_gradient;
for (unsigned long long int layer_above_neuron_index = 0; layer_above_neuron_index < this->layers_of_neurons[layer_index - 1].size(); ++layer_above_neuron_index) {
const scalar_type delta = this->learning_rate * error_gradient * this->layers_of_neurons[layer_index - 1][layer_above_neuron_index].output;
const scalar_type delta_with_momentum = (1 - this->momentum) * delta + (this->momentum) * this->layers_of_neurons[layer_index][neuron_index].weight_deltas[layer_above_neuron_index];
this->layers_of_neurons[layer_index][neuron_index].weights[layer_above_neuron_index] += delta_with_momentum;
this->layers_of_neurons[layer_index][neuron_index].weight_deltas[layer_above_neuron_index] = delta_with_momentum;
}
}
}
}
/// @brief Process a value using the supplied activation function.
/// @param activation_function The activation function to use.
/// @param value The value to process.
/// @return The processed value, equivalent to "activation_function(value)".
constexpr static scalar_type activate(activation_type activation_function, const scalar_type& value) {
switch (activation_function) {
case activation_type::none: return 0;
case activation_type::identity: return ann::identity(value);
case activation_type::step: return ann::step(value);
case activation_type::sigmoid: return ann::sigmoid(value);
case activation_type::softplus: return ann::softplus(value);
case activation_type::tanh: return ann::tanh(value);
case activation_type::erf: return ann::erf(value);
}
return 0;
}
/// @brief Process result of an activation function on a value using the supplied activation differential function to get the gradient of the activation function.
/// @param activation_function The activation function to use.
/// @param activiated_value The value to process, where "activiated_value = activation_function(value)".
/// @return The gradient of the activation function at the value location.
constexpr static scalar_type activate_differential(activation_type activation_function, const scalar_type& activiated_value) {
switch (activation_function) {
case activation_type::none: return 0;
case activation_type::identity: return ann::identity_differential(activiated_value);
case activation_type::step: return ann::step_differential(activiated_value);
case activation_type::softplus: return ann::softplus_differential(activiated_value);
case activation_type::sigmoid: return ann::sigmoid_differential(activiated_value);
case activation_type::tanh: return ann::tanh_differential(activiated_value);
case activation_type::erf: return ann::erf_differential(activiated_value);
}
return 0;
}
// Activation functions
/// @brief Identity activation function, just returns the input value.
/// @param value The input value.
/// @return The input value.
constexpr static scalar_type identity(scalar_type value) {
return value;
}
/// @brief Gradient of the identity activation function, always one.
/// @param identity_value The input value, where "identity_value = identity(value)".
/// @return One.
constexpr static scalar_type identity_differential(scalar_type identity_value) {
static_cast<void>(identity_value);
return 1;
}
/// @brief Step activation function, returns either a zero or a one depending on the sign of the input.
/// @param value The input value.
/// @return One if the input is greater than zero, zero otherwise.
constexpr static scalar_type step(scalar_type value) {
if (value > 0) {
return 1;
}
return 0;
}
/// @brief Gradient of the step activation function, always zero.
/// @param step_value The input value, where "step_value = step(value)".
/// @return Zero.
constexpr static scalar_type step_differential(scalar_type step_value) {
static_cast<void>(step_value);
return 0;
}
/// @brief Sigmoid activation function, which returns a transition between zero and one depending on the input.
/// @param value The input value.
/// @return Zero for an input of negative infinity and one for an input of positive infinity, signmoid transition inbetween.
constexpr static scalar_type sigmoid(const scalar_type& value) {
return 1 / (1 + std::exp(-value));
}
/// @brief Gradient of the sigmoid activation function.
/// @param sigmoid_value The input value, where "sigmoid_value = sigmoid(value)".
/// @return Gradient of the sigmoid activation function at the value location.
constexpr static scalar_type sigmoid_differential(scalar_type sigmoid_value) {
return sigmoid_value * (1 - sigmoid_value);
}
/// @brief Softplus activation function, which returns a transition between zero and infinity depending on the input.
/// @param value The input value.
/// @return Zero for an input of negative infinity and positive infinity for an input of positive infinity, softmax transition inbetween.
constexpr static scalar_type softplus(const scalar_type& value) {
return std::log(1 + std::exp(value));
}
/// @brief Gradient of the softplus activation function.
/// @param softplus_value The input value, where "softplus_value = softplus(value)".
/// @return Gradient of the softplus activation function at the value location.
constexpr static scalar_type softplus_differential(const scalar_type& softplus_value) {
return 1 / (1 + (1 / (std::exp(softplus_value) - 1)));
}
/// @brief Tanh activation function, which returns a transition between negative one and positive one depending on the input.
/// @param value The input value.
/// @return Negative one for an input of negative infinity and positive one for an input of positive infinity, tanh transition inbetween.
constexpr static scalar_type tanh(scalar_type value) {
return std::tanh(value);
}
/// @brief Gradient of the tanh activation function.
/// @param tanh_value The input value, where "tanh_value = tanh(value)".
/// @return Gradient of the tanh activation function at the value location.
constexpr static scalar_type tanh_differential(scalar_type tanh_value) {
return 1 - (tanh_value * tanh_value);
}
/// @brief Erf activation function, which returns a transition between negative one and positive one depending on the input.
/// @param value The input value.
/// @return Negative one for an input of negative infinity and positive one for an input of positive infinity, erf transition inbetween.
constexpr static scalar_type erf(scalar_type value) {
return std::erf(value);
}
/// @brief Gradient of the erf activation function.
/// @param erf_value The input value, where "erf_value = erf(value)".
/// @return Gradient of the erf activation function at the value location.
constexpr static scalar_type erf_differential(scalar_type erf_value) {
constexpr auto ierf = [](scalar_type x) -> scalar_type {
const scalar_type sgn = (x < 0) ? static_cast<scalar_type>(-1) : static_cast<scalar_type>(1);
x = (static_cast<scalar_type>(1) - x) * (static_cast<scalar_type>(1) + x);
const scalar_type lnx = std::log(x);
const scalar_type tt1 = 2 / (static_cast<scalar_type>(M_PI) * static_cast<scalar_type>(0.147)) + static_cast<scalar_type>(0.5) * lnx;
const scalar_type tt2 = 1 / static_cast<scalar_type>(0.147) * lnx;
return (sgn * std::sqrt(-tt1 + std::sqrt(tt1 * tt1 - tt2)));
};
scalar_type value = ierf(erf_value);
return std::exp(-(value * value)) / std::sqrt(static_cast<scalar_type>(M_PI));
}
};
}
#undef GTL_ANN_ASSERT
#endif // GTL_AI_ANN_HPP