-
Notifications
You must be signed in to change notification settings - Fork 15
/
cpu_operations.h
349 lines (263 loc) · 10.3 KB
/
cpu_operations.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
/*
Copyright © 2015-2017 Thomas Unterthiner
Additional Contributions by Thomas Adler, Balázs Bencze
Licensed under GPL, version 2 or a later (see LICENSE.txt)
*/
#include <cstring>
#include <cstdlib>
#include <cmath>
#include <typeinfo> /* for typeid */
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
extern "C" {
extern void sgemm_(const char *transa, const char *transb, const int *m, const int *n, const int *k, const float *alpha,
const float *a, const int *lda, const float *b, const int *ldb, const float *beta, float *c, const int *ldc);
extern void ssymm_(const char *side, const char *uplo, const int *m, const int *n, const float *alpha, const float *a,
const int *lda, const float *b, const int *ldb, const float *beta, float *c, const int *ldc);
extern void saxpy_(const int *n, const float *alpha, const float *dx, const int *incx, float *dy, const int *incy);
extern int spotrf_(const char *uplo, int *n, float *a, int * lda, int *info);
extern int spotrs_(const char *uplo, int *n, int *nrhs, float * a, int *lda, float *b, int *ldb, int *info);
extern int sposv_(const char *uplo, int *n, int *nrhs, float * a, int *lda, float *b, int *ldb, int *info);
extern int spotri_(const char *uplo, int *n, float *a, int *lda, int *info);
extern float sdot_(const int *n, const float *x, const int* incx, const float *y, const int* incy);
}
using std::cos;
using std::log;
using std::sqrt;
#ifdef COMPILE_FOR_R
#include "use_R_impl.h"
#else
#include <cstdio>
#include <cassert>
using std::rand;
using std::srand;
// random in (0, 1]
inline double rand_unif(void) {
return (rand() + 1.0) / (RAND_MAX + 1.0);
}
// generates random samples from a 0/1 Gaussian via Box-Mueller
inline double rand_normal(void) {
return sqrt(-2.0 * log(rand_unif())) * cos(2.0 * M_PI * rand_unif());
}
#endif
inline double rand_exp(double lambda) /* inversion sampling */
{
return -log(1 - rand_unif()) / lambda;
}
class CPU_Operations {
float* var_tmp;
public:
float* ones;
typedef int SparseMatrix;
static SparseMatrix create_sparse_matrix(const float* Xvals, const int* Xcols, const int *Xrowptr, int n, int m);
static void free_sparse_matrix(SparseMatrix);
template<typename T>
T init_invalid(void) {
return (typeid(T) == typeid(SparseMatrix) ? (T) -1 : (T) 0);
}
CPU_Operations(const int m, const int n, const int k, unsigned long seed, int gpu_id);
~CPU_Operations();
float* to_device(const float* src, const int size) const {
return (float*) src;
}
SparseMatrix to_device(SparseMatrix src, const int size) const {
return src;
}
float* to_host(const float* src, float* dest, const int size) const {
return dest;
}
float* copy_to_host(const float* src, float* dst, size_t size) const {
memcpy(dst, src, size);
return dst;
}
float* get_batch(const float* X, int ncol, int batch_num, int batch_size) {
/* return pointer */
return (float*) &X[batch_num * batch_size * ncol];
}
SparseMatrix get_batch(SparseMatrix X, int ldx, int batch_num, int batch_size);
void gemm(const char *transa, const char *transb, const int m, const int n, const int k, const float alpha,
const float *a, const int lda, const float *b, const int ldb, const float beta, float *c,
const int ldc) const {
sgemm_(transa, transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc);
}
void gemm(const char *transa, const char *transb, const int m, const int n, const int k, const float alpha,
const SparseMatrix a, const int lda, const float *b, const int ldb, const float beta, float *c,
const int ldc) const;
void gemm(const char *transa, const char *transb, const int m, const int n, const int k, const float alpha,
const float *a, const int lda, const SparseMatrix b, const int ldb, const float beta, float *c,
const int ldc) const;
void dgmm(const char* mode, const int m, const int n, const float* A, int lda, const float* x, int incx, float* C,
int ldc) const;
void symm(const char *side, const char *uplo, const int m, const int n, const float alpha, const float *a,
const int lda, const float *b, const int ldb, const float beta, float *c, const int ldc) const {
ssymm_(side, uplo, &m, &n, &alpha, a, &lda, b, &ldb, &beta, c, &ldc);
}
void dot(int n, const float *x, int incx, const float *y, int incy, float *result) {
*result = sdot_(&n, x, &incx, y, &incy);
}
void axpy(const int n, const float alpha, const float *dx, const int incx, float *dy, const int incy) const {
saxpy_(&n, &alpha, dx, &incx, dy, &incy);
}
int posv(const char* uplo, int n, int nrhs, float* a, int lda, float* b, int ldb) const {
int info;
int retval = sposv_(uplo, &n, &nrhs, a, &lda, b, &ldb, &info);
if (info != 0)
printf("info: %d\n", info);
assert(!info);
return retval;
}
int potrf(const char *uplo, int n, float* a, int lda) const {
int info;
int retval = spotrf_(uplo, &n, a, &lda, &info);
assert(!info);
return retval;
}
int potrs(const char *uplo, int n, int nrhs, float* a, int lda, float *b, int ldb, int *info) const {
return spotrs_(uplo, &n, &nrhs, a, &lda, b, &ldb, info);
}
int potri(const char *uplo, int n, float *a, int lda) const {
int info;
int retval = spotri_(uplo, &n, a, &lda, &info);
assert(!info);
return retval;
}
void* memset(void* dest, int ch, size_t count) const {
return std::memset(dest, ch, count);
}
float* memcpy(void* dest, const void *src, size_t count) const {
return (float*) std::memcpy(dest, src, count);
}
float *memcpy_matrix(float *dest, float *src, int nrows_to_copy, int src_ncol, int first_row = 0) const {
return memcpy(dest, &src[first_row * src_ncol], nrows_to_copy * src_ncol * sizeof(float));
}
SparseMatrix memcpy_matrix(SparseMatrix &dest, SparseMatrix src, int nrows_to_copy, int src_ncol, int first_row) const;
void free(void* ptr) const {
if (ptr != 0)
std::free(ptr);
}
void free(SparseMatrix a) const;
void free_batch(void *ptr) {
}
void free_batch(SparseMatrix a) {
free(a);
}
void free_devicememory(void* ptr) const {
}
void free_devicememory(SparseMatrix X) const {
}
void free_malloc_matrix(void* ptr) {
free(ptr);
}
void free_malloc_matrix(SparseMatrix m);
void free_memcpy_matrix(void* ptr) {
}
void free_memcpy_matrix(SparseMatrix m) {
}
template<typename T>
T malloc_matrix(int rows, int cols) {
return malloc_matrix(rows, cols, init_invalid<T>());
}
SparseMatrix malloc_matrix(int rows, int cols, SparseMatrix dummy);
float *malloc_matrix(int rows, int cols, float *dummy) {
return malloc(rows * cols * sizeof(float));
}
float* malloc(size_t size) const {
return (float*) std::malloc(size);
}
void maximum(float* x, const float value, const int size) const {
for (int i = 0; i < size; ++i)
x[i] = fmaxf(x[i], value);
}
void leaky_relu(float* x, const float value, const int size) {
for (int i = 0; i < size; ++i)
x[i] = (x[i] < 0.0f) ? x[i] * value : x[i];
}
void sigmoid(float* x, const int size) const {
for (int i = 0; i < size; ++i) {
x[i] = 1 / (1 + expf(-x[i]));
}
}
void tanh(float* x, const int size) const {
for (int i = 0; i < size; ++i) {
x[i] = tanhf(x[i]);
}
}
void fill_eye(float* a, int n) const {
memset(a, 0, n * n * sizeof(float));
for (int i = 0; i < n; ++i)
a[i * n + i] = 1.0f;
}
void fill(float* X, const int size, const float value) const {
for (int i = 0; i < size; ++i) {
X[i] = value;
}
}
void calculate_column_variance(const float* X, const unsigned nrows, const unsigned ncols, float* variances, float eps);
void calculate_column_variance(SparseMatrix X, const unsigned nrows, const unsigned ncols, float* variances, float eps);
void invsqrt(float* s, const unsigned n) const;
void scale_columns(float* X, const unsigned nrows, const unsigned ncols, float* s) const;
void scale_columns(SparseMatrix X, const unsigned nrows, const unsigned ncols, float* s) const;
void scale_rows(float* X, const unsigned nrows, const unsigned ncols, float* s) const;
void scale_rows(SparseMatrix X, const unsigned nrows, const unsigned ncols, float* s) const;
void dropout(float* X, const unsigned size, const float dropout_rate) const {
assert(0.0f <= dropout_rate && dropout_rate <= 1.0f);
for (unsigned i = 0; i < size; ++i)
X[i] = rand_unif() < dropout_rate ? 0.0f : X[i];
}
void dropout(SparseMatrix X, const unsigned size, const float dropout_rate) const;
void add_saltpepper_noise(float* X, const unsigned size, const float noise_rate) const {
assert(0.0f <= noise_rate && noise_rate <= 1.0f);
for (unsigned i = 0; i < size; ++i) {
if (rand_unif() < noise_rate) {
X[i] = rand_unif() < 0.5 ? 0.0f : 1.0f;
}
}
}
void add_saltpepper_noise(SparseMatrix X, const unsigned size, const float noise_rate) const;
void add_gauss_noise(float* X, const unsigned size, const float noise_rate) const {
assert(0.0 <= noise_rate);
for (unsigned i = 0; i < size; ++i)
X[i] += rand_normal() * noise_rate;
}
/* gauss noise makes no sense on sparse matrices */
void add_gauss_noise(SparseMatrix X, const unsigned size, const float noise_rate) const;
void invert(float* X, const unsigned size) const {
for (unsigned i = 0; i < size; ++i)
X[i] = 1.0f / X[i];
}
void soft_threshold(float* x, const float alpha, const int size) const {
float f;
for (int i = 0; i < size; ++i) {
f = x[i];
x[i] = f > 0 ? fmaxf(0., f - alpha) : fminf(0., f + alpha);
}
}
// Useful for debugging
static void printMatrixCM(const float* a, int n, int m, const char* fmt);
static void printMatrixCM(const SparseMatrix a, int n, int m, const char *fmt);
static void printMatrixRM(const float* a, int n, int m, const char* fmt);
static void printMatrixRM(const SparseMatrix a, int n, int m, const char *fmt);
void printMatrixSP(const SparseMatrix& a, const char* fmt) const {
// TODO
}
void prints(const float* f, unsigned l) const {}
void printsu(const int* f, unsigned l) const {}
void printm(const char* name, const SparseMatrix a, int n, int m) const {
printf("%s\n", name);
printMatrixCM(a, n, m, 0);
}
void printm(const char* name, const float* a, int n, int m) const {
printf("%s\n", name);
printMatrixCM(a, n, m, 0);
}
#ifdef MEM_DEBUG
void reset_memory_usage_counter() {
// TODO
}
void print_memory_usage() {
// TODO
printf("Memory usage logging is not implemented in the CPU version");
}
#endif
};