-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbase.py
executable file
·991 lines (816 loc) · 34.9 KB
/
base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
from __future__ import absolute_import
from .._compat import basestring
from .._compat import chain_exception
from .._compat import pickle
from collections import OrderedDict, Iterable
import itertools
from pydoc import locate
from warnings import warn
from time import time
from lasagne.layers import get_all_layers
from lasagne.layers import get_output
from lasagne.layers import InputLayer
from lasagne.layers import Layer
from lasagne import regularization
from lasagne.objectives import aggregate
from lasagne.objectives import categorical_crossentropy
from lasagne.objectives import squared_error
from lasagne.updates import nesterov_momentum
from lasagne.utils import floatX
from lasagne.utils import unique
import numpy as np
from sklearn.base import BaseEstimator
#from sklearn.cross_validation import KFold
#from sklearn.cross_validation import StratifiedKFold
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import r2_score
from sklearn.preprocessing import LabelEncoder
import theano
from theano import tensor as T
from . import PrintLog
from . import PrintLayerInfo
class _list(list):
pass
class _dict(dict):
def __contains__(self, key):
return True
def _sldict(arr, sl):
if isinstance(arr, dict):
return {k: v[sl] for k, v in arr.items()}
else:
return arr[sl]
def _shuffle_arrays(arrays, random):
rstate = random.get_state()
for array in arrays:
if isinstance(array, dict):
for v in list(array.values()):
random.set_state(rstate)
random.shuffle(v)
else:
random.set_state(rstate)
random.shuffle(array)
class Layers(OrderedDict):
def __getitem__(self, key):
if isinstance(key, int):
return list(self.values()).__getitem__(key)
elif isinstance(key, slice):
items = list(self.items()).__getitem__(key)
return Layers(items)
else:
return super(Layers, self).__getitem__(key)
def keys(self):
return list(super(Layers, self).keys())
def values(self):
return list(super(Layers, self).values())
class BatchIterator(object):
def __init__(self, batch_size, shuffle=False, seed=42):
self.batch_size = batch_size
self.shuffle = shuffle
self.random = np.random.RandomState(seed)
def __call__(self, X, y=None):
if self.shuffle:
_shuffle_arrays([X, y] if y is not None else [X], self.random)
self.X, self.y = X, y
return self
def __iter__(self):
bs = self.batch_size
for i in range((self.n_samples + bs - 1) // bs):
sl = slice(i * bs, (i + 1) * bs)
Xb = _sldict(self.X, sl)
if self.y is not None:
yb = _sldict(self.y, sl)
else:
yb = None
yield self.transform(Xb, yb)
@property
def n_samples(self):
X = self.X
if isinstance(X, dict):
return len(list(X.values())[0])
else:
return len(X)
def transform(self, Xb, yb):
return Xb, yb
def __getstate__(self):
state = dict(self.__dict__)
for attr in ('X', 'y',):
if attr in state:
del state[attr]
return state
def grad_scale(layer, scale):
for param in layer.get_params(trainable=True):
param.tag.grad_scale = floatX(scale)
return layer
class TrainSplit(object):
def __init__(self, eval_size, stratify=True, shuffle=False, seed=42):
print("eval_size=",eval_size)
self.eval_size = eval_size
self.stratify = stratify
self.shuffle = shuffle
self.random = np.random.RandomState(seed)
def __call__(self, X, y, net):
if self.shuffle:
_shuffle_arrays([X, y] if y is not None else [X], self.random)
if self.eval_size:
if net.regression or not self.stratify:
kf = KFold(n_splits=2)
#kf = KFold(y.shape[0], round(1. / self.eval_size))
else:
kf = StratifiedKFold(n_splits=2)
#kf = StratifiedKFold(y, round(1. / self.eval_size))
#train_indices, valid_indices = next(iter(kf))
train_indices, valid_indices = kf.split(X,y)
X_train, X_valid = X[train_indices[1]], X[valid_indices[1]]
y_train, y_valid = y[train_indices[1]], y[valid_indices[1]]
#train_indices, valid_indices = kf.split(X,y)
#X_train = _sldict(X, train_indices)
#y_train = _sldict(y, train_indices)
#X_valid = _sldict(X, valid_indices)
#y_valid = _sldict(y, valid_indices)
else:
X_train, y_train = X, y
X_valid, y_valid = _sldict(X, slice(0,0)), _sldict(y, slice(0,0))
return X_train, X_valid, y_train, y_valid
class LegacyTrainTestSplit(object): # BBB
def __init__(self, eval_size=0.2):
self.eval_size = eval_size
def __call__(self, X, y, net):
return net.train_test_split(X, y, self.eval_size)
def objective(layers,
loss_function,
target,
aggregate=aggregate,
deterministic=False,
l1=0,
l2=0,
get_output_kw=None):
"""
Default implementation of the NeuralNet objective.
:param layers: The underlying layers of the NeuralNetwork
:param loss_function: The callable loss function to use
:param target: the expected output
:param aggregate: the aggregation function to use
:param deterministic: Whether or not to get a deterministic output
:param l1: Optional l1 regularization parameter
:param l2: Optional l2 regularization parameter
:param get_output_kw: optional kwargs to pass to
:meth:`NeuralNetwork.get_output`
:return: The total calculated loss
"""
if get_output_kw is None:
get_output_kw = {}
output_layer = layers[-1]
network_output = get_output(
output_layer, deterministic=deterministic, **get_output_kw)
loss = aggregate(loss_function(network_output, target))
if l1:
loss += regularization.regularize_layer_params(
layers.values(), regularization.l1) * l1
if l2:
loss += regularization.regularize_layer_params(
layers.values(), regularization.l2) * l2
return loss
class NeuralNet(BaseEstimator):
"""A configurable Neural Network estimator based on Lasagne.
Compatible with scikit-learn estimators.
Attributes
----------
train_history_:
A list of network training info for each epoch.
Each index contains a dictionary with the following keys
* epoch - The epoch number
* train_loss_best - True if this epoch had the best training loss so far
* valid_loss_best - True if this epoch had the best validation loss so far
* train_loss - The training loss for this epoch
* valid_loss - The validation loss for this epoch
* valid_accuracy - The validation accuracy for this epoch
layers_: A dictionary of lasagne layers keyed by the layer's name, or the layer's index
layer_reference_params:
A list of Lasagne layer parameter names that may reference
other layers, excluding 'incoming' and 'incomings'.
"""
layer_reference_params = ['mask_input']
def __init__(
self,
layers,
update=nesterov_momentum,
loss=None, # BBB
objective=objective,
objective_loss_function=None,
batch_iterator_train=BatchIterator(batch_size=128),
batch_iterator_test=BatchIterator(batch_size=128),
regression=False,
max_epochs=100,
train_split=TrainSplit(eval_size=0.2),
custom_scores=None,
scores_train=None,
scores_valid=None,
X_tensor_type=None,
y_tensor_type=None,
use_label_encoder=False,
on_batch_finished=None,
on_epoch_finished=None,
on_training_started=None,
on_training_finished=None,
more_params=None,
check_input=True,
verbose=0,
**kwargs
):
"""
Initialize a Neural Network
Parameters
----------
layers:
A list of lasagne layers to compose into the final neural net.
See :ref:`layer-def`
update:
The update function to use when training. Uses the form
provided by the :mod:`lasagne.updates` implementations.
objective:
The objective function to use when training. The callable
will be passed the NeuralNetwork's :attr:`.layers_`
attribute as the first argument, and the output target as
the second argument.
max_epochs:
The number of epochs to train. This is used as the
default when calling the :meth:`.fit` method without an
epochs argument.
Other Parameters
----------------
batch_iterator_train:
The sample iterator to use while training the network.
batch_iterator_test:
The sample Iterator to use while testing and validating
the network.
regression:
Whether or not this is a regressor network. Determines
the default objective and scoring functions.
train_split:
The method used to separate training and validation
samples. See :class:`TrainSplit` for the default
implementation.
y_tensor_type:
The type of tensor to use to hold the network's output.
Typically ``T.ivector`` (the default) for classification
tasks.
on_training_started, on_batch_finished, on_epoch_finished,
on_training_finished:
A list of functions which are called during training at
the corresponding times.
The functions will be passed the NeuralNet as the first
parameter and its :attr:`.train_history_` attribute as the
second parameter.
custom_scores:
A list of callable custom scoring functions.
The functions will be passed the expected y values as the
first argument, and the predicted y_values as the second
argument.
use_label_encoder:
If true, all y_values will be encoded using a
:class:`sklearn.preprocessing.LabelEncoder` instance.
verbose:
The verbosity level of the network.
Any non-zero value will cause the network to print the
layer info at the start of training, as well as print a
log of the training history after each epoch. Larger
values will increase the amount of info shown.
more_params:
A set of more parameters to use when initializing layers
defined using the dictionary method.
Note
----
* Extra arguments can be passed to the call to the *update*
function by prepending the string ``update_`` to the
corresponding argument name,
e.g. ``update_learning_rate=0.01`` will define the
``learning_rate`` parameter of the update function.
* Extra arguments can be provided to the objective call
through the Neural Network by prepending the string
``objective_`` to the corresponding argument name.
"""
if loss is not None:
raise ValueError(
"The 'loss' parameter was removed, please use "
"'objective_loss_function' instead.") # BBB
if hasattr(objective, 'get_loss'):
raise ValueError(
"The 'Objective' class is no longer supported, please "
"use 'nolearn.lasagne.objective' or similar.") # BBB
if objective_loss_function is None:
objective_loss_function = (
squared_error if regression else categorical_crossentropy)
if hasattr(self, 'train_test_split'): # BBB
warn("The 'train_test_split' method has been deprecated, please "
"use the 'train_split' parameter instead.")
train_split = LegacyTrainTestSplit(
eval_size=kwargs.pop('eval_size', 0.2))
if 'eval_size' in kwargs: # BBB
warn("The 'eval_size' argument has been deprecated, please use "
"the 'train_split' parameter instead, e.g.\n"
"train_split=TrainSplit(eval_size=0.4)")
train_split.eval_size = kwargs.pop('eval_size')
if y_tensor_type is None:
if regression:
y_tensor_type = T.TensorType(
theano.config.floatX, (False, False))
else:
y_tensor_type = T.ivector
if X_tensor_type is not None:
raise ValueError(
"The 'X_tensor_type' parameter has been removed. "
"It's unnecessary.") # BBB
if 'custom_score' in kwargs:
warn("The 'custom_score' argument has been deprecated, please use "
"the 'custom_scores' parameter instead, which is just "
"a list of custom scores e.g.\n"
"custom_scores=[('first output', lambda y1, y2: abs(y1[0,0]-y2[0,0])), ('second output', lambda y1,y2: abs(y1[0,1]-y2[0,1]))]")
# add it to custom_scores
if custom_scores is None:
custom_scores = [kwargs.pop('custom_score')]
else:
custom_scores.append(kwargs.pop('custom_score'))
if isinstance(layers, Layer):
layers = _list([layers])
elif isinstance(layers, Iterable):
layers = _list(layers)
self.layers = layers
self.update = update
self.objective = objective
self.objective_loss_function = objective_loss_function
self.batch_iterator_train = batch_iterator_train
self.batch_iterator_test = batch_iterator_test
self.regression = regression
self.max_epochs = max_epochs
self.train_split = train_split
self.custom_scores = custom_scores
self.scores_train = scores_train or []
self.scores_valid = scores_valid or []
self.y_tensor_type = y_tensor_type
self.use_label_encoder = use_label_encoder
self.on_batch_finished = on_batch_finished or []
self.on_epoch_finished = on_epoch_finished or []
self.on_training_started = on_training_started or []
self.on_training_finished = on_training_finished or []
self.more_params = more_params or {}
self.check_input = check_input
self.verbose = verbose
if self.verbose:
# XXX: PrintLog should come before any other handlers,
# because early stopping will otherwise cause the last
# line not to be printed
self.on_epoch_finished.append(PrintLog())
self.on_training_started.append(PrintLayerInfo())
for key in kwargs.keys():
assert not hasattr(self, key)
vars(self).update(kwargs)
self._kwarg_keys = list(kwargs.keys())
self.train_history_ = []
if 'batch_iterator' in kwargs: # BBB
raise ValueError(
"The 'batch_iterator' argument has been replaced. "
"Use 'batch_iterator_train' and 'batch_iterator_test' instead."
)
def _check_for_unused_kwargs(self):
names = self.layers_.keys() + ['update', 'objective']
for k in self._kwarg_keys:
for n in names:
prefix = '{}_'.format(n)
if k.startswith(prefix):
break
else:
raise ValueError("Unused kwarg: {}".format(k))
def _check_good_input(self, X, y=None):
if isinstance(X, dict):
lengths = [len(X1) for X1 in X.values()]
if len(set(lengths)) > 1:
raise ValueError("Not all values of X are of equal length.")
x_len = lengths[0]
else:
x_len = len(X)
if y is not None:
if len(y) != x_len:
raise ValueError("X and y are not of equal length.")
if self.regression and y is not None and y.ndim == 1:
y = y.reshape(-1, 1)
return X, y
def initialize(self):
"""Initializes the network. Checks that no extra kwargs were
passed to the constructor, and compiles the train, predict,
and evaluation functions.
Subsequent calls to this function will return without any action.
"""
if getattr(self, '_initialized', False):
return
out = getattr(self, '_output_layers', None)
if out is None:
self.initialize_layers()
self._check_for_unused_kwargs()
iter_funcs = self._create_iter_funcs(
self.layers_, self.objective, self.update,
self.y_tensor_type,
)
self.train_iter_, self.eval_iter_, self.predict_iter_ = iter_funcs
self._initialized = True
def _get_params_for(self, name):
collected = {}
prefix = '{}_'.format(name)
params = vars(self)
more_params = self.more_params
for key, value in itertools.chain(params.items(), more_params.items()):
if key.startswith(prefix):
collected[key[len(prefix):]] = value
return collected
def _layer_name(self, layer_class, index):
return "{}{}".format(
layer_class.__name__.lower().replace("layer", ""), index)
def initialize_layers(self, layers=None):
"""Sets up the Lasagne layers
:param layers: The dictionary of layers, or a
:class:`lasagne.Layers` instance, describing the underlying
network
:return: the output layer of the underlying lasagne network.
:seealso: :ref:`layer-def`
"""
if layers is not None:
self.layers = layers
self.layers_ = Layers()
#If a Layer, or a list of Layers was passed in
if isinstance(self.layers[0], Layer):
for out_layer in self.layers:
for i, layer in enumerate(get_all_layers(out_layer)):
if layer not in self.layers_.values():
name = layer.name or self._layer_name(layer.__class__, i)
self.layers_[name] = layer
if self._get_params_for(name) != {}:
raise ValueError(
"You can't use keyword params when passing a Lasagne "
"instance object as the 'layers' parameter of "
"'NeuralNet'."
)
self._output_layers = self.layers
return self.layers
# 'self.layers' are a list of '(Layer class, kwargs)', so
# we'll have to actually instantiate the layers given the
# arguments:
layer = None
for i, layer_def in enumerate(self.layers):
if isinstance(layer_def[1], dict):
# Newer format: (Layer, {'layer': 'kwargs'})
layer_factory, layer_kw = layer_def
layer_kw = layer_kw.copy()
else:
# The legacy format: ('name', Layer)
layer_name, layer_factory = layer_def
layer_kw = {'name': layer_name}
if isinstance(layer_factory, str):
layer_factory = locate(layer_factory)
assert layer_factory is not None
if 'name' not in layer_kw:
layer_kw['name'] = self._layer_name(layer_factory, i)
more_params = self._get_params_for(layer_kw['name'])
layer_kw.update(more_params)
if layer_kw['name'] in self.layers_:
raise ValueError(
"Two layers with name {}.".format(layer_kw['name']))
# Any layers that aren't subclasses of InputLayer are
# assumed to require an 'incoming' paramter. By default,
# we'll use the previous layer as input:
try:
is_input_layer = issubclass(layer_factory, InputLayer)
except TypeError:
is_input_layer = False
if not is_input_layer:
if 'incoming' in layer_kw:
layer_kw['incoming'] = self.layers_[
layer_kw['incoming']]
elif 'incomings' in layer_kw:
layer_kw['incomings'] = [
self.layers_[name] for name in layer_kw['incomings']]
else:
layer_kw['incoming'] = layer
# Deal with additional string parameters that may
# reference other layers; currently only 'mask_input'.
for param in self.layer_reference_params:
if param in layer_kw:
val = layer_kw[param]
if isinstance(val, basestring):
layer_kw[param] = self.layers_[val]
for attr in ('W', 'b'):
if isinstance(layer_kw.get(attr), str):
name = layer_kw[attr]
layer_kw[attr] = getattr(self.layers_[name], attr, None)
try:
layer_wrapper = layer_kw.pop('layer_wrapper', None)
layer = layer_factory(**layer_kw)
except TypeError as e:
msg = ("Failed to instantiate {} with args {}.\n"
"Maybe parameter names have changed?".format(
layer_factory, layer_kw))
chain_exception(TypeError(msg), e)
self.layers_[layer_kw['name']] = layer
if layer_wrapper is not None:
layer = layer_wrapper(layer)
self.layers_["LW_%s" % layer_kw['name']] = layer
self._output_layers = [layer]
return [layer]
def _create_iter_funcs(self, layers, objective, update, output_type):
y_batch = output_type('y_batch')
objective_kw = self._get_params_for('objective')
loss_train = objective(
layers, target=y_batch, **objective_kw)
loss_eval = objective(
layers, target=y_batch, deterministic=True, **objective_kw)
output_layer = self._output_layers
predict_proba = get_output(output_layer, None, deterministic=True)
if not self.regression:
predict = predict_proba[0].argmax(axis=1)
accuracy = T.mean(T.eq(predict, y_batch))
else:
accuracy = loss_eval
scores_train = [
s[1](predict_proba, y_batch) for s in self.scores_train]
scores_valid = [
s[1](predict_proba, y_batch) for s in self.scores_valid]
all_params = self.get_all_params(trainable=True)
grads = theano.grad(loss_train, all_params)
for idx, param in enumerate(all_params):
grad_scale = getattr(param.tag, 'grad_scale', 1)
if grad_scale != 1:
grads[idx] *= grad_scale
update_params = self._get_params_for('update')
updates = update(grads, all_params, **update_params)
input_layers = [layer for layer in layers.values()
if isinstance(layer, InputLayer)]
X_inputs = [theano.In(input_layer.input_var, name=input_layer.name)
for input_layer in input_layers]
inputs = X_inputs + [theano.In(y_batch, name="y")]
train_iter = theano.function(
inputs=inputs,
outputs=[loss_train] + scores_train,
updates=updates,
allow_input_downcast=True,
on_unused_input='ignore',
)
eval_iter = theano.function(
inputs=inputs,
outputs=[loss_eval, accuracy] + scores_valid,
allow_input_downcast=True,
on_unused_input='ignore',
)
predict_iter = theano.function(
inputs=X_inputs,
outputs=predict_proba,
allow_input_downcast=True,
on_unused_input='ignore',
)
return train_iter, eval_iter, predict_iter
def fit(self, X, y, epochs=None):
"""
Runs the training loop for a given number of epochs
:param X: The input data
:param y: The ground truth
:param epochs: The number of epochs to run, if `None` runs for the
network's :attr:`max_epochs`
:return: This instance
"""
if self.check_input:
X, y = self._check_good_input(X, y)
if self.use_label_encoder:
self.enc_ = LabelEncoder()
y = self.enc_.fit_transform(y).astype(np.int32)
self.classes_ = self.enc_.classes_
self.initialize()
try:
self.train_loop(X, y, epochs=epochs)
except KeyboardInterrupt:
pass
return self
def partial_fit(self, X, y, classes=None):
"""
Runs a single epoch using the provided data
:return: This instance
"""
return self.fit(X, y, epochs=1)
def train_loop(self, X, y, epochs=None):
epochs = epochs or self.max_epochs
X_train, X_valid, y_train, y_valid = self.train_split(X, y, self)
on_batch_finished = self.on_batch_finished
if not isinstance(on_batch_finished, (list, tuple)):
on_batch_finished = [on_batch_finished]
on_epoch_finished = self.on_epoch_finished
if not isinstance(on_epoch_finished, (list, tuple)):
on_epoch_finished = [on_epoch_finished]
on_training_started = self.on_training_started
if not isinstance(on_training_started, (list, tuple)):
on_training_started = [on_training_started]
on_training_finished = self.on_training_finished
if not isinstance(on_training_finished, (list, tuple)):
on_training_finished = [on_training_finished]
epoch = 0
best_valid_loss = (
min([row['valid_loss'] for row in self.train_history_]) if
self.train_history_ else np.inf
)
best_train_loss = (
min([row['train_loss'] for row in self.train_history_]) if
self.train_history_ else np.inf
)
for func in on_training_started:
func(self, self.train_history_)
num_epochs_past = len(self.train_history_)
while epoch < epochs:
epoch += 1
train_outputs = []
valid_outputs = []
if self.custom_scores:
custom_scores = [[] for _ in self.custom_scores]
else:
custom_scores = []
t0 = time()
batch_train_sizes = []
for Xb, yb in self.batch_iterator_train(X_train, y_train):
train_outputs.append(
self.apply_batch_func(self.train_iter_, Xb, yb))
batch_train_sizes.append(len(Xb))
for func in on_batch_finished:
func(self, self.train_history_)
batch_valid_sizes = []
for Xb, yb in self.batch_iterator_test(X_valid, y_valid):
valid_outputs.append(
self.apply_batch_func(self.eval_iter_, Xb, yb))
batch_valid_sizes.append(len(Xb))
if self.custom_scores:
y_prob = self.apply_batch_func(self.predict_iter_, Xb)
y_prob = y_prob[0] if len(y_prob) == 1 else y_prob
for custom_scorer, custom_score in zip(
self.custom_scores, custom_scores):
custom_score.append(custom_scorer[1](yb, y_prob))
train_outputs = np.array(train_outputs, dtype=object).T
train_outputs = [
np.average(
[np.mean(row) for row in col],
weights=batch_train_sizes,
)
for col in train_outputs
]
if valid_outputs:
valid_outputs = np.array(valid_outputs, dtype=object).T
valid_outputs = [
np.average(
[np.mean(row) for row in col],
weights=batch_valid_sizes,
)
for col in valid_outputs
]
if custom_scores:
avg_custom_scores = np.average(
custom_scores, weights=batch_valid_sizes, axis=1)
if train_outputs[0] < best_train_loss:
best_train_loss = train_outputs[0]
if valid_outputs and valid_outputs[0] < best_valid_loss:
best_valid_loss = valid_outputs[0]
info = {
'epoch': num_epochs_past + epoch,
'train_loss': train_outputs[0],
'train_loss_best': best_train_loss == train_outputs[0],
'valid_loss': valid_outputs[0]
if valid_outputs else np.nan,
'valid_loss_best': best_valid_loss == valid_outputs[0]
if valid_outputs else np.nan,
'valid_accuracy': valid_outputs[1]
if valid_outputs else np.nan,
'dur': time() - t0,
}
if self.custom_scores:
for index, custom_score in enumerate(self.custom_scores):
info[custom_score[0]] = avg_custom_scores[index]
if self.scores_train:
for index, (name, func) in enumerate(self.scores_train):
info[name] = train_outputs[index + 1]
if self.scores_valid:
for index, (name, func) in enumerate(self.scores_valid):
info[name] = valid_outputs[index + 2]
self.train_history_.append(info)
try:
for func in on_epoch_finished:
func(self, self.train_history_)
except StopIteration:
break
for func in on_training_finished:
func(self, self.train_history_)
@staticmethod
def apply_batch_func(func, Xb, yb=None):
if isinstance(Xb, dict):
kwargs = dict(Xb)
if yb is not None:
kwargs['y'] = yb
return func(**kwargs)
else:
return func(Xb) if yb is None else func(Xb, yb)
def predict_proba(self, X):
probas = []
for Xb, yb in self.batch_iterator_test(X):
probas.append(self.apply_batch_func(self.predict_iter_, Xb))
output = tuple(np.vstack(o) for o in zip(*probas))
return output if len(output) > 1 else output[0]
def predict(self, X):
if self.regression:
return self.predict_proba(X)
else:
y_pred = np.argmax(self.predict_proba(X), axis=1)
if self.use_label_encoder:
y_pred = self.enc_.inverse_transform(y_pred)
return y_pred
def get_output(self, layer, X):
if isinstance(layer, basestring):
layer = self.layers_[layer]
fn_cache = getattr(self, '_get_output_fn_cache', None)
if fn_cache is None:
fn_cache = {}
self._get_output_fn_cache = fn_cache
if layer not in fn_cache:
xs = self.layers_[0].input_var.type()
get_activity = theano.function([xs], get_output(layer, xs))
fn_cache[layer] = get_activity
else:
get_activity = fn_cache[layer]
outputs = []
for Xb, yb in self.batch_iterator_test(X):
outputs.append(get_activity(Xb))
return np.vstack(outputs)
def score(self, X, y):
score = r2_score if self.regression else accuracy_score
return float(score(self.predict(X), y))
def get_all_layers(self):
return self.layers_.values()
def get_all_params(self, **kwargs):
layers = self.get_all_layers()
params = sum([l.get_params(**kwargs) for l in layers], [])
return unique(params)
def get_all_params_values(self):
return_value = OrderedDict()
for name, layer in self.layers_.items():
return_value[name] = [p.get_value() for p in layer.get_params()]
return return_value
def load_params_from(self, source):
self.initialize()
if isinstance(source, basestring):
with open(source, 'rb') as f:
source = pickle.load(f)
if isinstance(source, NeuralNet):
source = source.get_all_params_values()
success = "Loaded parameters to layer '{}' (shape {})."
failure = ("Could not load parameters to layer '{}' because "
"shapes did not match: {} vs {}.")
for key, values in source.items():
layer = self.layers_.get(key)
if layer is not None:
for p1, p2v in zip(layer.get_params(), values):
shape1 = p1.get_value().shape
shape2 = p2v.shape
shape1s = 'x'.join(map(str, shape1))
shape2s = 'x'.join(map(str, shape2))
if shape1 == shape2:
p1.set_value(p2v)
if self.verbose:
print(success.format(
key, shape1s, shape2s))
else:
if self.verbose:
print(failure.format(
key, shape1s, shape2s))
def save_params_to(self, fname):
params = self.get_all_params_values()
with open(fname, 'wb') as f:
pickle.dump(params, f, -1)
def load_weights_from(self, source):
warn("The 'load_weights_from' method will be removed in nolearn 0.6. "
"Please use 'load_params_from' instead.")
if isinstance(source, list):
raise ValueError(
"Loading weights from a list of parameter values is no "
"longer supported. Please send me something like the "
"return value of 'net.get_all_params_values()' instead.")
return self.load_params_from(source)
def save_weights_to(self, fname):
warn("The 'save_weights_to' method will be removed in nolearn 0.6. "
"Please use 'save_params_to' instead.")
return self.save_params_to(fname)
def __setstate__(self, state): # BBB for pickles that don't have the graph
self.__dict__.update(state)
self.initialize()
def get_params(self, deep=True):
params = super(NeuralNet, self).get_params(deep=deep)
# Incidentally, Lasagne layers have a 'get_params' too, which
# for sklearn's 'clone' means it would treat it in a special
# way when cloning. Wrapping the list of layers in a custom
# list type does the trick here, but of course it's crazy:
params['layers'] = _list(params['layers'])
return _dict(params)
def _get_param_names(self):
# This allows us to have **kwargs in __init__ (woot!):
param_names = super(NeuralNet, self)._get_param_names()
return param_names + self._kwarg_keys