-
Notifications
You must be signed in to change notification settings - Fork 3
/
model1.py
143 lines (126 loc) · 4.71 KB
/
model1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import numpy as np
import theano
import theano.tensor as T
from utils import *
from emb import *
from hidden import *
from output import *
from trainer import *
"""
Class wrapping the whole model.
Inputs:
l_vocab_w - length of the word vocabulary - 'None' if we don't use one
l_vocab_out - length of the tag vocabulary
n_f - dimension of the word embeddings
n_hidden - dimension of the hidden embeddings
lr - learning rate
trainer - name of the trainer to use : 'AdagradTrainer', AdadeltaTrainer', or 'GDTrainer'
activation - name of the activation function to use, usually T.tanh or 'relu'
model - 'ff' is feedforward, 'rnn' for bidirectional recurrent hidden Layer
viterbi - True or False for outputing a structured sequence using Viterbi or not
fname- Embedded word vector file each line contain vector of corresponding word of vocab file or word list
wl- word list
windim- window size of window based feed forwared neural network
Inputs are mostly choices for the architecture and the training that the wrapper class will apply. Once everything is set, theano functions are created to train and evaluate the model, and access the parameters.
"""
class SLLModel(object):
def __init__(self, l_vocab_w, l_vocab_out, n_f, n_hidden, lr, trainer = 'AdagradTrainer', activation=T.tanh, model = 'ff', viterbi = True, fname = ' ', wl = ' ', windim = 4):
self.word_emb = T.imatrix('word_emb')
self.word_id = T.ivector('word_id')
self.tags = T.ivector('tags')
# Embeddding Layer
self.embLayer = LookupLayer(self.word_id, l_vocab_w = l_vocab_w, n_f=n_f, fname=fname)
if(model =='ff'):
# Hidden Layer
self.hiddenLayer = FFLayer(
input = self.embLayer.output,
n_in = n_f,
winDim = windim,
n_out = n_hidden,
activation = activation,
init = 'init_uniform'
)
# Output Layer
if(viterbi == True):
self.outputLayer = Structured_OutputLayer(
input=self.hiddenLayer.output,
n_in=n_hidden,
n_out=l_vocab_out,
init='init_zeros'
)
else:
self.outputLayer = Rec_OutputLayer(
input=self.hiddenLayer.output,
n_in=n_hidden,
n_out=l_vocab_out,
init='init_zeros'
)
self.params = self.embLayer.params + self.hiddenLayer.params + self.outputLayer.params
# self.params = self.hiddenLayer.params + self.outputLayer.params
elif(model == "rnn"):
# Hidden Layer
self.hiddenLayer = Rec_HiddenLayer(
input = self.embLayer.output,
n_in = n_f,
n_out = n_hidden,
init = 'init_uniform'
)
self.hiddenLayer_reverse = Rec_HiddenLayer(
input = self.embLayer.output[:,::-1],
n_in = n_f,
n_out = n_hidden,
init = 'init_uniform'
)
# Output Layer
if(viterbi == True):
self.outputLayer = Structured_OutputLayer(
input=T.concatenate([self.hiddenLayer.output, self.hiddenLayer_reverse.output[:,::-1]], axis = 1),
n_in = n_hidden * 2,
n_out = l_vocab_out,
init = 'init_zeros'
)
else:
self.outputLayer = Rec_OutputLayer(
input = T.concatenate([self.hiddenLayer.output, self.hiddenLayer_reverse.output[:,::-1]], axis = 1),
n_in = n_hidden * 2,
n_out = l_vocab_out,
init = 'init_zeros'
)
self.params = self.embLayer.params + self.hiddenLayer.params + self.hiddenLayer_reverse.params + self.outputLayer.params
# self.params = self.hiddenLayer.params + self.hiddenLayer_reverse.params + self.outputLayer.params
self.trainer = globals()[trainer](self.params, lr)
self.negative_log_likelihood = self.outputLayer.negative_log_likelihood
self.updates = self.trainer.get_updates(self.params, self.negative_log_likelihood(self.tags))
#Functions:
self.train_perplexity = theano.function (
inputs=[self.word_id, self.tags],
outputs=self.negative_log_likelihood(self.tags),
updates=self.updates,
allow_input_downcast=True,
on_unused_input='ignore'
)
self.eval_perplexity = theano.function (
inputs=[self.word_id, self.tags],
outputs=self.negative_log_likelihood(self.tags),
allow_input_downcast=True,
on_unused_input='ignore'
)
self.predict = theano.function (
inputs = [self.word_id],
outputs=T.argmax(self.outputLayer.p_y_given_x, axis=1),
allow_input_downcast=True,
on_unused_input='ignore'
)
self.output_params = theano.function (
inputs = [],
outputs = [ p for (p, wrt) in self.params ],
allow_input_downcast=True,
on_unused_input='ignore'
)
if viterbi:
self.output_decode = theano.function (
inputs = [self.word_id],
outputs= self.outputLayer.decode_forward(),
allow_input_downcast=True,
on_unused_input='ignore'
)