-
Notifications
You must be signed in to change notification settings - Fork 3
/
emb.py
39 lines (32 loc) · 1.19 KB
/
emb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# -*- coding: utf-8 -*-
import numpy as np
import theano
import theano.tensor as T
from utils import *
"""
fname: file name of embedded word vectors. Each line in the file represents a vector.
n_f : is dimention of word vectors
"""
def readWV(fname, n_f):
fw = open(fname, 'r')
wv = []
i = 0
with open(fname, 'r') as f:
for line in f :
vs = line.split()
vect = map(float, vs)
if(len(vect) == n_f):
wv.append(vect)
w = np.asarray(wv, dtype='float64')
return w
class LookupLayer(object):
"""
Classic Lookup Layer taking as inputs list of words id present in sentence word_id , the length of word vocabulary 'l_vocab_w' and the size of word embeddings 'n_f'. The Lookup matrix 'WE' is the parameter and its gradient is computed with respect to 'output' to avoid computing updates for the whole matrix.
"""
def __init__(self, word_id, l_vocab_w, n_f, fname):
wv = readWV(fname, n_f)
print "word embedding matrix shape ---------", wv.shape
self.WE = theano.shared(value=np.asarray(wv, dtype='float64'), name='WE', borrow=True)
# self.WE = globals()['init_zeros']((l_vocab_w, n_f), 'WE')
self.output = self.WE[word_id]
self.params = [(self.WE, self.output)]