forked from recski/HunTag
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfeature.py
79 lines (65 loc) · 2.89 KB
/
feature.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#feature.py is a module of HunTag. The Feature class is used for representing a feature type and calculating its value for some input. Feature instances are created by the getFeatureSet function in huntag.py.
from lexicon import Lexicon
import sys
import features
class Feature():
def __init__(self, kind, name, actionName, fields, radius, cutoff, options ) :
self.name = name
self.kind = kind
self.actionName = actionName
self.fields = fields
self.radius = int(radius)
self.cutoff = int(cutoff)
self.options = options
if kind=="lex" :
if self.options != {}:
sys.stderr.write('Lexicon features do not yet support options')
sys.exit(-1)
self.lexicon = Lexicon(actionName)
elif kind in ("token","sentence") :
if actionName not in features.__dict__:
sys.stderr.write( "Unknown operator named "+actionName+"\n" )
sys.exit(-1)
self.function = features.__dict__[actionName]
else :
assert False
def evalSentence_Token(self, sentence) :
featVec = []
for pos,word in enumerate(sentence) :
fieldVec = [ word[field] for field in self.fields ]
if self.options!={}:
fieldVec += [self.options]
feat = self.function( *fieldVec )
featVec.append(feat)
return featVec
def evalSentence_Lex(self, sentence) :
assert len(self.fields)==1
field = self.fields[0]
wordList = [ word[field] for word in sentence ]
return self.lexicon.lexEvalSentence(wordList)
def evalSentence_Sentence(self, sentence) :
if self.options == {}:
return self.function( sentence, self.fields )
else:
#print; '@'
return self.function( sentence, self.fields, self.options )
def evalSentence(self, sentence) :
if self.kind=="token" :
featVec = self.evalSentence_Token(sentence)
elif self.kind=="lex" :
featVec = self.evalSentence_Lex(sentence)
elif self.kind=="sentence" :
featVec = self.evalSentence_Sentence(sentence)
return self.multiplyFeatures(sentence, featVec)
def multiplyFeatures(self, sentence, featVec):
multipliedFeatVec = []
for c,word in enumerate(sentence):
multipliedFeatVec.append([])
for i in range (-self.radius, self.radius+1):
pos = c+i
if pos<0 or pos>len(sentence)-1 :
continue
currentWordFeatures = [feat for feat in featVec[pos] if feat!=0]
for feat in currentWordFeatures :
multipliedFeatVec[c].append(str(i)+'_'+self.name+'='+str(feat))
return multipliedFeatVec