-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathomorfi_wrapper.py
90 lines (72 loc) · 3.4 KB
/
omorfi_wrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import logging
import select
logging.basicConfig(level=logging.WARNING)
import subprocess
SCRIPTDIR=os.path.dirname(os.path.abspath(__file__))
class HFSTError(Exception): pass
class OmorfiWrapper(object):
def __init__(self, transducer_file):
if not os.path.exists(transducer_file):
raise Exception("No transducer file found: %s" % transducer_file)
self.log = logging.getLogger("hfst")
self.process=None
self.transducer_file=transducer_file
self.poll=None
self.restart()
def restart(self):
if self.process is not None:
#Try to kill
self.process.terminate()
self.poll=select.poll()
try:
self.log.info("Starting hfst-ol.jar")
self.process = subprocess.Popen(["java","-jar", os.path.join(SCRIPTDIR,"LIBS","hfst-ol.jar"), self.transducer_file], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if self.process.returncode!=None: #Ended already - something's wrong
self.log.debug("Non-zero exit code for hfst-ol.jar")
raise HFSTError()
self.log.info("hfst-ol.jar started")
for line in iter(self.process.stdout.readline,''):
self.log.info("process: %s" % line.strip())
if line == "Ready for input.\n":
break
else:
raise HFSTError()
except HFSTError:
self.log.error("Did not succeed in launching 'java -jar LIBS/hfst-ol.jar %s'. The most common reason for this is that you forgot to run './install.sh'. Run it, and also run 'test_dependencies.py' to make sure all is OK.\n\nIf it fails even though you did succeed with ./install.sh, try to run 'java -jar LIBS/hfst-ol.jar model/morphology.finntreebank.hfstol'. It should start and ask for input with 'Ready for input.' Then type in 'koiransa' and see if you get a reasonable analysis. Then either open an issue at https://github.com/TurkuNLP/Finnish-dep-parser/issues or email [email protected] and [email protected] and we'll try to help you.\n\nGiving up, because the parser cannot run without morphological lookup."%transducer_file)
sys.exit(1)
self.log.info("Started the HFST process.")
self.poll.register(self.process.stdout)
def lookup(self, word):
self.log.info("Sending in query: %s" % word)
self.process.stdin.write(word+"\n")
self.process.stdin.flush()
results = []
if not self.poll.poll(5000): #Nothing ready in five seconds - we're stuck
self.restart()
return []
for line in iter(self.process.stdout.readline,''):
if line.strip() == "":
break
res = line.strip().split("\t")[1:3]
if len(res)!=2:
continue #bad data, unrecognized token?
results.append(tuple(res))
self.log.info("Got %d results, returning" % len(results))
return results
def close(self):
self.process.kill()
self.log.info("Killed the HFST process.")
if __name__=="__main__":
logging.basicConfig(level=logging.INFO)
hfst = OmorfiWrapper(sys.argv[1])
try:
while True:
ret = raw_input("token> ")
print hfst.lookup(ret)
except KeyboardInterrupt:
print
hfst.close()