-
Notifications
You must be signed in to change notification settings - Fork 0
/
CKIPWS.py
53 lines (48 loc) · 1.34 KB
/
CKIPWS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/python
#-*- encoding: UTF-8 -*-
from ctypes import *
ckipws = None
def initial(main_dll, py_dll, ini):
global ckipws
c_main_dll = c_wchar_p(main_dll)
c_ini = c_wchar_p(ini)
ckipws = CDLL(py_dll)
ckipws.Initial(c_main_dll,c_ini)
def segment(inputStr, mode = 0):
global ckipws
Result = ''
try:
CResult = ckipws.Segment(inputStr)
CResult = cast(CResult,c_wchar_p)
Result = CResult.value
except:
pass
finally:
if mode == 0:
WSResult = []
Result = Result.split()
for res in Result:
re = res.strip()
re = res[0:len(res)-1]
temp = re.split(u'(')
word = temp[0]
pos = temp[1]
WSResult.append((word,pos))
#[('蔡英文', 'Nb'), ('是', 'SHI'), ...]
return WSResult
else:
#蔡英文(Nb) 是(SHI) 中華民國(Nc)...
return Result
def segList(corpus):
# #指定 CKIPWS 統系統檔, 請勿修改
main_dll = 'CKIPWS.dll'
py_dll = 'PY_CKIPWS.dll'
# 指定 CKIPWS 的設定檔
ini = 'ws.ini'
# 進行 CKIPWS 初始化的動作
initial(main_dll, py_dll, ini)
ans = []
for text in corpus:
ans.append(segment(text))
# 結果在 Result 中
return ans