-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_search.py
91 lines (80 loc) · 2.3 KB
/
test_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/env python
# -*-coding:utf-8-*-
#
# Author: liuzhida - [email protected]
# Blog: http://liuzhida.com
# Last modified: 2014-04-10 17:40
# Filename: test_search.py
# Description:
from mmseg import seg_txt
from config import c
from pinyin_trie import PinyinTokenizer, Trie, TrieNode
def search(word):
key = list()
data = list()
#print len(word)
#if len(word) <= 3:
# print "<=9"
# word = word.decode("utf-8")
# for n in word:
# print n,[n]
# if c.exists("index:" + n):
# key.append("index:" + n)
# #if c.exists("index:" + n.encode("utf-8")):
# # key.append("index:" + n.encode("utf-8"))
#else:
# for n in seg_txt(word):
# print n,[n]
# if c.exists("index:" + n):
# key.append("index:" + n)
#for n in seg_txt(word):
# if c.exists("index:" + n):
# key.append("index:" + n)
#if len(key) == 1:
# ids = c.smembers(key[0])
#else:
# ids = c.sinter(key)
if len(key) == 0 and ord(word[0]) <= 127:
print "char"
word = word.lower()
tokenizer = PinyinTokenizer()
keys = tokenizer.tokenize(word)
if keys:
print "keys exists"
for n in keys:
print n
if c.exists("index:" + n):
print "index exists"
key.append("index:" + n)
else:
print "index not exists"
_keys = c.keys("index_p:" + n + "*")
print _keys
key.extend(_keys)
else:
print "keys *"
print word
_keys = c.keys("index:" + word + "*")
key.extend(_keys)
print _keys
for n in seg_txt(word):
if c.exists("index:" + n):
key.append("index:" + n)
if len(key) == 1:
ids = c.smembers(key[0])
elif len(key) == 0:
ids = []
print "Query None"
return
else:
ids = c.sinter(key)
for id in ids:
print id
result = dict()
result = c.hgetall(id)
result['type'] = id.split(":")[0]
data.append(result)
for d in data:
print d['name']
if __name__ == "__main__":
search("zhida")