forked from meizhitu/100programhomework
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path100-20-literal-5.py
104 lines (86 loc) · 2.7 KB
/
100-20-literal-5.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
__author__ = 'rui'
#coding=utf-8
import codecs
import re
import pygtk
pygtk.require("2.0")
import gtk
maxWordLen = 2
def countWord(s):
dict = {}
s = str(s)
words = s.split()
for w in words:
if w in dict:
dict[w] += 1
else:
dict[w] = 1
return prettyPrint(dict, True)
def splitWord(w, allwords):
for i in range(len(w)):
if (i + maxWordLen) <= len(w):
if w[i:i + maxWordLen] in allwords:
allwords[w[i:i + maxWordLen]] += 1
else:
allwords[w[i:i + maxWordLen]] = 1
return allwords
def countFile():
from time import clock
start = clock()
words = []
allwords = {}
with codecs.open("res/songci.txt", 'r', 'utf-8') as f:
for line in f:
if len(line) > 10 and len(line) < 500:
word = re.split(u'[,!?.,!?。]', line)
words.extend(word)
words = filter(lambda x: len(x) > 0, words)
#prettyPrintArray(words)
for wd in words:
splitWord(wd, allwords)
maxCount = 30
result = ""
for key, valve in sorted(allwords.iteritems(), key=lambda e: e[1], reverse=True):
if (maxCount > 0):
maxCount -= 1
result += key + ":" + str(valve) + "\n"
print(result)
return result
def prettyPrintArray(arr):
for k in arr:
print(k)
def prettyPrint(wcDict, sort=False):
result = ""
if (sort):
for key, valve in sorted(wcDict.iteritems(), key=lambda e: e[1], reverse=True):
result += key + ":" + str(valve) + "\n"
else:
for k, v in wcDict.items():
result += k + ":" + str(v) + "\n"
return result
def doGo(widget):
text_buffer = tvInput.get_buffer()
result = text_buffer.get_text(text_buffer.get_start_iter(), text_buffer.get_end_iter())
if (len(result) > 0):
lbResult.set_text(str(countWord(result)))
else:
lbResult.set_text(str(countFile()))
return
handlers = {
"onDeleteWindow": gtk.main_quit,
"on_buttonDo_clicked": doGo
}
if __name__ == "__main__":
gladeFile = "res/demo.glade"
builder = gtk.Builder()
builder.add_from_file(gladeFile)
mainWindow = builder.get_object('demoWindow')
tvInput = builder.get_object('tvInput')
#tvInput.get_buffer().set_text(u"Count Words in a String – Counts the number of individual words in a string. For added complexity read these strings in from a text file and generate a summary.")
lbResult = builder.get_object('lbResult')
builder.connect_signals(handlers)
if mainWindow:
mainWindow.connect('destroy', gtk.main_quit)
mainWindow.set_title("单词出现次数")
mainWindow.show_all()
gtk.main()