-
Notifications
You must be signed in to change notification settings - Fork 26
/
ai_examples.py
108 lines (82 loc) · 2.65 KB
/
ai_examples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# inspired by http://www.paulgraham.com/spam.html
# but a simplified version as in https://course.elementsofai.com/se/3/3
DEFAULT_PROB = 0.00001
def probTable(words):
nwords = len(words)
table = dict()
for w in words:
table[w] = table.get(w,0) + 1
for w in table:
table[w] = table[w]/nwords
return table
class SpamFilter:
def __init__(self, spam, notspam):
self.spamTable = probTable(spam)
self.notspamTable = probTable(notspam)
wordTable = dict()
for word in self.spamTable:
wordTable[word] = self.spamTable[word] / self.notspamTable.get(word,DEFAULT_PROB)
for word in self.notspamTable:
if word not in wordTable:
wordTable[word] = DEFAULT_PROB / self.notspamTable[word]
self.ratioTable = wordTable
def spamProbability(self,message):
ratio = 1
for word in message:
ratio = ratio * self.ratioTable.get(word, DEFAULT_PROB)
prob = ratio / (ratio + 1)
return(prob)
# to read files from enron-spam data, which has Latin encoding
# http://www2.aueb.gr/users/ion/data/enron-spam/
def getLatinWords(file):
ham = open(file,encoding = "ISO-8859-1")
ws = []
for lines in ham:
for w in lines.split():
ws.append(w)
return ws
##############
## regression by using gradiet descent, in one variable
##############
def meanSquareError(prediction,examples):
loss = 0
for (x,y) in examples:
loss = loss + (y - prediction(x))**2
return loss/len(examples)
def linear(c,m):
return lambda x: c + m*x
# partial derivative
# https://towardsdatascience.com/linear-regression-using-gradient-descent-97a6c8700931
EPSILON = 0.00001
def descent(L,epochs,examples):
n = len(examples)
c = 0
m = 0
e = 0
prediction = linear(c,m)
while e < epochs and abs(meanSquareError(prediction,examples)) > EPSILON:
dc = 0
dm = 0
for (x,y) in examples:
dm = dm + x * (y - prediction(x))
dc = dc + (y - prediction(x))
dm = -2*dm/n
dc = -2*dc/n
m = m - L*dm
c = c - L*dc
prediction = linear(c,m)
print(c,m,abs(meanSquareError(prediction,examples)))
e = e+1
return (c,m)
def loadtxt(file): # to load for instance chirps.txt from Lab3, or the same format
lines = open(file)
M = []
for line in lines:
x,y = line.split()
M.append([float(x),float(y)])
lines.close()
return M
def testRegr(filename):
M = loadtxt(filename)
examples = [(xy[0],xy[1]) for xy in M]
return descent(0.01,20,examples)