-
Notifications
You must be signed in to change notification settings - Fork 3
/
stats.py
61 lines (48 loc) · 1.44 KB
/
stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
'''
stats.py
Author: John Dickinson
'''
import math # for square root
import collections # for default dict
def mean(data):
return float(sum(data)) / float(len(data))
def median(data):
count = len(data)
if count % 2:
return data[count / 2]
else:
middle = count // 2
return sum(data[middle-1:middle+1]) / 2.0
def mode(data):
d = collections.defaultdict(int)
for item in data:
d[item] += 1
return max((count,key) for key,count in d.items())[1]
def std_deviation(data):
avg = mean(data)
avg_squared_deviation = mean([(avg-x)**2 for x in data])
return math.sqrt(avg_squared_deviation)
def min_max_difference(data):
data = data[:]
data.sort()
return data[-1] - data[0]
def stats(data):
return (mean(data),
median(data),
mode(data),
std_deviation(data),
min_max_difference(data),
)
if __name__ == '__main__':
import random
data = [100*random.random() for _ in xrange(10000)]
print stats(sorted(data))
data = [random.uniform(1,100) for _ in xrange(10000)]
print stats(sorted(data))
data = [1,2,3,4,5,5,6,7,8,9]
print stats(data)
data = [1,1,1,1,1,1,1,1,1,9]
print stats(data)
import hashlib
data = [sum(ord(y) for y in hashlib.md5(''.join(chr(random.randint(ord('A'),ord('z'))) for _ in xrange(x))).hexdigest()) for x in xrange(100)]
print stats(sorted(data))