-
Notifications
You must be signed in to change notification settings - Fork 1
/
demos.py
97 lines (75 loc) · 2.72 KB
/
demos.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import os
from algorithms.distance_matrix import DistanceMatrix
from algorithms.additive_phylogeny import additive_phylogeny
from algorithms.upgma import upgma
from algorithms.four_russians_binary_encoding import LCS, Sequence, DNACode
from algorithms.sequencing_graph import paired_kmers_to_debruijn
from algorithms.reconstruct_genome_string import reconstruct_from_paired_kmers
from algorithms.neighbour_joining import neighbour_joining
from rosalind.solutions.q10d import q10d
from algorithms.suffix_array import create_suffix_array
from algorithms.needleman_wunsch import align_needleman
from algorithms.clustering import lloyd_kmeans, add_vector, assign_to_cluster
from algorithms.burrows_wheeler import bwt_matching_all, bwt
from random import random
import matplotlib.pyplot as plt
from functools import reduce
from operator import concat
top_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
##############
# Additional #
##############
def flatten(l):
return reduce(concat, l)
def k_means_clustering_demo():
k = 2
C = [(0, 0), (0, 10), (-5, 0), (5, 5)]
def gen_points(centre, n):
output = []
for i in range(n):
output.append(add_vector(centre, (random()*4-2, random()*4-2)))
return output
n = 20
points = flatten([gen_points(i, 50) for i in C])
centres = lloyd_kmeans(points, k)
assignments = [(point, assign_to_cluster(point, centres)) for point in points]
clusters = {i: [] for i in range(len(centres))}
for p, i in assignments:
clusters[i].append(p)
colors = ['r', 'g', 'b', 'y']
for cluster in clusters:
plt.scatter([i[0] for i in clusters[cluster]], [i[1] for i in clusters[cluster]], c=colors[cluster])
plt.savefig("out/img/clusters.png")
def FourRussianLCS():
n = 3000
s1 = Sequence(DNACode, n).random()
s2 = Sequence(DNACode, n).random()
print(s1 == s2)
print(LCS(s1, s2))
###########
# Example #
###########
def run_additive_phylogeny():
path = os.path.join(top_dir, "example_data", "additive_phylogeny.txt")
d_matrix = DistanceMatrix.from_txt_file(path)
tree = additive_phylogeny(d_matrix)
print(tree)
def run_upgma():
path = os.path.join(top_dir, "example_data", "upgma.txt")
d_matrix = DistanceMatrix.from_txt_file(path, with_ij_queue=True) # needed for fast algorithm
tree = upgma(d_matrix)
print(tree)
tree.draw()
def run_neighbour_joining():
path = os.path.join(top_dir, "example_data", "neighbour_joining.txt")
d_matrix = DistanceMatrix.from_txt_file(path, with_ij_queue=False)
tree = neighbour_joining(d_matrix)
print(tree)
tree.draw()
############
# ROSALIND #
############
##############
# Playground #
##############
run_neighbour_joining()