-
Notifications
You must be signed in to change notification settings - Fork 1
/
rankedSPR_seidel.py
119 lines (106 loc) · 4.88 KB
/
rankedSPR_seidel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
from os.path import exists
from rankedspr_adjacency import *
import time
from numpy.ctypeslib import ndpointer
import numpy as np
import ctypes
__author__ = 'Lena Collienne'
# Compute distance matrix for rankedSPR graph (from adjacency matrix, using SEIDEL implementation from RNNI_code package)
from pickle import FALSE
import sys
sys.path.append('seidel/')
_seidel = ctypes.CDLL("seidel_compressed/libseidel.so")
_seidel.test_function.argtypes = (ndpointer(ctypes.c_int,
flags="C_CONTIGUOUS"),
ctypes.c_int32)
_seidel.seidel.argtypes = (ndpointer(ctypes.c_int,
flags="C_CONTIGUOUS"), ctypes.c_int32)
_seidel.seidel_recursive.argtypes = (ndpointer(ctypes.c_int,
flags="C_CONTIGUOUS"),
ctypes.c_int32, ctypes.c_int32)
def rankedspr_seidel(n, hspr=False):
# compute distance matrix for RSPR (or HSPR if HSPR=0), for trees on n leaves
print('number of leaves:', n)
AI = rankedSPR_adjacency(n, hspr)
A = np.ascontiguousarray(AI[0], dtype=np.int32)
time1 = time.time()
_seidel.seidel(A, A.shape[0])
if (hspr == True):
np.save('output/distance_matrix_' + str(n) + '_leaves_hspr', A)
else:
np.save('output/distance_matrix_' + str(n) + '_leaves', A)
time2 = time.time()
print("C Seidel took {:.3f}ms".format((time2 - time1) * 1000.0))
print("diameter: ", np.amax(A))
def rankedspr_wo_RNNI_seidel(n):
# compute distance matrix for RSPR without RNNI moves, for trees on n leaves
print('number of leaves:', n)
AI = rankedSPR_wo_RNNI_adjacency(n)
A = np.ascontiguousarray(AI[0], dtype=np.int32)
time1 = time.time()
_seidel.seidel(A, A.shape[0])
np.save('output/wo_RNNI_distance_matrix_' + str(n) + '_leaves', A)
time2 = time.time()
print("C Seidel took {:.3f}ms".format((time2 - time1) * 1000.0))
print("diameter: ", np.amax(A))
def unlabelled_ranked_spr_seidel(n, hspr=True):
# compute distance matrix for RSPR (or HSPR if HSPR == True), for trees on n leaves
print('number of leaves:', n)
AI = unlabelled_rankedSPR_adjacency(n, hspr)
A = np.ascontiguousarray(AI[0], dtype=np.int32)
time1 = time.time()
_seidel.seidel(A, A.shape[0])
if (hspr == True):
np.save('output/unlabelled_distance_matrix_' + str(n) + '_leaves_hspr',
A)
else:
np.save('output/unlabelled_distance_matrix_' + str(n) + '_leaves', A)
time2 = time.time()
print("C Seidel took {:.3f}ms".format((time2 - time1) * 1000.0))
print("diameter: ", np.amax(A))
def read_distance_matrix(num_leaves, hspr=False, unlabelled=1):
# read distance matrix and corresponding trees and return them as matrix and two dicts (index to tree and tree to index)
# Read distance matrix
if unlabelled != 0:
if hspr == False:
d = np.load('output/distance_matrix_' + str(num_leaves) +
'_leaves.npy')
f = open('output/tree_dict_' + str(num_leaves) + '_leaves.txt',
'r')
elif hspr == True:
d = np.load('output/distance_matrix_' + str(num_leaves) +
'_leaves_hspr.npy')
f = open('output/tree_dict_' + str(num_leaves) +
'_leaves_hspr.txt', 'r')
else:
if hspr == False:
d = np.load('output/unlabelled_distance_matrix_' +
str(num_leaves) + '_leaves.npy')
f = open('output/unlabelled_tree_dict_' +
str(num_leaves) + '_leaves.txt', 'r')
elif hspr == True:
d = np.load('output/unlabelled_distance_matrix_' +
str(num_leaves) + '_leaves_hspr.npy')
f = open('output/unlabelled_tree_dict_' + str(num_leaves) +
'_leaves_hspr.txt', 'r')
# Put all trees into a dict (note that indices are sorted increasingly in file)
tree_strings = f.readlines()
index = 0
tree_dict = dict()
tree_index_dict = dict()
for tree_str in tree_strings:
tree_str = tree_str.split(" ")[1].split("\n")[0]
tree_dict[tree_str] = index
tree_index_dict[index] = tree_str
index += 1
return (d, tree_dict, tree_index_dict)
def get_distance_matrix(num_leaves, hspr):
'''get the distance matrix for trees on num_leaves leaves for hspr (HSPR=TRUE) or rspr.'''
if (hspr == True and not exists("output/distance_matrix_" +
str(num_leaves) + "_leaves_hspr.npy")
) or (hspr == False and not exists("output/distance_matrix_" +
str(num_leaves) + "_leaves.npy")):
print("start computing distance matrix")
rankedspr_seidel(num_leaves, hspr)
print("finish computing distance matrix")
return read_distance_matrix(num_leaves, hspr)