forked from hgrandjean/Anti-Fungi-Peptide
-
Notifications
You must be signed in to change notification settings - Fork 0
/
database_summary.py
59 lines (53 loc) · 2.14 KB
/
database_summary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import matplotlib
import matplotlib.pyplot as plt
from Bio import SeqIO
from collections import defaultdict, Counter
#Define the color properties and databases
properties_aa = [["D", "E", "K", "N"], ["H", "F", "Y", "W"], ["A", "V", "L", "I", "G"]]
color_aa = ["red", "blue", "green"]
pos_db_name = "positive_db_size.fasta"
neg_db_name = "negative_db_size.fasta"
#Define the graph
fig = plt.figure()
ax = fig.add_subplot()
fig.subplots_adjust(top=0.5)
#Parse the fasta file
def parse_fasta_file (file_name):
multi_fasta = [record for record in SeqIO.parse(file_name, "fasta")]
print(f"Ended parsing of {file_name}")
return multi_fasta
# Set titles for the figure and the subplot respectively
ax.axis([0, 18, -1, 1])
ax.text(-1, 1.1, 'Summary of databases with amino acid occurences and positions', fontsize=10, fontweight='bold')
ax.set_xlabel('Amino Acid positions')
ax.set_ylabel('Sizes in terms of occurrence')
#Get fasta file, sort their sequinces and plot
def sort_aa_positions(db_file):
db_fastas = parse_fasta_file(db_file)
print ("Parsing fasta files")
position_counts = defaultdict(list)
for fasta in db_fastas:
seq = fasta.seq
for position, aa in enumerate(list(seq)):
position_counts[position].append(aa)
count = {k:Counter(v) for k, v in position_counts.items()}
font_size = 0
color_default = "black"
for fasta in db_fastas:
seq = fasta.seq
for aa in seq:
for position_aa in range(len(count)):
if aa in count[position_aa]:
counted_value = count[position_aa][aa]
font_size = (counted_value/40)*35
for color in range(len(properties_aa)):
if aa in properties_aa[color]: color_default = color_aa[color]
ax.text(position_aa, y_pos, aa, color=color_default, fontsize=font_size)
del count[position_aa][aa]
y_pos = 0.05
sort_aa_positions(pos_db_name)
ax.text(5, 0.5, "Positive database", color="black", fontsize=10)
y_pos = -0.15
sort_aa_positions(neg_db_name)
ax.text(5, -0.5, "Negative database", color="black", fontsize=10)
plt.show()