-
Notifications
You must be signed in to change notification settings - Fork 20
/
plotEffScoreHistogram.py
66 lines (55 loc) · 1.96 KB
/
plotEffScoreHistogram.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os, logging
from annotateOffs import *
from collections import defaultdict
#from scipy.stats import linregress
import matplotlib.pyplot as plt
import numpy as np
def parseAllSeqs(dirName):
""" return a list of all 34-mer sequences for which we have scores
and also a list of (dataname, list of KO efficiencies)
"""
seqs = set()
effLists = []
for fname in glob.glob(dirName+"/*.ext.tab"):
if "S7" in fname or "S10" in fname:
continue
effList =[]
for row in iterTsvRows(fname):
seqs.add(row.extSeq)
effList.append(float(row.modFreq))
effLists.append(( basename(fname).split(".")[0] , effList) )
return seqs, effLists
def main():
#plt.figure(figsize=(,10))
#fig, axArr = plt.subplots(4, 1, sharex="col")
fig, (axRow1, axRow2) = plt.subplots(2, 6)
fig.set_size_inches(20,5)
seqs, effLists = parseAllSeqs("effData")
scores = calcEffScores(seqs)
scoresByType = defaultdict(list)
for seq, seqScores in scores.iteritems():
for seqType, score in seqScores.iteritems():
scoresByType[seqType].append(score)
# scores
for plotRow, scoreType in enumerate(["svm", "doench", "ssc", "chariRaw", "finalGc6"]):
seqScores = scoresByType[scoreType]
ax = axRow1[plotRow]
ax.hist(seqScores)
ax.set_xlabel("%s score" % scoreType)
ax.set_ylabel("Frequency")
#ax.set_ylim(0,1.0)
#if plotCol==0:
#ax.set_title(gene)
# efficiencies
for plotRow, (dataName, effList) in enumerate(effLists):
ax = axRow2[plotRow]
ax.hist(effList)
ax.set_xlabel("KO efficiency")
ax.set_ylabel("Frequency")
ax.set_title(dataName)
fig.tight_layout()
outFname = "out/effScoreHistogram.pdf"
plt.savefig(outFname)
plt.savefig(outFname.replace(".pdf", ".png"))
print "wrote plot to %s and .png" % outFname
main()