-
Notifications
You must be signed in to change notification settings - Fork 20
/
effDataReplaceScores.py
36 lines (29 loc) · 1006 Bytes
/
effDataReplaceScores.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# overwrite the mod frequencies from a .scores.tab file with the modification frequencies from another
# file
import sys
from collections import OrderedDict
def iterTsvRowsDict(ifh):
" yield rows from a tab-sep table as OrderedDict "
headers = ifh.readline().rstrip("\n").split("\t")
for line in ifh:
d = OrderedDict()
row = line.rstrip("\n").split("\t")
for name, val in zip(headers, row):
d[name] = val
yield d
scoresFname, modFreqFname, outFname = sys.argv[1:]
modFreqs = {}
for row in iterTsvRowsDict(open(modFreqFname)):
modFreqs[row["guide"]] = row["modFreq"]
ofh = open(outFname, "w")
hDone = False
for row in iterTsvRowsDict(open(scoresFname)):
if not hDone:
ofh.write("\t".join(row.keys())+"\n")
hDone = True
newFreq = modFreqs.get(row["guide"])
if newFreq==None:
print "not found %s" % row["guide"]
continue
row["modFreq"] = newFreq
ofh.write("\t".join(row.values())+"\n")