-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfilter_mutrate.py
54 lines (50 loc) · 1.96 KB
/
filter_mutrate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/mnt/projects/zhangyu2/projects/miniconda3/envs/mixture/bin/python
__doc__="""
the script is used for filter the mutrate file with coverage threshold
criteria:
1) absolute coverage > 100 reads in one position
2) <optional> filter by normalized coverage ( eg: 10 / 1M )
input:
mutrate.txt.gz
"""
__version__="v1.0"
__author__="noahzy"
__last_modify__="10-oct-2019"
import gzip
import io
from sys import stdout
import argparse
import re
def filter_mutrate_by_cov(inputfile,outputfile,cov_threshold,filter_type):
if inputfile.split(".")[-1] in ["gz","gzip"]:
input = io.TextIOWrapper(io.BufferedReader(gzip.open(inputfile,'rb')))
else:
input = open(inputfile)
if outputfile == "-":
output = stdout
else:
output = gzip.open(outputfile,'wt')
## choose the thresholds for cov or normalized cov
if filter_type == "ncov":
idx = "normalized_cov"
else:
idx = "coverage"
## filter out each line
names = ["gene","pos","end","id","mutrate","strand","coverage","mutant","normalized_cov","ntref","detail"]
for l in input:
i = l.strip('\n').split('\t')
cols = dict(zip(names,i))
test = float(cols[idx])
if test > cov_threshold:
#print (cols[idx],cov_threshold)
output.write(l)
output.close()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("-i","--input",help="inputfile of mutrate.txt.gz file")
parser.add_argument("-o","--output",default="-",help="outputfile of mutrate.filter.gz file")
parser.add_argument("-c","--cov_threshold",default=100,help="the threshold of coverage, > c reads")
parser.add_argument("-t","--filter_type",default="cov", help="use cov or normlized cov, default: cov")
args = parser.parse_args()
cov_threshold = int(args.cov_threshold)
filter_mutrate_by_cov(args.input, args.output, cov_threshold, args.filter_type)