-
Notifications
You must be signed in to change notification settings - Fork 11
/
shift_reads.py
executable file
·48 lines (40 loc) · 1.41 KB
/
shift_reads.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/env python
import csv
import sys
def getChrSizes(chrmFile):
"""
Reads tab-delimiter file with two rows describing the chromossomes and its lengths.
Returns dictionary of chr:sizes.
"""
with open(chrmFile, 'r') as f:
chrmSizes = {}
for line in enumerate(f):
row = line[1].strip().split('\t')
chrmSizes[str(row[0])] = int(row[1])
return chrmSizes
chrSizes = {
"hg38": "/data/groups/lab_bock/shared/resources/genomes/hg38/hg38.chromSizes",
"hg19": "/data/groups/lab_bock/shared/resources/genomes/hg19/hg19.chromSizes",
"mm10": "/data/groups/lab_bock/shared/resources/genomes/mm10/mm10.chromSizes",
"dr7": "/data/groups/lab_bock/shared/resources/genomes/dr7/dr7.chromSizes"
}
genome = sys.argv[1]
chrms = getChrSizes(chrSizes[genome]) # get size of chromosomes
wr = csv.writer(sys.stdout, delimiter='\t', lineterminator='\n')
for row in csv.reader(iter(sys.stdin.readline, ''), delimiter='\t'):
if row[0][0] == "@":
wr.writerow(row)
else:
flag = int(row[1])
chrm = row[2]
if flag & 16 == 0:
if int(row[3]) + 4 + 51 < chrms[chrm]:
row[3] = int(row[3]) + 4
else:
continue
elif flag & 16 == 16:
if int(row[3]) - 5 - 51 >= 1:
row[3] = int(row[3]) - 5
else:
continue
wr.writerow(row)