-
Notifications
You must be signed in to change notification settings - Fork 1
/
snp2fastq.cpp
150 lines (102 loc) · 3.35 KB
/
snp2fastq.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
// Copyright (c) 2018, Nicola Prezza. All rights reserved.
// Use of this source code is governed
// by a MIT license that can be found in the LICENSE file.
#include <iostream>
#include <fstream>
#include <assert.h>
#include <vector>
#include <unistd.h>
#include <sstream>
#include <set>
#include <cstring>
using namespace std;
bool switch_ = false;
void help(){
cout << "snp2fastq calls.snp [-i]" << endl << endl <<
"Converts clust2snp's calls 'calls.snp' into a fastq file 'calls.snp.fastq'. The output contains one " << endl <<
"read per call, where we put the second individual's DNA in the read's name, and the first individual's " << endl <<
"DNA in the read DNA. Base qualities are fake (all maximum). If option -i is specified, then individuals" << endl <<
"are switched." << endl;
exit(0);
}
int main(int argc, char** argv){
if(argc != 2 and argc != 3) help();
if(argc == 3){
if(string(argv[2]).compare("-i")==0){
switch_=true;
}else{
help();
}
}
string infile = argv[1];
string outfile = infile;
outfile.append(".fastq");
ifstream is(infile);
ofstream of(outfile);
string str;
unsigned int idx=0;
string header;
string dna;
string event_type;
string event_number;
string snp_pos;
string cov0;
string cov1;
string event;
while(getline(is, str)){
if( ((switch_) and idx%4==2) or ((not switch_) and idx%4==0) ){//header
//if switch is false, this is activated on line number 0 (header first indiv)
//if switch is true, this is activated on line number 2 (header second indiv)
std::istringstream iss_bar(str);
std::string token;
getline(iss_bar, token, '|');
token = token.substr(1);
{
std::istringstream iss_underscore(token);
getline(iss_underscore, event_type, '_');
getline(iss_underscore, token, '_');
getline(iss_underscore, token, '_');
getline(iss_underscore, event_number, '_');
}
getline(iss_bar, token, '|');
{
std::istringstream iss_dots(token);
getline(iss_dots, token, ':');
getline(iss_dots, token, ':');
std::istringstream iss_underscore(token);
getline(iss_underscore, snp_pos, '_');
getline(iss_underscore, event, '_');
}
getline(iss_bar, cov0, '|');
header = event_type;
header += "_" + event_number + "_" + snp_pos + "_" + event + "_" + cov0;
}
if( ((switch_) and idx%4==3) or ((not switch_) and idx%4==1) ){//DNA
//if switch is false, this is activated on line number 1 (DNA first indiv)
//if switch is true, this is activated on line number 3 (DNA second indiv)
dna = str;
}
if( ((switch_) and idx%4==0) or ((not switch_) and idx%4==2) ){//header
//if switch is false, this is activated on line number 2 (header second indiv)
//if switch is true, this is activated on line number 0 (header first indiv)
std::istringstream iss_bar(str);
getline(iss_bar, cov1, '|');
getline(iss_bar, cov1, '|');
getline(iss_bar, cov1, '|');
header += "_" + cov1 + "_";
}
if( ((switch_) and idx%4==1) or ((not switch_) and idx%4==3) ){//DNA
//if switch is false, this is activated on line number 3 (DNA second indiv)
//if switch is true, this is activated on line number 1 (DNA first indiv)
header += str;
//now output fastq entry
of << "@" << header << endl <<
dna << endl <<
"+" << endl <<
string(dna.length(),'I') << endl;
}
idx++;
}
is.close();
of.close();
}