-
Notifications
You must be signed in to change notification settings - Fork 4
/
Genescan_parse_cds-Prot-V1.0.pl
64 lines (45 loc) · 1.56 KB
/
Genescan_parse_cds-Prot-V1.0.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/perl -w
#This script will read through genscan output file and collect
#coding sequences from it into different file.keep this script
#in the folder containing genscan output and run from their.
#Version:1.0
#Author: Ratnesh Singh
use strict;
#open the directory containing files(current directory".").
opendir(DIR, ".");
#read filesnames (readdir(DIR) having .txt pattern in an array
my @files = grep(/genscan/,readdir(DIR));
closedir(DIR);
#print "files to be read @files\n";
open(PEPT,">papaya_pept_genscan.fasta");
open(CDS,">papaya_CDS_genscan.fasta");
#processing files and printing information one after one.
foreach my $file (@files) {
my (@file)=();
open(FILE,"$file") or die "Can't open file $file";
$/="\n>";
while(<FILE>){
my $line=$_;
if($line=~/>/){
$line=~s/>//g;
$line=~s/\s*$//g;
push(@file,$line);
}
}
# remove the first block from the array as it contains table of coordinates
shift @file;
# process rest of the array.
my $size=@file; print "number of coding sequences in $file :: $size\n";
for(my $i=0;$i<$size;$i++){
my($header,@sequence)=split(/\n/,$file[$i]);
my$sequence= join("",@sequence);
$sequence=~ s/\s//g;
my $bypass=$header;
$bypass=~ s/\|/\t/g;
my $pattern1="GENSCAN_predicted_peptide";
my $pattern2="GENSCAN_predicted_CDS";
print PEPT ">$header\n$sequence\n" if ($bypass=~ /$pattern1/i);
print CDS ">$header\n$sequence\n" if ($bypass=~ /$pattern2/i);
}
}
exit;