-
Notifications
You must be signed in to change notification settings - Fork 0
/
Genome.pm
198 lines (152 loc) · 4.52 KB
/
Genome.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
=head2 taxon_name
Title : taxon_name
Usage : print "This genome is called " . $genome->name('full');
Function: Gets the name of the taxon of the genome.
Returns : A string.
Args : Either any point on the taxon tree (family, order, genus, species, etc), or 'full' for the complete name.
=cut
sub taxon_name
{
my $self = shift;
my $taxon_id = shift;
my $type = shift;
$type = lc($type);
# There are many types of names, default will be the genus species, though others can be called
# TODO: put other types of name calls
# if no arguments, then just return the name of the highest level we know of
if(!defined($type))
{
$type = 'species';
}
if($type eq 'species')
{
} elsif($type eq 'full')
{
my $full_name = '';
# Follow the tree all the way back and return the name
my @taxids = ();
unshift(@taxids, $self->_get_name_from_taxon_id($self->seqdb_taxon_id()));
my $parent_id = $self->_get_parent_taxon_id($self->seqdb_taxon_id);
while(defined($parent_id) && $parent_id != 1)
{
unshift(@taxids, $self->_get_name_from_taxon_id($parent_id));
$parent_id = $self->_get_parent_taxon_id($parent_id);
}
# Now go through it and print out the names
$full_name = join(", ", @taxids);
return $full_name;
} else
{
$taxid = $self->seqdb_taxon_id();
}
# Make sure we have a seqdb_taxon_id
if(defined($taxid))
{
return $self->_get_name_from_taxon_id($taxid);
} else
{
return undef;
}
}
=head2 _get_name_from_taxon_id
Title : _get_name_from_taxon_id
Usage : my $taxid_name = $genome->_get_name_from_taxon_id($genome->ncbi_species_taxon_id);
Function: Gets the scientific name for any NCBI taxon id.
Returns : A string.
Args : A integer.
=cut
sub _get_name_from_taxon_id
{
my $self = shift;
my $taxid = shift;
my $seqdb_dbh = $self->connection()->get_seqdb_dbh();
my $sth = $seqdb_dbh->prepare("select name from taxon_name where taxon_id = ? AND name_class = 'scientific name'");
$sth->execute($taxid);
if($sth->rows > 0)
{
my $row = $sth->fetchrow_hashref;
$sth->finish();
return $row->{name};
} else
{
return undef;
}
}
=head2 _get_parent_taxon_id
Title : _get_parent_taxon_id
Usage : my $parent = $genome->_get_parent_taxon_id($genome->ncbi_taxon_id);
Function: Gets the parent taxon for a particular taxon.
Returns : A integer.
Args : A integer.
=cut
sub _get_parent_taxon_id
{
my $self = shift;
my $taxid = shift;
my $seqdb_dbh = $self->connection()->get_seqdb_dbh();
my $sth = $seqdb_dbh->prepare("select parent_taxon_id from taxon where taxon_id = ?");
$sth->execute($taxid);
if($sth->rows > 0)
{
my $row = $sth->fetchrow_hashref;
$sth->finish();
return $row->{parent_taxon_id};
} else
{
return undef;
}
}
=head2 _get_node_rank
Title : _get_node_rank
Usage : if($genome->_get_node_rank(5443) eq 'order') { print "This is an order"; }
Function: Gets the rank of a particular taxon id.
Returns : A string.
Args : A integer.
=cut
sub _get_node_rank
{
my $self = shift;
my $taxid = shift;
my $seqdb_dbh = $self->connection()->get_seqdb_dbh();
my $sth = $seqdb_dbh->prepare("select node_rank from taxon where ncbi_taxon_id = ?");
$sth->execute($taxid);
if($sth->rows > 0)
{
my $row = $sth->fetchrow_hashref;
$sth->finish();
return $row->{node_rank};
} else
{
return undef;
}
}
=head2 _get_parent_rank_taxid
Title : _get_parent_rank_taxid
Usage : my $order_id = $genome->_get_parent_rank_taxid('order', $genome->ncbi_species_id);
Function: Will search up the taxonomy tree for a specific rank, and then return that taxon id.
Returns : A integer.
Args : (string, integer) which is ("the rank", "the taxon id").
=cut
sub _get_parent_rank_taxid
{
my $self = shift;
my $rank_name = shift;
my $taxid = shift;
if(!defined($taxid))
{
return $self->seqdb_taxon_id();
}
# This is for hitting the bottom
if(!defined($taxid) || $taxid == 1)
{
return undef;
}
# First check if I am the desired rank
if(lc($self->_get_node_rank($taxid)) eq lc($rank_name))
{
return $taxid;
}
# Ok, I'm not the rank I want, so now just keep calling my parent until either it is undef, or my rank
return $self->_get_parent_rank_taxid($rank_name, $self->_get_parent_taxon_id($taxid) );
}
1;