Skip to content

Commit

Permalink
Merge pull request #343 from andrewjpage/core_paralogs
Browse files Browse the repository at this point in the history
Optionally allow paralogs in core gene alignment
  • Loading branch information
andrewjpage authored Aug 22, 2017
2 parents f299b01 + 8d34a81 commit 18af663
Show file tree
Hide file tree
Showing 10 changed files with 194 additions and 139 deletions.
2 changes: 1 addition & 1 deletion dist.ini
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name = Bio-Roary
version = 3.9.0
version = 3.9.1
author = Andrew J. Page <[email protected]>
license = GPL_3
copyright_holder = Wellcome Trust Sanger Institute
Expand Down
2 changes: 2 additions & 0 deletions lib/Bio/Roary.pm
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ has 'core_definition' => ( is => 'rw', isa => 'Num', default =
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'mafft' => ( is => 'ro', isa => 'Bool', default => 0 );
has 'inflation_value' => ( is => 'rw', isa => 'Num', default => 1.5 );
has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 );

has 'output_multifasta_files' => ( is => 'ro', isa => 'Bool', default => 0 );

Expand Down Expand Up @@ -136,6 +137,7 @@ sub run {
core_definition => $self->core_definition,
verbose => $self->verbose,
mafft => $self->mafft,
allow_paralogs => $self->allow_paralogs,
);
$post_analysis->run();

Expand Down
10 changes: 7 additions & 3 deletions lib/Bio/Roary/CommandLine/Roary.pm
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ has 'dont_split_groups' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'verbose_stats' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'translation_table' => ( is => 'rw', isa => 'Int', default => 11 );
has 'mafft' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'group_limit' => ( is => 'rw', isa => 'Num', default => 50000 );
has 'core_definition' => ( is => 'rw', isa => 'Num', default => 0.99 );
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
Expand All @@ -71,7 +72,7 @@ sub BUILD {
$job_runner, $makeblastdb_exec, $mcxdeblast_exec, $mcl_exec, $blastp_exec,
$apply_unknowns_filter, $cpus, $output_multifasta_files, $verbose_stats, $translation_table,
$run_qc, $core_definition, $help, $kraken_db, $cmd_version,
$mafft, $output_directory, $check_dependancies, $inflation_value,
$mafft, $output_directory, $check_dependancies, $inflation_value, $allow_paralogs,
);

GetOptionsFromArray(
Expand All @@ -98,6 +99,7 @@ sub BUILD {
'cd|core_definition=f' => \$core_definition,
'v|verbose' => \$verbose,
'n|mafft' => \$mafft,
'ap|allow_paralogs' => \$allow_paralogs,
'k|kraken_db=s' => \$kraken_db,
'w|version' => \$cmd_version,
'a|check_dependancies' => \$check_dependancies,
Expand Down Expand Up @@ -302,7 +304,8 @@ sub run {
core_definition => $self->core_definition,
verbose => $self->verbose,
mafft => $self->mafft,
inflation_value => $self->inflation_value,
allow_paralogs => $self->allow_paralogs,
inflation_value => $self->inflation_value,
);
$pan_genome_obj->run();

Expand Down Expand Up @@ -343,11 +346,12 @@ Options: -p INT number of threads [1]
-r create R plots, requires R and ggplot2
-s dont split paralogs
-t INT translation table [11]
-ap allow paralogs in core alignment
-z dont delete intermediate files
-v verbose output to STDOUT
-w print version and exit
-y add gene inference information to spreadsheet, doesnt work with -e
-iv STR Change the MCL inflation value [1.5]
-iv STR Change the MCL inflation value [1.5]
-h this help message
Example: Quickly generate a core gene alignment using 8 threads
Expand Down
9 changes: 7 additions & 2 deletions lib/Bio/Roary/CommandLine/RoaryCoreAlignment.pm
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,14 @@ has 'spreadsheet_filename' => ( is => 'rw', isa => 'Str', default => 'gene_
has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'core_gene_alignment.aln' );
has 'core_definition' => ( is => 'rw', isa => 'Num', default => 0.99 );
has 'dont_delete_files' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 );
has '_error_message' => ( is => 'rw', isa => 'Str' );
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );

sub BUILD {
my ($self) = @_;

my ( $multifasta_base_directory, $spreadsheet_filename, $output_filename, $core_definition,$verbose, $help, $mafft, $dont_delete_files );
my ( $multifasta_base_directory, $spreadsheet_filename, $output_filename, $core_definition,$verbose, $help, $mafft, $allow_paralogs, $dont_delete_files );

GetOptionsFromArray(
$self->args,
Expand All @@ -42,6 +43,7 @@ sub BUILD {
'o|output_filename=s' => \$output_filename,
'cd|core_definition=f' => \$core_definition,
'z|dont_delete_files' => \$dont_delete_files,
'p|allow_paralogs' => \$allow_paralogs,
'v|verbose' => \$verbose,
'h|help' => \$help,
);
Expand All @@ -51,6 +53,7 @@ sub BUILD {
$self->logger->level(10000);
}
$self->help($help) if(defined($help));
$self->allow_paralogs($allow_paralogs) if(defined($allow_paralogs));

if ( defined($multifasta_base_directory) && ( -d $multifasta_base_directory ) ) {
$self->multifasta_base_directory( abs_path($multifasta_base_directory));
Expand Down Expand Up @@ -95,7 +98,8 @@ sub run {
$self->logger->info("Extract core genes from spreadsheet");
my $core_genes_obj = Bio::Roary::ExtractCoreGenesFromSpreadsheet->new(
spreadsheet => $self->spreadsheet_filename,
core_definition => $self->core_definition
core_definition => $self->core_definition,
allow_paralogs => $self->allow_paralogs
);

$self->logger->info("Looking up genes in files");
Expand Down Expand Up @@ -130,6 +134,7 @@ Options: -o STR output filename [core_gene_alignment.aln]
-cd FLOAT percentage of isolates a gene must be in to be core [99]
-m STR directory containing gene multi-FASTAs [pan_genome_sequences]
-s STR gene presence and absence spreadsheet [gene_presence_absence.csv]
-p allow paralogs
-z dont delete intermediate files
-v verbose output to STDOUT
-h this help message
Expand Down
7 changes: 6 additions & 1 deletion lib/Bio/Roary/CommandLine/RoaryPostAnalysis.pm
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,15 @@ has 'group_limit' => ( is => 'rw', isa => 'Num', default => 500
has 'core_definition' => ( is => 'rw', isa => 'Num', default => 0.99 );
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'mafft' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 );

sub BUILD {
my ($self) = @_;

my (
$output_filename, $dont_create_rplots, $dont_delete_files, $dont_split_groups, $output_pan_geneome_filename,
$job_runner, $output_statistics_filename, $output_multifasta_files, $clusters_filename, $core_definition,
$fasta_files, $input_files, $verbose_stats, $translation_table, $help, $cpus,$group_limit,$verbose,$mafft
$fasta_files, $input_files, $verbose_stats, $translation_table, $help, $cpus,$group_limit,$verbose,$mafft, $allow_paralogs
);


Expand All @@ -72,6 +73,7 @@ sub BUILD {
'cd|core_definition=f' => \$core_definition,
'v|verbose' => \$verbose,
'n|mafft' => \$mafft,
'q|allow_paralogs' => \$allow_paralogs,
'h|help' => \$help,
);

Expand All @@ -93,6 +95,7 @@ sub BUILD {
$self->group_limit($group_limit) if ( defined($group_limit) );
$self->core_definition( $core_definition/100 ) if ( defined($core_definition) );
$self->mafft($mafft) if ( defined($mafft) );
$self->allow_paralogs($allow_paralogs) if ( defined($allow_paralogs) );
if ( defined($verbose) ) {
$self->verbose($verbose);
$self->logger->level(10000);
Expand Down Expand Up @@ -158,6 +161,7 @@ sub run {
cpus => $self->cpus,
verbose => $self->verbose,
mafft => $self->mafft,
allow_paralogs => $self->allow_paralogs,
dont_delete_files => $self->dont_delete_files,
num_input_files => $#{$input_files},
);
Expand Down Expand Up @@ -222,6 +226,7 @@ Options: -a dont delete intermediate files
-n fast core gene alignement with MAFFT instead of PRANK
-o STR clusters output filename [clustered_proteins]
-p STR output pan genome filename [pan_genome.fa]
-q allow paralogs in core alignment
-s STR output gene presence and absence filename [gene_presence_absence.csv]
-t INT translation table [11]
-z INT number of threads [1]
Expand Down
2 changes: 2 additions & 0 deletions lib/Bio/Roary/External/GeneAlignmentFromNucleotides.pm
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ has 'translation_table' => ( is => 'rw', isa => 'Int', default =>
has 'core_definition' => ( is => 'ro', isa => 'Num', default => 1 );
has 'mafft' => ( is => 'ro', isa => 'Bool', default => 0 );
has 'dont_delete_files' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'num_input_files' => ( is => 'ro', isa => 'Int', required => 1);

# Overload Role`
Expand Down Expand Up @@ -85,6 +86,7 @@ sub _build__core_alignment_cmd {
my $core_cmd = "pan_genome_core_alignment";
$core_cmd .= " -cd " . ($self->core_definition*100) if ( defined $self->core_definition );
$core_cmd .= " --dont_delete_files " if ( defined $self->dont_delete_files && $self->dont_delete_files == 1 );
$core_cmd .= " --allow_paralogs " if ( defined $self->allow_paralogs && $self->allow_paralogs == 1 );

return $core_cmd;
}
Expand Down
5 changes: 5 additions & 0 deletions lib/Bio/Roary/External/PostAnalysis.pm
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ has 'group_limit' => ( is => 'rw', isa => 'Num', default => 50
has 'core_definition' => ( is => 'ro', isa => 'Num', default => 1.0 );
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
has 'mafft' => ( is => 'ro', isa => 'Bool', default => 0 );
has 'allow_paralogs' => ( is => 'ro', isa => 'Bool', default => 0 );
has '_working_directory' => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
has '_gff_fofn' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__gff_fofn' );
has '_fasta_fofn' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__fasta_fofn' );
Expand Down Expand Up @@ -137,6 +138,9 @@ sub _command_to_run {

my $verbose_flag = '';
$verbose_flag = '-v' if ( defined($self->verbose) && $self->verbose == 1 );

my $allow_paralogs_flag = '';
$allow_paralogs_flag = '--allow_paralogs' if ( defined($self->allow_paralogs) && $self->allow_paralogs == 1 );

return join(
" ",
Expand All @@ -156,6 +160,7 @@ sub _command_to_run {
$verbose_stats_flag,
$verbose_flag,
$mafft_flag,
$allow_paralogs_flag,
'-j', $self->job_runner,
'--processors', $self->cpus,
'--group_limit', $self->group_limit,
Expand Down
Loading

0 comments on commit 18af663

Please sign in to comment.