diff --git a/tools/augustus/augustus.xml b/tools/augustus/augustus.xml index e496b34caeb..c17e9049b82 100644 --- a/tools/augustus/augustus.xml +++ b/tools/augustus/augustus.xml @@ -246,6 +246,28 @@ + + + + + + + + + + + + + + + + + + + + + + @@ -473,4 +495,4 @@ Running this tool will produce this: ]]> - + \ No newline at end of file diff --git a/tools/augustus/augustus_training.xml b/tools/augustus/augustus_training.xml index 44866aa9516..0c1300c48c0 100644 --- a/tools/augustus/augustus_training.xml +++ b/tools/augustus/augustus_training.xml @@ -8,7 +8,7 @@ augustus - maker + maker - 3.4.0 - 2 + 3.5.0 + 0 23.1 diff --git a/tools/augustus/test-data/augustus.hints.output.gtf b/tools/augustus/test-data/augustus.hints.output.gtf index be1fbe49e73..28a27c7ff61 100644 --- a/tools/augustus/test-data/augustus.hints.output.gtf +++ b/tools/augustus/test-data/augustus.hints.output.gtf @@ -1,17 +1,17 @@ -# This output was generated with AUGUSTUS (version 3.4.0). +# This output was generated with AUGUSTUS (version 3.5.0). # AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de), -# O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff. +# O. Keller, S. König, L. Gerischer, L. Romoth, Katharina Hoff, Henry Mehlan and Daniel Honsel. # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008), # Using native and syntenically mapped cDNA alignments to improve de novo gene finding # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013 # Sources of extrinsic information: M RM E W # Setting CDSpart local malus: 0.985 # Setting UTRpart local malus: 0.973 -# reading in the file /tmp/tmpb49zmbej/files/6/4/3/dataset_64360fd3-ce82-407d-a499-79ac51decbd9.dat ... +# reading in the file /tmp/tmppoq4flrj/files/9/3/2/dataset_9324eb55-2768-47e7-810a-f6b680ce991c.dat ... # Have extrinsic information about 1 sequences (in the specified range). # Initializing the parameters using config directory /usr/local/config/ ... # fly version. Using default transition matrix. -# Looks like /tmp/tmpb49zmbej/files/0/c/6/dataset_0c6b001d-370e-42cf-be92-b3435bd212c5.dat is in fasta format. +# Looks like /tmp/tmppoq4flrj/files/1/5/a/dataset_15a2d256-bf11-48b4-b436-22f4c66d8c5a.dat is in fasta format. # We have hints for 1 sequence and for 1 of the sequences in the input set. # # ----- prediction on sequence number 1 (length = 9950, name = chr2R) ----- @@ -59,4 +59,4 @@ chr2R AUGUSTUS start_codon 9301 9303 . - 0 transcript_id "chr2R.g1.t1"; gene_id # end gene chr2R.g1 ### # command line: -# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=off --codingseq=off --introns=off --stop=off --stop=off --cds=off --singlestrand=false /tmp/tmpb49zmbej/files/0/c/6/dataset_0c6b001d-370e-42cf-be92-b3435bd212c5.dat --UTR=off --genemodel=complete --softmasking=0 --hintsfile=/tmp/tmpb49zmbej/files/6/4/3/dataset_64360fd3-ce82-407d-a499-79ac51decbd9.dat --extrinsicCfgFile=/tmp/tmpb49zmbej/files/8/6/b/dataset_86b0a149-1d37-4615-9915-2c48586e3ca1.dat --species=fly +# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=off --codingseq=off --introns=off --stop=off --stop=off --cds=off --singlestrand=false /tmp/tmppoq4flrj/files/1/5/a/dataset_15a2d256-bf11-48b4-b436-22f4c66d8c5a.dat --UTR=off --genemodel=complete --softmasking=0 --hintsfile=/tmp/tmppoq4flrj/files/9/3/2/dataset_9324eb55-2768-47e7-810a-f6b680ce991c.dat --extrinsicCfgFile=/tmp/tmppoq4flrj/files/1/f/a/dataset_1fa5c464-019f-4a97-8759-0b44332f4963.dat --species=fly diff --git a/tools/augustus/test-data/augustus.hints_and_range.output.gtf b/tools/augustus/test-data/augustus.hints_and_range.output.gtf index 4c6dec57605..8fbe17858e0 100644 --- a/tools/augustus/test-data/augustus.hints_and_range.output.gtf +++ b/tools/augustus/test-data/augustus.hints_and_range.output.gtf @@ -1,17 +1,17 @@ -# This output was generated with AUGUSTUS (version 3.4.0). +# This output was generated with AUGUSTUS (version 3.5.0). # AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de), -# O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff. +# O. Keller, S. König, L. Gerischer, L. Romoth, Katharina Hoff, Henry Mehlan and Daniel Honsel. # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008), # Using native and syntenically mapped cDNA alignments to improve de novo gene finding # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013 # Sources of extrinsic information: M RM E W # Setting CDSpart local malus: 0.985 # Setting UTRpart local malus: 0.973 -# reading in the file /tmp/tmpb49zmbej/files/f/a/8/dataset_fa8684ad-0602-4c00-9999-b998db931a6e.dat ... +# reading in the file /tmp/tmppoq4flrj/files/9/c/0/dataset_9c0c03c0-7d1a-4b5f-86dd-0ce3d861e601.dat ... # Have extrinsic information about 1 sequences (in the specified range). # Initializing the parameters using config directory /usr/local/config/ ... # fly version. Using default transition matrix. -# Looks like /tmp/tmpb49zmbej/files/4/c/8/dataset_4c80a809-791b-4afe-a497-698a7460ac31.dat is in fasta format. +# Looks like /tmp/tmppoq4flrj/files/5/2/c/dataset_52ca6132-4c8f-4bb1-8edc-a985c1310790.dat is in fasta format. # We have hints for 1 sequence and for 1 of the sequences in the input set. # # ----- prediction on sequence number 1 (length = 2001, name = chr2R) ----- @@ -53,4 +53,4 @@ chr2R AUGUSTUS start_codon 8929 8931 . - 0 transcript_id "chr2R.g1.t1"; gene_id # end gene chr2R.g1 ### # command line: -# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=off --codingseq=off --introns=off --stop=off --stop=off --cds=off --singlestrand=false /tmp/tmpb49zmbej/files/4/c/8/dataset_4c80a809-791b-4afe-a497-698a7460ac31.dat --UTR=off --genemodel=complete --softmasking=0 --hintsfile=/tmp/tmpb49zmbej/files/f/a/8/dataset_fa8684ad-0602-4c00-9999-b998db931a6e.dat --extrinsicCfgFile=/tmp/tmpb49zmbej/files/3/3/5/dataset_335e9fec-9340-42e6-97ce-af35d5220fcc.dat --predictionStart=7000 --predictionEnd=9000 --species=fly +# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=off --codingseq=off --introns=off --stop=off --stop=off --cds=off --singlestrand=false /tmp/tmppoq4flrj/files/5/2/c/dataset_52ca6132-4c8f-4bb1-8edc-a985c1310790.dat --UTR=off --genemodel=complete --softmasking=0 --hintsfile=/tmp/tmppoq4flrj/files/9/c/0/dataset_9c0c03c0-7d1a-4b5f-86dd-0ce3d861e601.dat --extrinsicCfgFile=/tmp/tmppoq4flrj/files/f/6/0/dataset_f6046d74-eb33-4cb6-9062-0955336c2672.dat --predictionStart=7000 --predictionEnd=9000 --species=fly diff --git a/tools/augustus/test-data/human_augustus_protein_codingseq_introns_cds_main.gtf b/tools/augustus/test-data/human_augustus_protein_codingseq_introns_cds_main.gtf index 417feb76a40..ec35abe8d2e 100644 --- a/tools/augustus/test-data/human_augustus_protein_codingseq_introns_cds_main.gtf +++ b/tools/augustus/test-data/human_augustus_protein_codingseq_introns_cds_main.gtf @@ -1,13 +1,13 @@ -# This output was generated with AUGUSTUS (version 3.4.0). +# This output was generated with AUGUSTUS (version 3.5.0). # AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de), -# O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff. +# O. Keller, S. König, L. Gerischer, L. Romoth, Katharina Hoff, Henry Mehlan and Daniel Honsel. # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008), # Using native and syntenically mapped cDNA alignments to improve de novo gene finding # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013 # No extrinsic information on sequences given. # Initializing the parameters using config directory /usr/local/config/ ... # human version. Using default transition matrix. -# Looks like /tmp/tmpb49zmbej/files/c/d/6/dataset_cd6650af-fd36-4176-b9f1-e38bb118655f.dat is in fasta format. +# Looks like /tmp/tmppoq4flrj/files/b/9/e/dataset_b9eb8c49-e040-4f55-b476-56080cc0c7ac.dat is in fasta format. # We have hints for 0 sequences and for 0 of the sequences in the input set. # # ----- prediction on sequence number 1 (length = 9453, name = HS04636) ----- @@ -98,4 +98,4 @@ HS08198 AUGUSTUS stop_codon 1846 1848 . + 0 transcript_id "HS08198.g2.t1"; gene_ # end gene HS08198.g2 ### # command line: -# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=on --start=on --stop=on --cds=on --singlestrand=false /tmp/tmpb49zmbej/files/c/d/6/dataset_cd6650af-fd36-4176-b9f1-e38bb118655f.dat --UTR=off --genemodel=complete --softmasking=0 --species=human +# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=on --start=on --stop=on --cds=on --singlestrand=false /tmp/tmppoq4flrj/files/b/9/e/dataset_b9eb8c49-e040-4f55-b476-56080cc0c7ac.dat --UTR=off --genemodel=complete --softmasking=0 --species=human diff --git a/tools/augustus/test-data/human_augustus_utr-on.gff b/tools/augustus/test-data/human_augustus_utr-on.gff index 6ca8e427490..14da4cb89f3 100644 --- a/tools/augustus/test-data/human_augustus_utr-on.gff +++ b/tools/augustus/test-data/human_augustus_utr-on.gff @@ -1,14 +1,14 @@ ##gff-version 3 -# This output was generated with AUGUSTUS (version 3.4.0). +# This output was generated with AUGUSTUS (version 3.5.0). # AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de), -# O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff. +# O. Keller, S. König, L. Gerischer, L. Romoth, Katharina Hoff, Henry Mehlan and Daniel Honsel. # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008), # Using native and syntenically mapped cDNA alignments to improve de novo gene finding # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013 # No extrinsic information on sequences given. # Initializing the parameters using config directory /usr/local/config/ ... # human version. Using default transition matrix. -# Looks like /tmp/tmpb49zmbej/files/e/0/1/dataset_e0109fd4-ac59-4275-90d3-25691349bc0c.dat is in fasta format. +# Looks like /tmp/tmppoq4flrj/files/3/4/a/dataset_34aeff85-4867-4ac0-a3b5-57c9af2f6d41.dat is in fasta format. # We have hints for 0 sequences and for 0 of the sequences in the input set. # # ----- prediction on sequence number 1 (length = 9453, name = HS04636) ----- @@ -103,4 +103,4 @@ HS08198 AUGUSTUS transcription_end_site 2105 2105 . + . Parent=HS08198.g2.t1 # end gene HS08198.g2 ### # command line: -# augustus --strand=both --noInFrameStop=false --gff3=on --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /tmp/tmpb49zmbej/files/e/0/1/dataset_e0109fd4-ac59-4275-90d3-25691349bc0c.dat --UTR=on --genemodel=complete --softmasking=0 --species=human +# augustus --strand=both --noInFrameStop=false --gff3=on --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /tmp/tmppoq4flrj/files/3/4/a/dataset_34aeff85-4867-4ac0-a3b5-57c9af2f6d41.dat --UTR=on --genemodel=complete --softmasking=0 --species=human diff --git a/tools/augustus/test-data/human_augustus_utr-on.gtf b/tools/augustus/test-data/human_augustus_utr-on.gtf index 3f0a10bf6b3..424b911243b 100644 --- a/tools/augustus/test-data/human_augustus_utr-on.gtf +++ b/tools/augustus/test-data/human_augustus_utr-on.gtf @@ -1,13 +1,13 @@ -# This output was generated with AUGUSTUS (version 3.4.0). +# This output was generated with AUGUSTUS (version 3.5.0). # AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de), -# O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff. +# O. Keller, S. König, L. Gerischer, L. Romoth, Katharina Hoff, Henry Mehlan and Daniel Honsel. # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008), # Using native and syntenically mapped cDNA alignments to improve de novo gene finding # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013 # No extrinsic information on sequences given. # Initializing the parameters using config directory /usr/local/config/ ... # human version. Using default transition matrix. -# Looks like /tmp/tmpb49zmbej/files/6/0/5/dataset_605b6f62-4302-4e11-b378-848be921c4e4.dat is in fasta format. +# Looks like /tmp/tmppoq4flrj/files/9/2/f/dataset_92f269da-33a0-4419-9ebb-402fb31a679e.dat is in fasta format. # We have hints for 0 sequences and for 0 of the sequences in the input set. # # ----- prediction on sequence number 1 (length = 9453, name = HS04636) ----- @@ -102,4 +102,4 @@ HS08198 AUGUSTUS tts 2105 2105 . + . transcript_id "HS08198.g2.t1"; gene_id "HS0 # end gene HS08198.g2 ### # command line: -# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /tmp/tmpb49zmbej/files/6/0/5/dataset_605b6f62-4302-4e11-b378-848be921c4e4.dat --UTR=on --genemodel=complete --softmasking=0 --species=human +# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /tmp/tmppoq4flrj/files/9/2/f/dataset_92f269da-33a0-4419-9ebb-402fb31a679e.dat --UTR=on --genemodel=complete --softmasking=0 --species=human diff --git a/tools/augustus/test-data/human_augustus_utr-on_softmasking.gtf b/tools/augustus/test-data/human_augustus_utr-on_softmasking.gtf index 52c3f9ab11f..6949b96d1af 100644 --- a/tools/augustus/test-data/human_augustus_utr-on_softmasking.gtf +++ b/tools/augustus/test-data/human_augustus_utr-on_softmasking.gtf @@ -1,6 +1,6 @@ -# This output was generated with AUGUSTUS (version 3.4.0). +# This output was generated with AUGUSTUS (version 3.5.0). # AUGUSTUS is a gene prediction tool written by M. Stanke (mario.stanke@uni-greifswald.de), -# O. Keller, S. König, L. Gerischer, L. Romoth and Katharina Hoff. +# O. Keller, S. König, L. Gerischer, L. Romoth, Katharina Hoff, Henry Mehlan and Daniel Honsel. # Please cite: Mario Stanke, Mark Diekhans, Robert Baertsch, David Haussler (2008), # Using native and syntenically mapped cDNA alignments to improve de novo gene finding # Bioinformatics 24: 637-644, doi 10.1093/bioinformatics/btn013 @@ -8,7 +8,7 @@ # Sources of extrinsic information: M RM # Initializing the parameters using config directory /usr/local/config/ ... # human version. Using default transition matrix. -# Looks like /tmp/tmpb49zmbej/files/1/1/4/dataset_11447207-979d-4b84-a63d-14dd1e776f0e.dat is in fasta format. +# Looks like /tmp/tmppoq4flrj/files/8/9/9/dataset_8998cdab-ae1a-4630-be3b-51ec04b35598.dat is in fasta format. # We have hints for 0 sequences and for 0 of the sequences in the input set. # # ----- prediction on sequence number 1 (length = 9453, name = HS04636) ----- @@ -21,4 +21,4 @@ # Predicted genes for sequence number 2 on both strands # (none) # command line: -# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=on --codingseq=on --introns=off --stop=off --stop=off --cds=on --singlestrand=false /tmp/tmpb49zmbej/files/1/1/4/dataset_11447207-979d-4b84-a63d-14dd1e776f0e.dat --UTR=on --genemodel=complete --softmasking=1 --species=human +# augustus --strand=both --noInFrameStop=false --gff3=off --uniqueGeneId=true --protein=off --codingseq=off --introns=off --stop=off --stop=off --cds=off --singlestrand=false /tmp/tmppoq4flrj/files/8/9/9/dataset_8998cdab-ae1a-4630-be3b-51ec04b35598.dat --UTR=on --genemodel=complete --softmasking=1 --species=human