From c60606f2509aedb453ad2f5347272f5e4470a3c5 Mon Sep 17 00:00:00 2001 From: Mark Bicknell Date: Thu, 11 Jul 2024 13:55:15 +0000 Subject: [PATCH] Merge branch 'INSTX-5803_midstrand_threshold_increase' into 'master' INSTX-5803 Increased midstrand_flank_score from 0.8 to 0.95 to reduce false matches. Closes INSTX-5803 See merge request machine-learning/dorado!1104 (cherry picked from commit d54de07451e0e63a0975915dc6c92841a4f3be4a) 440c3223 INSTX-5803 Increased midstrand_flank_score from 0.8 to 0.95 to reduce false matches. 39414f11 Updated example in CustomBarcodes.md to match the new default for midstrand_flank_score d2b8fd30 Update EXP-PBC096 mid strand detection unit test read to have a good enough... --- documentation/CustomBarcodes.md | 2 +- dorado/utils/barcode_kits.cpp | 2 ++ dorado/utils/barcode_kits.h | 2 +- .../barcode_demux/double_end_variant/EXP-PBC096_midstrand.fasta | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/documentation/CustomBarcodes.md b/documentation/CustomBarcodes.md index 85b9c930e..463d3c877 100644 --- a/documentation/CustomBarcodes.md +++ b/documentation/CustomBarcodes.md @@ -49,7 +49,7 @@ flank_left_pad = 5 flank_right_pad = 10 front_barcode_window = 175 rear_barcode_window = 175 -midstrand_flank_score = 0.8 +midstrand_flank_score = 0.95 ``` #### Arrangement Options diff --git a/dorado/utils/barcode_kits.cpp b/dorado/utils/barcode_kits.cpp index 86ee0c465..6b1438e31 100644 --- a/dorado/utils/barcode_kits.cpp +++ b/dorado/utils/barcode_kits.cpp @@ -120,6 +120,7 @@ const BarcodeKitScoringParams RBK114_PARAMS{ /*front_barcode_window*/ 175, /*rear_barcode_window*/ 175, /*min_flank_score*/ 0.0f, + /*midstrand_flank_score*/ 0.95f, }; const BarcodeKitScoringParams TWIST_PARAMS{ @@ -132,6 +133,7 @@ const BarcodeKitScoringParams TWIST_PARAMS{ /*front_barcode_window*/ 175, /*rear_barcode_window*/ 175, /*min_flank_score*/ 0.5f, + /*midstrand_flank_score*/ 0.95f, }; // Some arrangement names are just aliases of each other. This is because they were released diff --git a/dorado/utils/barcode_kits.h b/dorado/utils/barcode_kits.h index 0b3552588..c927a42ff 100644 --- a/dorado/utils/barcode_kits.h +++ b/dorado/utils/barcode_kits.h @@ -17,7 +17,7 @@ struct BarcodeKitScoringParams { int front_barcode_window = 175; int rear_barcode_window = 175; float min_flank_score = 0.5f; - float midstrand_flank_score = 0.8f; + float midstrand_flank_score = 0.95f; }; struct KitInfo { diff --git a/tests/data/barcode_demux/double_end_variant/EXP-PBC096_midstrand.fasta b/tests/data/barcode_demux/double_end_variant/EXP-PBC096_midstrand.fasta index 219607509..209763c13 100644 --- a/tests/data/barcode_demux/double_end_variant/EXP-PBC096_midstrand.fasta +++ b/tests/data/barcode_demux/double_end_variant/EXP-PBC096_midstrand.fasta @@ -1,2 +1,2 @@ >a6703fe8-baa2-4dbe-a2de-4b5b22acd99 -TGTACTTCGTTCCAGTTGCATTATGCTGGTGCTGTTCGGATTCTGTGGTAACTTCCTATTAATGCCTTTCTGTTGGTGCTGATGTGCGGCGTCTGCTTGGGTGTTTAACCTCCAAAAATCGCCGGAAGGGGCCCCTGCCCACTGAAACCGTGGTTTGCGATTCCTGGCCCTGTCGCTGAAGAATACAGCATCGCCTTTGGTCACTGGGCGTAACTGGAGGGCAAAGGTACGCCGGAAGGAGAGTATATACGCGCTGGATACCGGCTGCTGCTGGAATTCGGTACGTGCCTGCCTGCGCTGGGAAGATAAGCAGGCCTTTGTCAGCCGTCGAACCGGCATAAGGATTTGGAAATGAAACGGCGGCGTCTTAAACACCGACCTACGATATAGGAAGGCGGATAAGACGCGACCGGCGTCACATCCGGCGCTAGCCGTAAATTCTATACAAAATTACCGCCGCTCCAGATCTCAAAGCAATGAGCTGTGAGAGTTCTGCGCATCAGCATCGTGGAATTCGCTGAATACCGATTCCAGTCATCCGGCTCATCAATCGGAAATGGGTGTCGCCTTCCACTTCTGCGTCATTAATCAGATACAGTTTTTCTGCGCTTTTGGCAAGAACTGTTCATAAACGCGACCGCCGCCGCGATCACCCTTGGTGCGTACTACACGCCGCGATGGCTTCATCCACCAGCTACCGCGTTACGCGATCGTCAAATACCCGGTTGACTCTTCAGGGATAATATTTTTGCGTCTGGCAACGGACGACCGATTGATTCCCAGGTATGGCGGCCCATAATCACGGGTTTATTTAAGGTGTTGCGTTTAAACCAGGCGAGATCGGCAGGCAGGTTCCACGGCGTATGGCGTTTTCCATGCCGATAACGCGATCTACCGCTAACGCCGCAATCAGACTGATCATTGAGATTTCCCGATAAAAAAAATTGTCACAACCACTATGCGTAAAGCGTAAACCGTCGTCGACTGGTGCGAGGATGATGTTGAGGTTAAACACCCAAACGGAGCGCCGCAATATCAGCACCAGCAAGAAGGTTAATAGGGAAACACGATAGAATCCGAACAGCACCAGCAATACGTAATATTGTACTTCGTTCCAGTTGCATTATGCTGGTGCTGTTCGGATTCTGTGGTAACTTCCTATTAATGCCTTTCTGTTGGTGCTGATGTGCGGCGTCTGCTTGGGTGTTTAACCTCCAAAAATCGCCGGAAGGGGCCCCTGCCCACTGAAACCGTGGTTTGCGATTCCTGGCCCTGTCGCTGAAGAATACAGCATCGCCTTTGGTCACTGGGCGTAACTGGAGGGCAAAGGTACGCCGGAAGGAGAGTATATACGCGCTGGATACCGGCTGCTGCTGGAATTCGGTACGTGCCTGCCTGCGCTGGGAAGATAAGCAGGCCTTTGTCAGCCGTCGAACCGGCATAAGGATTTGGAAATGAAACGGCGGCGTCTTAAACACCGACCTACGATATAGGAAGGCGGATAAGACGCGACCGGCGTCACATCCGGCGCTAGCCGTAAATTCTATACAAAATTACCGCCGCTCCAGATCTCAAAGCAATGAGCTGTGAGAGTTCTGCGCATCAGCATCGTGGAATTCGCTGAATACCGATTCCAGTCATCCGGCTCATCAATCGGAAATGGGTGTCGCCTTCCACTTCTGCGTCATTAATCAGATACAGTTTTTCTGCGCTTTTGGCAAGAACTGTTCATAAACGCGACCGCCGCCGCGATCACCCTTGGTGCGTACTACACGCCGCGATGGCTTCATCCACCAGCTACCGCGTTACGCGATCGTCAAATACCCGGTTGACTCTTCAGGGATAATATTTTTGCGTCTGGCAACGGACGACCGATTGATTCCCAGGTATGGCGGCCCATAATCACGGGTTTATTTAAGGTGTTGCGTTTAAACCAGGCGAGATCGGCAGGCAGGTTCCACGGCGTATGGCGTTTTCCATGCCGATAACGCGATCTACCGCTAACGCCGCAATCAGACTGATCATTGAGATTTCCCGATAAAAAAAATTGTCACAACCACTATGCGTAAAGCGTAAACCGTCGTCGACTGGTGCGAGGATGATGTTGAGGTTAAACACCCAAACGGAGCGCCGCAATATCAGCACCAGCAAGAAGGTTAATAGGGAAACACGATAGAATCCGAACAGCACCAGCAATACGTAATAT +TGTACTTCGTTCCAGTTGCATTATGCTGGTGCTGTTCGGATTCTGTGGTAACTTCCTATTAATGCCTTTCTGTTGGTGCTGATGTGCGGCGTCTGCTTGGGTGTTTAACCTCCAAAAATCGCCGGAAGGGGCCCCTGCCCACTGAAACCGTGGTTTGCGATTCCTGGCCCTGTCGCTGAAGAATACAGCATCGCCTTTGGTCACTGGGCGTAACTGGAGGGCAAAGGTACGCCGGAAGGAGAGTATATACGCGCTGGATACCGGCTGCTGCTGGAATTCGGTACGTGCCTGCCTGCGCTGGGAAGATAAGCAGGCCTTTGTCAGCCGTCGAACCGGCATAAGGATTTGGAAATGAAACGGCGGCGTCTTAAACACCGACCTACGATATAGGAAGGCGGATAAGACGCGACCGGCGTCACATCCGGCGCTAGCCGTAAATTCTATACAAAATTACCGCCGCTCCAGATCTCAAAGCAATGAGCTGTGAGAGTTCTGCGCATCAGCATCGTGGAATTCGCTGAATACCGATTCCAGTCATCCGGCTCATCAATCGGAAATGGGTGTCGCCTTCCACTTCTGCGTCATTAATCAGATACAGTTTTTCTGCGCTTTTGGCAAGAACTGTTCATAAACGCGACCGCCGCCGCGATCACCCTTGGTGCGTACTACACGCCGCGATGGCTTCATCCACCAGCTACCGCGTTACGCGATCGTCAAATACCCGGTTGACTCTTCAGGGATAATATTTTTGCGTCTGGCAACGGACGACCGATTGATTCCCAGGTATGGCGGCCCATAATCACGGGTTTATTTAAGGTGTTGCGTTTAAACCAGGCGAGATCGGCAGGCAGGTTCCACGGCGTATGGCGTTTTCCATGCCGATAACGCGATCTACCGCTAACGCCGCAATCAGACTGATCATTGAGATTTCCCGATAAAAAAAATTGTCACAACCACTATGCGTAAAGCGTAAACCGTCGTCGACTGGTGCGAGGATGATGTTGAGGTTAAACACCCAAACGGAGCGCCGCAATATCAGCACCAGCAAGAAGGTTAATAGGGAAACACGATAGAATCCGAACAGCACCAGCAATACGTAATATTGTACTTCGTTCCAGTTGCATTATGCTGGTGCTGTTCGGATTCTGTGGTAACTTCCTATTAACCTTTCTGTTGGTGCTGATATGCGGCGTCTGCTTGGGTGTTTAACCTCCAAAAATCGCCGGAAGGGGCCCCTGCCCACTGAAACCGTGGTTTGCGATTCCTGGCCCTGTCGCTGAAGAATACAGCATCGCCTTTGGTCACTGGGCGTAACTGGAGGGCAAAGGTACGCCGGAAGGAGAGTATATACGCGCTGGATACCGGCTGCTGCTGGAATTCGGTACGTGCCTGCCTGCGCTGGGAAGATAAGCAGGCCTTTGTCAGCCGTCGAACCGGCATAAGGATTTGGAAATGAAACGGCGGCGTCTTAAACACCGACCTACGATATAGGAAGGCGGATAAGACGCGACCGGCGTCACATCCGGCGCTAGCCGTAAATTCTATACAAAATTACCGCCGCTCCAGATCTCAAAGCAATGAGCTGTGAGAGTTCTGCGCATCAGCATCGTGGAATTCGCTGAATACCGATTCCAGTCATCCGGCTCATCAATCGGAAATGGGTGTCGCCTTCCACTTCTGCGTCATTAATCAGATACAGTTTTTCTGCGCTTTTGGCAAGAACTGTTCATAAACGCGACCGCCGCCGCGATCACCCTTGGTGCGTACTACACGCCGCGATGGCTTCATCCACCAGCTACCGCGTTACGCGATCGTCAAATACCCGGTTGACTCTTCAGGGATAATATTTTTGCGTCTGGCAACGGACGACCGATTGATTCCCAGGTATGGCGGCCCATAATCACGGGTTTATTTAAGGTGTTGCGTTTAAACCAGGCGAGATCGGCAGGCAGGTTCCACGGCGTATGGCGTTTTCCATGCCGATAACGCGATCTACCGCTAACGCCGCAATCAGACTGATCATTGAGATTTCCCGATAAAAAAAATTGTCACAACCACTATGCGTAAAGCGTAAACCGTCGTCGACTGGTGCGAGGATGATGTTGAGGTTAAACACCCAAACGGAGCGCCGCAATATCAGCACCAGCAAGAAGGTTAATAGGGAAACACGATAGAATCCGAACAGCACCAGCAATACGTAATAT