Skip to content

Commit

Permalink
Merge branch 'DOR-830_rna_5.1.0' into 'master'
Browse files Browse the repository at this point in the history
DOR-830 RNA v5.1.0 and DNA v5.0 mods-v2

Closes DOR-830

See merge request machine-learning/dorado!1187
  • Loading branch information
HalfPhoton committed Sep 16, 2024
2 parents 0b79407 + 42d98d8 commit a69c0a2
Show file tree
Hide file tree
Showing 4 changed files with 183 additions and 9 deletions.
20 changes: 14 additions & 6 deletions dorado/models/metadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,24 @@ const std::unordered_map<std::string, ModelVariant> map = {

namespace mods {
const std::unordered_map<std::string, ModsVariant> map = {
{"4mC_5mC", ModsVariant::M_4mC_5mC}, {"5mC_5hmC", ModsVariant::M_5mC_5hmC},
{"5mCG", ModsVariant::M_5mCG}, {"5mCG_5hmCG", ModsVariant::M_5mCG_5hmCG},
{"5mC", ModsVariant::M_5mC}, {"6mA", ModsVariant::M_6mA},
{"m6A", ModsVariant::M_m6A}, {"m6A_DRACH", ModsVariant::M_m6A_DRACH},
{"4mC_5mC", ModsVariant::M_4mC_5mC},
{"5mC_5hmC", ModsVariant::M_5mC_5hmC},
{"5mCG", ModsVariant::M_5mCG},
{"5mCG_5hmCG", ModsVariant::M_5mCG_5hmCG},
{"5mC", ModsVariant::M_5mC},
{"m5C", ModsVariant::M_m5C},
{"6mA", ModsVariant::M_6mA},
{"m6A", ModsVariant::M_m6A},
{"m6A_DRACH", ModsVariant::M_m6A_DRACH},
{"inosine_m6A", ModsVariant::M_inosine_m6A},
{"pseU", ModsVariant::M_pseU},
};

const std::unordered_map<ModsVariant, std::string> canonical_base_map = {
{ModsVariant::M_4mC_5mC, "C"}, {ModsVariant::M_5mC_5hmC, "C"},
{ModsVariant::M_5mCG, "C"}, {ModsVariant::M_5mCG_5hmCG, "C"},
{ModsVariant::M_5mC, "C"}, {ModsVariant::M_6mA, "A"},
{ModsVariant::M_5mC, "C"}, {ModsVariant::M_m5C, "C"},
{ModsVariant::M_6mA, "A"}, {ModsVariant::M_inosine_m6A, "A"},
{ModsVariant::M_m6A, "A"}, {ModsVariant::M_m6A_DRACH, "A"},
{ModsVariant::M_pseU, "T"},
};
Expand All @@ -48,7 +55,8 @@ const std::unordered_map<std::string, ModelVersion> map = {
{"v3.5.2", ModelVersion::v3_5_2}, {"v3.6.0", ModelVersion::v3_6_0},
{"v4.0.0", ModelVersion::v4_0_0}, {"v4.1.0", ModelVersion::v4_1_0},
{"v4.2.0", ModelVersion::v4_2_0}, {"v4.3.0", ModelVersion::v4_3_0},
{"v5.0.0", ModelVersion::v5_0_0}, {"latest", ModelVersion::NONE}};
{"v5.0.0", ModelVersion::v5_0_0}, {"v5.1.0", ModelVersion::v5_1_0},
{"latest", ModelVersion::NONE}};
} // namespace version

const std::unordered_map<std::string, ModelVariant>& model_variants_map() {
Expand Down
3 changes: 3 additions & 0 deletions dorado/models/metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@ enum class ModsVariant : uint8_t {
M_5mCG,
M_5mCG_5hmCG,
M_5mC,
M_m5C,
M_6mA,
M_m6A,
M_m6A_DRACH,
M_inosine_m6A,
M_pseU,
NONE // NONE must be last
};
Expand All @@ -53,6 +55,7 @@ enum class ModelVersion : uint8_t {
v4_2_0,
v4_3_0,
v5_0_0,
v5_1_0,
NONE // NONE must be last
};

Expand Down
157 changes: 155 additions & 2 deletions dorado/models/models.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ const std::vector<ModelInfo> models = {
ModelVariantPair{ModelVariant::HAC, VV::v3_0_0, true},
},

// RNA004
// RNA004 v3.0.1
ModelInfo{
"[email protected]",
"2afa5de03f28162dd85b7be4a2dda108be7cc0a19062db7cb8460628aac462c0",
Expand All @@ -427,6 +427,7 @@ const std::vector<ModelInfo> models = {
CC::RNA004_130BPS,
ModelVariantPair{ModelVariant::SUP, VV::v3_0_1},
},
// RNA v5.0.0
ModelInfo{
"[email protected]",
"3b45ecedf2e20c56e15033402deb77f3c4e67df49aea8d7b76acdbb4029e8ea0",
Expand All @@ -445,6 +446,25 @@ const std::vector<ModelInfo> models = {
CC::RNA004_130BPS,
ModelVariantPair{ModelVariant::SUP, VV::v5_0_0},
},
// RNA v5.1.0
ModelInfo{
"[email protected]",
"c01353ac8362479ceedf607c41e5f238efd629725556d896161baa194b7354be",
CC::RNA004_130BPS,
ModelVariantPair{ModelVariant::FAST, VV::v5_1_0},
},
ModelInfo{
"[email protected]",
"36ac8bdb2baaf32e697086962078f83a001a3ffe1461e358fabef15c08b15c5e",
CC::RNA004_130BPS,
ModelVariantPair{ModelVariant::HAC, VV::v5_1_0, true},
},
ModelInfo{
"[email protected]",
"ab7c5687f149901868898791b8d243c28e8345c9b61e3abce30d63e112ebc3b1",
CC::RNA004_130BPS,
ModelVariantPair{ModelVariant::SUP, VV::v5_1_0},
},
};

} // namespace simplex
Expand Down Expand Up @@ -801,6 +821,20 @@ const std::vector<ModelInfo> models = {
ModelVariantPair{ModelVariant::SUP, VV::v5_0_0},
ModsVariantPair{ModsVariant::M_4mC_5mC, VV::v1_0_0},
},
ModelInfo{
"[email protected]_4mC_5mC@v2",
"d7c4ee43e954b081a0179e5236245a62094fcecb1454de2b3901f2b10d8807d7",
CC::DNA_R10_4_1_E8_2_400BPS_5KHZ,
ModelVariantPair{ModelVariant::HAC, VV::v5_0_0},
ModsVariantPair{ModsVariant::M_4mC_5mC, VV::v2_0_0},
},
ModelInfo{
"[email protected]_4mC_5mC@v2",
"eb971340e111ebfdb27bd2b70390c5f0252ba91f5ac92eea0dbd59524bac68f7",
CC::DNA_R10_4_1_E8_2_400BPS_5KHZ,
ModelVariantPair{ModelVariant::SUP, VV::v5_0_0},
ModsVariantPair{ModsVariant::M_4mC_5mC, VV::v2_0_0},
},
// 5mC+5hmC all-context HAC and SUP
ModelInfo{
"[email protected]_5mC_5hmC@v1",
Expand All @@ -816,6 +850,20 @@ const std::vector<ModelInfo> models = {
ModelVariantPair{ModelVariant::SUP, VV::v5_0_0},
ModsVariantPair{ModsVariant::M_5mC_5hmC, VV::v1_0_0},
},
ModelInfo{
"[email protected]_5mC_5hmC@v2",
"8bde4f0fd27a2e2fbf98942a6e1cc1d4547c6678a69940a2152a6a5cdb98cc3c",
CC::DNA_R10_4_1_E8_2_400BPS_5KHZ,
ModelVariantPair{ModelVariant::HAC, VV::v5_0_0},
ModsVariantPair{ModsVariant::M_5mC_5hmC, VV::v2_0_0},
},
ModelInfo{
"[email protected]_5mC_5hmC@v2",
"36de2e58edaf1e1a53bca0ebf029164112b9dbaad413672dde45efb093b7fcf6",
CC::DNA_R10_4_1_E8_2_400BPS_5KHZ,
ModelVariantPair{ModelVariant::SUP, VV::v5_0_0},
ModsVariantPair{ModsVariant::M_5mC_5hmC, VV::v2_0_0},
},
// 5mC+5hmC CG-context HAC and SUP
ModelInfo{
"[email protected]_5mCG_5hmCG@v1",
Expand All @@ -831,6 +879,20 @@ const std::vector<ModelInfo> models = {
ModelVariantPair{ModelVariant::SUP, VV::v5_0_0},
ModsVariantPair{ModsVariant::M_5mCG_5hmCG, VV::v1_0_0},
},
ModelInfo{
"[email protected]_5mCG_5hmCG@v2",
"5c2452e4ccd443e7f6549afb6ac732b03b90480801e2a29850e5616185cb6d5b",
CC::DNA_R10_4_1_E8_2_400BPS_5KHZ,
ModelVariantPair{ModelVariant::HAC, VV::v5_0_0},
ModsVariantPair{ModsVariant::M_5mCG_5hmCG, VV::v2_0_0},
},
ModelInfo{
"[email protected]_5mCG_5hmCG@v2",
"2e2d4de2ec1df90b37c50b3367bea90f7b9dfab11b90e98ee6963876589be4cc",
CC::DNA_R10_4_1_E8_2_400BPS_5KHZ,
ModelVariantPair{ModelVariant::SUP, VV::v5_0_0},
ModsVariantPair{ModsVariant::M_5mCG_5hmCG, VV::v2_0_0},
},
// 6mA all-context HAC and SUP
ModelInfo{
"[email protected]_6mA@v1",
Expand All @@ -846,8 +908,22 @@ const std::vector<ModelInfo> models = {
ModelVariantPair{ModelVariant::SUP, VV::v5_0_0},
ModsVariantPair{ModsVariant::M_6mA, VV::v1_0_0},
},

ModelInfo{
"[email protected]_6mA@v2",
"919aaf7fdfbf50a1fe20124e07014fa2b38cc10f3dadb27c56b415309147eee9",
CC::DNA_R10_4_1_E8_2_400BPS_5KHZ,
ModelVariantPair{ModelVariant::HAC, VV::v5_0_0},
ModsVariantPair{ModsVariant::M_6mA, VV::v2_0_0},
},
ModelInfo{
"[email protected]_6mA@v2",
"fc1d247475162d4f782d66bb3cd6f19c76e5589a8e064f738de4896f940568b3",
CC::DNA_R10_4_1_E8_2_400BPS_5KHZ,
ModelVariantPair{ModelVariant::SUP, VV::v5_0_0},
ModsVariantPair{ModsVariant::M_6mA, VV::v2_0_0},
},
// RNA004 v3.0.1
// m6A - DRACH
ModelInfo{
"[email protected]_m6A_DRACH@v1",
"356b3eed19916d83d59cbfd24bb9f33823d6f738891f3ac8fe77319ae5cbde7f",
Expand All @@ -871,6 +947,21 @@ const std::vector<ModelInfo> models = {
ModelVariantPair{ModelVariant::SUP, VV::v5_0_0},
ModsVariantPair{ModsVariant::M_m6A, VV::v1_0_0},
},
// m6A - DRACH
ModelInfo{
"[email protected]_m6A_DRACH@v1",
"b140acbfc04bb24080b39cc81d71016895dc74454c7cb630629b93ec60e315c9",
CC::RNA004_130BPS,
ModelVariantPair{ModelVariant::HAC, VV::v5_0_0},
ModsVariantPair{ModsVariant::M_m6A_DRACH, VV::v1_0_0},
},
ModelInfo{
"[email protected]_m6A_DRACH@v1",
"62dd2d9e225fa9638258bd33063fa930c4179b13878064547d5be7b33d478b23",
CC::RNA004_130BPS,
ModelVariantPair{ModelVariant::SUP, VV::v5_0_0},
ModsVariantPair{ModsVariant::M_m6A_DRACH, VV::v1_0_0},
},
// pseU - all context
ModelInfo{
"[email protected]_pseU@v1",
Expand All @@ -885,6 +976,68 @@ const std::vector<ModelInfo> models = {
CC::RNA004_130BPS,
ModelVariantPair{ModelVariant::SUP, VV::v5_0_0},
ModsVariantPair{ModsVariant::M_pseU, VV::v1_0_0},
},

// RNA004 v5.1.0
// m5C - all context
ModelInfo{
"[email protected]_m5C@v1",
"d9c142ba65c15cebaf42ea44a3e5731bc3d59f89a2b07e55701f7152bde2937e",
CC::RNA004_130BPS,
ModelVariantPair{ModelVariant::HAC, VV::v5_1_0},
ModsVariantPair{ModsVariant::M_m5C, VV::v1_0_0},
},
ModelInfo{
"[email protected]_m5C@v1",
"073a9a66a613f61fca83447816c4fd95ce608c854b54540e2a9f82b4c1498a3a",
CC::RNA004_130BPS,
ModelVariantPair{ModelVariant::SUP, VV::v5_1_0},
ModsVariantPair{ModsVariant::M_m5C, VV::v1_0_0},
},
// inosine_m6A - all context
ModelInfo{
"[email protected]_inosine_m6A@v1",
"e709c9ce7e256f8d2bb259a0ab22d2bddc60c61834d3a020e2c8fc5721c5d548",
CC::RNA004_130BPS,
ModelVariantPair{ModelVariant::HAC, VV::v5_1_0},
ModsVariantPair{ModsVariant::M_inosine_m6A, VV::v1_0_0},
},
ModelInfo{
"[email protected]_inosine_m6A@v1",
"8bcbd48f9f01eb624a8fdcb928c204b915ed002c1ddc600dfa3c2be16879b7df",
CC::RNA004_130BPS,
ModelVariantPair{ModelVariant::SUP, VV::v5_1_0},
ModsVariantPair{ModsVariant::M_inosine_m6A, VV::v1_0_0},
},
// m6A - DRACH
ModelInfo{
"[email protected]_m6A_DRACH@v1",
"911ba609b657f8e24fe44519a965d0d9bac91f35e7026c8ee1614492bf7ce3f9",
CC::RNA004_130BPS,
ModelVariantPair{ModelVariant::HAC, VV::v5_1_0},
ModsVariantPair{ModsVariant::M_m6A_DRACH, VV::v1_0_0},
},
ModelInfo{
"[email protected]_m6A_DRACH@v1",
"ec616e5d725860e1686c17d70c8f135c6e0e66f6c3e7e28a6cdefe19cae2e91f",
CC::RNA004_130BPS,
ModelVariantPair{ModelVariant::SUP, VV::v5_1_0},
ModsVariantPair{ModsVariant::M_m6A_DRACH, VV::v1_0_0},
},
// pseU - all context
ModelInfo{
"[email protected]_pseU@v1",
"5d7c3cf12736baaba987c2ca899abd89193e859edfc7b9aad82a00e4bbc2e6bd",
CC::RNA004_130BPS,
ModelVariantPair{ModelVariant::HAC, VV::v5_1_0},
ModsVariantPair{ModsVariant::M_pseU, VV::v1_0_0},
},
ModelInfo{
"[email protected]_pseU@v1",
"02049be4f690cdf4a1200f6077b657c43587d1be2816fab01bb3f02f06e2cb7c",
CC::RNA004_130BPS,
ModelVariantPair{ModelVariant::SUP, VV::v5_1_0},
ModsVariantPair{ModsVariant::M_pseU, VV::v1_0_0},
}};

} // namespace modified
Expand Down
12 changes: 11 additions & 1 deletion tests/ModelMetadataTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,11 @@ TEST_CASE(TEST_TAG " ModsVariant enumeration", TEST_TAG) {
CHECK(mods.at("5mCG") == ModsVariant::M_5mCG);
CHECK(mods.at("5mCG_5hmCG") == ModsVariant::M_5mCG_5hmCG);
CHECK(mods.at("5mC") == ModsVariant::M_5mC);
CHECK(mods.at("m5C") == ModsVariant::M_m5C);
CHECK(mods.at("6mA") == ModsVariant::M_6mA);
CHECK(mods.at("m6A") == ModsVariant::M_m6A);
CHECK(mods.at("m6A_DRACH") == ModsVariant::M_m6A_DRACH);
CHECK(mods.at("inosine_m6A") == ModsVariant::M_inosine_m6A);
CHECK(mods.at("pseU") == ModsVariant::M_pseU);
CHECK(mods.size() == static_cast<size_t>(ModsVariant::NONE));
}
Expand All @@ -70,9 +72,11 @@ TEST_CASE(TEST_TAG " ModsVariant enumeration", TEST_TAG) {
CHECK(get_mods_variant("5mCG") == ModsVariant::M_5mCG);
CHECK(get_mods_variant("5mCG_5hmCG") == ModsVariant::M_5mCG_5hmCG);
CHECK(get_mods_variant("5mC") == ModsVariant::M_5mC);
CHECK(get_mods_variant("m5C") == ModsVariant::M_m5C);
CHECK(get_mods_variant("6mA") == ModsVariant::M_6mA);
CHECK(get_mods_variant("m6A") == ModsVariant::M_m6A);
CHECK(get_mods_variant("m6A_DRACH") == ModsVariant::M_m6A_DRACH);
CHECK(get_mods_variant("inosine_m6A") == ModsVariant::M_inosine_m6A);
CHECK(get_mods_variant("pseU") == ModsVariant::M_pseU);
for (const auto& it : {"", "foo", "[email protected]_5mC@v2"}) {
CHECK(get_mods_variant(it) == ModsVariant::NONE);
Expand All @@ -85,9 +89,11 @@ TEST_CASE(TEST_TAG " ModsVariant enumeration", TEST_TAG) {
CHECK(to_string(ModsVariant::M_5mCG) == "5mCG");
CHECK(to_string(ModsVariant::M_5mCG_5hmCG) == "5mCG_5hmCG");
CHECK(to_string(ModsVariant::M_5mC) == "5mC");
CHECK(to_string(ModsVariant::M_m5C) == "m5C");
CHECK(to_string(ModsVariant::M_6mA) == "6mA");
CHECK(to_string(ModsVariant::M_m6A_DRACH) == "m6A_DRACH");
CHECK(to_string(ModsVariant::M_m6A) == "m6A");
CHECK(to_string(ModsVariant::M_m6A_DRACH) == "m6A_DRACH");
CHECK(to_string(ModsVariant::M_inosine_m6A) == "inosine_m6A");
CHECK(to_string(ModsVariant::M_pseU) == "pseU");
CHECK_THROWS_AS(to_string(ModsVariant::NONE), std::logic_error);
}
Expand Down Expand Up @@ -116,7 +122,9 @@ TEST_CASE(TEST_TAG " mods_canonical_base_map", TEST_TAG) {
CHECK(mods.at(ModsVariant::M_5mCG) == "C");
CHECK(mods.at(ModsVariant::M_5mCG_5hmCG) == "C");
CHECK(mods.at(ModsVariant::M_5mC) == "C");
CHECK(mods.at(ModsVariant::M_m5C) == "C");
CHECK(mods.at(ModsVariant::M_6mA) == "A");
CHECK(mods.at(ModsVariant::M_inosine_m6A) == "A");
CHECK(mods.at(ModsVariant::M_m6A) == "A");
CHECK(mods.at(ModsVariant::M_m6A_DRACH) == "A");
CHECK(mods.at(ModsVariant::M_pseU) == "T");
Expand Down Expand Up @@ -146,6 +154,8 @@ TEST_CASE(TEST_TAG " ModelVersion enumeration", TEST_TAG) {
CHECK(to_string(ModelVersion::v4_1_0) == "v4.1.0");
CHECK(to_string(ModelVersion::v4_2_0) == "v4.2.0");
CHECK(to_string(ModelVersion::v4_3_0) == "v4.3.0");
CHECK(to_string(ModelVersion::v5_0_0) == "v5.0.0");
CHECK(to_string(ModelVersion::v5_1_0) == "v5.1.0");
CHECK(to_string(ModelVersion::NONE) == "latest");
CHECK(vers.size() ==
static_cast<size_t>(ModelVersion::NONE) + 1); // +1 as "NONE" is included in the map
Expand Down

0 comments on commit a69c0a2

Please sign in to comment.