From fa8745b52bdc0b5090a0189c1292cb908ea5bc83 Mon Sep 17 00:00:00 2001 From: Alex Date: Mon, 6 Jan 2025 17:09:47 -0600 Subject: [PATCH] Fixed ProteinDB Writer method to be deterministic --- .../ProteinDbWriter.cs | 35 ++++++++++++------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/mzLib/UsefulProteomicsDatabases/ProteinDbWriter.cs b/mzLib/UsefulProteomicsDatabases/ProteinDbWriter.cs index dadba9e11..0eb2c0fee 100644 --- a/mzLib/UsefulProteomicsDatabases/ProteinDbWriter.cs +++ b/mzLib/UsefulProteomicsDatabases/ProteinDbWriter.cs @@ -286,7 +286,7 @@ private static Dictionary WriteNucleicAcidXmlDatabase( return newModResEntries; } - /// + // /// Writes a protein database in mzLibProteinDb format, with additional modifications from the AdditionalModsToAddToProteins list. /// /// @@ -324,8 +324,17 @@ public static Dictionary WriteXmlDatabase(Dictionary allRelevantModifications = new HashSet( - nonVariantProteins.SelectMany(p => p.SequenceVariations.SelectMany(sv => sv.OneBasedModifications).Concat(p.OneBasedPossibleLocalizedModifications).SelectMany(kv => kv.Value)) - .Concat(additionalModsToAddToProteins.Where(kv => nonVariantProteins.SelectMany(p => p.SequenceVariations.Select(sv => VariantApplication.GetAccession(p, new[] { sv })).Concat(new[] { p.Accession })).Contains(kv.Key)).SelectMany(kv => kv.Value.Select(v => v.Item2)))); + nonVariantProteins + .SelectMany(p => p.SequenceVariations + .SelectMany(sv => sv.OneBasedModifications) + .Concat(p.OneBasedPossibleLocalizedModifications) + .SelectMany(kv => kv.Value)) + .Concat(additionalModsToAddToProteins + .Where(kv => nonVariantProteins + .SelectMany(p => p.SequenceVariations + .Select(sv => VariantApplication.GetAccession(p, new[] { sv })).Concat(new[] { p.Accession })) + .Contains(kv.Key)) + .SelectMany(kv => kv.Value.Select(v => v.Item2)))); foreach (Modification mod in allRelevantModifications.OrderBy(m => m.IdWithMotif)) { @@ -384,7 +393,7 @@ public static Dictionary WriteXmlDatabase(Dictionary property in dbRef.Properties) + foreach (Tuple property in dbRef.Properties.OrderBy(t => t.Item1).ThenBy(t => t.Item2)) { writer.WriteStartElement("property"); writer.WriteAttributeString("type", property.Item1); @@ -397,7 +406,8 @@ public static Dictionary WriteXmlDatabase(Dictionary proteolysisProducts = protein.ProteolysisProducts.Where(p => !p.Type.Contains("truncation")).ToList(); + List proteolysisProducts = protein.ProteolysisProducts.Where(p => !p.Type.Contains("truncation")) + .OrderBy(p => p).ToList(); foreach (var proteolysisProduct in proteolysisProducts) { writer.WriteStartElement("feature"); @@ -413,23 +423,23 @@ public static Dictionary WriteXmlDatabase(Dictionary b.Key)) + foreach (var positionModKvp in GetModsForThisBioPolymer(protein, null, additionalModsToAddToProteins, newModResEntries).OrderBy(b => b.Key)) { - foreach (var modId in hm.Value) + foreach (var modId in positionModKvp.Value.OrderBy(mod => mod)) { writer.WriteStartElement("feature"); writer.WriteAttributeString("type", "modified residue"); writer.WriteAttributeString("description", modId); writer.WriteStartElement("location"); writer.WriteStartElement("position"); - writer.WriteAttributeString("position", hm.Key.ToString(CultureInfo.InvariantCulture)); + writer.WriteAttributeString("position", positionModKvp.Key.ToString(CultureInfo.InvariantCulture)); writer.WriteEndElement(); writer.WriteEndElement(); writer.WriteEndElement(); } } - foreach (var hm in protein.SequenceVariations) + foreach (var hm in protein.SequenceVariations.OrderBy(sv => sv)) { writer.WriteStartElement("feature"); writer.WriteAttributeString("type", "sequence variant"); @@ -458,7 +468,7 @@ public static Dictionary WriteXmlDatabase(Dictionary b.Key)) { - foreach (var modId in hmm.Value) + foreach (var modId in hmm.Value.OrderBy(mod => mod)) { writer.WriteStartElement("subfeature"); writer.WriteAttributeString("type", "modified residue"); @@ -475,7 +485,7 @@ public static Dictionary WriteXmlDatabase(Dictionary bond.OneBasedBeginPosition)) { writer.WriteStartElement("feature"); writer.WriteAttributeString("type", "disulfide bond"); @@ -500,7 +510,7 @@ public static Dictionary WriteXmlDatabase(Dictionary site.OneBasedBeginPosition)) { writer.WriteStartElement("feature"); writer.WriteAttributeString("type", "splice site"); @@ -538,6 +548,7 @@ public static Dictionary WriteXmlDatabase(Dictionary proteinList, string outputFileName, string delimeter) { using (StreamWriter writer = new StreamWriter(outputFileName))