Skip to content

Commit

Permalink
final comments
Browse files Browse the repository at this point in the history
  • Loading branch information
unknown committed Jan 31, 2018
1 parent 0c210b7 commit feb6054
Show file tree
Hide file tree
Showing 21 changed files with 83 additions and 8,267 deletions.
5 changes: 3 additions & 2 deletions ProteoformSuiteGUI/DisplayObjects/DisplayTopDownHit.cs
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,9 @@ public double Score
get { return h.score; }
}

public string PFR
public string PFR_accession
{
get { return h.pfr; }
get { return h.pfr_accession; }
}

#endregion Public Properties
Expand Down Expand Up @@ -154,6 +154,7 @@ private static string header(string property_name)
if (property_name == nameof(retention_time)) return "Retention Time";
if (property_name == nameof(pscore)) return "P-Score";
if (property_name == nameof(ptm_description)) return "PTM Description";
if (property_name == nameof(PFR_accession)) return "PFR Accession";
return null;
}

Expand Down
5 changes: 3 additions & 2 deletions ProteoformSuiteGUI/DisplayObjects/DisplayTopDownProteoform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ public int Observations
get { return t.topdown_hits.Count; }
}

public string PFR
public string PFR_accession
{
get { return t.pfr; }
get { return t.pfr_accession; }
}

public string family_id
Expand Down Expand Up @@ -170,6 +170,7 @@ public static string header(string name)
if (name == nameof(manual_id)) return "Best Hit Info";
if (name == nameof(family_id)) return "Family ID";
if (name == nameof(mass_error)) return "Mass Error";
if (name == nameof(PFR_accession)) return "PFR Accession";
return null;
}

Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 0 additions & 5 deletions ProteoformSuiteGUI/ExperimentTheoreticalComparison.cs
Original file line number Diff line number Diff line change
Expand Up @@ -477,10 +477,5 @@ private void ct_ET_peakList_MouseClick(object sender, MouseEventArgs e)
}

#endregion Tooltip Private Methods

private void label12_Click(object sender, EventArgs e)
{

}
}
}
2 changes: 1 addition & 1 deletion ProteoformSuiteGUI/TopDown.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public void FillTablesAndCharts()
mods = Sweet.lollipop.topdown_proteoforms.SelectMany(p => p.topdown_ptm_set.ptm_combination).Select(m => m.modification.id).Distinct().ToList();
tb_tdProteoforms.Text = Sweet.lollipop.topdown_proteoforms.Count.ToString();
tb_td_hits.Text = Sweet.lollipop.top_down_hits.Count.ToString();
tb_unique_PFRs.Text = Sweet.lollipop.topdown_proteoforms.Select(p => p.pfr).Distinct().Count().ToString();
tb_unique_PFRs.Text = Sweet.lollipop.topdown_proteoforms.Select(p => p.pfr_accession).Distinct().Count().ToString();
}

public void RunTheGamut(bool full_run)
Expand Down
19 changes: 10 additions & 9 deletions ProteoformSuiteInternal/Calibration.cs
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,11 @@ private DataPointAquisitionResults GetDataPoints()
{
//look around theoretical mass of topdown hit identified proteoforms - 10 ppm and 5 minutes same br, tr, fraction, condition (same file!)
//if neucode labled, look for the light component mass (loaded in...)
List<Component> potential_matches = Sweet.lollipop.calibration_components.Where(c => c.input_file.lt_condition == raw_file.lt_condition && (Sweet.lollipop.neucode_labeled || c.input_file.biological_replicate == raw_file.biological_replicate) && c.input_file.fraction == raw_file.fraction
&& c.input_file.technical_replicate == raw_file.technical_replicate).ToList();
List<Component> potential_matches = Sweet.lollipop.calibration_components.
Where(c => c.input_file.lt_condition == raw_file.lt_condition
&& (Sweet.lollipop.neucode_labeled || c.input_file.biological_replicate == raw_file.biological_replicate)
&& c.input_file.fraction == raw_file.fraction
&& c.input_file.technical_replicate == raw_file.technical_replicate).ToList();
if (potential_matches.Count > 0)
matching_component = potential_matches.Where(c =>
Math.Abs(c.charge_states.OrderByDescending(s => s.intensity).First().mz_centroid.ToMass(c.charge_states.OrderByDescending(s => s.intensity).First().charge_count) - identification.theoretical_mass) * 1e6 / c.charge_states.OrderByDescending(s => s.intensity).First().mz_centroid.ToMass(c.charge_states.OrderByDescending(s => s.intensity).First().charge_count) < 10
Expand All @@ -211,14 +214,12 @@ private DataPointAquisitionResults GetDataPoints()
rt = myMsDataFile.GetOneBasedScan(scanNumber + 1).RetentionTime;
}
proteinCharge = matching_component.charge_states.OrderByDescending(c => c.intensity).First().charge_count;
}
else
{
//if top-down file, needs to be hit from same techrep (exact same raw file...)
if (identification.technical_replicate != raw_file.technical_replicate) continue;
}
}
else if(identification.technical_replicate != raw_file.technical_replicate) continue;


var SequenceWithChemicalFormulas = identification.GetSequenceWithChemicalFormula();
var SequenceWithChemicalFormulas = identification.GetSequenceWithChemicalFormula
();
if (SequenceWithChemicalFormulas == null)
{
continue;
Expand Down
2 changes: 1 addition & 1 deletion ProteoformSuiteInternal/ComponentReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public List<Component> read_components_from_xlsx(InputFile file, bool remove_mis
int charge_row_index = 0;
string scan_range = "";
List<List<string>> cells = ExcelReader.get_cell_strings(file, false);
for (int i = 0; i < cells.Count(); i++)
for (int i = 0; i < cells.Count; i++)
{
if (i == 0) continue; //skip component header
List<string> cellStrings = cells[i];
Expand Down
22 changes: 14 additions & 8 deletions ProteoformSuiteInternal/Lollipop.cs
Original file line number Diff line number Diff line change
Expand Up @@ -362,10 +362,10 @@ public List<TopDownProteoform> aggregate_td_hits(List<TopDownHit> top_down_hits,
//get topdown hits that meet criteria
List<TopDownHit> remaining_td_hits = top_down_hits.Where(h => h.score >= min_score_td && ((biomarker && h.tdResultType == TopDownResultType.Biomarker) || (tight_abs_mass && h.tdResultType == TopDownResultType.TightAbsoluteMass))).OrderByDescending(h => h.score).ThenBy(h => h.pscore).ThenBy(h => h.reported_mass).ToList();

List<string> PFRs = remaining_td_hits.Select(h => h.pfr).Distinct().ToList();
Parallel.ForEach(PFRs, pfr =>
List<string> unique_proteoform_ids = remaining_td_hits.Select(h => h.pfr_accession).Distinct().ToList();
Parallel.ForEach(unique_proteoform_ids, pfr =>
{
List<TopDownHit> hits_by_pfr = remaining_td_hits.Where(h => h.pfr == pfr).ToList();
List<TopDownHit> hits_by_pfr = remaining_td_hits.Where(h => h.pfr_accession == pfr).ToList();
List<TopDownProteoform> first_aggregation = new List<TopDownProteoform>();
//aggregate to td hit w/ highest c-score as root - 1st average for retention time
while (hits_by_pfr.Count > 0)
Expand Down Expand Up @@ -400,7 +400,7 @@ public List<TopDownProteoform> aggregate_td_hits(List<TopDownHit> top_down_hits,
//convert unlabeled mass to neucode light mass based on lysine count (used for topdown identifications)
public double get_neucode_mass(double unlabeled_mass, int lysine_count)
{
return (unlabeled_mass - lysine_count * 128.094963 + lysine_count * 136.109162);
return unlabeled_mass - lysine_count * 128.094963 + lysine_count * 136.109162;
}

#endregion TOPDOWN
Expand Down Expand Up @@ -452,7 +452,7 @@ public List<ExperimentalProteoform> aggregate_proteoforms(bool two_pass_validati
{
community.experimental_proteoforms = Sweet.lollipop.target_proteoform_community.experimental_proteoforms.Select(e => e.topdown_id ? new TopDownProteoform(e as TopDownProteoform) : new ExperimentalProteoform(e)).ToArray();
}
if(get_files(input_files, Purpose.Quantification).Count() > 0)
if (get_files(input_files, Purpose.Quantification).Count() > 0)
{
assignQuantificationComponents(vetted_proteoforms, raw_quantification_components);
}
Expand Down Expand Up @@ -506,7 +506,7 @@ public void assign_best_components_for_manual_validation(IEnumerable<Experimenta

public List<ExperimentalProteoform> add_topdown_proteoforms(List<ExperimentalProteoform> vetted_proteoforms, List<TopDownProteoform> topdown_proteoforms)
{
foreach (TopDownProteoform topdown in topdown_proteoforms.Where(t => t.accepted).OrderByDescending(t => t.topdown_hits.Max(h => h.score)).ThenBy(t => t.topdown_hits.Min(h => h.pscore)).ThenBy(t => t.topdown_hits.Count()).ThenBy(t => t.agg_mass))
foreach (TopDownProteoform topdown in topdown_proteoforms.Where(t => t.accepted).OrderByDescending(t => t.topdown_hits.Max(h => h.score)).ThenBy(t => t.topdown_hits.Min(h => h.pscore)).ThenBy(t => t.topdown_hits.Count).ThenBy(t => t.agg_mass))
{
double mass = topdown.modified_mass;
List<ProteoformRelation> all_td_relations = new List<ProteoformRelation>();
Expand Down Expand Up @@ -1171,7 +1171,12 @@ public string calibrate_files()
//determine component and td hit shifts
determine_shifts(raw_file);
//calibrate component xlsx files
foreach (InputFile f in input_files.Where(f => f.lt_condition == raw_file.lt_condition && f.purpose == Purpose.CalibrationIdentification && f.biological_replicate == raw_file.biological_replicate && f.fraction == raw_file.fraction && f.technical_replicate == raw_file.technical_replicate)) Calibration.calibrate_components_in_xlsx(f);
foreach (InputFile f in input_files.Where(f => f.lt_condition == raw_file.lt_condition && f.purpose == Purpose.CalibrationIdentification
&& f.biological_replicate == raw_file.biological_replicate && f.fraction == raw_file.fraction
&& f.technical_replicate == raw_file.technical_replicate))
{
Calibration.calibrate_components_in_xlsx(f);
}
}
else filenames_did_not_calibrate.Add(raw_file.filename);
}
Expand All @@ -1180,7 +1185,8 @@ public string calibrate_files()
}
if (calibrate_td_files)
{
foreach (InputFile file in input_files.Where(f => f.purpose == Purpose.CalibrationTopDown && td_hits_calibration.Any(h => h.file == f && td_hit_correction.ContainsKey(new Tuple<string, int, double>(h.filename, h.ms2ScanNumber, h.reported_mass)))))
foreach (InputFile file in input_files.Where(f => f.purpose == Purpose.CalibrationTopDown &&
td_hits_calibration.Any(h => h.file == f && td_hit_correction.ContainsKey(new Tuple<string, int, double>(h.filename, h.ms2ScanNumber, h.reported_mass)))))
{
Calibration.calibrate_td_hits_file(file);
}
Expand Down
2 changes: 1 addition & 1 deletion ProteoformSuiteInternal/ProteinSequenceGroup.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ namespace ProteoformSuiteInternal
{
public class ProteinSequenceGroup : ProteinWithGoTerms
{
public List<ProteinWithGoTerms> proteinWithGoTermList;
public List<ProteinWithGoTerms> proteinWithGoTermList = new List<ProteinWithGoTerms>();
public ProteinSequenceGroup(IEnumerable<ProteinWithGoTerms> proteins_with_contaminants_first)
: base(proteins_with_contaminants_first.First().BaseSequence,
proteins_with_contaminants_first.First().Accession + "_" + proteins_with_contaminants_first.Count() + "G",
Expand Down
2 changes: 1 addition & 1 deletion ProteoformSuiteInternal/ProteinWithGoTerms.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ public class ProteinWithGoTerms : Protein
{
public List<string> AccessionList { get; set; }
public List<GoTerm> GoTerms { get; set; }
public bool topdown_protein;
public bool topdown_protein { get; set; }

public ProteinWithGoTerms(string sequence, string accession, List<Tuple<string, string>> gene_names, IDictionary<int, List<Modification>> oneBasedModifications, List<ProteolysisProduct> proteolysisProducts, string name, string full_name, bool isDecoy, bool isContaminant, IEnumerable<DatabaseReference> databaseReferences, IEnumerable<GoTerm> goTerms)
: base(sequence, accession, gene_names: gene_names, oneBasedModifications: oneBasedModifications, proteolysisProducts: proteolysisProducts, name: name, full_name: full_name, isDecoy: isDecoy, isContaminant: isContaminant, databaseReferences: databaseReferences.ToList())
Expand Down
7 changes: 5 additions & 2 deletions ProteoformSuiteInternal/Proteoform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,10 @@ private void assign_pf_identity(ExperimentalProteoform e, PtmSet set, Proteoform
e.begin--;
remove.Add(mod);
}
foreach (var ptm in remove) e.ptm_set.ptm_combination.Remove(ptm);
foreach (var ptm in remove)
{
e.ptm_set.ptm_combination.Remove(ptm);
}
e.ptm_set = new PtmSet(e.ptm_set.ptm_combination);
}
else
Expand All @@ -265,7 +268,7 @@ private void assign_pf_identity(ExperimentalProteoform e, PtmSet set, Proteoform

if (e.gene_name == null)
e.gene_name = this.gene_name;
else if(!e.topdown_id)
else if (!e.topdown_id)
e.gene_name.gene_names.Concat(this.gene_name.gene_names);

}
Expand Down
4 changes: 2 additions & 2 deletions ProteoformSuiteInternal/ProteoformCommunity.cs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ public List<ProteoformRelation> relate(ExperimentalProteoform[] pfs1, Proteoform
{
pf1.ptm_set = null;
pf1.linked_proteoform_references = null;
if(pf1 as TopDownProteoform == null) pf1.gene_name = null;
if (pf1 as TopDownProteoform == null) pf1.gene_name = null;
}

if (limit_et_relations && (relation_type == ProteoformComparison.ExperimentalTheoretical || relation_type == ProteoformComparison.ExperimentalDecoy))
Expand Down Expand Up @@ -354,7 +354,7 @@ public void clear_families()
p.family = null;
p.ptm_set = new PtmSet(new List<Ptm>());
p.linked_proteoform_references = null;
if(p as TopDownProteoform == null) p.gene_name = null;
if (p as TopDownProteoform == null) p.gene_name = null;
}
foreach (Proteoform p in theoretical_proteoforms) p.family = null;
}
Expand Down
4 changes: 2 additions & 2 deletions ProteoformSuiteInternal/ProteoformFamily.cs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ select f
public void identify_experimentals()
{
HashSet<ExperimentalProteoform> identified_experimentals = new HashSet<ExperimentalProteoform>();
foreach(TheoreticalProteoform t in theoretical_proteoforms)
foreach (TheoreticalProteoform t in theoretical_proteoforms)
{
lock (identified_experimentals)
foreach (ExperimentalProteoform e in t.identify_connected_experimentals(Sweet.lollipop.theoretical_database.all_possible_ptmsets, Sweet.lollipop.theoretical_database.all_mods_with_mass))
Expand All @@ -100,7 +100,7 @@ public void identify_experimentals()
{
last_identified_count = identified_experimentals.Count;
HashSet<ExperimentalProteoform> tmp_new_experimentals = new HashSet<ExperimentalProteoform>();
foreach(ExperimentalProteoform id_experimental in newly_identified_experimentals)
foreach (ExperimentalProteoform id_experimental in newly_identified_experimentals)
{
lock (identified_experimentals) lock (tmp_new_experimentals)
foreach (ExperimentalProteoform new_e in id_experimental.identify_connected_experimentals(Sweet.lollipop.theoretical_database.all_possible_ptmsets, Sweet.lollipop.theoretical_database.all_mods_with_mass))
Expand Down
10 changes: 5 additions & 5 deletions ProteoformSuiteInternal/ResultsSummaryGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ public static string proteoform_families_report()
List<string> experimental_ids = Sweet.lollipop.target_proteoform_community.experimental_proteoforms.Where(e => !e.topdown_id && e.linked_proteoform_references != null && (Sweet.lollipop.count_adducts_as_identifications || !e.adduct))
.Select(p => String.Join(",", (p.linked_proteoform_references.First() as TheoreticalProteoform).ExpandedProteinList.SelectMany(e => e.AccessionList.Select(a => a.Split('_')[0])).Distinct()) + "_" + p.begin + "_" + p.end + "_" + String.Join(", ", p.ptm_set.ptm_combination.Select(ptm => Sweet.lollipop.theoretical_database.unlocalized_lookup.TryGetValue(ptm.modification, out UnlocalizedModification x) ? x.id : ptm.modification.id).OrderBy(m => m))).ToList();
report += experimental_ids.Distinct().Count() + "\tUnique Intact-Mass Experimental Proteoforms Identifications" + Environment.NewLine;
int unique_td = Sweet.lollipop.topdown_proteoforms.Select(p => p.pfr).Distinct().Count();
int unique_td = Sweet.lollipop.topdown_proteoforms.Select(p => p.pfr_accession).Distinct().Count();
report += unique_td + "\tUnique Top-Down Proteoforms Identifications (TDPortal)" + Environment.NewLine;
List<string> topdown_ids = Sweet.lollipop.topdown_proteoforms
.Select(p => p.accession.Split('_')[0].Split('-')[0] + "_" + p.topdown_begin + "_" + p.topdown_end + "_" + String.Join(", ", p.topdown_ptm_set.ptm_combination.Select(ptm => Sweet.lollipop.theoretical_database.unlocalized_lookup.TryGetValue(ptm.modification, out UnlocalizedModification x) ? x.id : ptm.modification.id).OrderBy(m => m))).ToList();
Expand Down Expand Up @@ -468,10 +468,10 @@ public static DataTable experimental_results_dataframe(TusherAnalysis analysis)
public static DataTable topdown_results_dataframe()
{
DataTable results = new DataTable();
results.Columns.Add("PFR", typeof(string));
results.Columns.Add("Theoretiecal Accession", typeof(string));
results.Columns.Add("PFR Accession", typeof(string));
results.Columns.Add("Theoretical Accession", typeof(string));
results.Columns.Add("Top-Down Full Accession", typeof(string));
results.Columns.Add("Top-Down Accession", typeof(string));
results.Columns.Add("Top-Down Accession", typeof(string));
results.Columns.Add("Theoretical Description", typeof(string));
results.Columns.Add("Theoretical Begin and End", typeof(string));
results.Columns.Add("Top-Down Begin and End", typeof(string));
Expand All @@ -493,7 +493,7 @@ public static DataTable topdown_results_dataframe()
foreach (TopDownProteoform td in Sweet.lollipop.topdown_proteoforms)
{
results.Rows.Add(
td.pfr,
td.pfr_accession,
td.linked_proteoform_references == null ? "N/A" : (td.linked_proteoform_references.First() as TheoreticalProteoform).accession,
td.accession,
td.accession.Split('_')[0],
Expand Down
Loading

0 comments on commit feb6054

Please sign in to comment.