From 091fa2a8eea4fe10284a06a4d66f13d2c35035bc Mon Sep 17 00:00:00 2001 From: tibvdm Date: Tue, 16 Apr 2024 11:39:08 +0200 Subject: [PATCH] fix equate IL --- .../private_api/proteins_controller.rb | 122 +++++++++--------- .../proteins/proteins.json.jbuilder | 22 ++-- 2 files changed, 72 insertions(+), 72 deletions(-) diff --git a/app/controllers/private_api/proteins_controller.rb b/app/controllers/private_api/proteins_controller.rb index c202a60..4c726b3 100644 --- a/app/controllers/private_api/proteins_controller.rb +++ b/app/controllers/private_api/proteins_controller.rb @@ -1,5 +1,10 @@ class PrivateApi::ProteinsController < PrivateApi::PrivateApiController + include SuffixArrayHelper + def proteins + peptide = params[:peptide] + equate_il = params[:equate_il].nil? ? true : params[:equate_il] + unless params[:peptide] @error_name = 'Invalid peptide provided' @error_message = 'No peptide sequence was provided. Please provide a valid peptide sequence.' @@ -7,83 +12,76 @@ def proteins return end - # process parameters - # the sequence or id of the peptide (filter out all characters that are non-ASCII) - seq = params[:peptide].upcase.gsub(/\P{ASCII}/, '') - # should we equate I and L? (true by default) - equate_il = params.key?(:equate_il) ? params[:equate_il] == 'true' : true - - begin - # process the input, convert seq to a valid @sequence - sequence = Sequence.single_search(seq, equate_il) - @original_sequence = seq - rescue SequenceTooShortError + unless peptide.length >= 5 @error_name = 'Sequence too short' @error_message = 'The peptide sequence you provided is too short. It should contain at least 5 valid amino acids.' render 'private_api/error' return end - if sequence.present? && sequence.peptides(equate_il).empty? - @entries = [] - return - end + # Request the suffix array search service + @response = search([ peptide ], equate_il) - @common_lineage = [] + # if sequence.present? && sequence.peptides(equate_il).empty? + # @entries = [] + # return + # end - # get the uniprot entries of every peptide - # only used for the open in uniprot links - # and calculate the LCA - if sequence.nil? - begin - # we didn't find the sequence in the database, so let's try to split it - long_sequences = Sequence.advanced_single_search(seq, equate_il) - rescue NoMatchesFoundError - return - end - # calculate possible uniprot entries - temp_entries = long_sequences.map { |s| s.peptides(equate_il).map(&:uniprot_entry).to_set } - # take the intersection of all sets - @entries = temp_entries.reduce(:&) - # check if the protein contains the startsequence - @entries.select! { |e| e.protein_contains?(seq, equate_il) } + # @common_lineage = [] - # Calculate fa summary - @fa_summary = UniprotEntry.summarize_fa(@entries) + # # get the uniprot entries of every peptide + # # only used for the open in uniprot links + # # and calculate the LCA + # if sequence.nil? + # begin + # # we didn't find the sequence in the database, so let's try to split it + # long_sequences = Sequence.advanced_single_search(seq, equate_il) + # rescue NoMatchesFoundError + # return + # end + # # calculate possible uniprot entries + # temp_entries = long_sequences.map { |s| s.peptides(equate_il).map(&:uniprot_entry).to_set } + # # take the intersection of all sets + # @entries = temp_entries.reduce(:&) + # # check if the protein contains the startsequence + # @entries.select! { |e| e.protein_contains?(seq, equate_il) } - return if @entries.empty? + # # Calculate fa summary + # @fa_summary = UniprotEntry.summarize_fa(@entries) - @lineages = @entries.map(&:lineage).compact - else - @entries = sequence.peptides(equate_il).map(&:uniprot_entry) - @lineages = sequence.lineages(equate_il, true).to_a + # return if @entries.empty? - # Get FA summary from cache - @fa_summary = sequence.calculate_fa(equate_il) - end + # @lineages = @entries.map(&:lineage).compact + # else + # @entries = sequence.peptides(equate_il).map(&:uniprot_entry) + # @lineages = sequence.lineages(equate_il, true).to_a - # sort entries - @entries = @entries.to_a.sort_by { |e| e.taxon.nil? ? '' : e.taxon.name } + # # Get FA summary from cache + # @fa_summary = sequence.calculate_fa(equate_il) + # end - @lca_taxon = Lineage.calculate_lca_taxon(@lineages) - @root = Node.new(1, 'Organism', nil, 'root') # start constructing the tree - common_hits = @lineages.map(&:hits).reduce(:+) - @root.data['count'] = common_hits - last_node = @root + # # sort entries + # @entries = @entries.to_a.sort_by { |e| e.taxon.nil? ? '' : e.taxon.name } - # common lineage - # construct the common lineage in this array - l = @lca_taxon.lineage - found = (@lca_taxon.name == 'root') - while !found && l.has_next? - t = l.next_t - next if t.nil? + # @lca_taxon = Lineage.calculate_lca_taxon(@lineages) + # @root = Node.new(1, 'Organism', nil, 'root') # start constructing the tree + # common_hits = @lineages.map(&:hits).reduce(:+) + # @root.data['count'] = common_hits + # last_node = @root - found = (@lca_taxon.id == t.id) - @common_lineage << t - node = Node.new(t.id, t.name, @root, t.rank) - node.data['count'] = common_hits - last_node = last_node.add_child(node) - end + # # common lineage + # # construct the common lineage in this array + # l = @lca_taxon.lineage + # found = (@lca_taxon.name == 'root') + # while !found && l.has_next? + # t = l.next_t + # next if t.nil? + + # found = (@lca_taxon.id == t.id) + # @common_lineage << t + # node = Node.new(t.id, t.name, @root, t.rank) + # node.data['count'] = common_hits + # last_node = last_node.add_child(node) + # end end end diff --git a/app/views/private_api/proteins/proteins.json.jbuilder b/app/views/private_api/proteins/proteins.json.jbuilder index ca149b1..f53a184 100644 --- a/app/views/private_api/proteins/proteins.json.jbuilder +++ b/app/views/private_api/proteins/proteins.json.jbuilder @@ -1,10 +1,12 @@ -json.lca @lca_taxon ? @lca_taxon.id : -1 -json.common_lineage(@common_lineage.map(&:id)) -json.proteins @entries do |entry| - json.uniprotAccessionId entry.uniprot_accession_number - json.name entry.name - json.organism entry.taxon_id - json.ecNumbers(entry.ec_cross_references.map(&:ec_number_code)) - json.goTerms(entry.go_cross_references.map(&:go_term_code)) - json.interproEntries(entry.interpro_cross_references.map(&:interpro_entry_code)) -end +# json.lca @lca_taxon ? @lca_taxon.id : -1 +# json.common_lineage(@common_lineage.map(&:id)) +# json.proteins @entries do |entry| +# json.uniprotAccessionId entry.uniprot_accession_number +# json.name entry.name +# json.organism entry.taxon_id +# json.ecNumbers(entry.ec_cross_references.map(&:ec_number_code)) +# json.goTerms(entry.go_cross_references.map(&:go_term_code)) +# json.interproEntries(entry.interpro_cross_references.map(&:interpro_entry_code)) +# end + +json.proteins(@response)