diff --git a/Gemfile b/Gemfile index 7b7952bb..f34a6791 100644 --- a/Gemfile +++ b/Gemfile @@ -14,9 +14,7 @@ gem 'sinatra', '~> 1.0' gem 'sinatra-advanced-routes' gem 'sinatra-contrib', '~> 1.0' gem 'request_store' -gem 'rdf-raptor' - - +gem 'addressable', '~> 2.8' # Rack middleware gem 'ffi' gem 'rack-accept', '~> 0.4' @@ -46,11 +44,11 @@ gem 'haml', '~> 5.2.2' # pin see https://github.com/ncbo/ontologies_api/pull/107 gem 'redcarpet' # NCBO gems (can be from a local dev path or from rubygems/git) -gem 'goo', github: 'ontoportal-lirmm/goo', branch: 'feature/add-model-based-search' -gem 'ncbo_annotator', git: 'https://github.com/ontoportal-lirmm/ncbo_annotator.git', branch: 'master' -gem 'ncbo_cron', git: 'https://github.com/ontoportal-lirmm/ncbo_cron.git', branch: 'master' +gem 'ncbo_annotator', git: 'https://github.com/ontoportal-lirmm/ncbo_annotator.git', branch: 'development' +gem 'ncbo_cron', git: 'https://github.com/ontoportal-lirmm/ncbo_cron.git', branch: 'development' gem 'ncbo_ontology_recommender', git: 'https://github.com/ncbo/ncbo_ontology_recommender.git', branch: 'master' gem 'sparql-client', github: 'ontoportal-lirmm/sparql-client', branch: 'master' +gem 'goo', github: 'ontoportal-lirmm/goo', branch: 'feature/add-model-based-search' gem 'ontologies_linked_data', git: 'https://github.com/ontoportal-lirmm/ontologies_linked_data.git', branch: 'test-uri-deref-and-index' @@ -77,5 +75,5 @@ group :test do gem 'rack-test' gem 'simplecov', require: false gem 'simplecov-cobertura' # for codecov.io - gem 'webmock' + gem 'webmock', '~> 3.19.1' end \ No newline at end of file diff --git a/Gemfile.lock b/Gemfile.lock index 84b402c5..6743cbee 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -11,10 +11,11 @@ GIT GIT remote: https://github.com/ontoportal-lirmm/goo.git - revision: 83ac6f6c87aa6f960d3ee71ae85c6ed23c939785 + revision: 10b90c17af12c71bfc95bfb4fc0bba5e47ff77af branch: feature/add-model-based-search specs: goo (0.0.2) + addressable (~> 2.8) pry rdf (= 3.2.11) rdf-raptor @@ -28,8 +29,8 @@ GIT GIT remote: https://github.com/ontoportal-lirmm/ncbo_annotator.git - revision: 57204d8e54432ba660af4c49806e2a3019a23fa2 - branch: master + revision: 1eb751b65d10ae23d45c74e0516c78754a8419f0 + branch: development specs: ncbo_annotator (0.0.1) goo @@ -39,8 +40,8 @@ GIT GIT remote: https://github.com/ontoportal-lirmm/ncbo_cron.git - revision: 9ec0147203007cc368a5119ffe1a019fa8701c14 - branch: master + revision: 5bf8a237421cf3483604ea83ec20e46d6a5e59ad + branch: development specs: ncbo_cron (0.0.1) dante @@ -56,7 +57,7 @@ GIT GIT remote: https://github.com/ontoportal-lirmm/ontologies_linked_data.git - revision: a30a431664bbe3d4d0e53a9323017f8decf791d6 + revision: d7c466d5cb9148d0178ffab4d3c1c97e36cf9783 branch: test-uri-deref-and-index specs: ontologies_linked_data (0.0.1) @@ -76,7 +77,7 @@ GIT GIT remote: https://github.com/ontoportal-lirmm/sparql-client.git - revision: aed51baf4106fd0f3d0e3f9238f0aad9406aa3f0 + revision: 180c818f7715baac64b2699bb452ef5c756f62c5 branch: master specs: sparql-client (1.0.1) @@ -174,15 +175,15 @@ GEM grpc (~> 1.59) get_process_mem (0.2.7) ffi (~> 1.0) - google-analytics-data (0.5.0) + google-analytics-data (0.6.0) google-analytics-data-v1beta (>= 0.11, < 2.a) google-cloud-core (~> 1.6) - google-analytics-data-v1beta (0.11.2) + google-analytics-data-v1beta (0.12.0) gapic-common (>= 0.21.1, < 2.a) google-cloud-errors (~> 1.0) - google-apis-analytics_v3 (0.14.0) - google-apis-core (>= 0.12.0, < 2.a) - google-apis-core (0.13.0) + google-apis-analytics_v3 (0.15.0) + google-apis-core (>= 0.14.0, < 2.a) + google-apis-core (0.14.0) addressable (~> 2.5, >= 2.5.1) googleauth (~> 1.9) httpclient (>= 2.8.1, < 3.a) @@ -196,12 +197,13 @@ GEM google-cloud-env (2.1.1) faraday (>= 1.0, < 3.a) google-cloud-errors (1.3.1) + google-protobuf (3.25.3-x86_64-darwin) google-protobuf (3.25.3-x86_64-linux) - googleapis-common-protos (1.4.0) - google-protobuf (~> 3.14) - googleapis-common-protos-types (~> 1.2) - grpc (~> 1.27) - googleapis-common-protos-types (1.12.0) + googleapis-common-protos (1.5.0) + google-protobuf (~> 3.18) + googleapis-common-protos-types (~> 1.7) + grpc (~> 1.41) + googleapis-common-protos-types (1.13.0) google-protobuf (~> 3.18) googleauth (1.11.0) faraday (>= 1.0, < 3.a) @@ -210,6 +212,9 @@ GEM multi_json (~> 1.11) os (>= 0.9, < 2.0) signet (>= 0.16, < 2.a) + grpc (1.62.0-x86_64-darwin) + google-protobuf (~> 3.25) + googleapis-common-protos-types (~> 1.0) grpc (1.62.0-x86_64-linux) google-protobuf (~> 3.25) googleapis-common-protos-types (~> 1.0) @@ -228,7 +233,7 @@ GEM json-schema (2.8.1) addressable (>= 2.4) json_pure (2.7.1) - jwt (2.8.0) + jwt (2.8.1) base64 kgio (2.11.4) libxml-ruby (5.0.2) @@ -393,16 +398,18 @@ GEM unicorn (>= 4, < 7) uuid (2.3.9) macaddr (~> 1.0) - webmock (3.22.0) + webmock (3.19.1) addressable (>= 2.8.0) crack (>= 0.3.2) hashdiff (>= 0.4.0, < 2.0.0) PLATFORMS + x86_64-darwin-23 x86_64-linux DEPENDENCIES activesupport (~> 3.2) + addressable (~> 2.8) bcrypt_pbkdf (>= 1.0, < 2.0) bigdecimal (= 1.4.2) capistrano (~> 3) @@ -437,7 +444,6 @@ DEPENDENCIES rack-test rack-timeout rake (~> 10.0) - rdf-raptor redcarpet redis (~> 4.8.1) redis-activesupport @@ -453,7 +459,7 @@ DEPENDENCIES sparql-client! unicorn unicorn-worker-killer - webmock + webmock (~> 3.19.1) BUNDLED WITH - 2.3.14 + 2.4.22 diff --git a/controllers/admin_controller.rb b/controllers/admin_controller.rb index ed32e269..07708732 100644 --- a/controllers/admin_controller.rb +++ b/controllers/admin_controller.rb @@ -129,20 +129,23 @@ class AdminController < ApplicationController namespace "/search" do get '/collections' do - collections = { collections: Goo.search_connections.keys.map(&:to_s)} + conn = SOLR::SolrConnector.new(Goo.search_conf, '') + collections = { collections: conn.fetch_all_collections} reply(200, collections) end get '/collections/:collection/schema' do collection = params[:collection].to_sym - collection_schema = Goo.search_connections[collection].fetch_schema + conn = SOLR::SolrConnector.new(Goo.search_conf, collection) + collection_schema = conn.fetch_schema reply(200, collection_schema) end post '/collections/:collection/schema/init' do collection = params[:collection].to_sym - collection_schema = Goo.search_connections[collection].init_schema + conn = SOLR::SolrConnector.new(Goo.search_conf, collection) + collection_schema = conn.init_schema reply(200, collection_schema) end @@ -155,8 +158,8 @@ class AdminController < ApplicationController search_params = params.select { |key, _| search_keys.include?(key) } search_query = params[:query] || params[:q] search_query = search_query.blank? ? '*' : search_query - - reply(200, Goo.search_connections[collection].search(search_query, search_params).to_h) + conn = SOLR::SolrConnector.new(Goo.search_conf, collection) + reply(200, conn.search(search_query, search_params).to_h) end post '/index_batch/:model_name' do diff --git a/controllers/search_controller.rb b/controllers/search_controller.rb index 22dbc1e9..63c2226b 100644 --- a/controllers/search_controller.rb +++ b/controllers/search_controller.rb @@ -31,18 +31,18 @@ class SearchController < ApplicationController 'resource_model:"ontology_submission"', 'submissionStatus_txt:ERROR_* OR submissionStatus_txt:"RDF" OR submissionStatus_txt:"UPLOADED"', "ontology_viewingRestriction_t:#{visibility}", - groups.map{|x| "ontology_group_txt:\"http://data.bioontology.org/groups/#{x.upcase}\""}.join(' OR '), - categories.map{|x| "ontology_hasDomain_txt:\"http://data.bioontology.org/categories/#{x.upcase}\""}.join(' OR '), - languages.map{|x| "naturalLanguage_txt:\"#{x.downcase}\""}.join(' OR '), + groups.map { |x| "ontology_group_txt:\"http://data.bioontology.org/groups/#{x.upcase}\"" }.join(' OR '), + categories.map { |x| "ontology_hasDomain_txt:\"http://data.bioontology.org/categories/#{x.upcase}\"" }.join(' OR '), + languages.map { |x| "naturalLanguage_txt:\"#{x.downcase}\"" }.join(' OR '), ] fq << "!ontology_viewOf_t:*" unless show_views - fq << format.map{|x| "hasOntologyLanguage_t:\"http://data.bioontology.org/ontology_formats/#{x}\""}.join(' OR ') unless format.blank? + fq << format.map { |x| "hasOntologyLanguage_t:\"http://data.bioontology.org/ontology_formats/#{x}\"" }.join(' OR ') unless format.blank? - fq << status.map{|x| "status_t:#{x}"}.join(' OR ') unless status.blank? - fq << is_of_type.map{|x| "isOfType_t:#{x}"}.join(' OR ') unless is_of_type.blank? - fq << has_format.map{|x| "hasFormalityLevel_t:#{x}"}.join(' OR ') unless has_format.blank? + fq << status.map { |x| "status_t:#{x}" }.join(' OR ') unless status.blank? + fq << is_of_type.map { |x| "isOfType_t:#{x}" }.join(' OR ') unless is_of_type.blank? + fq << has_format.map { |x| "hasFormalityLevel_t:#{x}" }.join(' OR ') unless has_format.blank? fq.reject!(&:blank?) @@ -63,8 +63,7 @@ class SearchController < ApplicationController page_size: page_size, sort: sort }) - - #resp = Ontology.search(query, search_params) + total_found = page_data.aggregate ontology_rank = LinkedData::Models::Ontology.rank docs = {} @@ -77,7 +76,7 @@ class SearchController < ApplicationController old_id = old_resource_id.split('/').last.to_i rescue 0 if acronym.blank? || old_id && id && (id <= old_id) - total_found-= 1 + total_found -= 1 next end @@ -85,17 +84,40 @@ class SearchController < ApplicationController acronyms_ids[acronym] = resource_id doc["ontology_rank"] = ontology_rank.dig(doc["ontology_acronym_text"], :normalizedScore) || 0.0 - docs[resource_id] = doc + docs[resource_id] = doc end docs = docs.values - docs.sort! {|a, b| [b["score"], b["ontology_rank"]] <=> [a["score"], a["ontology_rank"]]} unless params[:sort].present? + docs.sort! { |a, b| [b["score"], b["ontology_rank"]] <=> [a["score"], a["ontology_rank"]] } unless params[:sort].present? page = page_object(docs, total_found) reply 200, page end + + get '/content' do + query = params[:query] || params[:q] + page, page_size = page_params + ontologies = params.fetch("ontologies", "").split(',') + qf = params.fetch("qf", "") + + fq = [] + + fq << ontologies.map { |x| "ontology_t:\"#{x}\"" }.join(' OR ') unless ontologies.blank? + + + conn = SOLR::SolrConnector.new(Goo.search_conf, :ontology_data) + + resp = conn.search(query, fq: fq, qf: qf, + page: page, page_size: page_size) + + total_found = resp["response"]["numFound"] + docs = resp["response"]["docs"] + + + reply 200,page_object(docs, total_found) + end end namespace "/agents" do @@ -104,7 +126,7 @@ class SearchController < ApplicationController page, page_size = page_params type = params[:agentType].blank? ? nil : params[:agentType] - fq = "agentType_t:#{type}" if type + fq = "agentType_t:#{type}" if type qf = [ "acronymSuggestEdge^25 nameSuggestEdge^15 emailSuggestEdge^15 identifiersSuggestEdge^10 ", # start of the word first @@ -118,7 +140,6 @@ class SearchController < ApplicationController sort = "score desc, acronym_sort asc, name_sort asc" end - reply 200, search(LinkedData::Models::Agent, query, fq: fq, qf: qf, @@ -132,7 +153,7 @@ class SearchController < ApplicationController def search(model, query, params = {}) query = query.blank? ? "*" : query - resp = model.search(query, search_params(params)) + resp = model.search(query, search_params(params)) total_found = resp["response"]["numFound"] docs = resp["response"]["docs"] @@ -140,7 +161,7 @@ def search(model, query, params = {}) page_object(docs, total_found) end - def search_params(defType: "edismax", fq: , qf: , stopwords: "true", lowercaseOperators: "true", page: , page_size: , fl: '*,score', sort: ) + def search_params(defType: "edismax", fq:, qf:, stopwords: "true", lowercaseOperators: "true", page:, page_size:, fl: '*,score', sort:) { defType: defType, fq: fq, @@ -154,8 +175,7 @@ def search_params(defType: "edismax", fq: , qf: , stopwords: "true", lowercaseOp } end - - def process_search(params=nil) + def process_search(params = nil) params ||= @params text = params["q"] @@ -191,13 +211,13 @@ def process_search(params=nil) unless params['sort'] if !text.nil? && text[-1] == '*' - docs.sort! {|a, b| [b[:score], a[:prefLabelExact].downcase, b[:ontology_rank]] <=> [a[:score], b[:prefLabelExact].downcase, a[:ontology_rank]]} + docs.sort! { |a, b| [b[:score], a[:prefLabelExact].downcase, b[:ontology_rank]] <=> [a[:score], b[:prefLabelExact].downcase, a[:ontology_rank]] } else - docs.sort! {|a, b| [b[:score], b[:ontology_rank]] <=> [a[:score], a[:ontology_rank]]} + docs.sort! { |a, b| [b[:score], b[:ontology_rank]] <=> [a[:score], a[:ontology_rank]] } end end - #need to return a Page object + # need to return a Page object page = page_object(docs, total_found) reply 200, page diff --git a/docker-compose.yml b/docker-compose.yml index b6b8102b..370615a6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -87,7 +87,7 @@ services: # volumes: #- solr_data:/var/solr/data agraph-ut: - image: franzinc/agraph:v8.0.0.rc1 + image: franzinc/agraph:v8.1.0 platform: linux/amd64 environment: - AGRAPH_SUPER_USER=test diff --git a/test/controllers/test_search_models_controller.rb b/test/controllers/test_search_models_controller.rb index 6f39e974..851c7a31 100644 --- a/test/controllers/test_search_models_controller.rb +++ b/test/controllers/test_search_models_controller.rb @@ -18,7 +18,7 @@ def test_show_all_collection get '/admin/search/collections' assert last_response.ok? res = MultiJson.load(last_response.body) - assert_equal res["collections"], Goo.search_connections.keys.map(&:to_s) + assert_equal res["collections"].sort, Goo.search_connections.keys.map(&:to_s).sort end def test_collection_schema @@ -341,4 +341,60 @@ def test_agents_search agents = MultiJson.load(last_response.body) assert_equal agent_org.id.to_s, agents["collection"].first["id"] end + + def test_search_data + count, acronyms, bro = LinkedData::SampleData::Ontology.create_ontologies_and_submissions({ + process_submission: true, + process_options: { process_rdf: true, extract_metadata: false, generate_missing_labels: false}, + acronym: "BROSEARCHTEST", + name: "BRO Search Test", + file_path: "./test/data/ontology_files/BRO_v3.2.owl", + ont_count: 1, + submission_count: 1, + ontology_type: "VALUE_SET_COLLECTION" + }) + + count, acronyms, mccl = LinkedData::SampleData::Ontology.create_ontologies_and_submissions({ + process_submission: true, + process_options: { process_rdf: true, extract_metadata: false, generate_missing_labels: false}, + acronym: "MCCLSEARCHTEST", + name: "MCCL Search Test", + file_path: "./test/data/ontology_files/CellLine_OWL_BioPortal_v1.0.owl", + ont_count: 1, + submission_count: 1 + }) + + + subs = LinkedData::Models::OntologySubmission.all + count = [] + subs.each do |s| + s.bring_remaining + s.index_all_data(Logger.new($stdout)) + count << Goo.sparql_query_client.query("SELECT (COUNT( DISTINCT ?id) as ?c) FROM <#{s.id}> WHERE {?id ?p ?v}") + .first[:c] + .to_i + end + + get "/search/ontologies/content?q=*" + assert last_response.ok? + res = MultiJson.load(last_response.body) + assert_equal count.sum, res['totalCount'] + + + get "/search/ontologies/content?q=*&ontologies=MCCLSEARCHTEST-0,BROSEARCHTEST-0" + assert last_response.ok? + res = MultiJson.load(last_response.body) + assert_equal count.sum, res['totalCount'] + + get "/search/ontologies/content?q=*&ontologies=BROSEARCHTEST-0" + assert last_response.ok? + res = MultiJson.load(last_response.body) + assert_includes count, res['totalCount'] + + get "/search/ontologies/content?q=*&ontologies=MCCLSEARCHTEST-0" + assert last_response.ok? + res = MultiJson.load(last_response.body) + assert_includes count, res['totalCount'] + + end end