Skip to content

Commit

Permalink
refactor mapping count to simplify the code
Browse files Browse the repository at this point in the history
  • Loading branch information
syphax-bouazzouni committed Nov 15, 2024
1 parent de2e01b commit 650598b
Showing 1 changed file with 52 additions and 119 deletions.
171 changes: 52 additions & 119 deletions lib/ontologies_linked_data/concerns/mappings/mapping_counts.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ module Count
def mapping_counts(enable_debug = false, logger = nil, reload_cache = false, arr_acronyms = [])
logger = nil unless enable_debug
t = Time.now
latest = self.retrieve_latest_submissions(options = { acronyms: arr_acronyms })
latest = retrieve_latest_submissions({ acronyms: arr_acronyms })
counts = {}
# Counting for External mappings
t0 = Time.now
Expand Down Expand Up @@ -34,12 +34,12 @@ def mapping_counts(enable_debug = false, logger = nil, reload_cache = false, arr
end
# Counting for mappings between the ontologies hosted by the BioPortal appliance
i = 0
epr = Goo.sparql_query_client(:main)
Goo.sparql_query_client(:main)

latest.each do |acro, sub|
self.handle_triple_store_downtime(logger) if Goo.backend_4s?
handle_triple_store_downtime(logger) if Goo.backend_4s?
t0 = Time.now
s_counts = self.mapping_ontologies_count(sub, nil, reload_cache = reload_cache)
s_counts = mapping_ontologies_count(sub, nil, reload_cache = reload_cache)
s_total = 0

s_counts.each do |k, v|
Expand All @@ -63,7 +63,7 @@ def mapping_counts(enable_debug = false, logger = nil, reload_cache = false, arr
end

def create_mapping_counts(logger, arr_acronyms = [])
ont_msg = arr_acronyms.empty? ? "all ontologies" : "ontologies [#{arr_acronyms.join(', ')}]"
ont_msg = arr_acronyms.empty? ? 'all ontologies' : "ontologies [#{arr_acronyms.join(', ')}]"

time = Benchmark.realtime do
create_mapping_count_totals_for_ontologies(logger, arr_acronyms)
Expand All @@ -79,92 +79,48 @@ def create_mapping_counts(logger, arr_acronyms = [])
end

def create_mapping_count_totals_for_ontologies(logger, arr_acronyms)
new_counts = mapping_counts(enable_debug = true, logger = logger, reload_cache = true, arr_acronyms)
new_counts = mapping_counts(true, logger, true, arr_acronyms)
persistent_counts = {}
f = Goo::Filter.new(:pair_count) == false

LinkedData::Models::MappingCount.where.filter(f)
.include(:ontologies, :count)
LinkedData::Models::MappingCount.where(pair_count: false)
.include(:ontologies, :count, :pair_count)
.include(:all)
.all
.each do |m|
persistent_counts[m.ontologies.first] = m
end

latest = self.retrieve_latest_submissions(options = { acronyms: arr_acronyms })
delete_zombie_mapping_count(persistent_counts.values, latest.values.compact.map { |sub| sub.ontology.acronym })
latest = retrieve_latest_submissions
delete_zombie_mapping_count(persistent_counts, latest, new_counts)


num_counts = new_counts.keys.length
ctr = 0

new_counts.each_key do |acr|
new_count = new_counts[acr]
ctr += 1

if persistent_counts.include?(acr)
inst = persistent_counts[acr]
if new_count.zero?
inst.delete if inst.persistent?
elsif new_count != inst.count
inst.bring_remaining
inst.count = new_count

begin
if inst.valid?
inst.save
else
logger.error("Error updating mapping count for #{acr}: #{inst.id.to_s}. #{inst.errors}")
next
end
rescue Exception => e
logger.error("Exception updating mapping count for #{acr}: #{inst.id.to_s}. #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}")
next
end
end
else
m = LinkedData::Models::MappingCount.new
m.ontologies = [acr]
m.pair_count = false
m.count = new_count

begin
if m.valid?
m.save
else
logger.error("Error saving new mapping count for #{acr}. #{m.errors}")
next
end
rescue Exception => e
logger.error("Exception saving new mapping count for #{acr}. #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}")
next
end
end
update_mapping_count(persistent_counts, new_counts, acr, acr, new_count, false)
remaining = num_counts - ctr
logger.info("Total mapping count saved for #{acr}: #{new_count}. " << ((remaining.positive?) ? "#{remaining} counts remaining..." : "All done!"))
logger.info("Total mapping count saved for #{acr}: #{new_count}. " << (remaining.positive? ? "#{remaining} counts remaining..." : 'All done!'))
end
end

# This generates pair mapping counts for the given
# ontologies to ALL other ontologies in the system
def create_mapping_count_pairs_for_ontologies(logger, arr_acronyms)

latest_submissions = self.retrieve_latest_submissions(options = { acronyms: arr_acronyms })
all_latest_submissions = self.retrieve_latest_submissions
latest_submissions = retrieve_latest_submissions({ acronyms: arr_acronyms })
all_latest_submissions = retrieve_latest_submissions
ont_total = latest_submissions.length
logger.info("There is a total of #{ont_total} ontologies to process...")
ont_ctr = 0
# filename = 'mapping_pairs.ttl'
# temp_dir = Dir.tmpdir
# temp_file_path = File.join(temp_dir, filename)
# temp_dir = '/Users/mdorf/Downloads/test/'
# temp_file_path = File.join(File.dirname(file_path), "test.ttl")
# fsave = File.open(temp_file_path, "a")

latest_submissions.each do |acr, sub|
self.handle_triple_store_downtime(logger) if Goo.backend_4s?
new_counts = nil

time = Benchmark.realtime do
new_counts = self.mapping_ontologies_count(sub, nil, reload_cache = true)
new_counts = mapping_ontologies_count(sub, nil, true)
end
logger.info("Retrieved new mapping pair counts for #{acr} in #{time} seconds.")
ont_ctr += 1
Expand All @@ -176,61 +132,20 @@ def create_mapping_count_pairs_for_ontologies(logger, arr_acronyms)
persistent_counts[other] = m
end

delete_zombie_mapping_count(persistent_counts.values, all_latest_submissions.values.compact.map { |s| s.ontology.acronym })
delete_zombie_mapping_count(persistent_counts, all_latest_submissions, new_counts)


num_counts = new_counts.keys.length
logger.info("Ontology: #{acr}. #{num_counts} mapping pair counts to record...")
logger.info("------------------------------------------------")
logger.info('------------------------------------------------')
ctr = 0

new_counts.each_key do |other|
new_count = new_counts[other]
ctr += 1

if persistent_counts.include?(other)
inst = persistent_counts[other]
if new_count.zero?
inst.delete
elsif new_count != inst.count
inst.bring_remaining if inst.persistent?
inst.pair_count = true
inst.count = new_count

begin
if inst.valid?
inst.save()
# inst.save({ batch: fsave })
else
logger.error("Error updating mapping count for the pair [#{acr}, #{other}]: #{inst.id.to_s}. #{inst.errors}")
next
end
rescue Exception => e
logger.error("Exception updating mapping count for the pair [#{acr}, #{other}]: #{inst.id.to_s}. #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}")
next
end
end
else
next unless new_counts.key?(other)

m = LinkedData::Models::MappingCount.new
m.count = new_count
m.ontologies = [acr, other]
m.pair_count = true
begin
if m.valid?
m.save()
# m.save({ batch: fsave })
else
logger.error("Error saving new mapping count for the pair [#{acr}, #{other}]. #{m.errors}")
next
end
rescue Exception => e
logger.error("Exception saving new mapping count for the pair [#{acr}, #{other}]. #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}")
next
end
end
update_mapping_count(persistent_counts, new_counts, acr, other, new_count, true)
remaining = num_counts - ctr
logger.info("Mapping count saved for the pair [#{acr}, #{other}]: #{new_count}. " << ((remaining.positive?) ? "#{remaining} counts remaining for #{acr}..." : "All done!"))
logger.info("Mapping count saved for the pair [#{acr}, #{other}]: #{new_count}. " << (remaining.positive? ? "#{remaining} counts remaining for #{acr}..." : 'All done!'))
wait_interval = 250

next unless (ctr % wait_interval).zero?
Expand All @@ -240,25 +155,43 @@ def create_mapping_count_pairs_for_ontologies(logger, arr_acronyms)
sleep(sec_to_wait)
end
remaining_ont = ont_total - ont_ctr
logger.info("Completed processing pair mapping counts for #{acr}. " << ((remaining_ont.positive?) ? "#{remaining_ont} ontologies remaining..." : "All ontologies processed!"))
logger.info("Completed processing pair mapping counts for #{acr}. " << (remaining_ont.positive? ? "#{remaining_ont} ontologies remaining..." : 'All ontologies processed!'))
end
# fsave.close
end

private

def delete_zombie_mapping_count(existent_counts, submissions_ready)
special_mappings = ["http://data.bioontology.org/metadata/ExternalMappings",
"http://data.bioontology.org/metadata/InterportalMappings/agroportal",
"http://data.bioontology.org/metadata/InterportalMappings/ncbo",
"http://data.bioontology.org/metadata/InterportalMappings/sifr"]
def update_mapping_count(persistent_counts, new_counts, acr, other, new_count, pair_count)
if persistent_counts.include?(other)
inst = persistent_counts[other]
if new_count.zero?
inst.delete
elsif new_count != inst.count
inst.pair_count = true
inst.count = new_count
inst.save
end
else
return unless new_counts.key?(other)

m = LinkedData::Models::MappingCount.new
m.count = new_count
m.ontologies = if pair_count
[acr, other]
else
[acr]
end
m.pair_count = pair_count
m.save
end
end

existent_counts.each do |mapping|
next if mapping.ontologies.size == 1 && !(mapping.ontologies & special_mappings).empty?
next if mapping.ontologies.all? { |x| submissions_ready.include?(x) }
next unless mapping.persistent?
def delete_zombie_mapping_count(persistent_counts, all_latest_submissions, new_counts)
persistent_counts.each do |k, v|
next if all_latest_submissions.key?(k) && new_counts.key?(k)

mapping.delete
v.delete
persistent_counts.delete(k)
end
end
end
Expand Down

0 comments on commit 650598b

Please sign in to comment.