Skip to content

Commit

Permalink
Merge branch 'dev' into fix/updater_v2
Browse files Browse the repository at this point in the history
  • Loading branch information
akabishau committed Jun 3, 2024
2 parents a65463e + ea08374 commit 347c576
Show file tree
Hide file tree
Showing 24 changed files with 236 additions and 60 deletions.
10 changes: 5 additions & 5 deletions app/models/clinical_trials_api_v2.rb
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ def list_fields(field)
end

# get all the studies from ctgov
def self.all(limit: 20000, days_back: nil)

def self.all(limit: 1_000_000, days_back: nil)
offset = 1
items = []

Expand Down Expand Up @@ -91,16 +92,15 @@ def self.all(limit: 20000, days_back: nil)
posted: rec["protocolSection"]["statusModule"]["studyFirstSubmitDate"],
updated: rec["protocolSection"]["statusModule"]["lastUpdatePostDateStruct"]["date"]
}
break if items.size >= limit
end

# puts "api studies: #{items.length}"
break if items.size >= limit || page_token.nil?
print "\rstudies found: #{items.length}"
break if items.size >= limit

page_token = json_response["nextPageToken"]
break if page_token.nil?
end
puts "api v2 studies: #{items.length}"
print "\rstudies found: #{items.length}\n"
return items
end

Expand Down
2 changes: 1 addition & 1 deletion app/models/design_group.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
class DesignGroup < StudyRelationship
has_many :design_group_interventions, inverse_of: :design_group, autosave: true
has_many :design_group_interventions, inverse_of: :design_group
has_many :interventions, :through => :design_group_interventions

add_mapping do
Expand Down
4 changes: 2 additions & 2 deletions app/models/design_group_intervention.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
class DesignGroupIntervention < StudyRelationship
belongs_to :intervention, inverse_of: :design_group_interventions, autosave: true
belongs_to :design_group, inverse_of: :design_group_interventions, autosave: true
belongs_to :intervention, inverse_of: :design_group_interventions
belongs_to :design_group, inverse_of: :design_group_interventions

def self.create_all_from(opts)
return [] if opts[:group_titles].empty?
Expand Down
4 changes: 2 additions & 2 deletions app/models/facility.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
class Facility < StudyRelationship
has_many :facility_contacts, autosave: true
has_many :facility_investigators, autosave: true
has_many :facility_contacts
has_many :facility_investigators

add_mapping do
{
Expand Down
2 changes: 1 addition & 1 deletion app/models/facility_contact.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
class FacilityContact < StudyRelationship
belongs_to :facility, autosave: true
belongs_to :facility

end
2 changes: 1 addition & 1 deletion app/models/facility_investigator.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
class FacilityInvestigator < StudyRelationship
belongs_to :facility, autosave: true
belongs_to :facility

end

4 changes: 2 additions & 2 deletions app/models/intervention.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
class Intervention < StudyRelationship
has_many :intervention_other_names, inverse_of: :intervention, autosave: true
has_many :design_group_interventions, inverse_of: :intervention, autosave: true
has_many :intervention_other_names, inverse_of: :intervention
has_many :design_group_interventions, inverse_of: :intervention
has_many :design_groups, :through => :design_group_interventions

add_mapping do
Expand Down
2 changes: 1 addition & 1 deletion app/models/intervention_other_name.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
class InterventionOtherName < StudyRelationship
belongs_to :intervention, inverse_of: :intervention_other_names, autosave: true
belongs_to :intervention, inverse_of: :intervention_other_names

end
6 changes: 3 additions & 3 deletions app/models/outcome.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
class Outcome < StudyRelationship
has_many :outcome_counts, inverse_of: :outcome, autosave: true
has_many :outcome_analyses, inverse_of: :outcome, autosave: true
has_many :outcome_measurements, inverse_of: :outcome, autosave: true
has_many :outcome_counts, inverse_of: :outcome
has_many :outcome_analyses, inverse_of: :outcome
has_many :outcome_measurements, inverse_of: :outcome

add_mapping do
{
Expand Down
4 changes: 2 additions & 2 deletions app/models/outcome_analysis.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
class OutcomeAnalysis < StudyRelationship
belongs_to :outcome, inverse_of: :outcome_analyses, autosave: true
has_many :outcome_analysis_groups, inverse_of: :outcome_analysis, autosave: true
belongs_to :outcome, inverse_of: :outcome_analyses
has_many :outcome_analysis_groups, inverse_of: :outcome_analysis
has_many :result_groups, :through => :outcome_analysis_groups
end
4 changes: 2 additions & 2 deletions app/models/outcome_analysis_group.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
class OutcomeAnalysisGroup < StudyRelationship
belongs_to :outcome_analysis, inverse_of: :outcome_analysis_groups, autosave: true
belongs_to :result_group, inverse_of: :outcome_analysis_groups, autosave: true
belongs_to :outcome_analysis, inverse_of: :outcome_analysis_groups
belongs_to :result_group, inverse_of: :outcome_analysis_groups

end
4 changes: 2 additions & 2 deletions app/models/outcome_count.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
class OutcomeCount < StudyRelationship
belongs_to :outcome, autosave: true
belongs_to :result_group, autosave: true
belongs_to :outcome
belongs_to :result_group
end
4 changes: 2 additions & 2 deletions app/models/outcome_measurement.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
class OutcomeMeasurement < StudyRelationship
belongs_to :outcome, autosave: true
belongs_to :result_group, autosave: true
belongs_to :outcome
belongs_to :result_group
end
18 changes: 17 additions & 1 deletion app/models/provided_document.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,20 @@
class ProvidedDocument < ApplicationRecord
class ProvidedDocument < StudyRelationship

add_mapping do
{
table: :provided_documents,
root: [:documentSection, :largeDocumentModule, :largeDocs],
columns: [
{ name: :document_type, value: :label },
{ name: :has_protocol, value: :hasProtocol },
{ name: :has_icf, value: :hasIcf },
{ name: :has_sap, value: :hasSap },
{ name: :document_date, value: :date },
{ name: :url, value: :filename, convert_to: ->(val, nct_id) { "https://ClinicalTrials.gov/ProvidedDocs/#{nct_id[-2..-1]}/#{nct_id}/#{val}" } },
]
}
end


def self.mapper(json)
return unless json.document_section
Expand Down
16 changes: 8 additions & 8 deletions app/models/result_group.rb
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
class ResultGroup < StudyRelationship

has_many :reported_events, autosave: true
has_many :milestones, autosave: true
has_many :drop_withdrawals, autosave: true
has_many :baseline_counts, autosave: true
has_many :baseline_measures, autosave: true
has_many :outcome_counts, autosave: true
has_many :outcome_measurements, autosave: true
has_many :outcome_analysis_groups, inverse_of: :result_group, autosave: true
has_many :reported_events
has_many :milestones
has_many :drop_withdrawals
has_many :baseline_counts
has_many :baseline_measures
has_many :outcome_counts
has_many :outcome_measurements
has_many :outcome_analysis_groups, inverse_of: :result_group
has_many :outcome_analyses, :through => :outcome_analysis_groups

add_mapping do
Expand Down
18 changes: 12 additions & 6 deletions app/models/study_downloader.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
class StudyDownloader
def self.download_recently_updated
# ActiveRecord::Base.logger.silence do # silence method error
find_studies_to_update.each do |nct_id|
record = StudyJsonRecord.find_or_create_by(nct_id: nct_id, version: '2') { |r| r.content = {} }
puts "\nDownloading: #{nct_id}"
update_from_apiV2(record, nct_id)
silence_active_record do
list = find_studies_to_update
i = 0
print "downloading #{list.length} studies: 0%"
list.each do |nct_id|
record = StudyJsonRecord.find_or_create_by(nct_id: nct_id, version: '2') { |r| r.content = {} }
update_from_apiV2(record, nct_id)
i += 1
print "\rdownloading #{list.length} studies: #{(i / list.length.to_f * 100).round(2)}%"
end
print "\rdownloading #{list.length} studies: 100%\n"
end
end

Expand Down Expand Up @@ -33,7 +39,7 @@ def self.update_from_apiV2(record, nct_id)
attempts = 0
begin
attempts += 1
print ".".green
# print ".".green
content = ClinicalTrialsApiV2.study(nct_id)
record.update(content: content)

Expand Down
10 changes: 9 additions & 1 deletion app/models/study_json_record.rb
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def update_from_api
return update content: data, download_date: Time.now
end
rescue => e
puts e.message
Airbrake.notify(e)
end
end
Expand Down Expand Up @@ -423,7 +424,7 @@ def baseline_measurements_data
dispersion_value = measurement['BaselineMeasurementSpread']
ctgov_group_code = measurement['BaselineMeasurementGroupId']
denoms = @results_section.dig('BaselineCharacteristicsModule', 'BaselineDenomList', 'BaselineDenom')
denom = denoms.find {|k| k['BaselineDemonUnits'] == measurement ['BaselineDenomUnitsSelected'] }
denom = denoms.find {|k| k['BaselineDenomUnits'] == measure['BaselineDenomUnitsSelected'] }
counts = denom.dig('BaselineDenomCountList', 'BaselineDenomCount')
count = counts.find {|k| k['BaselineDenomCountGroupId'] == ctgov_group_code}
collection[:measurements] << {
Expand Down Expand Up @@ -1612,4 +1613,11 @@ def self.load_from_file(filename)
record.update(nct_id: nct_id, content: content)
end
end

def self.import_and_compare(nct_id)
record = StudyDownloader.download([nct_id], '1')
record.create_or_update_study
StudyDownloader.download([nct_id], '2')
StudyJsonRecord::Worker.new.process_study(nct_id)
end
end
36 changes: 22 additions & 14 deletions app/models/study_json_record/worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def self.reset
StudyJsonRecord.where(version: '2').update_all(saved_study_at: nil) # rubocop:disable Rails/SkipsModelValidations
end

def save_children(parents)
def save_children(parents, indent=" ")
return unless parents.first

klass = parents.first.class
Expand All @@ -70,16 +70,20 @@ def save_children(parents)
end
next if collection.empty?

print "#{indent}#{collection.first.class.table_name} - #{collection.count}"
collection.first.class.import(collection)
save_children(collection)
puts "\r#{indent}#{collection.first.class.table_name} - #{collection.count}"
save_children(collection," #{indent}")
end
end

def import_all
records = StudyJsonRecord.where(version: '2').where('updated_at > saved_study_at OR saved_study_at IS NULL').count
while records > 0
process(5000)
def import_all(batch_size=5000)
silence_active_record do
records = StudyJsonRecord.where(version: '2').where('updated_at > saved_study_at OR saved_study_at IS NULL').count
while records > 0
process(batch_size)
records = StudyJsonRecord.where(version: '2').where('updated_at > saved_study_at OR saved_study_at IS NULL').count
end
end
end

Expand All @@ -94,6 +98,7 @@ def process(count = 1, records = nil)

Rails.logger.debug { "records: #{records.count}" }

puts "removing records: #{records.count}".red
remove_study_data(records.map(&:nct_id))

@collections = Hash.new { |h, k| h[k] = [] }
Expand Down Expand Up @@ -131,7 +136,7 @@ def prepare_children(parent, content, children)

entries.each_with_index do |entry, index|
values = mapping[:columns].map do |column|
[column[:name], get_value(column, entry, index)]
[column[:name], get_value(column, entry, index, nct_id)]
end
row = model.new(values.to_h)
row.nct_id = nct_id
Expand Down Expand Up @@ -166,7 +171,7 @@ def add_missing_keys(item, keys)

unless item.key?(key.to_s)
item[key.to_s] = remaining_keys.empty? ? [{}] : [add_missing_keys({}, remaining_keys)]
puts "🛑 data after adding new key to item: #{item}"
# puts "🛑 data after adding new key to item: #{item}"
end

item
Expand All @@ -177,7 +182,7 @@ def flatten(path, data, parent=nil)
child_key = path.first
result = []
if !child_key.nil? && !data.first.key?(child_key.to_s) && path == [:categories, :measurements]
puts "🛑 #{path} found in path but not in data}"
# puts "🛑 #{path} found in path but not in data}"
data.each do |item|
add_missing_keys(item, path)
end
Expand All @@ -204,6 +209,7 @@ def flatten(path, data, parent=nil)
end

def process_mapping(mapping, records)
print " #{mapping[:table]}"
model = mapping[:table].to_s.classify.constantize # get the model from the table name
root = mapping[:root].map(&:to_s) if mapping[:root] # normalize root path to array of strings
collection = [] # this array will collect all the models to be imported
Expand All @@ -229,7 +235,7 @@ def process_mapping(mapping, records)
# performing the mapping on the json objects
entries.each_with_index do |entry, index|
values = mapping[:columns].map do |column|
[column[:name], get_value(column, entry, index)]
[column[:name], get_value(column, entry, index, nct_id)]
end
row = model.new(values.to_h)
row.nct_id = nct_id
Expand All @@ -240,7 +246,9 @@ def process_mapping(mapping, records)
end

# import models
print "\r #{mapping[:table]} - #{collection.count}"
model.import(collection)
puts "\r#{mapping[:table]} - #{collection.count}"
if mapping[:index]
index = [:nct_id] + mapping[:index]
collection.each do |row|
Expand All @@ -261,20 +269,20 @@ def remove_study_data(nct_ids)
end

# call a converter method or proc to convert the value
def convert_value(column, value)
def convert_value(column, value, nct_id = nil)
case column[:convert_to]
when Symbol
send(column[:convert_to], value)
when Proc
column[:convert_to].call(value)
column[:convert_to].arity == 1 ? column[:convert_to].call(value) : column[:convert_to].call(value, nct_id)
else
value
end
end

# column - describes where to get the value & how to convert the value before saving
# root - the json object to search for the value
def get_value(column, root, index = nil)
def get_value(column, root, index = nil, nct_id = nil)
# get value from json
case column[:value]
when Array # deep level in hierarchy
Expand All @@ -293,7 +301,7 @@ def get_value(column, root, index = nil)
value = column[:value]
end

convert_value(column, value)
convert_value(column, value, nct_id)
rescue StandardError
Rails.logger.debug { "Error getting #{column[:value]} from #{root}" }
raise $ERROR_INFO
Expand Down
2 changes: 1 addition & 1 deletion app/models/study_relationship.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def self.loadable_tables

def self.study_models
return @models if @models
@models = (connection.tables - blacklist).map{|k| k.singularize.camelize.constantize }
@models = (connection.tables - blacklist).sort.map{|k| k.singularize.camelize.constantize }
end

def self.remove_all_data
Expand Down
2 changes: 1 addition & 1 deletion app/models/util/updater.rb
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def htime(seconds)
def update_study(nct_id)
begin
stime = Time.now
record = StudyJsonRecord.find_by(nct_id: nct_id) || StudyJsonRecord.create(nct_id: nct_id, content: {})
record = StudyJsonRecord.find_by(nct_id: nct_id, version: '1') || StudyJsonRecord.create(nct_id: nct_id, content: {}, version: '1')
changed = record.update_from_api unless ENV['STUDY_SECTIONS']

if record.blank? || record.content.blank?
Expand Down
5 changes: 3 additions & 2 deletions app/models/util/updater_v2.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,11 @@ def execute
db_mgr.remove_constraints
@load_event.log("1/11 removed constraints")


# 2. update studies
log("#{@schema}: updating studies...")
update_studies
StudyDownloader.download_recently_updated
worker = StudyJsonRecord::Worker.new
worker.import_all
@load_event.log("2/11 updated studies")


Expand Down
Loading

0 comments on commit 347c576

Please sign in to comment.