Skip to content

Commit

Permalink
Merge pull request #1185 from ctti-clinicaltrials/fix-ctgov-v2
Browse files Browse the repository at this point in the history
small changes
  • Loading branch information
micronix authored Jun 1, 2024
2 parents 599b9e5 + 1533ebf commit ea08374
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 10 deletions.
4 changes: 2 additions & 2 deletions app/models/clinical_trials_api_v2.rb
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,13 @@ def self.all(limit: 1_000_000, days_back: nil)
}
end

print "\rapi studies found: #{items.length}"
print "\rstudies found: #{items.length}"
break if items.size >= limit

page_token = json_response["nextPageToken"]
break if page_token.nil?
end
print "\rapi v2 studies found: #{items.length}\n"
print "\rstudies found: #{items.length}\n"
return items
end

Expand Down
10 changes: 8 additions & 2 deletions app/models/study_downloader.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
class StudyDownloader
def self.download_recently_updated
silence_active_record do
find_studies_to_update.each do |nct_id|
list = find_studies_to_update
i = 0
print "downloading #{list.length} studies: 0%"
list.each do |nct_id|
record = StudyJsonRecord.find_or_create_by(nct_id: nct_id, version: '2') { |r| r.content = {} }
update_from_apiV2(record, nct_id)
i += 1
print "\rdownloading #{list.length} studies: #{(i / list.length.to_f * 100).round(2)}%"
end
print "\rdownloading #{list.length} studies: 100%\n"
end
end

Expand Down Expand Up @@ -33,7 +39,7 @@ def self.update_from_apiV2(record, nct_id)
attempts = 0
begin
attempts += 1
print ".".green
# print ".".green
content = ClinicalTrialsApiV2.study(nct_id)
record.update(content: content, download_date: Time.now)
return record
Expand Down
2 changes: 1 addition & 1 deletion app/models/util/updater.rb
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def htime(seconds)
def update_study(nct_id)
begin
stime = Time.now
record = StudyJsonRecord.find_by(nct_id: nct_id) || StudyJsonRecord.create(nct_id: nct_id, content: {})
record = StudyJsonRecord.find_by(nct_id: nct_id, version: '1') || StudyJsonRecord.create(nct_id: nct_id, content: {}, version: '1')
changed = record.update_from_api unless ENV['STUDY_SECTIONS']

if record.blank? || record.content.blank?
Expand Down
7 changes: 7 additions & 0 deletions lib/tasks/load.rake
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@ namespace :db do
end
end

desc 'process study json records'
task :import_study, [:nct_id] => :environment do |t, args|
worker = StudyJsonRecord::Worker.new
records = StudyJsonRecord.where(nct_id: args[:nct_id], version: '2')
worker.process(1, records)
end

desc 'process study json records'
task :import, [:schema] => :environment do |t, args|
with_search_path(args[:schema]) do
Expand Down
60 changes: 55 additions & 5 deletions lib/tasks/stats.rake
Original file line number Diff line number Diff line change
Expand Up @@ -10,27 +10,77 @@ namespace :stats do

desc 'compare'
task :compare => :environment do
`rm -rf comprisons`
`mkdir comparisons`
StudyRelationship.study_models.each do |model|
sql = <<-SQL
SELECT
nct_id
original.nct_id
FROM (
SELECT
nct_id,
COUNT(*) AS count
FROM ctgov.#{table_name}
FROM ctgov.#{model.table_name}
GROUP BY nct_id
) AS original
JOIN (
LEFT JOIN (
SELECT
nct_id,
COUNT(*) AS count
FROM ctgov_v2.#{table_name}
FROM ctgov_v2.#{model.table_name}
GROUP BY nct_id
) AS future ON future.nct_id = original.nct_id
WHERE orignal.count != future.count
WHERE original.count != future.count OR future.count IS NULL
SQL

results = ActiveRecord::Base.connection.execute(sql)
CSV.open("comparisons/#{model.table_name}.csv", "w") do |csv|
# Write the header row (if your query has headers)
csv << results.fields

# Write each row from the query results
results.each do |row|
csv << row.values
end
end
puts results.to_a
end
end

desc 'find missing studies'
task :missing => :environment do
sql = <<-SQL
SELECT
SJR.nct_id
FROM study_json_records SJR
LEFT JOIN ctgov_v2.studies S ON S.nct_id = SJR.nct_id
WHERE S.nct_id IS NULL
SQL
results = ActiveRecord::Base.connection.execute(sql)
results.each do |result|
puts result['nct_id']
end
end

desc 'compare studies'
task :compare_studies, [:nct_id] => :environment do |t, args|
StudyRelationship.study_models.each do |model|
sql = <<-SQL
SELECT
COUNT(*)
FROM ctgov.#{model.table_name}
WHERE nct_id = #{args[:nct_id]}
SQL
original = ActiveRecord::Base.connection.execute(sql).to_a[0][0]

sql = <<-SQL
SELECT
COUNT(*)
FROM ctgov.#{model.table_name}
WHERE nct_id = #{args[:nct_id]}
SQL
future = ActiveRecord::Base.connection.execute(sql).to_a[0][0]
puts "#{model.table_name}: #{original} vs #{future}"
end
end
end

0 comments on commit ea08374

Please sign in to comment.