Skip to content

Commit

Permalink
Merge pull request #153 from DigitalNZ/resumable-harvests
Browse files Browse the repository at this point in the history
Resumable harvests
  • Loading branch information
richardmatthewsdev authored Mar 3, 2021
2 parents 91b312a + fb1120b commit b682335
Show file tree
Hide file tree
Showing 16 changed files with 211 additions and 57 deletions.
7 changes: 4 additions & 3 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,18 @@ gem 'oai'
gem 'ougai', '~> 1.8'
gem 'parse-cron'
gem 'puma'
gem 'rails', '~> 6.0.3'
gem 'rails', '~> 6.0.3.5'
gem 'responders'
gem 'sidekiq'
gem 'sinatra', require: nil
gem 'supplejack_common', github: 'DigitalNZ/supplejack_common', tag: 'v2.9.0'
gem 'supplejack_common', github: 'DigitalNZ/supplejack_common', tag: 'v2.10.0'
gem 'whenever', require: false
gem 'brakeman'
gem 'moderate_parameters'
gem 'amazing_print'

group :test do
gem 'database_cleaner'
gem 'database_cleaner-mongoid'
gem 'factory_bot_rails'
gem 'rails-controller-testing'
gem 'rspec-activemodel-mocks'
Expand Down
103 changes: 57 additions & 46 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
GIT
remote: https://github.com/DigitalNZ/supplejack_common
revision: c2085f9c21bcd1ed5d823ee850b0b6e5d88ee590
tag: v2.9.0
revision: 570099ff3b2fb94ba1436eac41cfe1a83880b6f0
tag: v2.10.0
specs:
supplejack_common (0.0.2)
actionpack
actionpack (~> 6.0.3.5)
activesupport
aws-sdk-s3
chronic (<= 0.10.2)
Expand Down Expand Up @@ -100,25 +100,26 @@ GEM
airbrake-ruby (~> 5.1)
airbrake-ruby (5.2.0)
rbtree3 (~> 0.5)
ast (2.4.1)
amazing_print (1.2.2)
ast (2.4.2)
aws-eventstream (1.1.0)
aws-partitions (1.405.0)
aws-sdk-core (3.110.0)
aws-partitions (1.426.0)
aws-sdk-core (3.112.0)
aws-eventstream (~> 1, >= 1.0.2)
aws-partitions (~> 1, >= 1.239.0)
aws-sigv4 (~> 1.1)
jmespath (~> 1.0)
aws-sdk-kms (1.39.0)
aws-sdk-core (~> 3, >= 3.109.0)
aws-sdk-kms (1.42.0)
aws-sdk-core (~> 3, >= 3.112.0)
aws-sigv4 (~> 1.1)
aws-sdk-s3 (1.86.1)
aws-sdk-core (~> 3, >= 3.109.0)
aws-sdk-kms (~> 1.26)
aws-sdk-s3 (1.88.0)
aws-sdk-core (~> 3, >= 3.112.0)
aws-sdk-kms (~> 1)
aws-sigv4 (~> 1.1)
aws-sigv4 (1.2.2)
aws-eventstream (~> 1, >= 1.0.2)
brakeman (4.10.0)
bson (4.11.1)
brakeman (5.0.0)
bson (4.12.0)
builder (3.2.4)
byebug (11.1.3)
case_transform (0.2)
Expand All @@ -129,6 +130,13 @@ GEM
connection_pool (2.2.3)
crass (1.0.6)
database_cleaner (1.8.5)
database_cleaner-mongo (1.8.1)
database_cleaner (~> 1.8.0)
mongo
database_cleaner-mongoid (1.8.1)
database_cleaner (~> 1.8.0)
database_cleaner-mongo (~> 1.8.0)
mongoid
diff-lcs (1.4.4)
dimensions (1.3.0)
domain_name (0.5.20190701)
Expand All @@ -139,12 +147,14 @@ GEM
factory_bot_rails (6.1.0)
factory_bot (~> 6.1.0)
railties (>= 5.0.0)
faraday (1.1.0)
faraday (1.3.0)
faraday-net_http (~> 1.0)
multipart-post (>= 1.2, < 3)
ruby2_keywords
faraday-net_http (1.0.1)
faraday_middleware (1.0.0)
faraday (~> 1.0)
ffi (1.13.1)
ffi (1.14.2)
figaro (1.2.0)
thor (>= 0.14.0, < 2)
globalid (0.4.2)
Expand Down Expand Up @@ -194,7 +204,7 @@ GEM
method_source (1.0.0)
mime-types (3.3.1)
mime-types-data (~> 3.2015)
mime-types-data (3.2020.1104)
mime-types-data (3.2021.0225)
mimemagic (0.3.5)
mini_mime (1.0.2)
mini_portile2 (2.5.0)
Expand Down Expand Up @@ -224,12 +234,12 @@ GEM
builder (>= 3.1.0)
faraday
faraday_middleware
oj (3.10.16)
ougai (1.8.5)
oj (3.11.2)
ougai (1.9.1)
oj (~> 3.10)
parallel (1.20.1)
parse-cron (0.1.4)
parser (2.7.2.0)
parser (3.0.0.0)
ast (~> 2.4.1)
pry (0.13.1)
coderay (~> 1.1)
Expand All @@ -239,7 +249,7 @@ GEM
pry (~> 0.13.0)
pry-rails (0.3.9)
pry (>= 0.10.4)
puma (5.1.1)
puma (5.2.1)
nio4r (~> 2.0)
racc (1.5.2)
rack (2.2.3)
Expand Down Expand Up @@ -284,7 +294,7 @@ GEM
ffi (~> 1.0)
rbtree3 (0.6.0)
redis (4.2.5)
regexp_parser (2.0.0)
regexp_parser (2.0.3)
request_store (1.5.0)
rack (>= 1.4)
responders (3.0.1)
Expand All @@ -301,64 +311,64 @@ GEM
activemodel (>= 3.0)
activesupport (>= 3.0)
rspec-mocks (>= 2.99, < 4.0)
rspec-core (3.10.0)
rspec-core (3.10.1)
rspec-support (~> 3.10.0)
rspec-expectations (3.10.0)
rspec-expectations (3.10.1)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.10.0)
rspec-mocks (3.10.0)
rspec-mocks (3.10.2)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.10.0)
rspec-rails (4.0.1)
rspec-rails (4.0.2)
actionpack (>= 4.2)
activesupport (>= 4.2)
railties (>= 4.2)
rspec-core (~> 3.9)
rspec-expectations (~> 3.9)
rspec-mocks (~> 3.9)
rspec-support (~> 3.9)
rspec-core (~> 3.10)
rspec-expectations (~> 3.10)
rspec-mocks (~> 3.10)
rspec-support (~> 3.10)
rspec-sidekiq (3.1.0)
rspec-core (~> 3.0, >= 3.0.0)
sidekiq (>= 2.4.0)
rspec-support (3.10.0)
rubocop (1.6.1)
rspec-support (3.10.2)
rubocop (1.9.1)
parallel (~> 1.10)
parser (>= 2.7.1.5)
parser (>= 3.0.0.0)
rainbow (>= 2.2.2, < 4.0)
regexp_parser (>= 1.8, < 3.0)
rexml
rubocop-ast (>= 1.2.0, < 2.0)
ruby-progressbar (~> 1.7)
unicode-display_width (>= 1.4.0, < 2.0)
rubocop-ast (1.3.0)
unicode-display_width (>= 1.4.0, < 3.0)
rubocop-ast (1.4.1)
parser (>= 2.7.1.5)
rubocop-packaging (0.5.1)
rubocop (>= 0.89, < 2.0)
rubocop-performance (1.9.1)
rubocop-performance (1.9.2)
rubocop (>= 0.90.0, < 2.0)
rubocop-ast (>= 0.4.0)
rubocop-rails (2.9.0)
rubocop-rails (2.9.1)
activesupport (>= 4.2.0)
rack (>= 1.1)
rubocop (>= 0.90.0, < 2.0)
rubocop-rails_config (1.1.1)
rubocop-rails_config (1.2.2)
railties (>= 5.0)
rubocop (>= 1.0)
rubocop-ast (>= 1.0.1)
rubocop-packaging (~> 0.4)
rubocop-packaging (~> 0.5)
rubocop-performance (~> 1.3)
rubocop-rails (~> 2.0)
rubocop-rspec (2.0.1)
rubocop-rspec (2.2.0)
rubocop (~> 1.0)
rubocop-ast (>= 1.1.0)
ruby-progressbar (1.10.1)
ruby2_keywords (0.0.2)
ruby-progressbar (1.11.0)
ruby2_keywords (0.0.4)
ruby_dep (1.5.0)
sanitize (5.2.3)
crass (~> 1.0.2)
nokogiri (>= 1.8.0)
nokogumbo (~> 2.0)
sidekiq (6.1.2)
sidekiq (6.1.3)
connection_pool (>= 2.2.2)
rack (~> 2.0)
redis (>= 4.2.0)
Expand All @@ -381,13 +391,13 @@ GEM
thor (1.1.0)
thread_safe (0.3.6)
tilt (2.0.10)
timecop (0.9.2)
timecop (0.9.3)
tzinfo (1.2.9)
thread_safe (~> 0.1)
unf (0.1.4)
unf_ext
unf_ext (0.0.7.7)
unicode-display_width (1.7.0)
unicode-display_width (2.0.0)
websocket-driver (0.7.3)
websocket-extensions (>= 0.1.0)
websocket-extensions (0.1.5)
Expand All @@ -403,10 +413,11 @@ DEPENDENCIES
active_model_serializers (~> 0.10.7)
activeresource
airbrake
amazing_print
aws-sdk-s3 (~> 1)
brakeman
chronic
database_cleaner
database_cleaner-mongoid
factory_bot_rails
figaro
kaminari
Expand All @@ -421,7 +432,7 @@ DEPENDENCIES
pry-byebug
pry-rails
puma
rails (~> 6.0.3)
rails (~> 6.0.3.5)
rails-controller-testing
responders
rspec-activemodel-mocks
Expand Down
18 changes: 18 additions & 0 deletions app/models/abstract_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ class AbstractJob

index status: 1, start_time: 1, created_at: 1, updated_at: 1, parser_id: 1

after_save :check_if_job_should_be_resumed, if: :status_changed?

field :start_time, type: DateTime
field :end_time, type: DateTime
field :updated_at, type: DateTime
Expand All @@ -32,6 +34,7 @@ class AbstractJob
embeds_many :invalid_records
embeds_many :failed_records
embeds_one :harvest_failure
embeds_many :states

belongs_to :harvest_schedule, optional: true

Expand Down Expand Up @@ -98,6 +101,7 @@ def required_enrichments
state :active
state :finished
state :failed
state :resume
state :stopped

event :start do
Expand All @@ -122,6 +126,16 @@ def required_enrichments
transitions to: :finished
end

event :resume do
after do
self.records_count = self.posted_records_count
save!
self.enqueue
end

transitions to: :active
end

event :error do
after do
self.start_time = Time.zone.now if start_time.blank?
Expand Down Expand Up @@ -203,4 +217,8 @@ def increment_processed_count!
self.processed_count += 1
save!
end

def check_if_job_should_be_resumed
self.resume! if self.status_change.last == 'resume'
end
end
10 changes: 10 additions & 0 deletions app/models/harvest_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,16 @@ def records
options[:limit] = limit.to_i if limit.to_i.positive?
options[:from] = parser.last_harvested_at if incremental? && parser.last_harvested_at

# pass details that are needed for resuming the job ...
options[:job] = self

if self.states.any?
options[:page] = self.states.last.page
options[:limit] = self.states.last.limit
options[:counter] = self.states.last.counter
options[:base_urls] = self.states.last.base_urls
end

parser.load_file(environment)
parser_klass = parser.loader.parser_class
parser_klass.environment = environment if environment.present?
Expand Down
15 changes: 15 additions & 0 deletions app/models/state.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# frozen_string_literal: true

# app/models/state.rb
class State
include Mongoid::Document
include Mongoid::Timestamps

embedded_in :abstract_job

field :page, type: Integer
field :per_page, type: Integer
field :limit, type: Integer
field :counter, type: Integer
field :base_urls, type: Array
end
4 changes: 2 additions & 2 deletions app/models/supplejack_api/record.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ class Record < ActiveResource::Base
include Enrichable

def self.find(query, page)
Rails.logger.info "query: #{query}"
Rails.logger.info "page: #{page}"
Rails.logger.debug "query: #{query}"
Rails.logger.debug "page: #{page}"
super(:all, params: { search: query, search_options: page, api_key: ENV['HARVESTER_API_KEY'] })
rescue ActiveResource::ServerError => e
Airbrake.notify(e, error_message: "The api request failed with: query: #{query} and page: #{page}. #{e&.message}")
Expand Down
1 change: 1 addition & 0 deletions app/workers/abstract_worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def sanitize_id(id)

def api_update_finished?
job.reload

job.posted_records_count == job.records_count
end

Expand Down
Loading

0 comments on commit b682335

Please sign in to comment.