diff --git a/Gemfile b/Gemfile index c5ddcc98..a3a46ee7 100644 --- a/Gemfile +++ b/Gemfile @@ -6,7 +6,7 @@ git_source(:github) { |repo| "https://github.com/#{repo}.git" } ruby '3.2.5' # Bundle edge Rails instead: gem 'rails', github: 'rails/rails', branch: 'main' -gem 'rails', '~> 7.1.3.3' +gem 'rails', '~> 7.1.4.1' # The original asset pipeline for Rails [https://github.com/rails/sprockets-rails] gem 'sprockets-rails' diff --git a/Gemfile.lock b/Gemfile.lock index ea0547bb..a9aa464a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -13,35 +13,35 @@ GIT GEM remote: https://rubygems.org/ specs: - actioncable (7.1.3.4) - actionpack (= 7.1.3.4) - activesupport (= 7.1.3.4) + actioncable (7.1.4.1) + actionpack (= 7.1.4.1) + activesupport (= 7.1.4.1) nio4r (~> 2.0) websocket-driver (>= 0.6.1) zeitwerk (~> 2.6) - actionmailbox (7.1.3.4) - actionpack (= 7.1.3.4) - activejob (= 7.1.3.4) - activerecord (= 7.1.3.4) - activestorage (= 7.1.3.4) - activesupport (= 7.1.3.4) + actionmailbox (7.1.4.1) + actionpack (= 7.1.4.1) + activejob (= 7.1.4.1) + activerecord (= 7.1.4.1) + activestorage (= 7.1.4.1) + activesupport (= 7.1.4.1) mail (>= 2.7.1) net-imap net-pop net-smtp - actionmailer (7.1.3.4) - actionpack (= 7.1.3.4) - actionview (= 7.1.3.4) - activejob (= 7.1.3.4) - activesupport (= 7.1.3.4) + actionmailer (7.1.4.1) + actionpack (= 7.1.4.1) + actionview (= 7.1.4.1) + activejob (= 7.1.4.1) + activesupport (= 7.1.4.1) mail (~> 2.5, >= 2.5.4) net-imap net-pop net-smtp rails-dom-testing (~> 2.2) - actionpack (7.1.3.4) - actionview (= 7.1.3.4) - activesupport (= 7.1.3.4) + actionpack (7.1.4.1) + actionview (= 7.1.4.1) + activesupport (= 7.1.4.1) nokogiri (>= 1.8.5) racc rack (>= 2.2.4) @@ -49,37 +49,37 @@ GEM rack-test (>= 0.6.3) rails-dom-testing (~> 2.2) rails-html-sanitizer (~> 1.6) - actiontext (7.1.3.4) - actionpack (= 7.1.3.4) - activerecord (= 7.1.3.4) - activestorage (= 7.1.3.4) - activesupport (= 7.1.3.4) + actiontext (7.1.4.1) + actionpack (= 7.1.4.1) + activerecord (= 7.1.4.1) + activestorage (= 7.1.4.1) + activesupport (= 7.1.4.1) globalid (>= 0.6.0) nokogiri (>= 1.8.5) - actionview (7.1.3.4) - activesupport (= 7.1.3.4) + actionview (7.1.4.1) + activesupport (= 7.1.4.1) builder (~> 3.1) erubi (~> 1.11) rails-dom-testing (~> 2.2) rails-html-sanitizer (~> 1.6) - activejob (7.1.3.4) - activesupport (= 7.1.3.4) + activejob (7.1.4.1) + activesupport (= 7.1.4.1) globalid (>= 0.3.6) - activemodel (7.1.3.4) - activesupport (= 7.1.3.4) - activerecord (7.1.3.4) - activemodel (= 7.1.3.4) - activesupport (= 7.1.3.4) + activemodel (7.1.4.1) + activesupport (= 7.1.4.1) + activerecord (7.1.4.1) + activemodel (= 7.1.4.1) + activesupport (= 7.1.4.1) timeout (>= 0.4.0) activerecord-nulldb-adapter (1.0.1) activerecord (>= 5.2.0, < 7.2) - activestorage (7.1.3.4) - actionpack (= 7.1.3.4) - activejob (= 7.1.3.4) - activerecord (= 7.1.3.4) - activesupport (= 7.1.3.4) + activestorage (7.1.4.1) + actionpack (= 7.1.4.1) + activejob (= 7.1.4.1) + activerecord (= 7.1.4.1) + activesupport (= 7.1.4.1) marcel (~> 1.0) - activesupport (7.1.3.4) + activesupport (7.1.4.1) base64 bigdecimal concurrent-ruby (~> 1.0, >= 1.0.2) @@ -152,7 +152,7 @@ GEM railties (>= 4.1.0) responders warden (~> 1.2.3) - devise-two-factor (5.0.0) + devise-two-factor (6.0.0) activesupport (~> 7.0) devise (~> 4.0) railties (~> 7.0) @@ -213,10 +213,10 @@ GEM http-cookie (1.0.5) domain_name (~> 0.5) http-form_data (2.3.0) - i18n (1.14.5) + i18n (1.14.6) concurrent-ruby (~> 1.0) io-console (0.7.2) - irb (1.14.0) + irb (1.14.1) rdoc (>= 4.0.0) reline (>= 0.4.2) jaro_winkler (1.5.6) @@ -262,13 +262,13 @@ GEM mime-types-data (3.2024.0507) mini_mime (1.1.5) minitar (0.9) - minitest (5.24.1) + minitest (5.25.1) multi_json (1.15.0) mutex_m (0.2.0) mysql2 (0.5.6) net-http (0.4.1) uri - net-imap (0.4.11) + net-imap (0.5.0) date net-protocol net-pop (0.1.2) @@ -279,13 +279,13 @@ GEM net-protocol netrc (0.11.0) nio4r (2.7.3) - nokogiri (1.16.6-aarch64-linux) + nokogiri (1.16.7-aarch64-linux) racc (~> 1.4) - nokogiri (1.16.6-arm64-darwin) + nokogiri (1.16.7-arm64-darwin) racc (~> 1.4) - nokogiri (1.16.6-x86_64-darwin) + nokogiri (1.16.7-x86_64-darwin) racc (~> 1.4) - nokogiri (1.16.6-x86_64-linux) + nokogiri (1.16.7-x86_64-linux) racc (~> 1.4) optparse (0.5.0) orm_adapter (0.5.0) @@ -302,11 +302,11 @@ GEM psych (5.1.2) stringio public_suffix (5.0.5) - puma (6.4.2) + puma (6.4.3) nio4r (~> 2.0) raabro (1.4.0) - racc (1.8.0) - rack (3.1.7) + racc (1.8.1) + rack (3.1.8) rack-mini-profiler (3.3.1) rack (>= 1.2.0) rack-proxy (0.7.7) @@ -318,20 +318,20 @@ GEM rackup (2.1.0) rack (>= 3) webrick (~> 1.8) - rails (7.1.3.4) - actioncable (= 7.1.3.4) - actionmailbox (= 7.1.3.4) - actionmailer (= 7.1.3.4) - actionpack (= 7.1.3.4) - actiontext (= 7.1.3.4) - actionview (= 7.1.3.4) - activejob (= 7.1.3.4) - activemodel (= 7.1.3.4) - activerecord (= 7.1.3.4) - activestorage (= 7.1.3.4) - activesupport (= 7.1.3.4) + rails (7.1.4.1) + actioncable (= 7.1.4.1) + actionmailbox (= 7.1.4.1) + actionmailer (= 7.1.4.1) + actionpack (= 7.1.4.1) + actiontext (= 7.1.4.1) + actionview (= 7.1.4.1) + activejob (= 7.1.4.1) + activemodel (= 7.1.4.1) + activerecord (= 7.1.4.1) + activestorage (= 7.1.4.1) + activesupport (= 7.1.4.1) bundler (>= 1.15.0) - railties (= 7.1.3.4) + railties (= 7.1.4.1) rails-controller-testing (1.0.5) actionpack (>= 5.0.1.rc1) actionview (>= 5.0.1.rc1) @@ -343,9 +343,9 @@ GEM rails-html-sanitizer (1.6.0) loofah (~> 2.21) nokogiri (~> 1.14) - railties (7.1.3.4) - actionpack (= 7.1.3.4) - activesupport (= 7.1.3.4) + railties (7.1.4.1) + actionpack (= 7.1.4.1) + activesupport (= 7.1.4.1) irb rackup (>= 1.0.0) rake (>= 12.2) @@ -358,7 +358,7 @@ GEM redis-client (0.22.1) connection_pool regexp_parser (2.9.1) - reline (0.5.9) + reline (0.5.10) io-console (~> 0.5) responders (3.1.1) actionpack (>= 5.2) @@ -461,7 +461,7 @@ GEM sprockets (>= 3.0.0) stringio (3.1.1) strscan (3.1.0) - thor (1.3.1) + thor (1.3.2) timeout (0.4.1) tzinfo (2.0.6) concurrent-ruby (~> 1.0) @@ -480,7 +480,7 @@ GEM addressable (>= 2.8.0) crack (>= 0.3.2) hashdiff (>= 0.4.0, < 2.0.0) - webrick (1.8.1) + webrick (1.8.2) websocket (1.2.11) websocket-driver (0.7.6) websocket-extensions (>= 0.1.0) @@ -489,7 +489,7 @@ GEM nokogiri (~> 1.8) yard (0.9.36) yomu (0.1.5) - zeitwerk (2.6.13) + zeitwerk (2.7.1) zlib (3.1.1) PLATFORMS @@ -527,7 +527,7 @@ DEPENDENCIES pry-byebug puma (~> 6.0) rack-mini-profiler - rails (~> 7.1.3.3) + rails (~> 7.1.4.1) rails-controller-testing retriable rqrcode diff --git a/app/supplejack/extraction/enrichment_extraction.rb b/app/supplejack/extraction/enrichment_extraction.rb index 572d074f..22b5e747 100644 --- a/app/supplejack/extraction/enrichment_extraction.rb +++ b/app/supplejack/extraction/enrichment_extraction.rb @@ -11,6 +11,18 @@ def initialize(request, record, page = 1, extraction_folder = nil) @extraction_folder = extraction_folder end + def extract + ::Retriable.retriable do + @document = if @extraction_definition.evaluate_javascript? + Extraction::JavascriptRequest.new(url:, params:).get + else + Extraction::Request.new(url:, params:, headers:, method: http_method).send(http_method) + end + end + rescue StandardError => e + ::Sidekiq.logger.info "Extraction error: #{e}" if defined?(Sidekiq) + end + def valid? url.exclude?('evaluation-error') end diff --git a/spec/supplejack/extraction/enrichment_extraction_spec.rb b/spec/supplejack/extraction/enrichment_extraction_spec.rb index c350ebae..27225dd2 100644 --- a/spec/supplejack/extraction/enrichment_extraction_spec.rb +++ b/spec/supplejack/extraction/enrichment_extraction_spec.rb @@ -35,6 +35,25 @@ end end + context 'when the extraction requires JavaScript' do + let(:ed) { create(:extraction_definition, :enrichment, destination:, extraction_jobs: [extraction_job], base_url: "file://#{Rails.root.join('spec/stub_responses')}", evaluate_javascript: true) } + let!(:parameter) { create(:parameter, content: "javascript_example.html", kind: 'slug', request: request_two, content_type: 'static') } + + context 'when the extraction is successful' do + it 'evaluates the JavaScript and saves the HTML as a document' do + document = subject.extract + + document_html = Nokogiri::HTML(document.body).xpath('//body').to_html + expect(document_html).to include('This heading is rendered with JavaScript') + end + + it 'returns a successful status code' do + document = subject.extract + expect(document.status).to eq 200 + end + end + end + context 'when record extraction fails' do before do subject