-
- {preview.method == "GET" && (
-
- {preview.url}
-
- )}
- {preview.method == "POST" && preview.url}
- {preview.method == "POST" && formattedPayload()}
-
+ const accordionView = () => {
+ return (
+ <>
+
+
+
+
+ Request ({preview.method})
+
+
+
+
+ {preview.method == "GET" && (
+
+ {preview.url}
+
+ )}
+ {preview.method == "POST" && preview.url}
+ {preview.method == "POST" && formattedPayload()}
+
-
-
-
- Request Headers
-
-
-
<% end %>
-<%= render layout: 'shared/create_modal',
- locals: { modal_heading: 'Add enrichment', id: 'add-enrichment',
- modal_subheading: 'Add a source_id to identify records from this enrichment' } do %>
-
- <%= vertical_form_with model: [@pipeline, @enrichment_definition] do |form| %>
-
- <%= form.hidden_field :pipeline_id, value: @pipeline.id %>
- <%= form.hidden_field :kind, value: 'enrichment' %>
- <%= form.hidden_field :priority, value: "-#{@pipeline.enrichments.count + 1}" %>
-
-
- <%= form.label :source_id, class: 'form-label' do %>
- Source ID
-
-
- <% end %>
-
-
- <%= form.text_field :source_id,
- class: {
- 'form-control': true,
- 'is-invalid': @enrichment_definition.errors[:source_id].any?
- } %>
+<% if @pipeline.harvest.present? %>
+ <%= render layout: 'shared/create_modal',
+ locals: { modal_heading: 'Add enrichment', id: 'add-enrichment',
+ modal_subheading: 'Add a source_id to identify records from this enrichment' } do %>
+
+ <%= vertical_form_with model: [@pipeline, @enrichment_definition] do |form| %>
+
+ <%= form.hidden_field :pipeline_id, value: @pipeline.id %>
+ <%= form.hidden_field :kind, value: 'enrichment' %>
+ <%= form.hidden_field :priority, value: @pipeline.harvest_definitions.last.priority - 1 %>
+
+
+ <%= form.label :source_id, class: 'form-label' do %>
+ Source ID
+
+
+ <% end %>
+
+
+ <%= form.text_field :source_id,
+ class: {
+ 'form-control': true,
+ 'is-invalid': @enrichment_definition.errors[:source_id].any?
+ } %>
+
-
-
- Add to pipeline
-
- <% end %>
-
+
+ Add to pipeline
+
+ <% end %>
+
+ <% end %>
<% end %>
<% if @harvest_definition.persisted? %>
@@ -238,13 +240,6 @@
<% end %>
- <%= render 'extraction_definitions/create_edit_modal',
- {
- id: 'create-harvest-extraction-definition-modal',
- modal_heading: 'Create extraction definition',
- modal_subheading: 'Define the settings for your extraction definition'
- } %>
-
<%= render(
layout: 'shared/create_modal',
locals: {
@@ -277,13 +272,4 @@
<% end %>
-
- <%= render 'transformation_definitions/create_edit_modal',
- {
- id: 'create-harvest-transformation-definition-modal',
- modal_heading: 'Create transformation definition',
- harvest_definition: @harvest_definition,
- modal_subheading: 'Define the settings for your transformation definition',
- extraction_jobs: @extraction_jobs
- } %>
<% end %>
diff --git a/app/views/transformation_definitions/show.html.erb b/app/views/transformation_definitions/show.html.erb
index d950eef6..f72eef3c 100644
--- a/app/views/transformation_definitions/show.html.erb
+++ b/app/views/transformation_definitions/show.html.erb
@@ -37,7 +37,7 @@
<% end %>
-<%= render 'create_edit_modal',
+<%= render 'transformation_definitions/create_edit_modal',
{
id: 'update-transformation-definition-modal',
model: @transformation_definition,
diff --git a/config/routes.rb b/config/routes.rb
index f2e68156..15e82a8b 100644
--- a/config/routes.rb
+++ b/config/routes.rb
@@ -29,13 +29,7 @@
resources :schedules
resources :harvest_definitions, only: %i[create update destroy] do
- resources :extraction_definitions, only: %i[show create update destroy new edit] do
- collection do
- post :test
- post :test_record_extraction
- post :test_enrichment_extraction
- end
-
+ resources :extraction_definitions, only: %i[show create update destroy] do
member do
post :clone
end
diff --git a/db/migrate/20231016002448_remove_enrichment_url_from_extraction_definition.rb b/db/migrate/20231016002448_remove_enrichment_url_from_extraction_definition.rb
new file mode 100644
index 00000000..9a97a3a0
--- /dev/null
+++ b/db/migrate/20231016002448_remove_enrichment_url_from_extraction_definition.rb
@@ -0,0 +1,7 @@
+# frozen_string_literal: true
+
+class RemoveEnrichmentUrlFromExtractionDefinition < ActiveRecord::Migration[7.0]
+ def change
+ remove_column :extraction_definitions, :enrichment_url, :string
+ end
+end
diff --git a/db/schema.rb b/db/schema.rb
index 6d2b7da7..c4a3799d 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
-ActiveRecord::Schema[7.0].define(version: 2023_10_09_033947) do
+ActiveRecord::Schema[7.0].define(version: 2023_10_16_002448) do
create_table "destinations", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t|
t.string "name", null: false
t.string "url", null: false
@@ -30,7 +30,6 @@
t.datetime "updated_at", null: false
t.integer "kind", default: 0
t.string "source_id"
- t.string "enrichment_url"
t.bigint "destination_id"
t.integer "page", default: 1
t.string "total_selector"
diff --git a/spec/factories/extraction_definition.rb b/spec/factories/extraction_definition.rb
index f264d8a9..6dc566f5 100644
--- a/spec/factories/extraction_definition.rb
+++ b/spec/factories/extraction_definition.rb
@@ -29,10 +29,9 @@
trait :enrichment do
kind { 1 }
source_id { 'test' }
- # rubocop:disable Lint/InterpolationCheck
- enrichment_url { '"https://api.figshare.com/v1/articles/#{record["dc_identifier"].first}"' }
- # rubocop:enable Lint/InterpolationCheck
- throttle { 1000 }
+ base_url { 'https://api.figshare.com/v1/articles' }
+ total_selector { '$.meta.total_pages' }
+ per_page { 20 }
end
pipeline
diff --git a/spec/models/extraction_definition_spec.rb b/spec/models/extraction_definition_spec.rb
index ab48bb4d..bd6c24cb 100644
--- a/spec/models/extraction_definition_spec.rb
+++ b/spec/models/extraction_definition_spec.rb
@@ -22,7 +22,6 @@
it { is_expected.not_to validate_presence_of(:destination_id).with_message("can't be blank") }
it { is_expected.not_to validate_presence_of(:source_id).with_message("can't be blank") }
- it { is_expected.not_to validate_presence_of(:enrichment_url).with_message("can't be blank") }
end
context 'when the extraction definition is for an enrichment' do
@@ -33,7 +32,6 @@
it { is_expected.to validate_presence_of(:throttle).with_message('is not a number') }
it { is_expected.to validate_presence_of(:destination_id).with_message("can't be blank") }
it { is_expected.to validate_presence_of(:source_id).with_message("can't be blank") }
- it { is_expected.to validate_presence_of(:enrichment_url).with_message("can't be blank") }
it { is_expected.not_to validate_presence_of(:format).with_message("can't be blank") }
it { is_expected.not_to validate_presence_of(:base_url).with_message("can't be blank") }
diff --git a/spec/models/parameter_spec.rb b/spec/models/parameter_spec.rb
index 103fdb1b..852d6483 100644
--- a/spec/models/parameter_spec.rb
+++ b/spec/models/parameter_spec.rb
@@ -68,6 +68,10 @@
create(:parameter, kind: 'query', name: 'itemsPerPage', content: 'JSON.parse(response)["items_found"] + 10',
content_type: 1)
end
+ let(:erroring_dynamic_response) do
+ create(:parameter, kind: 'query', name: 'itemsPerPage', content: 'raise',
+ content_type: 1)
+ end
let(:extraction_definition) { create(:extraction_definition, :figshare) }
let(:request) { create(:request, :figshare_initial_request, extraction_definition:) }
let(:response) { Extraction::DocumentExtraction.new(request).extract }
@@ -91,5 +95,9 @@
it 'returns the incremented parameter if it is incremental' do
expect(incremental.evaluate(response).content).to eq '22'
end
+
+ it 'returns a helpful message if the paramater has failed to be evaluated' do
+ expect(erroring_dynamic_response.evaluate(response).content).to eq 'raise-evaluation-error'
+ end
end
end
diff --git a/spec/requests/extraction_definitions_spec.rb b/spec/requests/extraction_definitions_spec.rb
index 3a31cc0e..0827b127 100644
--- a/spec/requests/extraction_definitions_spec.rb
+++ b/spec/requests/extraction_definitions_spec.rb
@@ -12,14 +12,6 @@
sign_in user
end
- describe '#new' do
- it 'renders the new form' do
- get new_pipeline_harvest_definition_extraction_definition_path(pipeline, harvest_definition, kind: 'enrichment')
-
- expect(response).to have_http_status :ok
- end
- end
-
describe '#create' do
context 'with valid parameters' do
let(:extraction_definition2) { build(:extraction_definition, pipeline:) }
@@ -155,55 +147,6 @@
end
end
- describe '#test_record_extraction' do
- let(:destination) { create(:destination) }
- let(:extraction_definition) { create(:extraction_definition, :enrichment, destination:) }
-
- before do
- stub_figshare_enrichment_page1(destination)
- end
-
- it 'returns a document extraction of API records' do
- post test_record_extraction_pipeline_harvest_definition_extraction_definitions_path(pipeline, harvest_definition), params: {
- extraction_definition: extraction_definition.attributes
- }
-
- expect(response).to have_http_status :ok
-
- json_response = response.parsed_body['body']
- records = JSON.parse(json_response)['records']
-
- records.each do |record|
- expect(record).to have_key('dc_identifier')
- expect(record).to have_key('internal_identifier')
- end
- end
- end
-
- describe '#test_enrichment_extraction' do
- let(:destination) { create(:destination) }
- let(:ed) { create(:extraction_definition, :enrichment, destination:) }
-
- before do
- stub_figshare_enrichment_page1(destination)
- end
-
- it 'returns a document extraction of data for an enrichment' do
- post test_enrichment_extraction_pipeline_harvest_definition_extraction_definitions_path(pipeline, harvest_definition), params: {
- extraction_definition: ed.attributes
- }
-
- expect(response).to have_http_status :ok
-
- json_response = response.parsed_body['body']
- records = JSON.parse(json_response)['items']
-
- records.each do |record|
- expect(record).to have_key('article_id')
- end
- end
- end
-
describe '#clone' do
let!(:extraction_definition) { create(:extraction_definition, name: 'one') }
let!(:request_one) { create(:request, :figshare_initial_request, extraction_definition:) }
diff --git a/spec/requests/extraction_jobs_spec.rb b/spec/requests/extraction_jobs_spec.rb
index 719b9566..05ba2d09 100644
--- a/spec/requests/extraction_jobs_spec.rb
+++ b/spec/requests/extraction_jobs_spec.rb
@@ -91,49 +91,31 @@
end
context 'when the format is JSON' do
- context 'when the type is pipeline' do
- it 'returns information to redirect to the pipeline path' do
- post pipeline_harvest_definition_extraction_definition_extraction_jobs_path(pipeline, harvest_definition, extraction_definition, kind: 'full', type: 'pipeline', format: 'json')
+ context 'when there is allready a Transformation Definition associated with the harvest_definition' do
+ it 'updates the Transformation Definition to reference the new job id' do
+ existing_extraction_job = harvest_definition.transformation_definition.extraction_job
- body = JSON.parse(response.body)
+ post pipeline_harvest_definition_extraction_definition_extraction_jobs_path(pipeline, harvest_definition, extraction_definition, kind: 'full', type: 'transform', format: 'json')
- expect(body['location']).to eq "/pipelines/#{pipeline.id}"
- end
-
- it 'queues a job' do
- expect(ExtractionWorker).to receive(:perform_async)
-
- post pipeline_harvest_definition_extraction_definition_extraction_jobs_path(pipeline, harvest_definition, extraction_definition, kind: 'full', type: 'pipeline', format: 'json')
+ harvest_definition.reload
+
+ expect(harvest_definition.transformation_definition.extraction_job).not_to eq existing_extraction_job
end
end
- context 'when the type is transform' do
- context 'when there is allready a Transformation Definition associated with the harvest_definition' do
- it 'updates the Transformation Definition to reference the new job id' do
- existing_extraction_job = harvest_definition.transformation_definition.extraction_job
+ context 'when there is no Transformation Definition associated with the harvest definition' do
+ it 'creates a new Transformation Definition and assigns it to the Harvest Definition' do
+ harvest_definition.transformation_definition.destroy
+ harvest_definition.reload
- post pipeline_harvest_definition_extraction_definition_extraction_jobs_path(pipeline, harvest_definition, extraction_definition, kind: 'full', type: 'transform', format: 'json')
+ expect(harvest_definition.transformation_definition).to be_nil
- harvest_definition.reload
-
- expect(harvest_definition.transformation_definition.extraction_job).not_to eq existing_extraction_job
- end
- end
-
- context 'when there is no Transformation Definition associated with the harvest definition' do
- it 'creates a new Transformation Definition and assigns it to the Harvest Definition' do
- harvest_definition.transformation_definition.destroy
- harvest_definition.reload
-
- expect(harvest_definition.transformation_definition).to be_nil
-
- expect do
- post pipeline_harvest_definition_extraction_definition_extraction_jobs_path(pipeline, harvest_definition, extraction_definition, kind: 'full', type: 'transform', format: 'json')
- end.to change(TransformationDefinition, :count).by(1)
+ expect do
+ post pipeline_harvest_definition_extraction_definition_extraction_jobs_path(pipeline, harvest_definition, extraction_definition, kind: 'full', type: 'transform', format: 'json')
+ end.to change(TransformationDefinition, :count).by(1)
- harvest_definition.reload
- expect(harvest_definition.transformation_definition).not_to be_nil
- end
+ harvest_definition.reload
+ expect(harvest_definition.transformation_definition).not_to be_nil
end
end
end
diff --git a/spec/requests/requests_spec.rb b/spec/requests/requests_spec.rb
index 46bc64a3..2171957c 100644
--- a/spec/requests/requests_spec.rb
+++ b/spec/requests/requests_spec.rb
@@ -44,44 +44,103 @@
end
end
- describe 'GET /show' do
- before do
- stub_figshare_harvest_requests(request_one)
+ describe 'GET /show' do
+ context 'when the extraction definition is for a harvest' do
+ before do
+ stub_figshare_harvest_requests(request_one)
+ end
+
+ let(:request_one) { create(:request, :figshare_initial_request, extraction_definition:) }
+ let(:request_two) { create(:request, :figshare_main_request, extraction_definition:) }
+
+ it 'returns a JSON response of the completed request' do
+ get pipeline_harvest_definition_extraction_definition_request_path(pipeline, harvest_definition,
+ extraction_definition, request_one)
+
+ expect(response).to have_http_status :ok
+
+ json_data = response.parsed_body
+
+ expected_keys = %w[url format preview http_method created_at updated_at id]
+
+ expected_keys.each do |key|
+ expect(json_data).to have_key(key)
+ end
+ end
+
+ it 'returns a JSON response of the completed request referencing a response' do
+ get pipeline_harvest_definition_extraction_definition_request_path(pipeline, harvest_definition,
+ extraction_definition, request_two, previous_request_id: request_one.id)
+
+ expect(response).to have_http_status :ok
+
+ json_data = response.parsed_body
+
+ expected_keys = %w[url format preview http_method created_at updated_at id]
+
+ expected_keys.each do |key|
+ expect(json_data).to have_key(key)
+ end
+
+ expect(JSON.parse(json_data['preview']['body'])['page_nr']).to eq 2
+ end
end
- let(:request_one) { create(:request, :figshare_initial_request, extraction_definition:) }
- let(:request_two) { create(:request, :figshare_main_request, extraction_definition:) }
+ context 'when the extraction definition is for an enrichment' do
+ let(:destination) { create(:destination) }
+ let(:extraction_definition) { create(:extraction_definition, :enrichment, pipeline:, destination:) }
- it 'returns a JSON response of the completed request' do
- get pipeline_harvest_definition_extraction_definition_request_path(pipeline, harvest_definition,
- extraction_definition, request_one)
+ let!(:request_one) { create(:request, extraction_definition:) }
+ let!(:request_two) { create(:request, extraction_definition:) }
- expect(response).to have_http_status :ok
+ let!(:parameter) { create(:parameter, content: "response['dc_identifier'].first", kind: 'slug', request: request_two, content_type: 'dynamic') }
- json_data = response.parsed_body
+ before do
+ stub_figshare_enrichment_page1(destination)
+ end
- expected_keys = %w[url format preview http_method created_at updated_at id]
+ it 'returns a JSON response of data from the API' do
+ get pipeline_harvest_definition_extraction_definition_request_path(pipeline, harvest_definition,
+ extraction_definition, request_one)
- expected_keys.each do |key|
- expect(json_data).to have_key(key)
- end
- end
+ expect(response).to have_http_status :ok
+
+ json_data = response.parsed_body
- it 'returns a JSON response of the completed request referencing a response' do
- get pipeline_harvest_definition_extraction_definition_request_path(pipeline, harvest_definition,
- extraction_definition, request_two, previous_request_id: request_one.id)
+ expected_keys = %w[url format preview http_method created_at updated_at id]
- expect(response).to have_http_status :ok
+ expected_keys.each do |key|
+ expect(json_data).to have_key(key)
+ end
- json_data = response.parsed_body
+ expected_preview_keys = %w[page total_pages total_records body]
- expected_keys = %w[url format preview http_method created_at updated_at id]
+ preview_data = json_data['preview']
- expected_keys.each do |key|
- expect(json_data).to have_key(key)
+ expected_preview_keys.each do |key|
+ expect(preview_data).to have_key(key)
+ end
end
- expect(JSON.parse(json_data['preview']['body'])['page_nr']).to eq 2
+ it 'returns a JSON response of the data from the content partner based on the data from the API' do
+ get pipeline_harvest_definition_extraction_definition_request_path(pipeline, harvest_definition,
+ extraction_definition, request_two)
+
+ expect(response).to have_http_status :ok
+
+ json_data = response.parsed_body
+
+ expected_keys = %w[http_method base_url url format preview]
+
+ expected_keys.each do |key|
+ expect(json_data).to have_key(key)
+ end
+
+ content_source_response = JSON.parse(json_data['preview']['body'])
+
+ expect(content_source_response).to have_key('count')
+ expect(content_source_response).to have_key('items')
+ end
end
end
end
diff --git a/spec/sidekiq/extraction_worker_spec.rb b/spec/sidekiq/extraction_worker_spec.rb
index ea3302ef..b22a46f3 100644
--- a/spec/sidekiq/extraction_worker_spec.rb
+++ b/spec/sidekiq/extraction_worker_spec.rb
@@ -30,8 +30,7 @@
context 'when the extraction is for an enrichment' do
let(:destination) { create(:destination) }
let(:extraction_definition) do
- create(:extraction_definition, kind: 'enrichment', destination:, source_id: 'test',
- enrichment_url: 'http://www.google.co.nz')
+ create(:extraction_definition, kind: 'enrichment', destination:, source_id: 'test')
end
let(:enrichment_extraction_job) { create(:extraction_job, extraction_definition:, status: 'queued') }
diff --git a/spec/supplejack/extraction/enrichment_execution_spec.rb b/spec/supplejack/extraction/enrichment_execution_spec.rb
index 6e21f51d..b5fc5dd6 100644
--- a/spec/supplejack/extraction/enrichment_execution_spec.rb
+++ b/spec/supplejack/extraction/enrichment_execution_spec.rb
@@ -7,6 +7,11 @@
let(:extraction_definition) { create(:extraction_definition, :enrichment, destination:, throttle: 0) }
let(:sample_job) { create(:extraction_job, extraction_definition:, kind: 'sample') }
let(:full_job) { create(:extraction_job, extraction_definition:, kind: 'full') }
+
+ let!(:request_one) { create(:request, extraction_definition:) }
+ let!(:request_two) { create(:request, extraction_definition:) }
+
+ let!(:parameter) { create(:parameter, content: "response['dc_identifier'].first", kind: 'slug', request: request_two, content_type: 'dynamic') }
describe '#call' do
before do
diff --git a/spec/supplejack/extraction/enrichment_extraction_spec.rb b/spec/supplejack/extraction/enrichment_extraction_spec.rb
index ed5515dc..1e2e6bdb 100644
--- a/spec/supplejack/extraction/enrichment_extraction_spec.rb
+++ b/spec/supplejack/extraction/enrichment_extraction_spec.rb
@@ -3,16 +3,23 @@
require 'rails_helper'
RSpec.describe Extraction::EnrichmentExtraction do
- subject { described_class.new(ed, records.first, 1, extraction_job.extraction_folder) }
+ subject { described_class.new(request_two, record, 1, extraction_job.extraction_folder) }
let(:extraction_job) { create(:extraction_job) }
let(:destination) { create(:destination) }
let(:ed) { create(:extraction_definition, :enrichment, destination:, extraction_jobs: [extraction_job]) }
- let(:re) { Extraction::RecordExtraction.new(ed, 1).extract }
+ let(:re) { Extraction::RecordExtraction.new(request_one, 1).extract }
let(:records) { JSON.parse(re.body)['records'] }
+ let(:record) { Extraction::ApiRecord.new(records.first) }
+
+ let!(:request_one) { create(:request, extraction_definition: ed) }
+ let!(:request_two) { create(:request, extraction_definition: ed) }
+
+ let!(:parameter) { create(:parameter, content: "response['dc_identifier'].first", kind: 'slug', request: request_two, content_type: 'dynamic') }
before do
stub_figshare_enrichment_page1(destination)
+ stub_figshare_enrichment_page2(destination)
end
describe '#extract' do
@@ -33,7 +40,7 @@
context 'when there is no extraction_folder' do
it 'returns an extracted document from a content source' do
- doc = described_class.new(ed, records.first, 1)
+ doc = described_class.new(request_two, record, 1)
expect { doc.save }.to raise_error(ArgumentError, 'extraction_folder was not provided in #new')
end
end
@@ -60,13 +67,9 @@
end
it 'returns false if the provided enrichment url returns nothing from the record' do
- ed = create(
- :extraction_definition, :enrichment,
- destination:,
- extraction_jobs: [extraction_job],
- enrichment_url: '"http://www.google.co.nz/#{record["bla"]}"'
- )
- expect(described_class.new(ed, records.first, 1, extraction_job.extraction_folder).valid?).to be false
+ record = Extraction::ApiRecord.new({ 'hello' => 'goodbye'} )
+
+ expect(described_class.new(request_two, record, 1, extraction_job.extraction_folder).valid?).to be false
end
end
end
diff --git a/spec/supplejack/extraction/record_extraction_spec.rb b/spec/supplejack/extraction/record_extraction_spec.rb
index 668e221b..a037bb77 100644
--- a/spec/supplejack/extraction/record_extraction_spec.rb
+++ b/spec/supplejack/extraction/record_extraction_spec.rb
@@ -6,6 +6,9 @@
let(:destination) { create(:destination) }
let(:extraction_definition) { create(:extraction_definition, :enrichment, destination:) }
+ let!(:request) { create(:request, extraction_definition:) }
+
+
describe '#extract' do
context 'when the enrichment is not scheduled after a harvest' do
before do
@@ -25,7 +28,7 @@
).to_return(fake_response('test_api_records_1'))
end
- let(:subject) { described_class.new(extraction_definition, 1) }
+ let(:subject) { described_class.new(request, 1) }
it 'returns an extracted document from a Supplejack API' do
expect(subject.extract).to be_a(Extraction::Document)
@@ -39,7 +42,7 @@
let(:harvest_job) do
create(:harvest_job, :completed, harvest_definition:, pipeline_job:, target_job_id: 'harvest-job-1')
end
- let(:subject) { described_class.new(extraction_definition, 1, harvest_job) }
+ let(:subject) { described_class.new(request, 1, harvest_job) }
before do
stub_request(:get, "#{destination.url}/harvester/records")