Skip to content

Commit

Permalink
fix(text-extraction-error): Fix for when a file is failed to be extra…
Browse files Browse the repository at this point in the history
…cted
  • Loading branch information
richardmatthewsdev committed Sep 2, 2024
1 parent 787ea80 commit 8706d1c
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 14 deletions.
1 change: 1 addition & 0 deletions app/sidekiq/file_extraction_worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def create_transformation_job(page)

def api_record_id(page)
return nil unless @extraction_job.extraction_definition.enrichment?
return nil if @extraction_job.documents[page].file_path.nil?

@extraction_job.documents[page].file_path.match(/__(?<record_id>.+)__/)[:record_id]
end
Expand Down
2 changes: 1 addition & 1 deletion app/supplejack/extraction/document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def save(file_path)
end

def size_in_bytes
return if file_path.nil?
return 0 if file_path.nil?

File.size(file_path)
end
Expand Down
4 changes: 3 additions & 1 deletion app/supplejack/extraction/documents.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ def initialize(folder)

def [](key)
@current_page = key&.to_i || 1
return nil if documents_filepath.blank?
if documents_filepath.blank?
return @documents[@current_page] = Document.new(documents_filepath, body: '{"message":"File does not exist in filesystem"}')
end

@documents[@current_page] = Document.load_from_file(documents_filepath)
end
Expand Down
20 changes: 12 additions & 8 deletions app/views/extraction_jobs/show.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,12 @@
<li class="list-group-item d-flex justify-content-between align-items-start">
<div class="ms-2 me-auto">
<div class="fw-bold">Request Headers</div>
<% @document.request_headers.each do |header, value| %>
<dt class="d-inline"><%= header %></dt>
<dd class="d-inline"><%= value %></dd>
<br>
<% if @document.request_headers.present? %>
<% @document.request_headers.each do |header, value| %>
<dt class="d-inline"><%= header %></dt>
<dd class="d-inline"><%= value %></dd>
<br>
<% end %>
<% end %>
</div>
</li>
Expand All @@ -141,10 +143,12 @@
<li class="list-group-item d-flex justify-content-between align-items-start">
<div class="ms-2 me-auto">
<div class="fw-bold">Response Headers</div>
<% @document.response_headers.each do |header, value| %>
<dt class="d-inline"><%= header %></dt>
<dd class="d-inline"><%= value %></dd>
<br>
<% if @document.response_headers.present? %>
<% @document.response_headers.each do |header, value| %>
<dt class="d-inline"><%= header %></dt>
<dd class="d-inline"><%= value %></dd>
<br>
<% end %>
<% end %>
</dl>
</div>
Expand Down
8 changes: 4 additions & 4 deletions spec/supplejack/extraction/documents_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@
expect(subject.current_page).to eq 130
end

it 'returns nil when index is out of bounds' do
expect(subject[200]).to be_nil
expect(subject[0]).to be_nil
expect(subject[-1]).to be_nil
it 'returns a blank document when index is out of bounds' do
expect(subject[200]).to be_a(Extraction::Document)
expect(subject[0]).to be_a(Extraction::Document)
expect(subject[-1]).to be_a(Extraction::Document)
end

it 'returns pages based on their page number, rather than their order in the file system' do
Expand Down

0 comments on commit 8706d1c

Please sign in to comment.