From 51935df4dc5aeccefde475fba85fd742bd850b82 Mon Sep 17 00:00:00 2001 From: Gustavo Motizuki Date: Fri, 26 Apr 2019 08:57:20 +1200 Subject: [PATCH 1/4] Add new pre_process_block DSL --- Gemfile.lock | 5 ----- lib/supplejack_common/dsl.rb | 6 ++++++ spec/spec_helper.rb | 1 - spec/supplejack_common/dsl_spec.rb | 10 ++++++++++ supplejack_common.gemspec | 1 - 5 files changed, 16 insertions(+), 7 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index e99def5..2d87e9a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -63,7 +63,6 @@ GEM aws-sigv4 (1.0.3) bson (4.4.2) builder (3.2.3) - byebug (10.0.2) chronic (0.10.2) coderay (1.1.2) concurrent-ruby (1.1.4) @@ -124,9 +123,6 @@ GEM pry (0.11.3) coderay (~> 1.1.0) method_source (~> 0.9.0) - pry-byebug (3.6.0) - byebug (~> 10.0) - pry (~> 0.10) public_suffix (3.0.2) rack (2.0.6) rack-test (1.1.0) @@ -191,7 +187,6 @@ DEPENDENCIES mock_redis oai (~> 0.3.1) pry - pry-byebug rake (< 11.0) rspec (~> 2.11.0) rubocop diff --git a/lib/supplejack_common/dsl.rb b/lib/supplejack_common/dsl.rb index f4efabd..14d62c0 100644 --- a/lib/supplejack_common/dsl.rb +++ b/lib/supplejack_common/dsl.rb @@ -20,6 +20,7 @@ module DSL class_attribute :_match_concepts class_attribute :_http_headers class_attribute :_proxy + class_attribute :_pre_process_block self._base_urls = {} self._attribute_definitions = {} @@ -34,6 +35,7 @@ module DSL self._match_concepts = {} self._http_headers = {} self._proxy = nil + self._pre_process_block = nil end module ClassMethods @@ -116,6 +118,10 @@ def match_concepts(match_concepts) def proxy(url) self._proxy = url end + + def pre_process_block(&block) + self._pre_process_block = block + end end end end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index eccf560..b510e42 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -4,7 +4,6 @@ require 'webmock/rspec' require 'simplecov' require 'loofah' -require 'pry-byebug' SimpleCov.start diff --git a/spec/supplejack_common/dsl_spec.rb b/spec/supplejack_common/dsl_spec.rb index 94742ea..23c467a 100644 --- a/spec/supplejack_common/dsl_spec.rb +++ b/spec/supplejack_common/dsl_spec.rb @@ -163,4 +163,14 @@ class WithOptionsTest < SupplejackCommon::Base klass._match_concepts[klass.identifier].should eq :create_or_match end end + + describe '.pre_process_block' do + it 'store given block to _pre_process_block class variable so it can be used to process raw data from source' do + klass.pre_process_block do |data| + data + end + + klass._pre_process_block.call(123).should eq 123 + end + end end diff --git a/supplejack_common.gemspec b/supplejack_common.gemspec index 0b14b46..780bd91 100644 --- a/supplejack_common.gemspec +++ b/supplejack_common.gemspec @@ -28,7 +28,6 @@ Gem::Specification.new do |gem| gem.add_runtime_dependency 'mimemagic' gem.add_runtime_dependency 'mongoid' gem.add_runtime_dependency 'nokogiri' - gem.add_development_dependency 'pry-byebug' gem.add_runtime_dependency 'redis' gem.add_runtime_dependency 'rest-client' gem.add_runtime_dependency 'retriable' From edf106075fb3be975525660b8eb096e06d8981b7 Mon Sep 17 00:00:00 2001 From: Gustavo Motizuki Date: Fri, 26 Apr 2019 09:08:27 +1200 Subject: [PATCH 2/4] Implement pre_process_block DSL to JSON --- lib/supplejack_common/json/base.rb | 11 ++++++----- spec/supplejack_common/json/base_spec.rb | 23 +++++++++++++---------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/lib/supplejack_common/json/base.rb b/lib/supplejack_common/json/base.rb index 46cd8c1..1f267ea 100644 --- a/lib/supplejack_common/json/base.rb +++ b/lib/supplejack_common/json/base.rb @@ -17,15 +17,15 @@ def record_selector(path) end def document(url) - if url.include?('scroll') - self._document = SupplejackCommon::Request.scroll(url, _request_timeout, _throttle, _http_headers) - _document + self._document = if url.include?('scroll') + SupplejackCommon::Request.scroll(url, _request_timeout, _throttle, _http_headers) elsif url =~ /^https?/ - self._document = SupplejackCommon::Request.get(url, _request_timeout, _throttle, _http_headers, _proxy) - _document + SupplejackCommon::Request.get(url, _request_timeout, _throttle, _http_headers, _proxy) elsif url =~ /^file/ File.read(url.gsub(/file:\/\//, '')) end + self._document = _pre_process_block.call(_document) if _pre_process_block + _document end def next_page_token(next_page_token_location) @@ -57,6 +57,7 @@ def clear_definitions super self._record_selector = nil self._document = nil + self._pre_process_block = nil end end diff --git a/spec/supplejack_common/json/base_spec.rb b/spec/supplejack_common/json/base_spec.rb index 3d82d60..a3c812b 100644 --- a/spec/supplejack_common/json/base_spec.rb +++ b/spec/supplejack_common/json/base_spec.rb @@ -8,11 +8,7 @@ let(:record) { double(:record).as_null_object } after do - klass._base_urls[klass.identifier] = [] - klass._attribute_definitions[klass.identifier] = {} - klass._rejection_rules[klass.identifier] = nil - klass._throttle = {} - klass._request_timeout = 60_000 + klass.clear_definitions end describe '.record_selector' do @@ -50,22 +46,29 @@ let(:json) { '"description": "Some json!"' } context 'json web document' do - it 'stores the raw json' do + before do klass._throttle = {} klass.http_headers('Authorization': 'Token token="token"', 'x-api-key': 'gus') klass._request_timeout = 60_000 SupplejackCommon::Request.should_receive(:get).with('http://google.com', 60_000, {}, { 'Authorization': 'Token token="token"', 'x-api-key': 'gus' }, nil) { json } + end + + it 'stores the raw json' do klass.document('http://google.com').should eq json end it 'stores json document at _document class attribute' do - klass._throttle = {} - klass.http_headers('Authorization': 'Token token="token"', 'x-api-key': 'gus') - klass._request_timeout = 60_000 - SupplejackCommon::Request.should_receive(:get).with('http://google.com', 60_000, {}, { 'Authorization': 'Token token="token"', 'x-api-key': 'gus' }, nil) { json } klass.document('http://google.com') expect(klass._document).to equal json end + + it 'pre process json data if pre_process_block DSL is defined' do + new_json = { a_new_json: 'Some value' } + klass.pre_process_block { new_json } + + klass.document('http://google.com') + expect(klass._document).to equal new_json + end end context 'json files' do From ac7d02d6bc78ea6c69b3acb9ca8d2f74a58d331f Mon Sep 17 00:00:00 2001 From: Gustavo Motizuki Date: Fri, 26 Apr 2019 09:13:24 +1200 Subject: [PATCH 3/4] Implement pre_process_block DSL for XMLs --- lib/supplejack_common/xml/base.rb | 1 + .../xml_helpers/xml_document_methods.rb | 1 + .../xml_helpers/xml_document_methods_spec.rb | 10 +++++++++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/supplejack_common/xml/base.rb b/lib/supplejack_common/xml/base.rb index b18e5e6..1cc3a83 100644 --- a/lib/supplejack_common/xml/base.rb +++ b/lib/supplejack_common/xml/base.rb @@ -44,6 +44,7 @@ def clear_definitions self._total_results = nil self._record_format = nil self._document = nil + self._pre_process_block = nil end def total_results(_total_selector) diff --git a/lib/supplejack_common/xml_helpers/xml_document_methods.rb b/lib/supplejack_common/xml_helpers/xml_document_methods.rb index 2def58a..0d68617 100644 --- a/lib/supplejack_common/xml_helpers/xml_document_methods.rb +++ b/lib/supplejack_common/xml_helpers/xml_document_methods.rb @@ -14,6 +14,7 @@ module ClassMethods def xml_records(url) xml_nodes = [] with_each_file(url) do |file| + file = _pre_process_block.call(file) if _pre_process_block document = parse_document(file) self._document = document xml_nodes += document.xpath(_record_selector, _namespaces).map { |node| new(node, url) } diff --git a/spec/supplejack_common/xml_helpers/xml_document_methods_spec.rb b/spec/supplejack_common/xml_helpers/xml_document_methods_spec.rb index 409079c..9f64830 100644 --- a/spec/supplejack_common/xml_helpers/xml_document_methods_spec.rb +++ b/spec/supplejack_common/xml_helpers/xml_document_methods_spec.rb @@ -17,12 +17,20 @@ before do klass.record_selector '/g:items/g:item' klass.stub(:with_each_file).and_yield(xml) - klass.stub(:parse_document) { doc } klass.namespaces g: 'http://digitalnz.org/schemas/test' klass._request_timeout = 60_000 end + it 'pre process xml data if pre_process_block DSL is defined' do + new_xml = 'Some value' + klass.pre_process_block { new_xml } + + klass.xml_records('url') + expect(klass._document.to_s).to eq Nokogiri::XML.parse(new_xml).to_s + end + it 'initializes a record with every section of the XML' do + klass.stub(:parse_document) { doc } klass.should_receive(:new).once.with(xml_snippets.first, anything) klass.xml_records('url') end From 3dfbb448af852ae45b6415ee9a9d17cb86b16e86 Mon Sep 17 00:00:00 2001 From: Gustavo Motizuki Date: Fri, 26 Apr 2019 09:15:07 +1200 Subject: [PATCH 4/4] Fix rubocop offences --- lib/supplejack_common/json/base.rb | 12 ++++++------ spec/supplejack_common/dsl_spec.rb | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/supplejack_common/json/base.rb b/lib/supplejack_common/json/base.rb index 1f267ea..9947674 100644 --- a/lib/supplejack_common/json/base.rb +++ b/lib/supplejack_common/json/base.rb @@ -18,12 +18,12 @@ def record_selector(path) def document(url) self._document = if url.include?('scroll') - SupplejackCommon::Request.scroll(url, _request_timeout, _throttle, _http_headers) - elsif url =~ /^https?/ - SupplejackCommon::Request.get(url, _request_timeout, _throttle, _http_headers, _proxy) - elsif url =~ /^file/ - File.read(url.gsub(/file:\/\//, '')) - end + SupplejackCommon::Request.scroll(url, _request_timeout, _throttle, _http_headers) + elsif url =~ /^https?/ + SupplejackCommon::Request.get(url, _request_timeout, _throttle, _http_headers, _proxy) + elsif url =~ /^file/ + File.read(url.gsub(/file:\/\//, '')) + end self._document = _pre_process_block.call(_document) if _pre_process_block _document end diff --git a/spec/supplejack_common/dsl_spec.rb b/spec/supplejack_common/dsl_spec.rb index 23c467a..e1a2393 100644 --- a/spec/supplejack_common/dsl_spec.rb +++ b/spec/supplejack_common/dsl_spec.rb @@ -170,7 +170,7 @@ class WithOptionsTest < SupplejackCommon::Base data end - klass._pre_process_block.call(123).should eq 123 + klass._pre_process_block.call(123).should eq 123 end end end