Skip to content

Commit

Permalink
Merge pull request #17 from DigitalNZ/oliver/xml-pagination
Browse files Browse the repository at this point in the history
ADD TOKENISED PAGINATION TO XML HARVESTING
  • Loading branch information
ostigley authored Feb 14, 2018
2 parents a561b87 + de3c28a commit c859abf
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 23 deletions.
29 changes: 20 additions & 9 deletions lib/supplejack_common/paginated_collection.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ def initialize(klass, pagination_options={}, options={})
@per_page = pagination_options[:per_page]
@page = pagination_options[:page]
@type = pagination_options[:type]
@tokenised = pagination_options[:tokenised] || false
@next_page_token_location = pagination_options[:next_page_token_location]
@total_selector = pagination_options[:total_selector]
@initial_param = pagination_options[:initial_param]

@options = options
@counter = 0
Expand Down Expand Up @@ -56,24 +56,35 @@ def each(&block)

private

def initial_url(url, joiner)
url = "#{url}#{joiner}#{@initial_param}"
@initial_param = nil
url
end

def next_url(url)
if paginated?
joiner = url.match(/\?/) ? "&" : "?"
if @tokenised
if tokenised?
@page = self.klass._document.present? ? self.klass.next_page_token(@next_page_token_location) : nil
url = "#{url}#{joiner}#{url_options.to_query}"
result = "#{url}#{joiner}#{url_options.to_query}"
result = initial_url(url, joiner) if @initial_param.present?
result
else
url = "#{url}#{joiner}#{url_options.to_query}"
result = "#{url}#{joiner}#{url_options.to_query}"
increment_page_counter!
url
result
end
else
url
end
end

def url_options
{page_parameter => page, per_page_parameter => per_page}
options = {}
options[page_parameter] = page if page_parameter.present?
options[per_page_parameter] = per_page if per_page_parameter.present?
options
end

def page_pagination?
Expand Down Expand Up @@ -101,18 +112,18 @@ def increment_page_counter!
end

def more_results?
if @tokenised
if tokenised?
return self.klass.next_page_token(@next_page_token_location).present?
end
current_page <= total_pages
end

def paginated?
(page && per_page) || @tokenised
(page && per_page) || tokenised?
end

def tokenised?
@tokenised
@type == 'token'
end

def yield_from_records(&block)
Expand Down
11 changes: 6 additions & 5 deletions lib/supplejack_common/sitemap/base.rb
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# The Supplejack Common code is Crown copyright (C) 2014, New Zealand Government,
# and is licensed under the GNU General Public License, version 3.
# See https://github.com/DigitalNZ/supplejack for details.
#
# Supplejack was created by DigitalNZ at the National Library of NZ and the Department of Internal Affairs.
# http://digitalnz.org/supplejack
# and is licensed under the GNU General Public License, version 3.
# See https://github.com/DigitalNZ/supplejack for details.
#
# Supplejack was created by DigitalNZ at the National Library of NZ and the Department of Internal Affairs.
# http://digitalnz.org/supplejack

module SupplejackCommon
module Sitemap
Expand All @@ -14,6 +14,7 @@ class Base < SupplejackCommon::Base

class_attribute :_record_selector
class_attribute :_namespaces
class_attribute :_document

class << self
def fetch_entries(url=nil)
Expand Down
13 changes: 10 additions & 3 deletions lib/supplejack_common/xml/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,17 @@ class Base < SupplejackCommon::Base
class_attribute :_record_selector
class_attribute :_record_format
class_attribute :_total_results
class_attribute :_document

class << self
def record_selector(xpath)
self._record_selector = xpath
end

def next_page_token(next_page_token_location)
self._document.xpath(next_page_token_location, self._namespaces).first.text
end

def records(options={})
options.reverse_merge!(limit: nil)
klass = !!self._sitemap_entry_selector ? SupplejackCommon::Sitemap::PaginatedCollection : SupplejackCommon::PaginatedCollection
Expand All @@ -34,15 +43,13 @@ def record_format(format)
self._record_format = format.to_sym
end

def record_selector(xpath)
self._record_selector = xpath
end

def clear_definitions
super
self._record_selector = nil
self._total_results = nil
self._record_format = nil
self._document = nil
end

def total_results(_total_selector)
Expand Down
3 changes: 2 additions & 1 deletion lib/supplejack_common/xml_helpers/xml_document_methods.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ def xml_records(url)
xml_nodes = []
with_each_file(url) do |file|
document = parse_document(file)
self._document = document
xml_nodes += document.xpath(self._record_selector, self._namespaces).map {|node| new(node, url) }
if pagination_options
if pagination_options&.include?(:total_selector)
if self.pagination_options[:total_selector].start_with?("/")
self._total_results ||= document.xpath(self.pagination_options[:total_selector]).text.to_i
else
Expand Down
37 changes: 32 additions & 5 deletions spec/supplejack_common/paginated_collection_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,8 @@

context "tokenised pagination" do
let(:params) { {
page_parameter: "page",
type: "item",
tokenised: true,
page_parameter: "page-parameter",
type: "token",
per_page_parameter: "per_page",
per_page: 5,
next_page_token_location: "next_page_token",
Expand All @@ -115,16 +114,44 @@
SupplejackCommon::Base.stub(:_document) {true}
end

it "does some things" do
expect(collection.send(:next_url, "http://go.gle/?sort=asc")).to eq "http://go.gle/?sort=asc&page=abc_1234&per_page=5"
it "generates the next url" do
expect(collection.send(:next_url, "http://go.gle/?sort=asc")).to eq "http://go.gle/?sort=asc&page-parameter=abc_1234&per_page=5"
end
end

context "with initial parameter" do
let(:params) { {
page_parameter: "page-parameter",
type: "token",
initial_param: 'initial-paramater=true'
}}
let(:collection) { klass.new(SupplejackCommon::Base, params, {limit: 1}) }

before do
SupplejackCommon::Base.stub(:next_page_token) {'abc_1234'}
SupplejackCommon::Base.stub(:_document) {true}
end

it "generates a url with an initial parameter" do
expect(collection.send(:next_url, "http://go.gle/?sort=asc")).to eq "http://go.gle/?sort=asc&initial-paramater=true"
end

it 'generates next url without initial parameter after the first call' do
expect(collection.send(:next_url, "http://go.gle/?sort=asc")).to eq "http://go.gle/?sort=asc&initial-paramater=true"
expect(collection.send(:next_url, "http://go.gle/?sort=asc")).to eq "http://go.gle/?sort=asc&page-parameter=abc_1234"
end
end
end

describe "#url_options" do
it "returns a hash with the url options" do
collection.send(:url_options).should eq({"page" => 1, "per_page" => 5})
end

it "removes nil keys from the hash of url options" do
collection = klass.new(SupplejackCommon::Base, {page_parameter: "page", page: 1, type: "item", per_page_parameter: nil})
collection.send(:url_options).should eq({"page" => 1})
end
end

describe "#current_page" do
Expand Down
14 changes: 14 additions & 0 deletions spec/supplejack_common/xml/base_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,20 @@
klass.clear_definitions
end

describe ".record_selector" do
it 'assignes the record selector xpath class attributed' do
klass.record_selector("//o:ListRecords/o:record")
expect(klass._record_selector).to eq "//o:ListRecords/o:record"
end
end

describe ".next_page_token" do
it 'returns the next page token from the document of xml' do
klass._document = Nokogiri::XML.parse "<NextPageToken>token</NextPageToken>"
expect(klass.next_page_token('//NextPageToken')).to eq 'token'
end
end

describe ".records" do
it "returns an object of type SupplejackCommon::Sitemap::PaginatedCollection when sitemap_entry_selector is set" do
klass.should_receive(:_sitemap_entry_selector).twice.and_return("//loc")
Expand Down

0 comments on commit c859abf

Please sign in to comment.