Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: make find faster #5

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 101 additions & 8 deletions src/xpath.jl
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,7 @@ end
Find the first node matching `xpath` XPath query from `doc`.
"""
function Base.findfirst(doc::Document, xpath::AbstractString)
# string("(", xpath, ")[position()=1]") may be faster
return first(find(doc, xpath))
return findfirst(doc.node, xpath)
end

"""
Expand All @@ -59,8 +58,7 @@ end
Find the last node matching `xpath` XPath query from `doc`.
"""
function Base.findlast(doc::Document, xpath::AbstractString)
# string("(", xpath, ")[position()=last()]") may be faster
return last(find(doc, xpath))
return findlast(doc.node, xpath)
end

"""
Expand All @@ -71,6 +69,19 @@ Find nodes matching `xpath` XPath query starting from `node`.
The `ns` argument is an iterator of namespace prefix and URI pairs.
"""
function Base.find(node::Node, xpath::AbstractString, ns=namespaces(node))::Vector{Node}
if is_asterisk(xpath)
return elements(node)
elseif is_ncname(xpath)
node_ptr = first_element_ptr(node.ptr)
nodes = Node[]
while node_ptr != C_NULL
if is_named_as(node_ptr, xpath)
push!(nodes, Node(node_ptr))
end
node_ptr = next_element_ptr(node_ptr)
end
return nodes
end
context_ptr = new_xpath_context(document(node))
if context_ptr == C_NULL
throw_xml_error()
Expand Down Expand Up @@ -104,8 +115,30 @@ end
Find the first node matching `xpath` XPath query starting from `node`.
"""
function Base.findfirst(node::Node, xpath::AbstractString, ns=namespaces(node))
# string("(", xpath, ")[position()=1]") may be faster
return first(find(node, xpath, ns))
if is_asterisk(xpath) # any element
node_ptr = first_element_ptr(node.ptr)
if node_ptr == C_NULL
throw_no_matching_error()
end
return Node(node_ptr)
elseif is_ncname(xpath) # without namespace prefix
node_ptr = first_element_ptr(node.ptr)
while node_ptr != C_NULL
if is_named_as(node_ptr, xpath)
return Node(node_ptr)
end
node_ptr = next_element_ptr(node_ptr)
end
throw_no_matching_error()
#elseif is_qname(xpath) # with namespace prefix
# TODO
else
ret = find(node, xpath, ns)
if isempty(ret)
throw_no_matching_error()
end
return first(ret)
end
end

"""
Expand All @@ -114,8 +147,34 @@ end
Find the last node matching `xpath` XPath query starting from `node`.
"""
function Base.findlast(node::Node, xpath::AbstractString, ns=namespaces(node))
# string("(", xpath, ")[position()=last()]") may be faster
return last(find(node, xpath, ns))
if is_asterisk(xpath) # any element
node_ptr = last_element_ptr(node.ptr)
if node_ptr == C_NULL
throw_no_matching_error()
end
return Node(node_ptr)
elseif is_ncname(xpath) # without namespace prefix
node_ptr = last_element_ptr(node.ptr)
while node_ptr != C_NULL
if is_named_as(node_ptr, xpath)
return Node(node_ptr)
end
node_ptr = prev_element_ptr(node_ptr)
end
throw_no_matching_error()
# elseif is_qname(xpath) # with namespace prefix
# TODO
else
ret = find(node, xpath, ns)
if isempty(ret)
throw_no_matching_error()
end
return last(ret)
end
end

function throw_no_matching_error()
throw(ArgumentError("no matching nodes"))
end

function new_xpath_context(doc)
Expand Down Expand Up @@ -162,3 +221,37 @@ function free(ptr::Ptr{_XPathObject})
(Ptr{Void},),
ptr)
end

# Check if `node` is named as `name` (same as `name(node) == name` but faster).
function is_named_as(node_ptr, name)
node_str = unsafe_load(node_ptr)
if node_str.name == C_NULL
return false
end
ret = ccall(
(:xmlStrEqual, libxml2),
Cint,
(Cstring, Cstring),
node_str.name, name)
return ret == 1
end

function is_asterisk(name)
return name == "*"
end

function is_ncname(name)
return ccall(
(:xmlValidateNCName, libxml2),
Cint,
(Cstring, Cint),
name, 0) == 0
end

function is_qname(name)
return ccall(
(:xmlValidateQName, libxml2),
Cint,
(Cstring, Cint),
name, 0) == 0
end