From 741cbd5bbcc52d2310abb7ec0202f0993e457d18 Mon Sep 17 00:00:00 2001 From: Kristoffer Carlsson Date: Sat, 10 Nov 2018 11:27:35 -0500 Subject: [PATCH] Improve number parsing in streams: use a PushVector to work around slow push! in Base (#264) * restore special case parsing of numbers for in memory JSON * use a PushVector to work around slow push! in Base --- src/Parser.jl | 6 ++++-- src/pushvector.jl | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 src/pushvector.jl diff --git a/src/Parser.jl b/src/Parser.jl index d066672..b7556bb 100644 --- a/src/Parser.jl +++ b/src/Parser.jl @@ -3,6 +3,8 @@ module Parser # JSON using Mmap using ..Common +include("pushvector.jl") + """ Like `isspace`, but work on bytes and includes only the four whitespace characters defined by the JSON standard: space, tab, line feed, and carriage @@ -31,9 +33,9 @@ mutable struct StreamingParserState{T <: IO} <: ParserState io::T cur::UInt8 used::Bool - utf8array::Vector{UInt8} + utf8array::PushVector{UInt8, Vector{UInt8}} end -StreamingParserState(io::IO) = StreamingParserState(io, 0x00, true, UInt8[]) +StreamingParserState(io::IO) = StreamingParserState(io, 0x00, true, PushVector{UInt8}()) struct ParserContext{DictType, IntType} end diff --git a/src/pushvector.jl b/src/pushvector.jl new file mode 100644 index 0000000..01399f1 --- /dev/null +++ b/src/pushvector.jl @@ -0,0 +1,33 @@ +# This is a vector wrapper that we use as a workaround for `push!` +# being slow (it always calls into the runtime even if the underlying buffer, +# has enough space). Here we keep track of the length using an extra field +mutable struct PushVector{T, A<:AbstractVector{T}} <: AbstractVector{T} + v::A + l::Int +end + +# Default length of 20 should be enough to never need to grow in most cases +PushVector{T}() where {T} = PushVector(Vector{T}(undef, 20), 0) + +Base.unsafe_convert(::Type{Ptr{UInt8}}, v::PushVector) = pointer(v.v) +Base.length(v::PushVector) = v.l +Base.size(v::PushVector) = (v.l,) +@inline function Base.getindex(v::PushVector, i) + @boundscheck checkbounds(v, i) + @inbounds v.v[i] +end + +function Base.push!(v::PushVector, i) + v.l += 1 + if v.l > length(v.v) + resize!(v.v, v.l * 2) + end + v.v[v.l] = i + return v +end + +function Base.resize!(v::PushVector, l::Integer) + # Only support shrinking for now, since that is all we need + @assert l <= v.l + v.l = l +end