#132 allow users to optionally provide an output buffer when calling …

…transcode (#136) * allow users to optionally provide an output buffer when calling transcode * add tests and refactor docstring * nicer formating Co-authored-by: Mark Kittisopikul <[email protected]> * new copydata! method * reassociate docstring with correct method * Expand ByteData Co-authored-by: Mark Kittisopikul <[email protected]> * Expand ByteData Co-authored-by: Mark Kittisopikul <[email protected]> * Expand ByteData Co-authored-by: Mark Kittisopikul <[email protected]> * clarify formatting * formatting * Generic args Co-authored-by: Joao Aparicio <[email protected]> * Generic args Co-authored-by: Joao Aparicio <[email protected]> * small fixes * simplify * fix buffer * fix * formatting * fix buffer * Address PR comment #136 (comment) * Address PR comment #136 (comment) * Address PR review #136 (comment) #136 (comment) * dont use isnothing * add doc-string * replace keepbytes with marginpos * fix typeo --------- Co-authored-by: Mark Kittisopikul <[email protected]> Co-authored-by: Joao Aparicio <[email protected]>
JuliaIO · Apr 9, 2023 · 3ece750 · 3ece750
1 parent de62b63
commit 3ece750
Show file tree

Hide file tree

Showing 4 changed files with 85 additions and 19 deletions.
diff --git a/src/buffer.jl b/src/buffer.jl
@@ -26,17 +26,18 @@ mutable struct Buffer
     # the total number of transcoded bytes
     transcoded::Int64
 
-    function Buffer(size::Integer)
-        return new(Vector{UInt8}(undef, size), 0, 1, 1, 0)
+    function Buffer(data::Vector{UInt8}, marginpos::Integer=length(data)+1)
+        @assert 1 <= marginpos <= length(data)+1
+        return new(data, 0, 1, marginpos, 0)
     end
+end
 
-    function Buffer(data::Vector{UInt8})
-        return new(data, 0, 1, length(data)+1, 0)
-    end
+function Buffer(size::Integer = 0)
+    return Buffer(Vector{UInt8}(undef, size), 1)
 end
 
-function Buffer(data::Base.CodeUnits{UInt8})
-    return Buffer(Vector{UInt8}(data))
+function Buffer(data::Base.CodeUnits{UInt8}, args...)
+    return Buffer(Vector{UInt8}(data), args...)
 end
 
 function Base.length(buf::Buffer)
@@ -199,6 +200,11 @@ function copydata!(buf::Buffer, data::Ptr{UInt8}, nbytes::Integer)
     return buf
 end
 
+# Copy data from `data` to `buf`.
+function copydata!(buf::Buffer, data::Buffer, nbytes::Integer = length(data))
+    return copydata!(buf, bufferptr(data), nbytes)
+end
+
 # Copy data from `buf` to `data`.
 function copydata!(data::Ptr{UInt8}, buf::Buffer, nbytes::Integer)
     # NOTE: It's caller's responsibility to ensure that the buffer has at least

diff --git a/src/noop.jl b/src/noop.jl
@@ -120,10 +120,9 @@ function Base.transcode(::Type{Noop}, data::ByteData)
     return transcode(Noop(), data)
 end
 
-function Base.transcode(::Noop, data::ByteData)
-    # Copy data because the caller may expect the return object is not the same
-    # as from the input.
-    return Vector{UInt8}(data)
+function Base.transcode(codec::Noop, input::Buffer, output::Buffer = Buffer())
+    copydata!(output, input)
+    return output.data
 end
 
 

diff --git a/src/transcode.jl b/src/transcode.jl
@@ -2,7 +2,10 @@
 # =========
 
 """
-    transcode(::Type{C}, data::Vector{UInt8})::Vector{UInt8} where C<:Codec
+    transcode(
+        ::Type{C},
+        data::Union{Vector{UInt8},Base.CodeUnits{UInt8}},
+    )::Vector{UInt8} where {C<:Codec}
 
 Transcode `data` by applying a codec `C()`.
 
@@ -27,21 +30,34 @@ julia> String(decompressed)
 
 ```
 """
-function Base.transcode(::Type{C}, data::ByteData) where C<:Codec
+function Base.transcode(::Type{C}, args...) where {C<:Codec}
     codec = C()
     initialize(codec)
     try
-        return transcode(codec, data)
+        return transcode(codec, args...)
     finally
         finalize(codec)
     end
 end
 
+_default_output_buffer(codec, input) = Buffer(
+    initial_output_size(
+        codec,
+        buffermem(input)
+    )
+)
+
 """
-    transcode(codec::Codec, data::Vector{UInt8})::Vector{UInt8}
+    transcode(
+        codec::Codec,
+        data::Union{Vector{UInt8},Base.CodeUnits{UInt8},Buffer},
+        [output::Union{Vector{UInt8},Base.CodeUnits{UInt8},Buffer}],
+    )::Vector{UInt8}
 
 Transcode `data` by applying `codec`.
 
+If `output` is unspecified, then this method will allocate it.
+
 Note that this method does not initialize or finalize `codec`. This is
 efficient when you transcode a number of pieces of data, but you need to call
 [`TranscodingStreams.initialize`](@ref) and
@@ -59,7 +75,9 @@ julia> codec = ZlibCompressor();
 
 julia> TranscodingStreams.initialize(codec)
 
-julia> compressed = transcode(codec, data);
+julia> compressed = Vector{UInt8}()
+
+julia> transcode(codec, data, compressed);
 
 julia> TranscodingStreams.finalize(codec)
 
@@ -76,9 +94,29 @@ julia> String(decompressed)
 
 ```
 """
-function Base.transcode(codec::Codec, data::ByteData)
-    input = Buffer(data)
-    output = Buffer(initial_output_size(codec, buffermem(input)))
+function Base.transcode(
+    codec::Codec,
+    input::Buffer,
+    output::Union{Buffer,Nothing} = nothing,
+)
+    output = (output === nothing ? _default_output_buffer(codec, input) : initbuffer!(output))
+    transcode!(output, codec, input)
+end
+
+"""
+    transcode!(output::Buffer, codec::Codec, input::Buffer)
+
+Transcode `input` by applying `codec` and storing the results in `output`.
+Note that this method does not initialize or finalize `codec`. This is
+efficient when you transcode a number of pieces of data, but you need to call
+[`TranscodingStreams.initialize`](@ref) and
+[`TranscodingStreams.finalize`](@ref) explicitly.
+"""
+function transcode!(
+    output::Buffer,
+    codec::Codec,
+    input::Buffer,
+)
     error = Error()
     code = startproc(codec, :write, error)
     if code === :error
@@ -121,6 +159,12 @@ function Base.transcode(codec::Codec, data::ByteData)
     throw(error[])
 end
 
+Base.transcode(codec::Codec, data::Buffer, output::ByteData) =
+    transcode(codec, data, Buffer(output))
+
+Base.transcode(codec::Codec, data::ByteData, args...) =
+    transcode(codec, Buffer(data), args...)
+
 # Return the initial output buffer size.
 function initial_output_size(codec::Codec, input::Memory)
     return max(

diff --git a/test/codecnoop.jl b/test/codecnoop.jl
@@ -192,9 +192,26 @@
     data = b""
     @test transcode(Noop(), data)  == data
     @test transcode(Noop(), data) !== data
+    @test transcode(Noop(), data, Vector{UInt8}()) == data
+    @test transcode(Noop(), data, TranscodingStreams.Buffer(Vector{UInt8}())) == data
+    @test transcode(Noop(), data, Vector{UInt8}()) !== data
+    @test transcode(Noop(), data, TranscodingStreams.Buffer(Vector{UInt8}())) !== data
+    output = Vector{UInt8}()
+    @test transcode(Noop(), data, output) === output
+    output = TranscodingStreams.Buffer(Vector{UInt8}())
+    @test transcode(Noop(), data, output) === output.data
+
     data = b"foo"
     @test transcode(Noop(), data)  == data
     @test transcode(Noop(), data) !== data
+    @test transcode(Noop(), data, Vector{UInt8}()) == data
+    @test transcode(Noop(), data, TranscodingStreams.Buffer(Vector{UInt8}())) == data
+    @test transcode(Noop(), data, Vector{UInt8}()) !== data
+    @test transcode(Noop(), data, TranscodingStreams.Buffer(Vector{UInt8}())) !== data
+    output = Vector{UInt8}()
+    @test transcode(Noop(), data, output) === output
+    output = TranscodingStreams.Buffer(Vector{UInt8}())
+    @test transcode(Noop(), data, output) === output.data
 
     TranscodingStreams.test_roundtrip_transcode(Noop, Noop)
     TranscodingStreams.test_roundtrip_read(NoopStream, NoopStream)