From 2e042774242cfc1d3ea5bd9bff368d6eb81a0fe8 Mon Sep 17 00:00:00 2001 From: nhz2 Date: Thu, 5 Sep 2024 22:33:26 -0400 Subject: [PATCH 1/5] Add `pledgeinsize` to transcoding protocol --- src/codec.jl | 25 ++++++++++++++++++++++++- src/transcode.jl | 6 ++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/codec.jl b/src/codec.jl index d6cff34e..cf1e37eb 100644 --- a/src/codec.jl +++ b/src/codec.jl @@ -14,6 +14,7 @@ Transcoding proceeds by calling some functions in a specific way. We call this There are six functions for a codec to implement: - `expectedsize`: return the expected size of transcoded data +- `pledgeinsize`: tell the codec the total input size - `minoutsize`: return the minimum output size of `process` - `initialize`: initialize the codec - `finalize`: finalize the codec @@ -22,7 +23,7 @@ There are six functions for a codec to implement: These are defined in the `TranscodingStreams` and a new codec type must extend these methods if necessary. Implementing a `process` method is mandatory but -others are optional. `expectedsize`, `minoutsize`, `initialize`, `finalize`, +others are optional. `expectedsize`, `minoutsize`, `pledgeinsize`, `initialize`, `finalize`, and `startproc` have a default implementation. Your codec type is denoted by `C` and its object by `codec`. @@ -39,6 +40,17 @@ used as a hint to determine the size of a data buffer when `transcode` is called. A good hint will reduce the number of buffer resizing and hence result in better performance. +### `pledgeinsize` + +The `pledgeinsize(codec::C, insize::Int64, error::Error)::Symbol` method is used +when `transcode` is called to tell the `codec` the total input size. Some +compressors can add this total input size to a header, making `expectedsize` +accurate during later decompression. By default this just returns `:ok`. +If there is an error, the return code must be `:error` and the `error` argument +must be set to an exception object. Setting an inaccurate `insize` may cause the +codec to error later on while streaming data. A negative `insize` means unknown +content size. + ### `minoutsize` The `minoutsize(codec::C, input::Memory)::Int` method takes `codec` and `input`, @@ -112,6 +124,17 @@ function expectedsize(codec::Codec, input::Memory)::Int return input.size end +""" + pledgeinsize(codec::Codec, insize::Int64, error::Error)::Symbol + +Tell the codec the total input size. + +The default method does nothing and returns `:ok`. +""" +function pledgeinsize(codec::Codec, insize::Int64, error::Error)::Symbol + return :ok +end + """ minoutsize(codec::Codec, input::Memory)::Int diff --git a/src/transcode.jl b/src/transcode.jl index fca33736..ab278032 100644 --- a/src/transcode.jl +++ b/src/transcode.jl @@ -147,6 +147,9 @@ function unsafe_transcode!( if code === :error @goto error end + if pledgeinsize(codec, Int64(buffersize(input)), error) === :error + @goto error + end n = GC.@preserve input minoutsize(codec, buffermem(input)) @label process makemargin!(output, n) @@ -168,6 +171,9 @@ function unsafe_transcode!( if startproc(codec, :write, error) === :error @goto error end + if pledgeinsize(codec, Int64(buffersize(input)), error) === :error + @goto error + end n = GC.@preserve input minoutsize(codec, buffermem(input)) @goto process end From b2b61c16f3ab3702b058190fa12dea4adbc6c879 Mon Sep 17 00:00:00 2001 From: nhz2 Date: Thu, 5 Sep 2024 22:45:37 -0400 Subject: [PATCH 2/5] add to docs --- docs/src/reference.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/src/reference.md b/docs/src/reference.md index 610e23fa..7a490afd 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -42,6 +42,7 @@ Base.position(::NoopStream) ```@docs TranscodingStreams.Codec TranscodingStreams.expectedsize +TranscodingStreams.pledgeinsize TranscodingStreams.minoutsize TranscodingStreams.initialize TranscodingStreams.finalize From 468d00624d28238d0d3a72652a86e339cbdb038b Mon Sep 17 00:00:00 2001 From: nhz2 Date: Fri, 6 Sep 2024 19:43:25 -0400 Subject: [PATCH 3/5] Add details of when `pledgeinsize` is called --- src/codec.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/codec.jl b/src/codec.jl index cf1e37eb..0c0809f4 100644 --- a/src/codec.jl +++ b/src/codec.jl @@ -43,12 +43,13 @@ in better performance. ### `pledgeinsize` The `pledgeinsize(codec::C, insize::Int64, error::Error)::Symbol` method is used -when `transcode` is called to tell the `codec` the total input size. Some +when `transcode` is called to tell the `codec` the total input size. +This is called after `startproc` and before `process`. Some compressors can add this total input size to a header, making `expectedsize` accurate during later decompression. By default this just returns `:ok`. If there is an error, the return code must be `:error` and the `error` argument must be set to an exception object. Setting an inaccurate `insize` may cause the -codec to error later on while streaming data. A negative `insize` means unknown +codec to error later on while processing data. A negative `insize` means unknown content size. ### `minoutsize` From 5581ff0d0ee7dde1019fcf34cd9a051f220e3bd5 Mon Sep 17 00:00:00 2001 From: Nathan Zimmerberg <39104088+nhz2@users.noreply.github.com> Date: Sun, 8 Sep 2024 12:06:08 -0400 Subject: [PATCH 4/5] Add more details on order. --- src/codec.jl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/codec.jl b/src/codec.jl index 0c0809f4..75d03bd7 100644 --- a/src/codec.jl +++ b/src/codec.jl @@ -84,10 +84,11 @@ the stream will become the close mode for safety. ### `startproc` The `startproc(codec::C, mode::Symbol, error::Error)::Symbol` method takes -`codec`, `mode` and `error`, and returns a status code. This is called just -before the stream starts reading or writing data. `mode` is either `:read` or -`:write` and then the stream starts reading or writing, respectively. The -return code must be `:ok` if `codec` is ready to read or write data. Otherwise, +`codec`, `mode`, and `error`, and returns a status code. This resets the state +of the codec and is called before the stream starts processing data. +After a call to `startproc`, `pledgeinsize` can be optionally called. +`mode` is either `:read` or `:write`. The +return code must be `:ok` if `codec` is ready to process data. Otherwise, it must be `:error` and the `error` argument must be set to an exception object. ### `process` From 2156476888d1be2b741f5fdd5926e07c0db4fffb Mon Sep 17 00:00:00 2001 From: nhz2 Date: Fri, 13 Sep 2024 00:33:04 -0400 Subject: [PATCH 5/5] Add example usage of `pledgeinsize` --- test/codecdoubleframe.jl | 58 +++++++++++++++++++++++++++++++++------- 1 file changed, 49 insertions(+), 9 deletions(-) diff --git a/test/codecdoubleframe.jl b/test/codecdoubleframe.jl index c004986d..0d85b370 100644 --- a/test/codecdoubleframe.jl +++ b/test/codecdoubleframe.jl @@ -20,9 +20,11 @@ struct DoubleFrameEncoder <: TranscodingStreams.Codec opened::Base.RefValue{Bool} stopped::Base.RefValue{Bool} got_stop_msg::Base.RefValue{Bool} + pledged_in_size::Base.RefValue{Int64} + in_size_count::Base.RefValue{Int64} end -DoubleFrameEncoder() = DoubleFrameEncoder(Ref(false), Ref(false), Ref(false)) +DoubleFrameEncoder() = DoubleFrameEncoder(Ref(false), Ref(false), Ref(false), Ref(Int64(-1)), Ref(Int64(0))) function TranscodingStreams.process( codec :: DoubleFrameEncoder, @@ -30,6 +32,7 @@ function TranscodingStreams.process( output :: TranscodingStreams.Memory, error_ref :: TranscodingStreams.Error, ) + pledged = codec.pledged_in_size[] if input.size == 0 codec.got_stop_msg[] = true end @@ -45,26 +48,59 @@ function TranscodingStreams.process( return 0, 0, :error elseif !codec.opened[] output[1] = UInt8('[') - output[2] = UInt8(' ') + if pledged ∈ (0:9) + output[2] = UInt8('0'+pledged) + else + output[2] = UInt8(' ') + end codec.opened[] = true return 0, 2, :ok elseif codec.got_stop_msg[] + # check in_size_count against pledged + if pledged ∈ (0:9) + if pledged > codec.in_size_count[] + error_ref[] = ErrorException("pledged in size was too big") + return 0, 0, :error + end + end output[1] = UInt8(' ') output[2] = UInt8(']') codec.stopped[] = true return 0, 2, :end else i = j = 0 + # check input.size against pledged + if pledged ∈ (0:9) + if input.size > pledged || pledged - input.size < codec.in_size_count[] + error_ref[] = ErrorException("pledged in size was too small") + return 0, 0, :error + end + end while i + 1 ≤ lastindex(input) && j + 2 ≤ lastindex(output) b = input[i+1] i += 1 output[j+1] = output[j+2] = b j += 2 end + codec.in_size_count[] += i return i, j, :ok end end +function TranscodingStreams.pledgeinsize( + codec::DoubleFrameEncoder, + insize::Int64, + error::Error, + )::Symbol + if codec.opened[] + error[] = ErrorException("pledgeinsize called after opening") + return :error + else + codec.pledged_in_size[] = insize + return :ok + end +end + function TranscodingStreams.expectedsize( :: DoubleFrameEncoder, input :: TranscodingStreams.Memory) @@ -81,6 +117,8 @@ function TranscodingStreams.startproc(codec::DoubleFrameEncoder, ::Symbol, error codec.opened[] = false codec.got_stop_msg[] = false codec.stopped[] = false + codec.pledged_in_size[] = -1 + codec.in_size_count[] = 0 return :ok end @@ -149,7 +187,7 @@ function TranscodingStreams.process( codec.a[] != UInt8('[') && error("expected [") @label state2 do_read(codec.a) || return (codec.state[]=2; (Δin, Δout, :ok)) - codec.a[] != UInt8(' ') && error("expected space") + codec.a[] ∉ (UInt8(' '), UInt8('0'):UInt8('9')...) && error("expected space or size") while true @label state3 do_read(codec.a) || return (codec.state[]=3; (Δin, Δout, :ok)) @@ -189,12 +227,14 @@ DoubleFrameDecoderStream(stream::IO; kwargs...) = TranscodingStream(DoubleFrameD @testset "DoubleFrame Codecs" begin - @test transcode(DoubleFrameEncoder, b"") == b"[ ]" - @test transcode(DoubleFrameEncoder, b"a") == b"[ aa ]" - @test transcode(DoubleFrameEncoder, b"ab") == b"[ aabb ]" - @test transcode(DoubleFrameEncoder(), b"") == b"[ ]" - @test transcode(DoubleFrameEncoder(), b"a") == b"[ aa ]" - @test transcode(DoubleFrameEncoder(), b"ab") == b"[ aabb ]" + @test transcode(DoubleFrameEncoder, b"") == b"[0 ]" + @test transcode(DoubleFrameEncoder, b"a") == b"[1aa ]" + @test transcode(DoubleFrameEncoder, b"ab") == b"[2aabb ]" + @test transcode(DoubleFrameEncoder(), b"") == b"[0 ]" + @test transcode(DoubleFrameEncoder(), b"a") == b"[1aa ]" + @test transcode(DoubleFrameEncoder(), b"ab") == b"[2aabb ]" + @test transcode(DoubleFrameEncoder(), ones(UInt8,9)) == [b"[9"; ones(UInt8,18); b" ]";] + @test transcode(DoubleFrameEncoder(), ones(UInt8,10)) == [b"[ "; ones(UInt8,20); b" ]";] @test_throws Exception transcode(DoubleFrameDecoder, b"") @test_throws Exception transcode(DoubleFrameDecoder, b" [")