Skip to content

Commit

Permalink
more testing of errors
Browse files Browse the repository at this point in the history
  • Loading branch information
nhz2 committed Jul 27, 2024
1 parent 16dfc95 commit ba548f9
Show file tree
Hide file tree
Showing 11 changed files with 532 additions and 24 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
/.vscode/

/test/Manifest.toml
/fuzz/Manifest.toml
/fuzz/test
/Manifest.toml
fixture.tar.gz
fixture
Expand Down
11 changes: 11 additions & 0 deletions fuzz/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[deps]
ArgCheck = "dce04be8-c92d-5529-be00-80e4d2c0e197"
CRC32 = "b4567568-9dcc-467e-9b62-c342d3a501d3"
CodecInflate64 = "6309b1aa-fc58-479c-8956-599a07234577"
CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
InputBuffers = "0c81fc1b-5583-44fc-8770-48be1e1cca08"
Supposition = "5a0628fe-1738-4658-9b6d-0b7605a9755b"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
ZipArchives = "49080126-0e18-4c2a-b176-c102e4b3760c"
p7zip_jll = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
134 changes: 134 additions & 0 deletions fuzz/fuzz.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
using Supposition: Data, @composed, @check, event!, produce!

include("../test/utils.jl")

# Note HuffmanTree is internal
using CodecInflate64: HuffmanTree, parse_huffman!, get_op

const datas = Data.Vectors(Data.Integers{UInt8}(); min_size=0, max_size=200_000)

@testset "roundtrip" begin
@check max_examples=10_000 function roundtrip(
data=datas
)
data == decompress(zlib_compress(data))
end
@check max_examples=200 function roundtrip_partial(
data=datas
)
c = zlib_compress(data)
for i in eachindex(c)
try
decompress(c[begin:i-1])
return false
catch e
e === DecompressionError("not enough input") || return false
end
end
return true
end
end

@testset "random input" begin
@check max_examples=10_000 function rand_input(
data=datas
)
try
s = DeflateDecompressorStream(InputBuffer(data))
read(s, 1_000_000) # avoid using all memory
catch e
e isa DecompressionError || rethrow()
end
true
end
@check max_examples=10_000 function rand_input64(
data=datas
)
try
s = Deflate64DecompressorStream(InputBuffer(data))
read(s, 1_000_000) # avoid using all memory
catch e
e isa DecompressionError || rethrow()
end
true
end
end

const blockss = Data.Vectors(Data.Vectors(Data.Integers{UInt8}(); min_size=0, max_size=2^16-1); min_size=1, max_size=100)

@testset "random non compressed blocks" begin
@check max_examples=10_000 function rand_blocks(
blocks=blockss
)
data = UInt8[]
for i in 1:length(blocks)
len = UInt16(length(blocks[i]))
nlen = ~len
if i == length(blocks)
push!(data, 0b001)
else
push!(data, 0b000)
end
push!(data, len&0xFF)
push!(data, len>>8)
push!(data, nlen&0xFF)
push!(data, nlen>>8)
append!(data, blocks[i])
end
de64compress(data) == collect(Iterators.flatten(blocks))
end
end

const clen_num_bits_per_ops = Data.Vectors(Data.Integers(0x00,0x07); min_size=4, max_size=19)
const lit_len_num_bits_per_ops = Data.Vectors(Data.Integers(0x00,0x0F); min_size=257, max_size=288)
const dist_num_bits_per_ops = Data.Vectors(Data.Integers(0x00,0x0F); min_size=1, max_size=32)

tree_types = [
((19, 7), clen_num_bits_per_ops),
((288, 15), lit_len_num_bits_per_ops),
((32, 15), dist_num_bits_per_ops),
]

@testset "random huffman tree building $(tree_args)" for (tree_args, tree_pos) in tree_types
@check max_examples=1_000_000 function rand_trees(
num_bits_per_op=tree_pos
)
sum(num_bits_per_op) < 2 && return true # ignore one bit special case here
try
tree = parse_huffman!(HuffmanTree(tree_args...), num_bits_per_op)
# @show num_bits_per_op
for input in 0x0000:0xFFFF
op, nbits = get_op(input, tree)
nbits == num_bits_per_op[op+1] || return false
op_to_huffman_code(tree, op) == (input & ~(0xFFFF<<nbits), nbits) || return false
end
catch err
err isa DecompressionError || return false
end
true
end
end

@testset "random valid huffman tree building" begin
@check max_examples=10_000 function rand_valid_trees(
split_ops=Data.Vectors(Data.Pairs(Data.Integers{Int}(),Data.Integers{Int}()); min_size=0, max_size=286)
)
max_nbits = 0x0F
num_bits_per_op = [0x01, 0x01]
for (split, at) in split_ops
n_left = findall(<(max_nbits), num_bits_per_op)
split = n_left[mod1(split, length(n_left))]
nb = num_bits_per_op[split] + 1
num_bits_per_op[split] = nb
insert!(num_bits_per_op, mod1(at, length(num_bits_per_op)+1), nb)
end
tree = parse_huffman!(HuffmanTree(288, 15), num_bits_per_op)
# @show num_bits_per_op
for input in 0x0000:0xFFFF
op, nbits = get_op(input, tree)
nbits == num_bits_per_op[op+1] || return false
op_to_huffman_code(tree, op) == (input & ~(0xFFFF<<nbits), nbits) || return false
end
true
end
end
30 changes: 14 additions & 16 deletions src/huffmantree.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ function parse_huffman!(
tree::HuffmanTree,
num_bits_per_op::AbstractVector{UInt8}, # in
)
# @show length(num_bits_per_op)
# TODO Validate produced tree
sorted_ops = tree.sorted_ops
num_ops_per_num_bit = tree.num_ops_per_num_bit
op_offset_per_num_bit = tree.op_offset_per_num_bit
Expand All @@ -28,7 +26,7 @@ function parse_huffman!(
op_offset_per_num_bit .= 0x0000
max_num_bits = length(num_ops_per_num_bit)
code_space::UInt64 = 0 # this keeps track of the amount of code_space used out of 2^32
@assert max_num_bits maximum(num_bits_per_op)
@assert max_num_bits maximum(num_bits_per_op; init=0x00)
@assert length(op_offset_per_num_bit) == max_num_bits + 1
@assert length(sorted_ops) length(num_bits_per_op)
for n in num_bits_per_op
Expand Down Expand Up @@ -56,24 +54,24 @@ function parse_huffman!(
# https://github.com/ebiggers/libdeflate/blob/dc76454a39e7e83b68c3704b6e3784654f8d5ac5/lib/deflate_decompress.c#L791
if code_space > UInt64(1)<<32
# This can never be valid
throw(DecompressionError("overfull code"))
throw(DecompressionError("overfull code table"))
elseif code_space < UInt64(1)<<32
# This can be valid in some special cases described in the RFC
# https://github.com/ebiggers/libdeflate/blob/dc76454a39e7e83b68c3704b6e3784654f8d5ac5/lib/deflate_decompress.c#L809-L839
if !iszero(code_space) # no codes is valid if no distance codes are used.
if code_space != UInt64(1)<<31 || num_ops_per_num_bit[1] != 1 # one code encoded with one bit is valid.
throw(DecompressionError("incomplete code"))
else
# pad out huffman tree like in libdeflate
# This ensures that all codes can be decoded without error
# later on.
num_ops_per_num_bit[1] = 2
op_offset_per_num_bit .= 3
op_offset_per_num_bit[1] = 1
sorted_ops[2] = sorted_ops[1]
end
if code_space != UInt64(1)<<31 || num_ops_per_num_bit[1] != 1
throw(DecompressionError("incomplete code table"))
else
# one code encoded with one bit is valid.
# pad out huffman tree like in libdeflate
# This ensures that all codes can be decoded without error
# later on.
num_ops_per_num_bit[1] = 2
op_offset_per_num_bit .= 3
op_offset_per_num_bit[1] = 1
sorted_ops[2] = sorted_ops[1]
end
end
tree
end

# Using algorithm from https://github.com/GunnarFarneback/Inflate.jl/blob/cc77be73388f4160d187ab0c3fdaa3df13aa7f3b/src/Inflate.jl#L134-L145
Expand Down
9 changes: 4 additions & 5 deletions src/stream.jl
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,6 @@ function read_input_bits!(s::StreamState)::Bool
s.clen_num_bits_per_op[1 + order[i]] = x & 0b111
x >>= 0x03
end
if all(iszero, s.clen_num_bits_per_op)
throw(DecompressionError("no code for clen"))
end
parse_huffman!(s.clen_tree, s.clen_num_bits_per_op)
s.lit_len_dist_num_bits_per_op .= 0x00
s.num_bits_per_op_idx = 1
Expand Down Expand Up @@ -306,15 +303,17 @@ function read_input_bits!(s::StreamState)::Bool
if iszero(lit_len_num_bits_per_op[1 + 256])
throw(DecompressionError("no code for end-of-block"))
end
parse_huffman!(s.lit_len_tree, lit_len_num_bits_per_op)
# if there are no dist codes, there also cannot be any len codes
if all(iszero, dist_num_bits_per_op)
local last_lit_len_op = something(findlast(!iszero, lit_len_num_bits_per_op))
if last_lit_len_op > 1 + 256
throw(DecompressionError("no codes for distances, but there is a code for length"))
end
reset!(s.dist_tree)
else
parse_huffman!(s.dist_tree, dist_num_bits_per_op)
end
parse_huffman!(s.lit_len_tree, lit_len_num_bits_per_op)
parse_huffman!(s.dist_tree, dist_num_bits_per_op)
s.in_mode = LIT_LEN_DIST_OP
else
s.num_bits_per_op_idx = i
Expand Down
1 change: 1 addition & 0 deletions test/Project.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
[deps]
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
ArgCheck = "dce04be8-c92d-5529-be00-80e4d2c0e197"
CRC32 = "b4567568-9dcc-467e-9b62-c342d3a501d3"
CodecInflate64 = "6309b1aa-fc58-479c-8956-599a07234577"
CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
Expand Down
4 changes: 4 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ include("utils.jl")

include("tests_from_inflate.jl")

include("test_huffman.jl")

include("test_errors.jl")

@testset "Exercise Deflate64 distances and lengths" begin
thing = rand(UInt8, 200)
d = UInt8[]
Expand Down
Loading

0 comments on commit ba548f9

Please sign in to comment.