diff --git a/src/Tar.jl b/src/Tar.jl index a580019..50b4f1d 100644 --- a/src/Tar.jl +++ b/src/Tar.jl @@ -54,12 +54,16 @@ include("extract.jl") ## official API: create, list, extract, rewrite, tree_hash """ - create([ predicate, ] dir, [ tarball ]; [ skeleton ]) -> tarball + create( + [ predicate, ] dir, [ tarball ]; + [ skeleton, ] [ portable = false ] + ) -> tarball predicate :: String --> Bool dir :: AbstractString tarball :: Union{AbstractString, AbstractCmd, IO} skeleton :: Union{AbstractString, AbstractCmd, IO} + portable :: Bool Create a tar archive ("tarball") of the directory `dir`. The resulting archive is written to the path `tarball` or if no path is specified, a temporary path is @@ -77,17 +81,22 @@ a "skeleton" to generate the tarball. You create a skeleton file by passing the `skeleton` keyword to the `extract` command. If `create` is called with that skeleton file and the extracted files haven't changed, an identical tarball is recreated. The `skeleton` and `predicate` arguments cannot be used together. + +If the `portable` flag is true then path names are checked for validity on +Windows, which ensures that they don't contain illegal characters or have names +that are reserved. See https://stackoverflow.com/a/31976060/659248 for details. """ function create( predicate::Function, dir::AbstractString, tarball::Union{ArgWrite, Nothing} = nothing; skeleton::Union{ArgRead, Nothing} = nothing, + portable::Bool = false, ) check_create_dir(dir) if skeleton === nothing arg_write(tarball) do tar - create_tarball(predicate, tar, dir) + create_tarball(predicate, tar, dir, portable=portable) end else predicate === true_predicate || @@ -95,7 +104,7 @@ function create( check_create_skeleton(skeleton) arg_read(skeleton) do skeleton arg_write(tarball) do tar - recreate_tarball(tar, dir, skeleton) + recreate_tarball(tar, dir, skeleton, portable=portable) end end end @@ -105,8 +114,9 @@ function create( dir::AbstractString, tarball::Union{ArgWrite, Nothing} = nothing; skeleton::Union{ArgRead, Nothing} = nothing, + portable::Bool = false, ) - create(true_predicate, dir, tarball, skeleton=skeleton) + create(true_predicate, dir, tarball, skeleton=skeleton, portable=portable) end """ @@ -261,11 +271,15 @@ function extract( end """ - rewrite([ predicate, ], old_tarball, [ new_tarball ]) -> new_tarball + rewrite( + [ predicate, ] old_tarball, [ new_tarball ]; + [ portable = false, ] + ) -> new_tarball predicate :: Header --> Bool old_tarball :: Union{AbstractString, AbtractCmd, IO} new_tarball :: Union{AbstractString, AbtractCmd, IO} + portable :: Bool Rewrite `old_tarball` to the standard format that `create` generates, while also checking that it doesn't contain anything that would cause `extract` to raise an @@ -289,25 +303,31 @@ remove `.` entries and replace multiple consecutive slashes with a single slash. If the entry has type `:hardlink`, the link target path is normalized the same way so that it will match the path of the target entry; the size field is set to the size of the target path (which must be an already-seen file). + +If the `portable` flag is true then path names are checked for validity on +Windows, which ensures that they don't contain illegal characters or have names +that are reserved. See https://stackoverflow.com/a/31976060/659248 for details. """ function rewrite( predicate::Function, old_tarball::ArgRead, - new_tarball::Union{ArgWrite, Nothing} = nothing, + new_tarball::Union{ArgWrite, Nothing} = nothing; + portable::Bool = false, ) old_tarball = check_rewrite_old_tarball(old_tarball) arg_read(old_tarball) do old_tar arg_write(new_tarball) do new_tar - rewrite_tarball(predicate, old_tar, new_tar) + rewrite_tarball(predicate, old_tar, new_tar, portable=portable) end end end function rewrite( old_tarball::ArgRead, - new_tarball::Union{ArgWrite, Nothing} = nothing, + new_tarball::Union{ArgWrite, Nothing} = nothing; + portable::Bool = false, ) - rewrite(true_predicate, old_tarball, new_tarball) + rewrite(true_predicate, old_tarball, new_tarball, portable=portable) end """ @@ -461,4 +481,36 @@ check_tree_hash_tarball(tarball::AbstractString) = check_tree_hash_tarball(tarball::ArgRead) = nothing +const Str = Union{String, SubString{String}} + +# Special names on Windows: CON PRN AUX NUL COM1-9 LPT1-9 +# we spell out uppercase/lowercase because of locales +const WIN_SPECIAL_NAMES = r"^( + [Cc][Oo][Nn] | + [Pp][Rr][Nn] | + [Aa][Uu][Xx] | + [Nn][Uu][Ll] | + ( [Cc][Oo][Mm] | + [Ll][Pp][Tt] )[1-9] +)(\.|$)"x + +function check_windows_path( + path :: AbstractString, + parts :: AbstractVector{<:Str} = split(path, r"/+"), +) + for part in parts + isempty(part) && continue + if !isvalid(part) + error("invalid Unicode: $(repr(part)) in $(repr(path))") + end + for ch in part + ch < ' ' || ch ∈ "\"*:<>?\\|" || continue + error("illegal Windows char: $(repr(ch)) in $(repr(path))") + end + if occursin(WIN_SPECIAL_NAMES, part) + error("reserved Windows name: $(repr(part)) in $(repr(path))") + end + end +end + end # module diff --git a/src/create.jl b/src/create.jl index 966c406..cc82051 100644 --- a/src/create.jl +++ b/src/create.jl @@ -3,8 +3,10 @@ function create_tarball( tar::IO, root::String; buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE), + portable::Bool = false, ) write_tarball(tar, root, buf=buf) do sys_path, tar_path + portable && check_windows_path(tar_path) hdr = path_header(sys_path, tar_path) hdr.type != :directory && return hdr, sys_path paths = Dict{String,String}() @@ -22,6 +24,7 @@ function recreate_tarball( root::String, skeleton::IO; buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE), + portable::Bool = false, ) check_skeleton_header(skeleton, buf=buf) globals = Dict{String,String}() @@ -29,6 +32,7 @@ function recreate_tarball( hdr = read_header(skeleton, globals=globals, buf=buf, tee=tar) hdr === nothing && break check_header(hdr) + portable && check_windows_path(hdr.path) sys_path = joinpath(root, hdr.path) if hdr.type == :file write_data(tar, sys_path, size=hdr.size, buf=buf) @@ -41,9 +45,11 @@ function rewrite_tarball( old_tar::IO, new_tar::IO; buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE), + portable::Bool = false, ) tree = Dict{String,Any}() read_tarball(predicate, old_tar; buf=buf) do hdr, parts + portable && check_windows_path(hdr.path, parts) isempty(parts) && return node = tree name = pop!(parts) @@ -138,6 +144,12 @@ function write_header( size = hdr.size link = hdr.link + # check for NULs + 0x0 in codeunits(path) && + throw(ArgumentError("path contains NUL bytes: $(repr(path))")) + 0x0 in codeunits(link) && + throw(ArgumentError("link contains NUL bytes: $(repr(path))")) + # determine if an extended header is needed extended = Pair{String,String}[] # WARNING: don't change the order of these insertions diff --git a/src/extract.jl b/src/extract.jl index 969c4ad..fd41d9d 100644 --- a/src/extract.jl +++ b/src/extract.jl @@ -57,6 +57,7 @@ function extract_tarball( ) root = normpath(root) paths = read_tarball(predicate, tar; buf=buf, skeleton=skeleton) do hdr, parts + Sys.iswindows() && check_windows_path(hdr.path, parts) # get the file system version of the path sys_path = isempty(parts) ? "." : reduce(joinpath, parts) isabspath(sys_path) && diff --git a/test/runtests.jl b/test/runtests.jl index 983375b..d63d522 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -193,61 +193,56 @@ end len = 1234 pad = mod(-len, 512) data = rand(UInt8, len) - tarball, io = mktemp() - Tar.write_header(io, Tar.Header("file", :file, 0o644, len, "")) - write(io, data) - write(io, fill(0x0, pad)) - close(io) - - @testset "tarball is well-formed" begin - @test Tar.list(tarball) == [Tar.Header("file", :file, 0o644, len, "")] - tmp = Tar.extract(tarball) - @test readdir(tmp) == ["file"] - @test read(joinpath(tmp, "file")) == data - rm(tmp, recursive=true) - tarball′ = Tar.rewrite(tarball) - @test read(tarball) == read(tarball′) - rm(tarball′) - end + with_tarball( + Tar.Header("file", :file, 0o644, len, ""), data, fill(0x0, pad), + ) do tarball + @testset "tarball is well-formed" begin + @test Tar.list(tarball) == [Tar.Header("file", :file, 0o644, len, "")] + tmp = Tar.extract(tarball) + @test readdir(tmp) == ["file"] + @test read(joinpath(tmp, "file")) == data + rm(tmp, recursive=true) + tarball′ = Tar.rewrite(tarball) + @test read(tarball) == read(tarball′) + rm(tarball′) + end - @testset "trailing padding truncated" begin - for p in [pad-1, pad÷2, 1, 0] - open(tarball, "a") do io - truncate(io, 512 + len + p) + @testset "trailing padding truncated" begin + for p in [pad-1, pad÷2, 1, 0] + open(tarball, "a") do io + truncate(io, 512 + len + p) + end + @test_throws_broken EOFError Tar.list(tarball) + @test_throws EOFError Tar.extract(tarball) + @test_throws EOFError Tar.tree_hash(tarball) + @test_throws_broken EOFError Tar.rewrite(tarball) end - @test_throws_broken EOFError Tar.list(tarball) - @test_throws EOFError Tar.extract(tarball) - @test_throws EOFError Tar.tree_hash(tarball) - @test_throws_broken EOFError Tar.rewrite(tarball) end - end - @testset "file data truncated" begin - for d in [len÷2, 512, 0] - open(tarball, "a") do io - truncate(io, 512 + d) + @testset "file data truncated" begin + for d in [len÷2, 512, 0] + open(tarball, "a") do io + truncate(io, 512 + d) + end + @test_throws_broken EOFError Tar.list(tarball) + @test_throws EOFError Tar.extract(tarball) + @test_throws EOFError Tar.tree_hash(tarball) + @test_throws EOFError Tar.rewrite(tarball) end - @test_throws_broken EOFError Tar.list(tarball) - @test_throws EOFError Tar.extract(tarball) - @test_throws EOFError Tar.tree_hash(tarball) - @test_throws EOFError Tar.rewrite(tarball) end - end - @testset "header truncated" begin - for h in [511, 256, 1] - open(tarball, "a") do io - truncate(io, h) + @testset "header truncated" begin + for h in [511, 256, 1] + open(tarball, "a") do io + truncate(io, h) + end + @test_throws EOFError Tar.list(tarball) + @test_throws EOFError Tar.extract(tarball) + @test_throws EOFError Tar.tree_hash(tarball) + @test_throws EOFError Tar.rewrite(tarball) end - @test_throws EOFError Tar.list(tarball) - @test_throws EOFError Tar.extract(tarball) - @test_throws EOFError Tar.tree_hash(tarball) - @test_throws EOFError Tar.rewrite(tarball) end end - - # cleanup - rm(tarball) end if @isdefined(gtar) @@ -285,35 +280,32 @@ end @testset "directory after contents" begin # create and hash a reference tarball - tarball, io = mktemp() - # executable files: hashing works on Windows + old Julia version - Tar.write_header(io, Tar.Header("dir/file", :file, 0o755, 0, "")) - Tar.write_header(io, Tar.Header("file", :file, 0o755, 0, "")) - close(io) - hash = Tar.tree_hash(tarball) - rm(tarball) + hash = with_tarball(Tar.tree_hash, + # executable files: hashing works on Windows + old Julia version + Tar.Header("dir/file", :file, 0o755, 0, ""), + Tar.Header("file", :file, 0o755, 0, ""), + ) # create a version with directory entries after contents - tarball, io = mktemp() - Tar.write_header(io, Tar.Header("file", :file, 0o755, 0, "")) - Tar.write_header(io, Tar.Header(".", :directory, 0o755, 0, "")) - Tar.write_header(io, Tar.Header("dir/file", :file, 0o755, 0, "")) - Tar.write_header(io, Tar.Header("dir", :directory, 0o755, 0, "")) - close(io) - # check extract - tree = Tar.extract(tarball) - check_tree_hash(hash, tree) - # check tree_hash - @test Tar.tree_hash(tarball) == hash - # check rewrite - tarball′ = Tar.rewrite(tarball) - @test Tar.list(tarball′) == [ - Tar.Header("dir", :directory, 0o755, 0, "") - Tar.Header("dir/file", :file, 0o755, 0, "") - Tar.Header("file", :file, 0o755, 0, "") - ] - # cleanup - rm(tarball′) - rm(tarball) + with_tarball( + Tar.Header("file", :file, 0o755, 0, ""), + Tar.Header(".", :directory, 0o755, 0, ""), + Tar.Header("dir/file", :file, 0o755, 0, ""), + Tar.Header("dir", :directory, 0o755, 0, ""), + ) do tarball + # check extract + tree = Tar.extract(tarball) + check_tree_hash(hash, tree) + # check tree_hash + @test Tar.tree_hash(tarball) == hash + # check rewrite + tarball′ = Tar.rewrite(tarball) + @test Tar.list(tarball′) == [ + Tar.Header("dir", :directory, 0o755, 0, "") + Tar.Header("dir/file", :file, 0o755, 0, "") + Tar.Header("file", :file, 0o755, 0, "") + ] + rm(tarball′) + end end @testset "extract attacks" begin @@ -323,7 +315,7 @@ end Tar.Header("../file", :file, 0o644, 0, ""), ) # attempt to extract relative path out of root (variation) - test_extract_attack(test_windows_variation, + with_tarball(test_windows_variation, # this is the same on UNIX but different on Windows Tar.Header(joinpath("..", "file"), :file, 0o644, 0, ""), ) @@ -332,7 +324,7 @@ end Tar.Header("dir/../../file", :file, 0o644, 0, ""), ) # same attack with some obfuscation (variation) - test_extract_attack(test_windows_variation, + with_tarball(test_windows_variation, # this is the same on UNIX but different on Windows Tar.Header(joinpath("dir", "..", "..", "file"), :file, 0o644, 0, ""), ) @@ -342,7 +334,7 @@ end ) @test !ispath(joinpath(tmpd, "file")) # attempt to extract absolute path out of root - test_extract_attack(test_windows_variation, + with_tarball(test_windows_variation, Tar.Header("$tmpd/file", :file, 0o644, 0, ""), ) @test !ispath(joinpath(tmpd, "file")) @@ -398,48 +390,41 @@ end end @testset "allow overwrites" begin for hdr₁ in hdrs, hdr₂ in hdrs - tarball₁, io = mktemp() - Tar.write_header(io, file) - Tar.write_header(io, hdr₁) - Tar.write_header(io, hdr₂) - close(io) - hash = Tar.tree_hash(tarball₁) - tree₁ = Tar.extract(tarball₁) - @test hash == tree_hash(tree₁) - tarball₂, io = mktemp() - Tar.write_header(io, file) - Tar.write_header(io, hdr₂) - close(io) - @test hash == Tar.tree_hash(tarball₂) - tree₂ = Tar.extract(tarball₂) - @test hash == tree_hash(tree₂) - rm(tree₁, recursive=true) - rm(tree₂, recursive=true) - rm(tarball₁) - rm(tarball₂) + with_tarball(file, hdr₁, hdr₂) do tarball₁ + hash = Tar.tree_hash(tarball₁) + tree₁ = Tar.extract(tarball₁) + @test hash == tree_hash(tree₁) + rm(tree₁, recursive=true) + with_tarball(file, hdr₂) do tarball₂ + @test hash == Tar.tree_hash(tarball₂) + tree₂ = Tar.extract(tarball₂) + @test hash == tree_hash(tree₂) + rm(tree₂, recursive=true) + end + end end end !Sys.iswindows() && @testset "allow write into directory overwriting a symlink" begin # make sure "path" is removed from links set - tarball₁, io = mktemp() - Tar.write_header(io, Tar.Header("path", :symlink, 0o755, 0, tmp)) - Tar.write_header(io, Tar.Header("path", :directory, 0o755, 0, "")) - Tar.write_header(io, Tar.Header("path/file", :file, 0o644, 0, "")) - close(io) - hash = Tar.tree_hash(tarball₁) - tree₁ = Tar.extract(tarball₁) - @test hash == tree_hash(tree₁) - tarball₂, io = mktemp() - Tar.write_header(io, Tar.Header("path/file", :file, 0o644, 0, "")) - close(io) - @test hash == Tar.tree_hash(tarball₂) - tree₂ = Tar.extract(tarball₂) - @test hash == tree_hash(tree₂) - rm(tree₁, recursive=true) - rm(tree₂, recursive=true) - rm(tarball₁) - rm(tarball₂) + with_tarball( + Tar.Header("path", :symlink, 0o755, 0, tmp), + Tar.Header("path", :directory, 0o755, 0, ""), + Tar.Header("path/file", :file, 0o644, 0, ""), + ) do tarball₁ + hash = Tar.tree_hash(tarball₁) + tree₁ = Tar.extract(tarball₁) + @test hash == tree_hash(tree₁) + rm(tree₁, recursive=true) + with_tarball( + Tar.Header("path/file", :file, 0o644, 0, ""), + ) do tarball₂ + @test hash == Tar.tree_hash(tarball₂) + tree₂ = Tar.extract(tarball₂) + @test hash == tree_hash(tree₂) + rm(tree₂, recursive=true) + end + end end # check temp dir is empty, remove it @test isempty(readdir(tmp)) @@ -535,6 +520,55 @@ end rm(tmp) end +@testset "bad Windows paths" begin + # test that we catch bad/illegal Windows names + bad = [ + "c:", "\\", "D:\\tmp", "*.*", "\1.txt", + "PRN", "LPT1", "lpt2", "LpT3", "Com9", + "CON.txt", "PRN.∀", "\xba\xdd", + ] + for name in bad + with_tarball(Tar.Header(name, :file, 0o644, 0, "")) do tarball + @test_no_throw Tar.rewrite(tarball) + @test_no_throw Tar.rewrite(tarball, portable=false) + @test_throws ErrorException Tar.rewrite(tarball, portable=true) + # Windows can't extract bad names (that's the whole point) + Sys.iswindows() && return # continue + if !isvalid(name) + # MacOS doesn't allow invalid UTF-8 path names + Sys.isapple() && return + # older Julia versions don't like invalid paths + VERSION < v"1.7" && return + end + for sk in [nothing, tempname()] + dir = Tar.extract(tarball, skeleton=sk) + @test_no_throw Tar.create(dir, skeleton=sk) + @test_no_throw Tar.create(dir, skeleton=sk, portable=false) + @test_throws ErrorException Tar.create(dir, skeleton=sk, portable=true) + rm(dir, recursive=true) + end + end + end + # test that we don't catch too much + fine = [ + "c;", "÷", "D;÷tmp", "+.+", "1.txt", + ".PRN", "LPT0", "𝕃pt2", " LpT3", "Com", + "CON⋅txt", "PRN-", "\xdd\xba", + ] + for name in fine + with_tarball(Tar.Header(name, :file, 0o644, 0, "")) do tarball + @test_no_throw Tar.rewrite(tarball, portable = false) + @test_no_throw Tar.rewrite(tarball, portable = true) + for sk in [nothing, tempname()] + dir = Tar.extract(tarball, skeleton=sk) + @test_no_throw Tar.create(dir, skeleton=sk, portable=false) + @test_no_throw Tar.create(dir, skeleton=sk, portable=true) + rm(dir, recursive=true) + end + end + end +end + @testset "API: create" begin local bytes @@ -583,7 +617,7 @@ end end # In this issue we've seen that symlinking a directory caused files inside - # the directory to become read-only. Guard against Tar.jl doing something + # the directory to become read-only. Guard against Tar.jl doing something # weird like that. @testset "Issue Pkg#2185" begin mktempdir() do dir @@ -601,7 +635,8 @@ end tarball = Tar.create(root, joinpath(dir, "test.tar")) files = Tar.list(tarball) # Make sure the file and the symlink have the expected permissions. - # Note: in old versions of Julia, the file has always permission 755 on Windows + # Note: in old versions of Julia, the file has always permission 755 + # on Windows @test Tar.Header(replace(file, "\\" => "/"), :file, VERSION ≤ v"1.6.0-DEV.1683" && Sys.iswindows() ? 0o755 : file_mode, 0, "") in files @test Tar.Header(replace(link, "\\" => "/"), :symlink, dir_mode, 0, basename(target)) in files end @@ -1071,21 +1106,19 @@ end pad = mod(-len, 512) data = rand(UInt8, len) # create reference tarball - reference, io = mktemp() - Tar.write_header(io, Tar.Header("file", :file, 0o755, len, "")) - write(io, data) - write(io, fill(0x0, pad)) - close(io) - hash = tree_hash(Tar.extract(reference)) - # create tarball with self-referential hard link - tarball, io = mktemp() - Tar.write_header(io, Tar.Header("file", :file, 0o644, len, "")) - write(io, data) - write(io, fill(0x0, pad)) - Tar.write_header(io, Tar.Header("file", :hardlink, 0o755, 0, "file")) - close(io) - # test that it behaves like the reference tarball - @test hash == Tar.tree_hash(tarball) - @test hash == tree_hash(Tar.extract(tarball)) - @test read(reference) == read(Tar.rewrite(tarball)) + with_tarball( + Tar.Header("file", :file, 0o755, len, ""), data, fill(0x0, pad), + ) do reference + hash = tree_hash(Tar.extract(reference)) + # create tarball with self-referential hard link + with_tarball( + Tar.Header("file", :file, 0o644, len, ""), data, fill(0x0, pad), + Tar.Header("file", :hardlink, 0o755, 0, "file"), + ) do tarball + # test that it behaves like the reference tarball + @test hash == Tar.tree_hash(tarball) + @test hash == tree_hash(Tar.extract(tarball)) + @test read(reference) == read(Tar.rewrite(tarball)) + end + end end diff --git a/test/setup.jl b/test/setup.jl index 34fd752..18d994b 100644 --- a/test/setup.jl +++ b/test/setup.jl @@ -251,18 +251,31 @@ function test_error_prefix(f::Function, prefix::AbstractString) @test startswith(val.msg, prefix) end -function test_extract_attack(body::Function, hdrs::Tar.Header...) +function make_tarball(args::Union{Tar.Header, Vector{UInt8}}...) tarball, io = mktemp() - for hdr in hdrs - Tar.write_header(io, hdr) + try + for arg in args + if arg isa Tar.Header + Tar.write_header(io, arg) + else + write(io, arg) + end + end + finally + close(io) + end + return tarball +end + +function with_tarball(body::Function, args::Union{Tar.Header, Vector{UInt8}}...) + tarball = make_tarball(args...) + try body(tarball) + finally rm(tarball) end - close(io) - body(tarball) - rm(tarball) end function test_extract_attack(hdrs::Tar.Header...) - test_extract_attack(hdrs...) do tarball + with_tarball(hdrs...) do tarball @test_throws ErrorException Tar.extract(tarball) @test_throws ErrorException Tar.rewrite(tarball) @test_throws ErrorException Tar.tree_hash(tarball) @@ -280,4 +293,10 @@ function test_windows_variation(tarball) @test_throws ErrorException Tar.tree_hash(tarball) end +# TODO: handle properly (fail if error, pass if no error) +# This should really be part off the Test stdlib +macro test_no_throw(ex) + esc(ex) +end + const test_data_dir = joinpath(@__DIR__, "data")