diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a3bc071..0557c66 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -6,6 +6,7 @@ on: tags: ['*'] jobs: test: + timeout-minutes: 30 name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} runs-on: ${{ matrix.os }} strategy: diff --git a/Artifacts.toml b/Artifacts.toml new file mode 100644 index 0000000..4f0bd93 --- /dev/null +++ b/Artifacts.toml @@ -0,0 +1,15 @@ +[julia_parcompat] +git-tree-sha1 = "066eb71b5392a8edb036e6a66a92d9b94a9e7eed" +lazy = true + + [[julia_parcompat.download]] + sha256 = "943ac718383a9bb1144e0edc88ec1c42f071e00d40728af7b18a213fd4da5d1c" + url = "https://github.com/JuliaIO/parquet-compatibility/archive/3f7586f1b7f2a0c6b048791fb5f97c0b3df52e39.tar.gz" + +[parcompat] +git-tree-sha1 = "1e993c153d3df6b2039ea5df61aeea2cb5213753" +lazy = true + + [[parcompat.download]] + sha256 = "895f382e65e4684335d6cbd2d682172d0923660f5525e0682112361305f05b64" + url = "https://github.com/Parquet/parquet-compatibility/archive/2b47eac447c7a4a88247651a4065984db7b247ff.tar.gz" diff --git a/Project.toml b/Project.toml index 8f5dad3..e9c73ce 100644 --- a/Project.toml +++ b/Project.toml @@ -3,7 +3,7 @@ uuid = "626c502c-15b0-58ad-a749-f091afb673ae" keywords = ["parquet", "julia", "columnar-storage"] license = "MIT" desc = "Julia implementation of parquet columnar file format reader and writer" -version = "0.8.3" +version = "0.8.4" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" @@ -29,14 +29,16 @@ Decimals = "0.4" LittleEndianBase128 = "0.3" Missings = "0.3,0.4,1" SentinelArrays = "1" -Snappy = "0.3" +Snappy = "0.3, 0.4" Tables = "1.6" Thrift = "0.8" julia = "1.3" [extras] +Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" +LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test", "Random"] +test = ["Artifacts", "LazyArtifacts", "Random", "Test"] diff --git a/test/generate_artifacts.jl b/test/generate_artifacts.jl new file mode 100644 index 0000000..ac15da7 --- /dev/null +++ b/test/generate_artifacts.jl @@ -0,0 +1,17 @@ +using ArtifactUtils, Artifacts + +add_artifact!( + joinpath(@__DIR__, "..", "Artifacts.toml"), + "parcompat", + "https://github.com/Parquet/parquet-compatibility/archive/2b47eac447c7a4a88247651a4065984db7b247ff.tar.gz", + force=true, + lazy=true, +) + +add_artifact!( + joinpath(@__DIR__, "..", "Artifacts.toml"), + "julia_parcompat", + "https://github.com/JuliaIO/parquet-compatibility/archive/3f7586f1b7f2a0c6b048791fb5f97c0b3df52e39.tar.gz", + force=true, + lazy=true, +) diff --git a/test/get_parcompat.jl b/test/get_parcompat.jl deleted file mode 100644 index d995079..0000000 --- a/test/get_parcompat.jl +++ /dev/null @@ -1,19 +0,0 @@ -using Parquet -using Test - -function get_parcompat(parcompat=joinpath(dirname(@__FILE__), "parquet-compatibility")) - # look for parquet-compatibility in test folder, clone to tempdir if not found - if !isdir(parcompat) - run(`git clone https://github.com/Parquet/parquet-compatibility.git $parcompat`) - end -end - -function get_juliaparcompat(juliaparcompat=joinpath(dirname(@__FILE__), "julia-parquet-compatibility")) - # look for julia-parquet-compatibility in test folder, clone to tempdir if not found - if !isdir(juliaparcompat) - run(`git clone https://github.com/JuliaIO/parquet-compatibility.git $juliaparcompat`) - end -end - -get_parcompat() -get_juliaparcompat() diff --git a/test/runtests.jl b/test/runtests.jl index 89ae2d7..f789d33 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,11 @@ -include("get_parcompat.jl") +using Parquet +using Test +using LazyArtifacts, Artifacts + +# Note: readdir(...; join=true) requires Julia v1.4. +const parcompat = joinpath(artifact"parcompat", readdir(artifact"parcompat")[1]) +const julia_parcompat = joinpath(artifact"julia_parcompat", readdir(artifact"julia_parcompat")[1]) + @testset "parquet tests" begin include("test_load.jl") include("test_codec.jl") diff --git a/test/test_cursors.jl b/test/test_cursors.jl index 4e1e07c..7c95c07 100644 --- a/test/test_cursors.jl +++ b/test/test_cursors.jl @@ -50,7 +50,7 @@ end function test_row_cursor_all_files() for encformat in ("SNAPPY", "GZIP", "NONE") for fname in ("nation", "customer") - test_row_cursor(joinpath(@__DIR__, "parquet-compatibility", "parquet-testdata", "impala", "1.1.1-$encformat/$fname.impala.parquet")) + test_row_cursor(joinpath(parcompat, "parquet-testdata", "impala", "1.1.1-$encformat/$fname.impala.parquet")) end end end @@ -58,13 +58,13 @@ end function test_batchedcols_cursor_all_files() for encformat in ("SNAPPY", "GZIP", "NONE") for fname in ("nation", "customer") - test_batchedcols_cursor(joinpath(@__DIR__, "parquet-compatibility", "parquet-testdata", "impala", "1.1.1-$encformat/$fname.impala.parquet")) + test_batchedcols_cursor(joinpath(parcompat, "parquet-testdata", "impala", "1.1.1-$encformat/$fname.impala.parquet")) end end end function test_col_cursor_length() - path = joinpath(@__DIR__, "parquet-compatibility", "parquet-testdata", "impala", "1.1.1-SNAPPY/nation.impala.parquet") + path = joinpath(parcompat, "parquet-testdata", "impala", "1.1.1-SNAPPY/nation.impala.parquet") pq_file = Parquet.File(path) col_name = pq_file |> colnames |> first col_cursor = Parquet.ColCursor(pq_file, col_name) diff --git a/test/test_load.jl b/test/test_load.jl index 364745e..3cb1471 100644 --- a/test/test_load.jl +++ b/test/test_load.jl @@ -97,7 +97,7 @@ end function test_decode_all_pages() @testset "decode parquet-compatibility test files" begin - testfolder = joinpath(@__DIR__, "parquet-compatibility") + testfolder = parcompat for encformat in ("SNAPPY", "GZIP", "NONE") for fname in ("nation", "customer") testfile = joinpath(testfolder, "parquet-testdata", "impala", "1.1.1-$encformat", "$fname.impala.parquet") @@ -107,7 +107,7 @@ function test_decode_all_pages() end @testset "decode julia-parquet-compatibility test files" begin - testfolder = joinpath(@__DIR__, "julia-parquet-compatibility") + testfolder = julia_parcompat for encformat in ("ZSTD", "SNAPPY", "GZIP", "NONE") for fname in ("nation", "customer") testfile = joinpath(testfolder, "Parquet_Files", "$(encformat)_pandas_pyarrow_$(fname).parquet") @@ -123,7 +123,7 @@ end function test_load_all_pages() @testset "load parquet-compatibility test files" begin - testfolder = joinpath(@__DIR__, "parquet-compatibility") + testfolder = parcompat for encformat in ("SNAPPY", "GZIP", "NONE") for fname in ("nation", "customer") testfile = joinpath(testfolder, "parquet-testdata", "impala", "1.1.1-$encformat", "$fname.impala.parquet") @@ -133,7 +133,7 @@ function test_load_all_pages() end @testset "load julia-parquet-compatibility test files" begin - testfolder = joinpath(@__DIR__, "julia-parquet-compatibility") + testfolder = julia_parcompat for encformat in ("ZSTD", "SNAPPY", "GZIP", "NONE") for fname in ("nation", "customer") testfile = joinpath(testfolder, "Parquet_Files", "$(encformat)_pandas_pyarrow_$(fname).parquet") @@ -323,7 +323,7 @@ end function test_load_at_offset() @testset "load file at offset" begin - testfolder = joinpath(@__DIR__, "parquet-compatibility") + testfolder = parcompat testfile = joinpath(testfolder, "parquet-testdata", "impala", "1.1.1-NONE", "customer.impala.parquet") parquet_file = Parquet.File(testfile)