diff --git a/Project.toml b/Project.toml index 0a092be..b488695 100644 --- a/Project.toml +++ b/Project.toml @@ -13,7 +13,6 @@ ShiftedArrays = "1277b4bf-5013-50f5-be3d-901d8477a67a" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" -TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" [compat] DataFrames = "1.3.2" diff --git a/src/TSFrames.jl b/src/TSFrames.jl index f7fa0c2..1085287 100644 --- a/src/TSFrames.jl +++ b/src/TSFrames.jl @@ -1,6 +1,6 @@ module TSFrames -using DataFrames, Dates, ShiftedArrays, RecipesBase, RollingFunctions, Tables, TimerOutputs +using DataFrames, Dates, ShiftedArrays, RecipesBase, RollingFunctions, Tables import Base.convert import Base.diff diff --git a/src/join.jl b/src/join.jl index 44b2439..1ba6e3f 100644 --- a/src/join.jl +++ b/src/join.jl @@ -395,7 +395,7 @@ function fast_join(left::TSFrame, right::TSFrame; method = :outer) to = Main.to - merged_idx, merged_idx_left, merged_idx_right = @timeit to "sort_merge_idx" sort_merge_idx(index(left), index(right), Val(true), Val(true)) + merged_idx, merged_idx_left, merged_idx_right = sort_merge_idx(index(left), index(right), Val(true), Val(true)) merged_length = length(merged_idx) @@ -404,43 +404,45 @@ function fast_join(left::TSFrame, right::TSFrame; method = :outer) # and we can go down a faster path of simple concatenation. # add_missings = !(length(merged_idx) == length(left) == length(right)) - @timeit to "column disambiguation" begin - - left_colnames = setdiff(Tables.columnnames(left.coredata), (:Index,)) - right_colnames = setdiff(Tables.columnnames(right.coredata), (:Index,)) - left_colidxs = Tables.columnindex.((left.coredata,), left_colnames) - right_colidxs = Tables.columnindex.((right.coredata,), right_colnames) - disambiguated_right_colnames = deepcopy(right_colnames) - - # disambiguate col names - for (ind, colname) in enumerate(right_colnames) - leftind = findfirst(==(colname), left_colnames) - isnothing(leftind) || (disambiguated_right_colnames[ind] = Symbol(string(colname)*"_1")) - end - + # this machinery disambiguates column names + # It doesn't take too much time, but it would be cleaner to somehow use + # DataFrames' machinery here. + left_colnames = setdiff(Tables.columnnames(left.coredata), (:Index,)) + right_colnames = setdiff(Tables.columnnames(right.coredata), (:Index,)) + left_colidxs = Tables.columnindex.((left.coredata,), left_colnames) + right_colidxs = Tables.columnindex.((right.coredata,), right_colnames) + disambiguated_right_colnames = deepcopy(right_colnames) + + # disambiguate col names + for (ind, colname) in enumerate(right_colnames) + leftind = findfirst(==(colname), left_colnames) + isnothing(leftind) || (disambiguated_right_colnames[ind] = Symbol(string(colname)*"_1")) end - @timeit to "DataFrame construction" begin - result = DataFrame(:Index => merged_idx; makeunique = false, copycols = false) - left_coredata = left.coredata - right_coredata = right.coredata - end - @timeit to "column building" for idx in 1:length(left_colnames) + # Construct the DataFrame + result = DataFrame(:Index => merged_idx; makeunique = false, copycols = false) + left_coredata = left.coredata + right_coredata = right.coredata + + # Store the data from the left table in the result + for idx in 1:length(left_colnames) col_idx = left_colidxs[idx] - contents = @timeit to "column allocation" DataFrames.similar_missing(left.coredata[!, col_idx], merged_length) - @timeit to "column population" (@inbounds contents[merged_idx_left] = left_coredata[!, col_idx]) - @timeit to "column transfer" (result[!, left_colnames[idx]] = contents) + contents = DataFrames.similar_missing(left.coredata[!, col_idx], merged_length) + @inbounds contents[merged_idx_left] = left_coredata[!, col_idx] + result[!, left_colnames[idx]] = contents end - @timeit to "column building" for idx in 1:length(right_colnames) + # Store the data from the right table in the result + for idx in 1:length(right_colnames) col_idx = right_colidxs[idx] - contents = @timeit to "column allocation" DataFrames.similar_missing(right.coredata[!, col_idx], merged_length) - @timeit to "column population" (@inbounds contents[merged_idx_right] = right_coredata[!, col_idx]) - @timeit to "column transfer" result[!, disambiguated_right_colnames[idx]] = contents + contents = DataFrames.similar_missing(right.coredata[!, col_idx], merged_length) + @inbounds contents[merged_idx_right] = right_coredata[!, col_idx] + # note that column names have to be disambiguated + result[!, disambiguated_right_colnames[idx]] = contents end - return @timeit to "TSFrame construction" TSFrame(result, :Index; issorted = true, copycols = false) + return TSFrame(result, :Index; issorted = true, copycols = false) end