From 3efcf71cf97fbba4ce58def064c1aad6fb5f8300 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 15 Mar 2023 17:05:11 +0530 Subject: [PATCH] Incorporate fast join into Base.join --- src/join.jl | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/src/join.jl b/src/join.jl index 431cfe5..44b2439 100644 --- a/src/join.jl +++ b/src/join.jl @@ -1,10 +1,14 @@ joinmap = Dict( - :JoinInner=>DataFrames.innerjoin, - :JoinBoth=>DataFrames.innerjoin, - :JoinOuter=>DataFrames.outerjoin, - :JoinAll=>DataFrames.outerjoin, - :JoinLeft=>DataFrames.leftjoin, - :JoinRight=>DataFrames.rightjoin + :JoinInner => :inner, + :JoinBoth => :inner, + :JoinOuter => :outer, + :JoinAll => :outer, + :JoinLeft => :left, + :JoinRight => :right, + :outerjoin => :outer, + :innerjoin => :inner, + :leftjoin => :left, + :rightjoin => :right, ) """ @@ -71,6 +75,16 @@ where `jointype` must be one of `:JoinInner`, `:JoinBoth`, `:JoinOuter`, `:JoinA `cbind` is an alias for `join` method. +## Using the `DataFrames` join methods + +DataFrames.jl's join methods are battle-tested, and handle quite a few error cases which `TSFrames.join` may not. +In order to use DataFrames' join methods, which are somewhat slower than `TSFrames.join`, you would have to +join the TSFrames' internal DataFrames, then construct a new TSFrame. For `ts1::TSFrame`, `ts2::TSFrame`, +this is how you would construct an outer join: +```julia +TSFrame(DataFrames.outerjoin(ts1.coredata, ts2.coredata; makeunique = true)) +``` + # Examples ```jldoctest; setup = :(using TSFrames, DataFrames, Dates, Random, Statistics) julia> using Random; @@ -294,9 +308,9 @@ function Base.join( ts...; jointype::Symbol=:JoinAll ) - result = joinmap[jointype](ts1.coredata, ts2.coredata, on=:Index, makeunique=true) + result = TSFrames.fast_join(ts1.coredata, ts2.coredata; method = joinmap[method]) for tsf in ts - result = joinmap[jointype](result, tsf.coredata, on=:Index, makeunique=true) + result = TSFrames.fast_join(result, tsf.coredata; method = joinmap[method]) end return TSFrame(result) end @@ -430,18 +444,6 @@ function fast_join(left::TSFrame, right::TSFrame; method = :outer) end -function fast_outerjoin(ts1::TSFrame, ts2::TSFrame, others:::TSFrame...) - - result = fast_outerjoin(ts1, ts2) - - for other in others - result = fast_outerjoin(ts1, ts2) - end - - return result - -end - # # as of 22-Jan-22, the timer outputs are as follows: # BenchmarkTools.Trial: 100 samples with 1 evaluation. # Range (min … max): 61.627 ms … 287.822 ms ┊ GC (min … max): 0.00% … 71.47%