Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/acvictor/velox into acvicto…
Browse files Browse the repository at this point in the history
…r/removeNoRetrun
  • Loading branch information
acvictor committed Apr 9, 2024
2 parents 88a613f + b719046 commit 972f365
Show file tree
Hide file tree
Showing 154 changed files with 4,018 additions and 1,636 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ jobs:
- name: "Install dependencies"
if: ${{ github.event_name == 'pull_request' }}
run: source velox/scripts/setup-ubuntu.sh
run: source velox/scripts/setup-ubuntu.sh && install_apt_deps

- name: Build Baseline Benchmarks
if: ${{ github.event_name == 'pull_request' }}
Expand All @@ -115,7 +115,7 @@ jobs:
submodules: 'recursive'

- name: "Install dependencies"
run: source velox/scripts/setup-ubuntu.sh
run: source velox/scripts/setup-ubuntu.sh && install_apt_deps

- name: Build Contender Benchmarks
working-directory: velox
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/experimental.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ jobs:
ref: "${{ inputs.ref || 'main' }}"

- name: "Install dependencies"
run: cd velox && source ./scripts/setup-ubuntu.sh
run: cd velox && source ./scripts/setup-ubuntu.sh && install_apt_deps

- name: "Build"
run: |
Expand Down Expand Up @@ -181,7 +181,7 @@ jobs:
ref: "${{ inputs.ref || 'main' }}"

- name: "Install dependencies"
run: source ./scripts/setup-ubuntu.sh
run: source ./scripts/setup-ubuntu.sh && install_apt_deps

- name: Download spark aggregation fuzzer
uses: actions/download-artifact@v3
Expand Down Expand Up @@ -222,7 +222,7 @@ jobs:
ref: "${{ inputs.ref || 'main' }}"

- name: "Install dependencies"
run: source ./scripts/setup-ubuntu.sh
run: source ./scripts/setup-ubuntu.sh && install_apt_deps

- name: Download join fuzzer
uses: actions/download-artifact@v3
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/linux-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ jobs:

- name: Install Dependencies
run: |
source scripts/setup-ubuntu.sh
source scripts/setup-ubuntu.sh && install_apt_deps
- name: Clear CCache Statistics
run: |
Expand Down
17 changes: 16 additions & 1 deletion .github/workflows/scheduled.yml
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,19 @@ jobs:
with:
name: aggregation

- name: "Checkout Repo"
uses: actions/checkout@v4
with:
path: velox
submodules: 'recursive'
ref: "${{ inputs.ref }}"

- name: Fix git permissions
# Usually actions/checkout does this but as we run in a container
# it doesn't work
run: git config --global --add safe.directory /__w/velox/velox/velox


- name: "Run Aggregate Fuzzer"
run: |
cd velox
Expand All @@ -614,10 +627,12 @@ jobs:
# Sleep for 60 seconds to allow Presto server to start.
sleep 60
/opt/presto-cli --server 127.0.0.1:8080 --execute 'CREATE SCHEMA hive.tpch;'
cd -
mkdir -p /tmp/aggregate_fuzzer_repro/
rm -rfv /tmp/aggregate_fuzzer_repro/*
chmod -R 777 /tmp/aggregate_fuzzer_repro
velox_aggregation_fuzzer_test \
chmod +x velox_aggregation_fuzzer_test
./velox_aggregation_fuzzer_test \
--seed ${RANDOM} \
--duration_sec $DURATION \
--logtostderr=1 \
Expand Down
16 changes: 8 additions & 8 deletions CMake/resolve_dependency_modules/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
Following is the list of libraries and their minimum version
that Velox requires. Some of these libraries can be installed
via a platform's package manager (eg. `brew` on MacOS).
The versions of certain libraries is the default provided by
The versions of certain libraries is the default provided by
the platform's package manager. Some libraries can be bundled
by Velox. See details on bundling below.

| Library Name | Minimum Version | Bundled? |
| Library Name | Minimum Version | Bundled? |
|-------------------|-----------------|----------|
| ninja | default | No |
| ccache | default | No |
Expand All @@ -16,7 +16,7 @@ by Velox. See details on bundling below.
| gtest (testing) | default | Yes |
| libevent | default | No |
| libsodium | default | No |
| lz4 | default | No |
| lz4 | default | No |
| snappy | default | No |
| lzo | default | No |
| xz | default | No |
Expand All @@ -32,11 +32,11 @@ by Velox. See details on bundling below.
| re2 | 2021-04-01 | Yes |
| fmt | 10.1.1 | Yes |
| simdjson | 3.2.0 | Yes |
| folly | v2024.02.26.00 | Yes |
| fizz | v2024.02.26.00 | No |
| wangle | v2024.02.26.00 | No |
| mvfst | v2024.02.26.00 | No |
| fbthrift | v2024.02.26.00 | No |
| folly | v2024.04.01.00 | Yes |
| fizz | v2024.04.01.00 | No |
| wangle | v2024.04.01.00 | No |
| mvfst | v2024.04.01.00 | No |
| fbthrift | v2024.04.01.00 | No |
| DuckDB (testing) | 0.8.1 | Yes |
| cpr (testing) | 1.10.15 | Yes |

Expand Down
4 changes: 2 additions & 2 deletions CMake/resolve_dependency_modules/folly/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
project(Folly)
cmake_minimum_required(VERSION 3.14)

set(VELOX_FOLLY_BUILD_VERSION v2024.02.26.00)
set(VELOX_FOLLY_BUILD_VERSION v2024.04.01.00)
set(VELOX_FOLLY_BUILD_SHA256_CHECKSUM
0a5570be503e377f2a9c178b926cd350ae72bddaeb0c7164a1fac7118c00e866)
e0c8386eea1a317ff17f1edc36c29362db94aeca8689659f1c530ea777779409)
set(VELOX_FOLLY_SOURCE_URL
"https://github.com/facebook/folly/releases/download/${VELOX_FOLLY_BUILD_VERSION}/folly-${VELOX_FOLLY_BUILD_VERSION}.tar.gz"
)
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ if(${VELOX_BUILD_PYTHON_PACKAGE})
set(VELOX_ENABLE_EXPRESSION ON)
set(VELOX_ENABLE_PARSE ON)
set(VELOX_ENABLE_EXEC ON)
set(VELOX_ENABLE_AGGREGATES ON)
set(VELOX_ENABLE_SPARK_FUNCTIONS ON)
endif()

Expand Down
4 changes: 3 additions & 1 deletion pyvelox/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ if(VELOX_BUILD_PYTHON_PACKAGE)
velox_functions_prestosql
velox_parse_parser
velox_functions_prestosql
velox_functions_spark)
velox_functions_spark
velox_aggregates
velox_functions_spark_aggregates)

target_include_directories(pyvelox SYSTEM
PRIVATE ${CMAKE_CURRENT_LIST_DIR}/..)
Expand Down
57 changes: 57 additions & 0 deletions pyvelox/signatures.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,12 @@
*/

#include "signatures.h" // @manual
#include "velox/exec/Aggregate.h"
#include "velox/functions/FunctionRegistry.h"
#include "velox/functions/prestosql/aggregates/RegisterAggregateFunctions.h"
#include "velox/functions/prestosql/registration/RegistrationFunctions.h"
#include "velox/functions/sparksql/Register.h"
#include "velox/functions/sparksql/aggregates/Register.h"

namespace facebook::velox::py {

Expand All @@ -31,6 +34,24 @@ void registerSparkFunctions(const std::string& prefix) {
facebook::velox::functions::sparksql::registerFunctions(prefix);
}

void registerPrestoAggregateFunctions(const std::string& prefix) {
facebook::velox::aggregate::prestosql::registerAllAggregateFunctions(prefix);
}

void registerSparkAggregateFunctions(const std::string& prefix) {
facebook::velox::functions::aggregate::sparksql::registerAggregateFunctions(
prefix);
}

exec::AggregateFunctionSignatureMap getAggregateSignatures() {
return exec::getAggregateFunctionSignatures();
}

void clearAggregateSignatures() {
exec::aggregateFunctions().withWLock(
[&](auto& aggregateFunctions) { aggregateFunctions.clear(); });
}

void addSignatureBindings(py::module& m, bool asModuleLocalDefinitions) {
// TypeSignature
py::class_<exec::TypeSignature> typeSignature(
Expand All @@ -53,6 +74,19 @@ void addSignatureBindings(py::module& m, bool asModuleLocalDefinitions) {
functionSignature.def(
"constant_arguments", &exec::FunctionSignature::constantArguments);

// AggregateFunctionSignature
py::class_<
exec::AggregateFunctionSignature,
std::unique_ptr<exec::AggregateFunctionSignature, py::nodelete>>
aggregateFunctionSignature(
m,
"AggregateFunctionSignature",
py::module_local(asModuleLocalDefinitions));
aggregateFunctionSignature.def(
"__str__", &exec::AggregateFunctionSignature::toString);
aggregateFunctionSignature.def(
"intermediate_type", &exec::AggregateFunctionSignature::intermediateType);

m.def(
"clear_signatures",
&clearFunctionRegistry,
Expand All @@ -75,5 +109,28 @@ void addSignatureBindings(py::module& m, bool asModuleLocalDefinitions) {
&getFunctionSignatures,
py::return_value_policy::reference,
"Returns a dictionary of the current signatures.");

m.def(
"register_presto_aggregate_signatures",
&registerPrestoAggregateFunctions,
"Adds Presto Aggregate signatures to the function registry.",
py::arg("prefix") = "");

m.def(
"register_spark_aggregate_signatures",
&registerSparkAggregateFunctions,
"Adds Spark Aggregate signatures to the function registry.",
py::arg("prefix") = "");

m.def(
"get_aggregate_function_signatures",
&getAggregateSignatures,
py::return_value_policy::reference,
"Returns a dictionary of the current aggregate signatures.");

m.def(
"clear_aggregate_signatures",
&clearAggregateSignatures,
"Clears the Aggregate function registry.");
}
} // namespace facebook::velox::py
18 changes: 18 additions & 0 deletions pyvelox/test/test_signatures.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,21 @@ def test_function_prefix(self):

concat_signatures = spark_signatures["barconcat"]
self.assertTrue(len(concat_signatures) > 0)

def test_aggregate_signatures(self):
pv.clear_aggregate_signatures()

pv.register_presto_aggregate_signatures()
presto_agg_signatures = pv.get_aggregate_function_signatures()

min_signatures = presto_agg_signatures["min"]
self.assertTrue(len(min_signatures) > 0)

max_signatures = presto_agg_signatures["max"]
self.assertTrue(len(max_signatures) > 0)

pv.clear_aggregate_signatures()

pv.register_spark_aggregate_signatures()
spark_agg_signatures = pv.get_aggregate_function_signatures()
self.assertTrue(len(spark_agg_signatures) > 0)
2 changes: 1 addition & 1 deletion scripts/setup-centos8.sh
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ function install_protobuf {
)
}

FB_OS_VERSION="v2024.02.26.00"
FB_OS_VERSION="v2024.04.01.00"

function install_fizz {
wget_and_untar https://github.com/facebookincubator/fizz/archive/refs/tags/${FB_OS_VERSION}.tar.gz fizz
Expand Down
2 changes: 1 addition & 1 deletion scripts/setup-macos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ NPROC=$(getconf _NPROCESSORS_ONLN)
DEPENDENCY_DIR=${DEPENDENCY_DIR:-$(pwd)}
MACOS_VELOX_DEPS="flex bison protobuf@21 icu4c boost gflags glog libevent lz4 lzo snappy xz zstd openssl libsodium"
MACOS_BUILD_DEPS="ninja cmake ccache"
FB_OS_VERSION="v2024.02.26.00"
FB_OS_VERSION="v2024.04.01.00"

function update_brew {
DEFAULT_BREW_PATH=/usr/local/bin/brew
Expand Down
7 changes: 3 additions & 4 deletions scripts/setup-ubuntu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ source $SCRIPTDIR/setup-helper-functions.sh
CPU_TARGET="${CPU_TARGET:-avx}"
COMPILER_FLAGS=$(get_cxx_flags "$CPU_TARGET")
export COMPILER_FLAGS
FB_OS_VERSION=v2024.02.26.00
FB_OS_VERSION=v2024.04.01.00
FMT_VERSION=10.1.1
BOOST_VERSION=boost-1.84.0
NPROC=$(getconf _NPROCESSORS_ONLN)
Expand Down Expand Up @@ -135,7 +135,7 @@ function install_conda {
echo "Unsupported architecture: $ARCH"
exit 1
fi

mkdir -p conda && cd conda
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-$ARCH.sh
bash Miniconda3-latest-Linux-$ARCH.sh -b -p $MINICONDA_PATH
Expand All @@ -159,8 +159,7 @@ function install_apt_deps {
install_velox_deps_from_apt
}

# For backward compatibility, invoke install_apt_deps
(return 2> /dev/null) && install_apt_deps && return # If script was sourced, don't run commands.
(return 2> /dev/null) && return # If script was sourced, don't run commands.

(
if [[ $# -ne 0 ]]; then
Expand Down
29 changes: 29 additions & 0 deletions scripts/signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,28 @@ def export(args):
return 0


def export_aggregates(args):
"""Exports Velox Aggregate function signatures."""
pv.clear_aggregate_signatures()

if args.spark:
pv.register_spark_aggregate_signatures()

if args.presto:
pv.register_presto_aggregate_signatures()

signatures = pv.get_aggregate_function_signatures()

# Convert signatures to json
jsoned_signatures = {}
for key in signatures.keys():
jsoned_signatures[key] = [str(value) for value in signatures[key]]

# Persist to file
json.dump(jsoned_signatures, args.output_file)
return 0


def diff_signatures(base_signatures, contender_signatures, error_path=""):
"""Diffs Velox function signatures. Returns a tuple of the delta diff and exit status"""

Expand Down Expand Up @@ -253,6 +275,13 @@ def parse_args(args):
export_command_parser.add_argument("--presto", action="store_true")
export_command_parser.add_argument("output_file", type=str)

export_aggregates_command_parser = command.add_parser("export_aggregates")
export_aggregates_command_parser.add_argument("--spark", action="store_true")
export_aggregates_command_parser.add_argument("--presto", action="store_true")
export_aggregates_command_parser.add_argument(
"output_file", type=argparse.FileType("w")
)

diff_command_parser = command.add_parser("diff")
diff_command_parser.add_argument("base", type=str)
diff_command_parser.add_argument("contender", type=str)
Expand Down
5 changes: 5 additions & 0 deletions velox/common/base/Counters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,5 +196,10 @@ void registerVeloxMetrics() {
// Tracks the number of times that we hit the max spill level limit.
DEFINE_METRIC(
kMetricMaxSpillLevelExceededCount, facebook::velox::StatType::COUNT);

// Tracks the total number of bytes in file writers that's pre-maturely
// flushed due to memory reclaiming.
DEFINE_METRIC(
kMetricFileWriterEarlyFlushedRawBytes, facebook::velox::StatType::SUM);
}
} // namespace facebook::velox
3 changes: 3 additions & 0 deletions velox/common/base/Counters.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,4 +113,7 @@ constexpr folly::StringPiece kMetricSpillFlushTimeMs{

constexpr folly::StringPiece kMetricSpillWriteTimeMs{
"velox.spill_write_time_ms"};

constexpr folly::StringPiece kMetricFileWriterEarlyFlushedRawBytes{
"velox.file_writer_early_flushed_raw_bytes"};
} // namespace facebook::velox
5 changes: 2 additions & 3 deletions velox/common/file/File.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ void LocalWriteFile::append(std::string_view data) {
bytesWritten,
data.size(),
folly::errnoStr(errno));
size_ += bytesWritten;
}

void LocalWriteFile::append(std::unique_ptr<folly::IOBuf> data) {
Expand All @@ -298,6 +299,7 @@ void LocalWriteFile::append(std::unique_ptr<folly::IOBuf> data) {
"Failure in LocalWriteFile::append, {} vs {}",
totalBytesWritten,
totalBytesToWrite);
size_ += totalBytesWritten;
}

void LocalWriteFile::flush() {
Expand All @@ -322,7 +324,4 @@ void LocalWriteFile::close() {
}
}

uint64_t LocalWriteFile::size() const {
return ftell(file_);
}
} // namespace facebook::velox
Loading

0 comments on commit 972f365

Please sign in to comment.