Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix row conversion test #1577

Merged
merged 3 commits into from
Nov 21, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions src/main/cpp/src/row_conversion.cu
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ constexpr auto NUM_WARPS_IN_BLOCK = 32;
#pragma nv_diag_suppress static_var_with_dynamic_init

using namespace cudf;
using detail::make_device_uvector_sync;
using detail::make_device_uvector_async;
using rmm::device_uvector;

Expand Down Expand Up @@ -231,7 +232,7 @@ build_string_row_offsets(table_view const& tbl,
offsets_iter + tbl.num_columns(),
std::back_inserter(offsets_iterators),
[](auto const& offset_ptr) { return offset_ptr != nullptr; });
return make_device_uvector_async(
return make_device_uvector_sync(
offsets_iterators, stream, rmm::mr::get_current_device_resource());
}();

Expand Down Expand Up @@ -1556,8 +1557,13 @@ batch_data build_batches(size_type num_rows,
batch_row_boundaries.push_back(0);
size_type last_row_end = 0;
device_uvector<uint64_t> cumulative_row_sizes(num_rows, stream);
thrust::inclusive_scan(
rmm::exec_policy(stream), row_sizes, row_sizes + num_rows, cumulative_row_sizes.begin());

// Evaluate the row size values before calling `inclusive_scan` to workaround
// memory issue in https://github.com/NVIDIA/spark-rapids-jni/issues/1567.
thrust::copy(rmm::exec_policy(stream), row_sizes, row_sizes + num_rows,
cumulative_row_sizes.begin());
thrust::inclusive_scan(rmm::exec_policy(stream), cumulative_row_sizes.begin(),
cumulative_row_sizes.end(), cumulative_row_sizes.begin());

// This needs to be split this into 2 gig batches. Care must be taken to avoid a batch larger than
// 2 gigs. Imagine a table with 900 meg rows. The batches should occur every 2 rows, but if a
Expand Down