Skip to content

Commit

Permalink
Only get previous revisions during search and vector bootstrapping fo…
Browse files Browse the repository at this point in the history
…r documents in tables with indexes (#23831)

This PR makes search and vector bootstrapping more efficient by filtering out documents that are not in tables that have indexes before getting previous revisions. I refactored to pass a `BootstrappedSearchAndVectorIndexes` struct that includes the set of tables with indexes so we can apply the same logic in the committer update. Also added metrics for the committer update, since technically search and vector search are down until the committer update is complete.

GitOrigin-RevId: ca70695094ebaa9c26f86e8f61aa94cfa2c09644
  • Loading branch information
emmaling27 authored and Convex, Inc. committed Mar 22, 2024
1 parent 2ef04ac commit 52c5143
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 72 deletions.
60 changes: 34 additions & 26 deletions crates/database/src/committer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@ use common::{
RetentionValidator,
TimestampRange,
},
persistence_helpers::stream_revision_pairs,
query::Order,
runtime::{
Runtime,
RuntimeInstant,
Expand Down Expand Up @@ -80,7 +78,6 @@ use futures::{
use indexing::index_registry::IndexRegistry;
use parking_lot::Mutex;
use prometheus::VMHistogram;
use search::SearchIndexManager;
use usage_tracking::{
DocInVectorIndex,
FunctionUsageTracker,
Expand All @@ -94,16 +91,23 @@ use value::{
TableMapping,
TableName,
};
use vector::VectorIndexManager;

use crate::{
bootstrap_model::defaults::BootstrapTableIds,
database::{
ConflictingReadWithWriteSource,
ShutdownSignal,
},
metrics,
metrics::{
self,
bootstrap_update_timer,
finish_bootstrap_update,
},
reads::ReadSet,
search_and_vector_bootstrap::{
stream_revision_pairs_for_indexes,
BootstrappedSearchAndVectorIndexes,
},
snapshot_manager::SnapshotManager,
transaction::FinalTransaction,
write_log::{
Expand Down Expand Up @@ -270,12 +274,12 @@ impl<RT: Runtime> Committer<RT> {
self.bump_max_repeatable_ts(result);
},
Some(CommitterMessage::FinishSearchAndVectorBootstrap {
search_index_manager,
vector_index_manager, bootstrap_ts, result,
bootstrapped_indexes,
bootstrap_ts,
result,
}) => {
self.finish_search_and_vector_bootstrap(
search_index_manager,
vector_index_manager,
bootstrapped_indexes,
bootstrap_ts,
result
).await;
Expand All @@ -293,12 +297,16 @@ impl<RT: Runtime> Committer<RT> {
}

async fn update_indexes_since_bootstrap(
search_index_manager: &mut SearchIndexManager,
vector_index_manager: &mut VectorIndexManager,
BootstrappedSearchAndVectorIndexes {
search_index_manager,
vector_index_manager,
tables_with_indexes,
}: &mut BootstrappedSearchAndVectorIndexes,
bootstrap_ts: Timestamp,
persistence: RepeatablePersistence,
registry: &IndexRegistry,
) -> anyhow::Result<()> {
let _timer = bootstrap_update_timer();
anyhow::ensure!(
!search_index_manager.is_bootstrapping(),
"Trying to update search index while it's still bootstrapping"
Expand All @@ -307,13 +315,17 @@ impl<RT: Runtime> Committer<RT> {
!vector_index_manager.is_bootstrapping(),
"Trying to update vector index while it's still bootstrapping"
);
let range = (Bound::Excluded(bootstrap_ts), Bound::Unbounded);
let range = TimestampRange::new((Bound::Excluded(bootstrap_ts), Bound::Unbounded))?;

let document_stream = persistence.load_documents(TimestampRange::new(range)?, Order::Asc);
let revision_stream = stream_revision_pairs(document_stream, &persistence);
let revision_stream =
stream_revision_pairs_for_indexes(tables_with_indexes, &persistence, range);
futures::pin_mut!(revision_stream);

let mut num_revisions = 0;
let mut total_size = 0;
while let Some(revision_pair) = revision_stream.try_next().await? {
num_revisions += 1;
total_size += revision_pair.document().map(|d| d.size()).unwrap_or(0);
search_index_manager.update(
registry,
revision_pair.prev_document(),
Expand All @@ -327,13 +339,13 @@ impl<RT: Runtime> Committer<RT> {
WriteTimestamp::Committed(revision_pair.ts()),
)?;
}
finish_bootstrap_update(num_revisions, total_size);
Ok(())
}

async fn finish_search_and_vector_bootstrap(
&mut self,
mut search_index_manager: SearchIndexManager,
mut vector_index_manager: VectorIndexManager,
mut bootstrapped_indexes: BootstrappedSearchAndVectorIndexes,
bootstrap_ts: RepeatableTimestamp,
result: oneshot::Sender<anyhow::Result<()>>,
) {
Expand All @@ -352,8 +364,7 @@ impl<RT: Runtime> Committer<RT> {
);

let res = Self::update_indexes_since_bootstrap(
&mut search_index_manager,
&mut vector_index_manager,
&mut bootstrapped_indexes,
*bootstrap_ts,
repeatable_persistence,
&last_snapshot.index_registry,
Expand All @@ -371,8 +382,8 @@ impl<RT: Runtime> Committer<RT> {
panic!("Snapshots were changed concurrently during commit?");
}
snapshot_manager.overwrite_last_snapshot_search_and_vector_indexes(
search_index_manager,
vector_index_manager,
bootstrapped_indexes.search_index_manager,
bootstrapped_indexes.vector_index_manager,
);
tracing::info!("Committed backfilled vector indexes");
let _ = result.send(Ok(()));
Expand Down Expand Up @@ -814,14 +825,12 @@ impl<RT: Runtime> Clone for CommitterClient<RT> {
impl<RT: Runtime> CommitterClient<RT> {
pub async fn finish_search_and_vector_bootstrap(
&self,
search_index_manager: SearchIndexManager,
vector_index_manager: VectorIndexManager,
bootstrapped_indexes: BootstrappedSearchAndVectorIndexes,
bootstrap_ts: RepeatableTimestamp,
) -> anyhow::Result<()> {
let (tx, rx) = oneshot::channel();
let message = CommitterMessage::FinishSearchAndVectorBootstrap {
search_index_manager,
vector_index_manager,
bootstrapped_indexes,
bootstrap_ts,
result: tx,
};
Expand Down Expand Up @@ -977,8 +986,7 @@ enum CommitterMessage {
result: oneshot::Sender<anyhow::Result<()>>,
},
FinishSearchAndVectorBootstrap {
search_index_manager: SearchIndexManager,
vector_index_manager: VectorIndexManager,
bootstrapped_indexes: BootstrappedSearchAndVectorIndexes,
bootstrap_ts: RepeatableTimestamp,
result: oneshot::Sender<anyhow::Result<()>>,
},
Expand Down
38 changes: 31 additions & 7 deletions crates/database/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -586,13 +586,40 @@ pub fn bootstrap_timer() -> StatusTimer {
StatusTimer::new(&SEARCH_AND_VECTOR_BOOTSTRAP_SECONDS)
}

register_convex_histogram!(
SEARCH_AND_VECTOR_BOOTSTRAP_COMMITTER_UPDATE_SECONDS,
"Time to update search and vector index bootstrap in the committer"
);
pub fn bootstrap_update_timer() -> Timer<VMHistogram> {
Timer::new(&SEARCH_AND_VECTOR_BOOTSTRAP_COMMITTER_UPDATE_SECONDS)
}
register_convex_counter!(
SEARCH_AND_VECTOR_BOOTSTRAP_COMMITTER_UPDATE_REVISIONS_TOTAL,
"Number of revisions loaded during search and vector bootstrap updates in the committer"
);
register_convex_counter!(
SEARCH_AND_VECTOR_BOOTSTRAP_COMMITTER_UPDATE_REVISIONS_BYTES,
"Total size of revisions loaded during search and vector bootstrap updates in the committer"
);

pub fn finish_bootstrap_update(num_revisions: usize, bytes: usize) {
log_counter(
&SEARCH_AND_VECTOR_BOOTSTRAP_COMMITTER_UPDATE_REVISIONS_TOTAL,
num_revisions as u64,
);
log_counter(
&SEARCH_AND_VECTOR_BOOTSTRAP_COMMITTER_UPDATE_REVISIONS_BYTES,
bytes as u64,
);
}

register_convex_counter!(
SEARCH_AND_VECTOR_BOOTSTRAP_REVISIONS_TOTAL,
"Number of revisions loaded during vector bootstrap"
"Number of revisions loaded during search and vector bootstrap"
);
register_convex_counter!(
SEARCH_AND_VECTOR_BOOTSTRAP_REVISIONS_BYTES,
"Total size of revisions loaded during vector bootstrap"
"Total size of revisions loaded during search and vector bootstrap"
);
pub fn finish_bootstrap(num_revisions: usize, bytes: usize, timer: StatusTimer) {
log_counter(
Expand Down Expand Up @@ -675,14 +702,11 @@ pub fn search_and_vector_bootstrap_timer() -> StatusTimer {
}

register_convex_counter!(
DATABASE_SEARCH_AND_VECTOR_BOOTSTRAP_DOCUMENTS_SKIPPED_TOTAL,
SEARCH_AND_VECTOR_BOOTSTRAP_DOCUMENTS_SKIPPED_TOTAL,
"Number of documents skipped during vector and search index bootstrap",
);
pub fn log_document_skipped() {
log_counter(
&DATABASE_SEARCH_AND_VECTOR_BOOTSTRAP_DOCUMENTS_SKIPPED_TOTAL,
1,
);
log_counter(&SEARCH_AND_VECTOR_BOOTSTRAP_DOCUMENTS_SKIPPED_TOTAL, 1);
}

pub mod vector {
Expand Down
Loading

0 comments on commit 52c5143

Please sign in to comment.