diff --git a/.github/workflows/build_local_backend.yml b/.github/workflows/build_local_backend.yml index edfa319e..be799035 100644 --- a/.github/workflows/build_local_backend.yml +++ b/.github/workflows/build_local_backend.yml @@ -64,4 +64,4 @@ jobs: run: cargo nextest run --no-run --profile ci - name: Run Rust tests - run: cargo nextest run --profile ci + run: DISABLE_FUZZY_TEXT_SEARCH=true cargo nextest run --profile ci diff --git a/crates/database/src/reads.rs b/crates/database/src/reads.rs index ef295d2b..868db18c 100644 --- a/crates/database/src/reads.rs +++ b/crates/database/src/reads.rs @@ -506,6 +506,7 @@ mod tests { PackedDocument, ResolvedDocument, }, + knobs::DISABLE_FUZZY_TEXT_SEARCH, query::search_value_to_bytes, testing::TestIdGenerator, types::{ @@ -637,13 +638,15 @@ mod tests { let read_set = reads.into_read_set(); let id = id_generator.user_generate(&table_name); - assert!(read_set_overlaps( - id, - &read_set, - field_path, - // If "word" is a token, it overlaps. - "Text containing word and other stuff." - )?); + if !*DISABLE_FUZZY_TEXT_SEARCH { + assert!(read_set_overlaps( + id, + &read_set, + field_path, + // If "word" is a token, it overlaps. + "Text containing word and other stuff." + )?); + } assert!(!read_set_overlaps( id, @@ -690,26 +693,28 @@ mod tests { field_path, "Text containing word and other stuff." )?); - assert!(read_set_overlaps( - id, - &read_set, - field_path, - "Text containing shword and other stuff." - )?); - - // This would match if prefix is true. - assert!(!read_set_overlaps( - id, - &read_set, - field_path, - "Text containing wordddd and other stuff." - )?); assert!(!read_set_overlaps( id, &read_set, field_path, "This text doesn't have the keyword." )?); + if !*DISABLE_FUZZY_TEXT_SEARCH { + assert!(read_set_overlaps( + id, + &read_set, + field_path, + "Text containing shword and other stuff." + )?); + + // This would match if prefix is true. + assert!(!read_set_overlaps( + id, + &read_set, + field_path, + "Text containing wordddd and other stuff." + )?); + } Ok(()) } @@ -789,13 +794,24 @@ mod tests { let read_set = reads.into_read_set(); let id = id_generator.user_generate(&table_name); - assert!(read_set_overlaps( - id, - &read_set, - field_path, - // If "word.*" is a token, it overlaps. - "Text containing wordsythings and other stuff." - )?); + if *DISABLE_FUZZY_TEXT_SEARCH { + // Add a test case here + assert!(read_set_overlaps( + id, + &read_set, + field_path, + // If "wrd.*" is a token, it overlaps. + "Text containing wrdsythings and other stuff." + )?); + } else { + assert!(read_set_overlaps( + id, + &read_set, + field_path, + // If "word.*" is a token, it overlaps. + "Text containing wordsythings and other stuff." + )?); + } assert!(!read_set_overlaps( id, diff --git a/crates/database/src/tests/randomized_search_tests.rs b/crates/database/src/tests/randomized_search_tests.rs index 4ef7cb5f..c0e999ee 100644 --- a/crates/database/src/tests/randomized_search_tests.rs +++ b/crates/database/src/tests/randomized_search_tests.rs @@ -21,6 +21,7 @@ use common::{ IndexMetadata, }, floating_point::assert_approx_equal, + knobs::DISABLE_FUZZY_TEXT_SEARCH, pause::PauseController, persistence::Persistence, query::{ @@ -861,10 +862,12 @@ async fn test_fuzzy_mem(rt: TestRuntime) -> anyhow::Result<()> { let mut scenario = Scenario::new(rt).await?; scenario._patch("a", "the quick brow fox", "test").await?; - let results = scenario - ._query_with_scores("brown", None, None, SearchVersion::V2) - .await?; - assert_eq!(results.len(), 1); + if !*DISABLE_FUZZY_TEXT_SEARCH { + let results = scenario + ._query_with_scores("brown", None, None, SearchVersion::V2) + .await?; + assert_eq!(results.len(), 1); + } // Exact match w/o prefix will fail let results = scenario @@ -888,26 +891,28 @@ async fn test_fuzzy_mem(rt: TestRuntime) -> anyhow::Result<()> { assert_eq!(results.len(), 1); // Prefix + fuzzy - let results = scenario - ._query_with_scores("ahhhhh", None, None, SearchVersion::V2) - .await?; - assert_eq!(results.len(), 1); - - // Edit distance 2 - scenario - ._patch("c", "my name is bartholomew", "test") - .await?; - let results = scenario - ._query_with_scores("batholmew runs fast", None, None, SearchVersion::V2) - .await?; - assert_eq!(results.len(), 1); + if !*DISABLE_FUZZY_TEXT_SEARCH { + let results = scenario + ._query_with_scores("ahhhhh", None, None, SearchVersion::V2) + .await?; + assert_eq!(results.len(), 1); + // + // Edit distance 2 + scenario + ._patch("c", "my name is bartholomew", "test") + .await?; + let results = scenario + ._query_with_scores("batholmew runs fast", None, None, SearchVersion::V2) + .await?; + assert_eq!(results.len(), 1); + } Ok(()) } #[convex_macro::test_runtime] async fn test_fuzzy_disk(rt: TestRuntime) -> anyhow::Result<()> { - { + if !*DISABLE_FUZZY_TEXT_SEARCH { let mut scenario = Scenario::new(rt).await?; scenario._patch("key1", "rakeeb wuz here", "test").await?; scenario.backfill().await?; @@ -947,22 +952,24 @@ async fn test_fuzzy_disk_snapshot_shortlist_ids_valid_with_empty_memory_index( // See https://github.com/get-convex/convex/pull/20649 #[convex_macro::test_runtime] async fn unrelated_sentences_are_queryable_after_flush(rt: TestRuntime) -> anyhow::Result<()> { - let mut scenario = Scenario::new(rt).await?; - scenario._patch("key1", "rakeeb wuz here", "test").await?; - scenario - ._patch("key2", "some other sentence", "test") - .await?; - scenario.backfill().await?; + if !*DISABLE_FUZZY_TEXT_SEARCH { + let mut scenario = Scenario::new(rt).await?; + scenario._patch("key1", "rakeeb wuz here", "test").await?; + scenario + ._patch("key2", "some other sentence", "test") + .await?; + scenario.backfill().await?; - let results = scenario - ._query_with_scores("rakeem", None, None, SearchVersion::V2) - .await?; - assert_eq!(results.len(), 1); + let results = scenario + ._query_with_scores("rakeem", None, None, SearchVersion::V2) + .await?; + assert_eq!(results.len(), 1); - let results = scenario - ._query_with_scores("senence", None, None, SearchVersion::V2) - .await?; - assert_eq!(results.len(), 1); + let results = scenario + ._query_with_scores("senence", None, None, SearchVersion::V2) + .await?; + assert_eq!(results.len(), 1); + } Ok(()) } diff --git a/crates/search/src/query.rs b/crates/search/src/query.rs index 538be94c..e2a66259 100644 --- a/crates/search/src/query.rs +++ b/crates/search/src/query.rs @@ -15,6 +15,7 @@ use common::{ PackedDocument, }, index::IndexKeyBytes, + knobs::DISABLE_FUZZY_TEXT_SEARCH, query::search_value_to_bytes, types::{ SubscriberId, @@ -825,9 +826,15 @@ impl SearchTermTries { // notes there), so we can get away with a symmetric search where the dfa's // prefix is always set to false. tokens.for_each_token(path, *prefix, |token| { - let dfa = build_fuzzy_dfa(token, *max_distance, false); - for (values, ..) in trie.intersect(dfa, None) { - result.extend(values.keys().cloned()); + if *DISABLE_FUZZY_TEXT_SEARCH { + if let Some(value) = trie.get(token) { + result.extend(value.keys().cloned()); + } + } else { + let dfa = build_fuzzy_dfa(token, *max_distance, false); + for (values, ..) in trie.intersect(dfa, None) { + result.extend(values.keys().cloned()); + } } }); }