From 23a8d403b94a3a5dbd676bc1fed02dc91e749ff4 Mon Sep 17 00:00:00 2001 From: Sujay Jayakar Date: Sun, 17 Mar 2024 15:45:43 -0400 Subject: [PATCH] Add `serde` support for `ConvexValue` to help with serializing system metadata (#22841) It's very tedious to manually write the conversions between our in-memory structs (like `TableMetadata`) and `ConvexValue`. This change uses Serde to automate some parts of this conversion. Let's use `TableMetadata` as our running example: ```rust pub struct TableMetadata { pub name: TableName, pub number: TableNumber, pub state: TableState, } pub enum TableState { Active, Hidden, Deleting, } ``` To start, the developer defines two `Serialized*` types in Rust that define the serialization format they'd like for this in-memory structure. Think of these definitions in Rust as similar to a Protobuf definition. ```rust #[derive(Serialize, Deserialize)] pub struct SerializedTableMetadata { pub name: String, pub number: i64, pub state: SerializedTableState, } #[derive(Serialize, Deserialize)] #[serde(tag = "type")] pub enum SerializedTableState { Active {}, Hidden {}, Deleting {}, } ``` The developer may have the `Serialized*` structs diverge from the in-memory one to, say, implement backwards compatibility. Note that we're using an empty struct variant for the enum to tell serde to serialize the enum as an object with its variant tagged in the `type` field. Then, implement `TryFrom` conversions both ways. Since this is going between two Rust data types, this should be easier than implementing conversions to and from `ConvexObject`: ```rust impl TryFrom for SerializedTableMetadata { type Error = anyhow::Error; ... } impl TryFrom for TableMetadata { type Error = anyhow::Error; ... } impl TryFrom for SerializedTableState { type Error = anyhow::Error; ... } impl TryFrom for TableState { type Error = anyhow::Error; ... } ``` Finally, connect the two types together with the `codegen_convex_serialization!` macro. This macro generates conversions to and from `ConvexValue` and `ConvexObject`, and it also defines a roundtrips proptest. ```rust codegen_convex_serialization!(TableMetadata, SerializedTableMetadata); ``` I've ported over all of `bootstrap_model` to this new approach, and I think it cleans stuff up a lot! GitOrigin-RevId: ede5e8a87d608bb72a7007948c91b28472ca4aca --- Cargo.lock | 1 + .../bootstrap_model/index/database_index.rs | 284 ------ .../index/database_index/backfill_state.rs | 32 + .../index/database_index/index_config.rs | 51 + .../index/database_index/index_state.rs | 77 ++ .../index/database_index/indexed_fields.rs | 156 +++ .../index/database_index/mod.rs | 49 + .../index/developer_index_config.rs | 109 ++ .../src/bootstrap_model/index/index_config.rs | 313 ++++++ .../bootstrap_model/index/index_metadata.rs | 225 +++++ .../index/index_validation_error.rs | 199 ++++ .../common/src/bootstrap_model/index/mod.rs | 951 +----------------- .../src/bootstrap_model/index/search_index.rs | 232 ----- .../index/search_index/index_config.rs | 93 ++ .../index/search_index/index_snapshot.rs | 91 ++ .../index/search_index/index_state.rs | 72 ++ .../bootstrap_model/index/search_index/mod.rs | 39 + .../src/bootstrap_model/index/vector_index.rs | 668 ------------ .../index/vector_index/backfill_state.rs | 83 ++ .../index/vector_index/dimensions.rs | 54 + .../index/vector_index/index_config.rs | 103 ++ .../index/vector_index/index_snapshot.rs | 153 +++ .../index/vector_index/index_state.rs | 101 ++ .../bootstrap_model/index/vector_index/mod.rs | 124 +++ .../index/vector_index/segment.rs | 185 ++++ crates/common/src/bootstrap_model/mod.rs | 2 + crates/common/src/bootstrap_model/schema.rs | 172 +--- .../src/bootstrap_model/schema_metadata.rs | 50 + .../src/bootstrap_model/schema_state.rs | 112 +++ crates/common/src/bootstrap_model/tables.rs | 122 +-- crates/common/src/types/actions.rs | 88 +- crates/convex_macro/Cargo.toml | 3 + crates/value/src/lib.rs | 1 + crates/value/src/serde/de.rs | 777 ++++++++++++++ crates/value/src/serde/mod.rs | 118 +++ crates/value/src/serde/ser.rs | 677 +++++++++++++ crates/value/src/serde/value.rs | 228 +++++ 37 files changed, 4399 insertions(+), 2396 deletions(-) delete mode 100644 crates/common/src/bootstrap_model/index/database_index.rs create mode 100644 crates/common/src/bootstrap_model/index/database_index/backfill_state.rs create mode 100644 crates/common/src/bootstrap_model/index/database_index/index_config.rs create mode 100644 crates/common/src/bootstrap_model/index/database_index/index_state.rs create mode 100644 crates/common/src/bootstrap_model/index/database_index/indexed_fields.rs create mode 100644 crates/common/src/bootstrap_model/index/database_index/mod.rs create mode 100644 crates/common/src/bootstrap_model/index/developer_index_config.rs create mode 100644 crates/common/src/bootstrap_model/index/index_config.rs create mode 100644 crates/common/src/bootstrap_model/index/index_metadata.rs create mode 100644 crates/common/src/bootstrap_model/index/index_validation_error.rs delete mode 100644 crates/common/src/bootstrap_model/index/search_index.rs create mode 100644 crates/common/src/bootstrap_model/index/search_index/index_config.rs create mode 100644 crates/common/src/bootstrap_model/index/search_index/index_snapshot.rs create mode 100644 crates/common/src/bootstrap_model/index/search_index/index_state.rs create mode 100644 crates/common/src/bootstrap_model/index/search_index/mod.rs delete mode 100644 crates/common/src/bootstrap_model/index/vector_index.rs create mode 100644 crates/common/src/bootstrap_model/index/vector_index/backfill_state.rs create mode 100644 crates/common/src/bootstrap_model/index/vector_index/dimensions.rs create mode 100644 crates/common/src/bootstrap_model/index/vector_index/index_config.rs create mode 100644 crates/common/src/bootstrap_model/index/vector_index/index_snapshot.rs create mode 100644 crates/common/src/bootstrap_model/index/vector_index/index_state.rs create mode 100644 crates/common/src/bootstrap_model/index/vector_index/mod.rs create mode 100644 crates/common/src/bootstrap_model/index/vector_index/segment.rs create mode 100644 crates/common/src/bootstrap_model/schema_metadata.rs create mode 100644 crates/common/src/bootstrap_model/schema_state.rs create mode 100644 crates/value/src/serde/de.rs create mode 100644 crates/value/src/serde/mod.rs create mode 100644 crates/value/src/serde/ser.rs create mode 100644 crates/value/src/serde/value.rs diff --git a/Cargo.lock b/Cargo.lock index 31d2e077..7fa51352 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1174,6 +1174,7 @@ dependencies = [ name = "convex_macro" version = "0.1.0" dependencies = [ + "anyhow", "proc-macro2", "quote", "syn 2.0.37", diff --git a/crates/common/src/bootstrap_model/index/database_index.rs b/crates/common/src/bootstrap_model/index/database_index.rs deleted file mode 100644 index f4dd0312..00000000 --- a/crates/common/src/bootstrap_model/index/database_index.rs +++ /dev/null @@ -1,284 +0,0 @@ -use std::{ - collections::{ - BTreeMap, - HashSet, - }, - convert::TryFrom, - fmt::Display, - ops::Deref, -}; - -use pb::convex_token::FieldPath as FieldPathProto; -use value::{ - heap_size::{ - HeapSize, - WithHeapSize, - }, - obj, - utils::display_sequence, - ConvexObject, - ConvexValue, -}; - -use super::MAX_INDEX_FIELDS_SIZE; -use crate::{ - bootstrap_model::index::index_validation_error, - document::{ - CREATION_TIME_FIELD, - ID_FIELD_PATH, - }, - paths::FieldPath, -}; - -#[derive(Debug, Clone, PartialEq, Eq)] -#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] -pub struct DeveloperDatabaseIndexConfig { - /// Ordered field(s) to index. The "unindexed" primary key ordering of - /// documents by [`DocumentId`] is represented by an empty vector. - pub fields: IndexedFields, -} - -/// Represents the state of an index. -/// Table scan index for a newly created table starts at `Enabled`. All -/// other indexes start at `Backfilling` state and are transitioned to -/// `Enabled` by the index backfill routine. Disabled indexes are not -/// implicitly transitioned to any other state. -#[derive(Debug, Clone, PartialEq, Eq)] -#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] -pub enum DatabaseIndexState { - // We are backfilling this index. All new writes should update the index. - Backfilling(DatabaseIndexBackfillState), - // The index is fully backfilled, but hasn't yet been committed and is not - // yet available for reads. - Backfilled, - // Index is fully backfilled and ready to serve reads. - Enabled, -} - -impl TryFrom for ConvexObject { - type Error = anyhow::Error; - - fn try_from(state: DatabaseIndexState) -> Result { - match state { - DatabaseIndexState::Backfilling(backfill_state) => obj!( - "type" => "Backfilling", - "backfillState" => ConvexValue::Object(backfill_state.try_into()?), - ), - DatabaseIndexState::Enabled => obj!("type" => "Enabled"), - // Use Backfilled2 to distinguish between records impacted by CX-3897 - DatabaseIndexState::Backfilled => obj!("type" => "Backfilled2"), - } - } -} - -impl TryFrom for DatabaseIndexState { - type Error = anyhow::Error; - - fn try_from(o: ConvexObject) -> Result { - let mut object_fields: BTreeMap<_, _> = o.into(); - - let t = match object_fields.get("type") { - Some(ConvexValue::String(s)) => s, - Some(..) => { - anyhow::bail!("Invalid `type` field for IndexState {:?}", object_fields) - }, - None => anyhow::bail!("Missing `type` field for IndexState {:?}", object_fields), - }; - - match t.as_ref() { - "Backfilling" => { - let backfill_state = match object_fields.remove("backfillState") { - Some(ConvexValue::Object(backfill_state)) => backfill_state.try_into()?, - _ => anyhow::bail!( - "Missing or invalid backfill_state field for IndexState: {:?}", - object_fields - ), - }; - Ok(DatabaseIndexState::Backfilling(backfill_state)) - }, - // We have historical records with Disabled state. - "Disabled" => Ok(DatabaseIndexState::Backfilling(DatabaseIndexBackfillState)), - "Backfilled2" => Ok(DatabaseIndexState::Backfilled), - "Enabled" => Ok(DatabaseIndexState::Enabled), - _ => anyhow::bail!("Invalid index type {}", t), - } - } -} - -/// Represents state of currently backfilling index. -/// We currently do not checkpoint. Will extend the struct when we do. -#[derive(Debug, Clone, PartialEq, Eq)] -#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] -pub struct DatabaseIndexBackfillState; - -impl From for ConvexObject { - fn from(_state: DatabaseIndexBackfillState) -> Self { - ConvexObject::empty() - } -} - -impl TryFrom for DatabaseIndexBackfillState { - type Error = anyhow::Error; - - fn try_from(o: ConvexObject) -> Result { - anyhow::ensure!(o.is_empty(), "Non-empty object {:?}", o); - Ok(DatabaseIndexBackfillState) - } -} - -/// Ordered list of fields in a multi-column index. This list only contains -/// the user-specified indexes: the system adds the `_id` column at the -/// end to guarantee uniqueness, but this trailing `_id` field isn't -/// included in this type. -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub struct IndexedFields(WithHeapSize>); - -impl IndexedFields { - pub fn by_id() -> Self { - IndexedFields(vec![].into()) - } - - pub fn creation_time() -> Self { - let field_path = FieldPath::new(vec![CREATION_TIME_FIELD.to_owned()]) - .expect("Invalid _creationTime field path"); - IndexedFields(vec![field_path].into()) - } -} - -impl HeapSize for IndexedFields { - fn heap_size(&self) -> usize { - self.0.heap_size() - } -} - -impl Display for IndexedFields { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - display_sequence(f, ["[", "]"], self.0.iter()) - } -} - -impl Deref for IndexedFields { - type Target = Vec; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl TryFrom> for IndexedFields { - type Error = anyhow::Error; - - fn try_from(fields: Vec) -> anyhow::Result { - if fields.len() > MAX_INDEX_FIELDS_SIZE { - anyhow::bail!(index_validation_error::too_many_fields( - MAX_INDEX_FIELDS_SIZE - )); - } - - if fields.contains(&ID_FIELD_PATH) { - anyhow::bail!(index_validation_error::fields_contain_id()) - } - - let mut seen: HashSet<_> = HashSet::new(); - for field in fields.iter() { - if !seen.insert(field.clone()) { - anyhow::bail!(index_validation_error::fields_not_unique_within_index( - field - )); - } - } - Ok(Self(fields.into())) - } -} - -impl From for Vec { - fn from(fields: IndexedFields) -> Self { - fields.0.into() - } -} - -impl TryFrom for ConvexValue { - type Error = anyhow::Error; - - fn try_from(fields: IndexedFields) -> anyhow::Result { - let vec: Vec<_> = fields.0.into(); - vec.try_into() - } -} - -impl TryFrom for IndexedFields { - type Error = anyhow::Error; - - fn try_from(val: ConvexValue) -> anyhow::Result { - if let ConvexValue::Array(arr) = val { - let fields: Vec = arr - .iter() - .cloned() - .map(FieldPath::try_from) - .collect::>>()?; - Ok(IndexedFields(fields.into())) - } else { - anyhow::bail!("Invalid value for IndexedFields") - } - } -} - -#[cfg(any(test, feature = "testing"))] -impl proptest::arbitrary::Arbitrary for IndexedFields { - type Parameters = (); - - type Strategy = impl proptest::strategy::Strategy; - - fn arbitrary_with((): Self::Parameters) -> Self::Strategy { - use proptest::prelude::*; - // Use collection::hash_set to ensure that the fields in the index are unique. - // Filter out `_id` - because those aren't allowed in indexes. Surprisingly, - // proptest does randomly generate `_id` once in a while. - prop::collection::hash_set( - any::() - .prop_filter("_id not allowed in index", |path| path != &*ID_FIELD_PATH), - 1..8, - ) - .prop_filter_map("Invalid IndexedFields", |set| { - IndexedFields::try_from(set.into_iter().collect::>()).ok() - }) - } -} - -impl From for Vec { - fn from(fields: IndexedFields) -> Self { - Vec::::from(fields) - .into_iter() - .map(|f| f.into()) - .collect() - } -} - -#[cfg(test)] -mod tests { - use std::assert_matches::assert_matches; - - use value::{ - obj, - ConvexObject, - }; - - use super::*; - - #[test] - fn test_backfilled_metadata_is_deserialized_as_backfilled() -> anyhow::Result<()> { - let object: ConvexObject = obj!("type" => "Backfilled2")?; - let index_state: DatabaseIndexState = object.try_into()?; - assert_matches!(index_state, DatabaseIndexState::Backfilled); - Ok(()) - } - - #[test] - fn test_backfilled_metadata_is_serialized_as_backfilled() -> anyhow::Result<()> { - let index_state = DatabaseIndexState::Backfilled; - let object: ConvexObject = index_state.try_into()?; - let index_state: DatabaseIndexState = object.try_into()?; - assert_matches!(index_state, DatabaseIndexState::Backfilled); - Ok(()) - } -} diff --git a/crates/common/src/bootstrap_model/index/database_index/backfill_state.rs b/crates/common/src/bootstrap_model/index/database_index/backfill_state.rs new file mode 100644 index 00000000..bdb19d23 --- /dev/null +++ b/crates/common/src/bootstrap_model/index/database_index/backfill_state.rs @@ -0,0 +1,32 @@ +use std::convert::TryFrom; + +use serde::{ + Deserialize, + Serialize, +}; + +/// Represents state of currently backfilling index. +/// We currently do not checkpoint. Will extend the struct when we do. +#[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] +pub struct DatabaseIndexBackfillState; + +#[derive(Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct SerializedDatabaseIndexBackfillState {} + +impl TryFrom for SerializedDatabaseIndexBackfillState { + type Error = anyhow::Error; + + fn try_from(_config: DatabaseIndexBackfillState) -> anyhow::Result { + Ok(Self {}) + } +} + +impl TryFrom for DatabaseIndexBackfillState { + type Error = anyhow::Error; + + fn try_from(_config: SerializedDatabaseIndexBackfillState) -> anyhow::Result { + Ok(Self) + } +} diff --git a/crates/common/src/bootstrap_model/index/database_index/index_config.rs b/crates/common/src/bootstrap_model/index/database_index/index_config.rs new file mode 100644 index 00000000..62ad70a7 --- /dev/null +++ b/crates/common/src/bootstrap_model/index/database_index/index_config.rs @@ -0,0 +1,51 @@ +use std::convert::TryFrom; + +use serde::{ + Deserialize, + Serialize, +}; + +use super::indexed_fields::IndexedFields; +use crate::paths::FieldPath; + +#[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] +pub struct DeveloperDatabaseIndexConfig { + /// Ordered field(s) to index. The "unindexed" primary key ordering of + /// documents by [`DocumentId`] is represented by an empty vector. + pub fields: IndexedFields, +} + +#[derive(Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct SerializedDeveloperDatabaseIndexConfig { + fields: Vec, +} + +impl TryFrom for SerializedDeveloperDatabaseIndexConfig { + type Error = anyhow::Error; + + fn try_from(config: DeveloperDatabaseIndexConfig) -> anyhow::Result { + Ok(Self { + fields: Vec::::from(config.fields) + .into_iter() + .map(String::from) + .collect(), + }) + } +} + +impl TryFrom for DeveloperDatabaseIndexConfig { + type Error = anyhow::Error; + + fn try_from(config: SerializedDeveloperDatabaseIndexConfig) -> anyhow::Result { + Ok(Self { + fields: config + .fields + .into_iter() + .map(|p| p.parse()) + .collect::>>()? + .try_into()?, + }) + } +} diff --git a/crates/common/src/bootstrap_model/index/database_index/index_state.rs b/crates/common/src/bootstrap_model/index/database_index/index_state.rs new file mode 100644 index 00000000..be5dc356 --- /dev/null +++ b/crates/common/src/bootstrap_model/index/database_index/index_state.rs @@ -0,0 +1,77 @@ +use std::convert::TryFrom; + +use serde::{ + Deserialize, + Serialize, +}; +use value::codegen_convex_serialization; + +use super::{ + DatabaseIndexBackfillState, + SerializedDatabaseIndexBackfillState, +}; + +/// Represents the state of an index. +/// Table scan index for a newly created table starts at `Enabled`. All +/// other indexes start at `Backfilling` state and are transitioned to +/// `Enabled` by the index backfill routine. Disabled indexes are not +/// implicitly transitioned to any other state. +#[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] +pub enum DatabaseIndexState { + // We are backfilling this index. All new writes should update the index. + Backfilling(DatabaseIndexBackfillState), + // The index is fully backfilled, but hasn't yet been committed and is not + // yet available for reads. + Backfilled, + // Index is fully backfilled and ready to serve reads. + Enabled, +} + +#[derive(Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "PascalCase")] +pub enum SerializedDatabaseIndexState { + #[serde(rename_all = "camelCase")] + Backfilling { + backfill_state: SerializedDatabaseIndexBackfillState, + }, + // Use Backfilled2 to distinguish between records impacted by CX-3897 + Backfilled2, + Enabled, + + // We have historical records with Disabled state. + Disabled, +} + +impl TryFrom for SerializedDatabaseIndexState { + type Error = anyhow::Error; + + fn try_from(config: DatabaseIndexState) -> anyhow::Result { + Ok(match config { + DatabaseIndexState::Backfilling(st) => SerializedDatabaseIndexState::Backfilling { + backfill_state: st.try_into()?, + }, + DatabaseIndexState::Backfilled => SerializedDatabaseIndexState::Backfilled2, + DatabaseIndexState::Enabled => SerializedDatabaseIndexState::Enabled, + }) + } +} + +impl TryFrom for DatabaseIndexState { + type Error = anyhow::Error; + + fn try_from(config: SerializedDatabaseIndexState) -> anyhow::Result { + Ok(match config { + SerializedDatabaseIndexState::Backfilling { backfill_state } => { + DatabaseIndexState::Backfilling(backfill_state.try_into()?) + }, + SerializedDatabaseIndexState::Backfilled2 => DatabaseIndexState::Backfilled, + SerializedDatabaseIndexState::Enabled => DatabaseIndexState::Enabled, + SerializedDatabaseIndexState::Disabled => { + DatabaseIndexState::Backfilling(DatabaseIndexBackfillState) + }, + }) + } +} + +codegen_convex_serialization!(DatabaseIndexState, SerializedDatabaseIndexState); diff --git a/crates/common/src/bootstrap_model/index/database_index/indexed_fields.rs b/crates/common/src/bootstrap_model/index/database_index/indexed_fields.rs new file mode 100644 index 00000000..100e42a2 --- /dev/null +++ b/crates/common/src/bootstrap_model/index/database_index/indexed_fields.rs @@ -0,0 +1,156 @@ +use std::{ + collections::HashSet, + convert::TryFrom, + fmt::Display, + ops::Deref, +}; + +use pb::convex_token::FieldPath as FieldPathProto; +use value::{ + heap_size::{ + HeapSize, + WithHeapSize, + }, + utils::display_sequence, + ConvexValue, +}; + +use crate::{ + bootstrap_model::index::{ + index_validation_error, + MAX_INDEX_FIELDS_SIZE, + }, + document::{ + CREATION_TIME_FIELD, + ID_FIELD_PATH, + }, + paths::FieldPath, +}; + +/// Ordered list of fields in a multi-column index. This list only contains +/// the user-specified indexes: the system adds the `_id` column at the +/// end to guarantee uniqueness, but this trailing `_id` field isn't +/// included in this type. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct IndexedFields(WithHeapSize>); + +impl IndexedFields { + pub fn by_id() -> Self { + IndexedFields(vec![].into()) + } + + pub fn creation_time() -> Self { + let field_path = FieldPath::new(vec![CREATION_TIME_FIELD.to_owned()]) + .expect("Invalid _creationTime field path"); + IndexedFields(vec![field_path].into()) + } +} + +impl HeapSize for IndexedFields { + fn heap_size(&self) -> usize { + self.0.heap_size() + } +} + +impl Display for IndexedFields { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + display_sequence(f, ["[", "]"], self.0.iter()) + } +} + +impl Deref for IndexedFields { + type Target = Vec; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl TryFrom> for IndexedFields { + type Error = anyhow::Error; + + fn try_from(fields: Vec) -> anyhow::Result { + if fields.len() > MAX_INDEX_FIELDS_SIZE { + anyhow::bail!(index_validation_error::too_many_fields( + MAX_INDEX_FIELDS_SIZE + )); + } + + if fields.contains(&ID_FIELD_PATH) { + anyhow::bail!(index_validation_error::fields_contain_id()) + } + + let mut seen: HashSet<_> = HashSet::new(); + for field in fields.iter() { + if !seen.insert(field.clone()) { + anyhow::bail!(index_validation_error::fields_not_unique_within_index( + field + )); + } + } + Ok(Self(fields.into())) + } +} + +impl From for Vec { + fn from(fields: IndexedFields) -> Self { + fields.0.into() + } +} + +impl TryFrom for ConvexValue { + type Error = anyhow::Error; + + fn try_from(fields: IndexedFields) -> anyhow::Result { + let vec: Vec<_> = fields.0.into(); + vec.try_into() + } +} + +impl TryFrom for IndexedFields { + type Error = anyhow::Error; + + fn try_from(val: ConvexValue) -> anyhow::Result { + if let ConvexValue::Array(arr) = val { + let fields: Vec = arr + .iter() + .cloned() + .map(FieldPath::try_from) + .collect::>>()?; + Ok(IndexedFields(fields.into())) + } else { + anyhow::bail!("Invalid value for IndexedFields") + } + } +} + +#[cfg(any(test, feature = "testing"))] +impl proptest::arbitrary::Arbitrary for IndexedFields { + type Parameters = (); + + type Strategy = impl proptest::strategy::Strategy; + + fn arbitrary_with((): Self::Parameters) -> Self::Strategy { + use proptest::prelude::*; + // Use collection::hash_set to ensure that the fields in the index are unique. + // Filter out `_id` - because those aren't allowed in indexes. Surprisingly, + // proptest does randomly generate `_id` once in a while. + prop::collection::hash_set( + any::() + .prop_filter("_id not allowed in index", |path| path != &*ID_FIELD_PATH), + 1..8, + ) + .prop_filter_map("Invalid IndexedFields", |set| { + IndexedFields::try_from(set.into_iter().collect::>()).ok() + }) + } +} + +impl From for Vec { + fn from(fields: IndexedFields) -> Self { + Vec::::from(fields) + .into_iter() + .map(|f| f.into()) + .collect() + } +} diff --git a/crates/common/src/bootstrap_model/index/database_index/mod.rs b/crates/common/src/bootstrap_model/index/database_index/mod.rs new file mode 100644 index 00000000..d088fb4a --- /dev/null +++ b/crates/common/src/bootstrap_model/index/database_index/mod.rs @@ -0,0 +1,49 @@ +mod backfill_state; +mod index_config; +mod index_state; +mod indexed_fields; + +pub use self::{ + backfill_state::{ + DatabaseIndexBackfillState, + SerializedDatabaseIndexBackfillState, + }, + index_config::{ + DeveloperDatabaseIndexConfig, + SerializedDeveloperDatabaseIndexConfig, + }, + index_state::{ + DatabaseIndexState, + SerializedDatabaseIndexState, + }, + indexed_fields::IndexedFields, +}; + +#[cfg(test)] +mod tests { + use std::assert_matches::assert_matches; + + use value::{ + obj, + ConvexObject, + }; + + use super::*; + + #[test] + fn test_backfilled_metadata_is_deserialized_as_backfilled() -> anyhow::Result<()> { + let object: ConvexObject = obj!("type" => "Backfilled2")?; + let index_state: DatabaseIndexState = object.try_into()?; + assert_matches!(index_state, DatabaseIndexState::Backfilled); + Ok(()) + } + + #[test] + fn test_backfilled_metadata_is_serialized_as_backfilled() -> anyhow::Result<()> { + let index_state = DatabaseIndexState::Backfilled; + let object: ConvexObject = index_state.try_into()?; + let index_state: DatabaseIndexState = object.try_into()?; + assert_matches!(index_state, DatabaseIndexState::Backfilled); + Ok(()) + } +} diff --git a/crates/common/src/bootstrap_model/index/developer_index_config.rs b/crates/common/src/bootstrap_model/index/developer_index_config.rs new file mode 100644 index 00000000..81375cd7 --- /dev/null +++ b/crates/common/src/bootstrap_model/index/developer_index_config.rs @@ -0,0 +1,109 @@ +use std::{ + convert::{ + TryFrom, + TryInto, + }, + fmt::Debug, +}; + +use serde::{ + Deserialize, + Serialize, +}; +use value::codegen_convex_serialization; + +use super::{ + database_index::{ + DeveloperDatabaseIndexConfig, + SerializedDeveloperDatabaseIndexConfig, + }, + search_index::{ + DeveloperSearchIndexConfig, + SerializedDeveloperSearchIndexConfig, + }, + vector_index::{ + DeveloperVectorIndexConfig, + SerializedDeveloperVectorIndexConfig, + }, + IndexConfig, +}; + +// Index config that's specified by the developer +#[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] +pub enum DeveloperIndexConfig { + /// Standard database index. + Database(DeveloperDatabaseIndexConfig), + + /// Full text search index. + Search(DeveloperSearchIndexConfig), + + Vector(DeveloperVectorIndexConfig), +} + +impl From for DeveloperIndexConfig { + fn from(value: IndexConfig) -> Self { + match value { + IndexConfig::Database { + developer_config, .. + } => DeveloperIndexConfig::Database(developer_config), + IndexConfig::Search { + developer_config, .. + } => DeveloperIndexConfig::Search(developer_config), + IndexConfig::Vector { + developer_config, .. + } => DeveloperIndexConfig::Vector(developer_config), + } + } +} + +#[derive(Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "camelCase")] +enum SerializedDeveloperIndexConfig { + Database { + #[serde(flatten)] + config: SerializedDeveloperDatabaseIndexConfig, + }, + Search { + #[serde(flatten)] + config: SerializedDeveloperSearchIndexConfig, + }, + Vector { + #[serde(flatten)] + config: SerializedDeveloperVectorIndexConfig, + }, +} + +impl TryFrom for SerializedDeveloperIndexConfig { + type Error = anyhow::Error; + + fn try_from(index_config: DeveloperIndexConfig) -> anyhow::Result { + Ok(match index_config { + DeveloperIndexConfig::Database(config) => Self::Database { + config: config.try_into()?, + }, + DeveloperIndexConfig::Search(config) => Self::Search { + config: config.try_into()?, + }, + DeveloperIndexConfig::Vector(config) => Self::Vector { + config: config.try_into()?, + }, + }) + } +} + +impl TryFrom for DeveloperIndexConfig { + type Error = anyhow::Error; + + fn try_from(index_config: SerializedDeveloperIndexConfig) -> anyhow::Result { + Ok(match index_config { + SerializedDeveloperIndexConfig::Database { config } => { + Self::Database(config.try_into()?) + }, + SerializedDeveloperIndexConfig::Search { config } => Self::Search(config.try_into()?), + SerializedDeveloperIndexConfig::Vector { config } => Self::Vector(config.try_into()?), + }) + } +} + +codegen_convex_serialization!(DeveloperIndexConfig, SerializedDeveloperIndexConfig); diff --git a/crates/common/src/bootstrap_model/index/index_config.rs b/crates/common/src/bootstrap_model/index/index_config.rs new file mode 100644 index 00000000..5d0d9224 --- /dev/null +++ b/crates/common/src/bootstrap_model/index/index_config.rs @@ -0,0 +1,313 @@ +use std::{ + convert::{ + TryFrom, + TryInto, + }, + fmt::Debug, +}; + +use serde::{ + Deserialize, + Serialize, +}; +use value::codegen_convex_serialization; + +use super::{ + database_index::{ + DatabaseIndexState, + DeveloperDatabaseIndexConfig, + SerializedDatabaseIndexState, + SerializedDeveloperDatabaseIndexConfig, + }, + search_index::{ + DeveloperSearchIndexConfig, + SearchIndexState, + SerializedDeveloperSearchIndexConfig, + SerializedSearchIndexState, + }, + vector_index::{ + DeveloperVectorIndexConfig, + SerializedDeveloperVectorIndexConfig, + SerializedVectorIndexState, + VectorIndexSnapshotData, + VectorIndexState, + }, +}; + +/// Configuration that depends on the type of index. +#[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] +pub enum IndexConfig { + /// Standard database index. + Database { + developer_config: DeveloperDatabaseIndexConfig, + + /// Whether the index is fully backfilled or not on disk. + on_disk_state: DatabaseIndexState, + }, + + /// Full text search index. + Search { + developer_config: DeveloperSearchIndexConfig, + + /// Whether the index is fully backfilled or not on disk. + on_disk_state: SearchIndexState, + }, + + Vector { + developer_config: DeveloperVectorIndexConfig, + on_disk_state: VectorIndexState, + }, +} + +impl IndexConfig { + pub fn is_enabled(&self) -> bool { + match self { + IndexConfig::Database { on_disk_state, .. } => { + matches!(on_disk_state, DatabaseIndexState::Enabled) + }, + IndexConfig::Search { on_disk_state, .. } => { + matches!(on_disk_state, SearchIndexState::SnapshottedAt(_)) + }, + IndexConfig::Vector { on_disk_state, .. } => { + matches!(on_disk_state, VectorIndexState::SnapshottedAt(_)) + }, + } + } + + pub fn is_backfilling(&self) -> bool { + match self { + IndexConfig::Database { on_disk_state, .. } => { + matches!(on_disk_state, DatabaseIndexState::Backfilling(_)) + }, + IndexConfig::Search { on_disk_state, .. } => { + matches!(on_disk_state, SearchIndexState::Backfilling) + }, + IndexConfig::Vector { on_disk_state, .. } => { + matches!(on_disk_state, VectorIndexState::Backfilling(_)) + }, + } + } + + pub fn same_config(&self, config: &IndexConfig) -> bool { + match (self, config) { + ( + IndexConfig::Database { + developer_config, .. + }, + IndexConfig::Database { + developer_config: config_to_compare, + .. + }, + ) => developer_config == config_to_compare, + ( + IndexConfig::Search { + developer_config, .. + }, + IndexConfig::Search { + developer_config: config_to_compare, + .. + }, + ) => developer_config == config_to_compare, + ( + IndexConfig::Vector { + developer_config, .. + }, + IndexConfig::Vector { + developer_config: config_to_compare, + .. + }, + ) => developer_config == config_to_compare, + (..) => false, + } + } + + /// Returns the estimated size of the index in bytes in a manner suitable + /// for usage and pricing. + /// + /// The estimate here may not accurately reflect the actual number of + /// stored bytes and may not be appropriate for estimate resource usage. For + /// example, small dimension vector indexes may have 20% overhead from + /// HNSW indexes that won't be reflected here, but would require + /// additional RAM or disk space to serve. + /// + /// This is only implemented for vector indexes for now. Calling this method + /// on other index types will panic. + pub fn estimate_pricing_size_bytes(&self) -> anyhow::Result { + match self { + IndexConfig::Database { .. } | IndexConfig::Search { .. } => { + // TODO(sam): We should support this for all index types in the future. Right + // now search indexes are free and we estimate the size of + // database indexes. Both of those could instead track usage in their metadata, + // similar to vector indexes. + anyhow::bail!("Only supported for vector indexes!") + }, + IndexConfig::Vector { + on_disk_state, + developer_config, + } => match on_disk_state { + VectorIndexState::Backfilling(_) | VectorIndexState::Backfilled(_) => Ok(0), + VectorIndexState::SnapshottedAt(snapshot) => match &snapshot.data { + VectorIndexSnapshotData::MultiSegment(segments) => segments + .iter() + .map(|segment| segment.non_deleted_size_bytes(developer_config.dimensions)) + .sum::>(), + VectorIndexSnapshotData::Unknown(_) => Ok(0), + }, + }, + } + } +} + +#[derive(Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "camelCase")] +pub enum SerializedIndexConfig { + #[serde(rename_all = "camelCase")] + Database { + #[serde(flatten)] + developer_config: SerializedDeveloperDatabaseIndexConfig, + on_disk_state: SerializedDatabaseIndexState, + }, + #[serde(rename_all = "camelCase")] + Search { + #[serde(flatten)] + developer_config: SerializedDeveloperSearchIndexConfig, + on_disk_state: SerializedSearchIndexState, + }, + #[serde(rename_all = "camelCase")] + Vector { + #[serde(flatten)] + developer_config: SerializedDeveloperVectorIndexConfig, + on_disk_state: SerializedVectorIndexState, + }, +} + +impl TryFrom for SerializedIndexConfig { + type Error = anyhow::Error; + + fn try_from(config: IndexConfig) -> anyhow::Result { + Ok(match config { + IndexConfig::Database { + developer_config, + on_disk_state, + } => SerializedIndexConfig::Database { + developer_config: developer_config.try_into()?, + on_disk_state: on_disk_state.try_into()?, + }, + IndexConfig::Search { + developer_config, + on_disk_state, + } => SerializedIndexConfig::Search { + developer_config: developer_config.try_into()?, + on_disk_state: on_disk_state.try_into()?, + }, + IndexConfig::Vector { + developer_config, + on_disk_state, + } => SerializedIndexConfig::Vector { + developer_config: developer_config.try_into()?, + on_disk_state: on_disk_state.try_into()?, + }, + }) + } +} + +impl TryFrom for IndexConfig { + type Error = anyhow::Error; + + fn try_from(config: SerializedIndexConfig) -> anyhow::Result { + Ok(match config { + SerializedIndexConfig::Database { + developer_config, + on_disk_state, + } => IndexConfig::Database { + developer_config: developer_config.try_into()?, + on_disk_state: on_disk_state.try_into()?, + }, + SerializedIndexConfig::Search { + developer_config, + on_disk_state, + } => IndexConfig::Search { + developer_config: developer_config.try_into()?, + on_disk_state: on_disk_state.try_into()?, + }, + SerializedIndexConfig::Vector { + developer_config, + on_disk_state, + } => IndexConfig::Vector { + developer_config: developer_config.try_into()?, + on_disk_state: on_disk_state.try_into()?, + }, + }) + } +} + +codegen_convex_serialization!(IndexConfig, SerializedIndexConfig, test_cases = 64); + +#[cfg(test)] +mod tests { + use maplit::btreeset; + use value::{ + obj, + ConvexValue, + }; + + use crate::bootstrap_model::index::{ + vector_index::{ + DeveloperVectorIndexConfig, + FragmentedVectorSegment, + VectorIndexBackfillState, + VectorIndexState, + }, + IndexConfig, + }; + + #[test] + fn test_backwards_compatibility() -> anyhow::Result<()> { + let serialized = obj!( + "type" => "vector", + "onDiskState" => { + "state" => "backfilling", + "document_cursor" => ConvexValue::Null, + "backfill_snapshot_ts" => 10i64, + "segments" => [ + { + "segment_key" => "abc", + "id_tracker_key" => "def", + "deleted_bitset_key" => "ghi", + "id" => "jkl", + "num_vectors" => 11i64, + "num_deleted" => 12i64, + }, + ], + }, + "dimensions" => 1536i64, + "vectorField" => "embedding.field", + "filterFields" => ["filter1", "filter2"], + )?; + let deserialized: IndexConfig = serialized.try_into()?; + assert_eq!( + deserialized, + IndexConfig::Vector { + developer_config: DeveloperVectorIndexConfig { + dimensions: 1536.try_into()?, + vector_field: "embedding.field".parse()?, + filter_fields: btreeset! { "filter1".parse()?, "filter2".parse()? }, + }, + on_disk_state: VectorIndexState::Backfilling(VectorIndexBackfillState { + cursor: None, + backfill_snapshot_ts: Some(10i64.try_into()?), + segments: vec![FragmentedVectorSegment { + segment_key: "abc".to_string().try_into()?, + id_tracker_key: "def".to_string().try_into()?, + deleted_bitset_key: "ghi".to_string().try_into()?, + id: "jkl".to_string(), + num_vectors: 11, + num_deleted: 12, + }] + }), + } + ); + Ok(()) + } +} diff --git a/crates/common/src/bootstrap_model/index/index_metadata.rs b/crates/common/src/bootstrap_model/index/index_metadata.rs new file mode 100644 index 00000000..036e6b58 --- /dev/null +++ b/crates/common/src/bootstrap_model/index/index_metadata.rs @@ -0,0 +1,225 @@ +use std::collections::BTreeSet; + +use serde::{ + Deserialize, + Serialize, +}; +use value::{ + codegen_convex_serialization, + ConvexValue, + FieldPath, + TableId, + TableIdAndTableNumber, + TableIdentifier, + TableName, +}; + +use super::{ + database_index::{ + DatabaseIndexBackfillState, + DatabaseIndexState, + DeveloperDatabaseIndexConfig, + IndexedFields, + }, + index_config::SerializedIndexConfig, + search_index::{ + DeveloperSearchIndexConfig, + SearchIndexState, + }, + vector_index::{ + DeveloperVectorIndexConfig, + VectorDimensions, + VectorIndexBackfillState, + VectorIndexState, + }, + IndexConfig, +}; +use crate::{ + document::{ + ParsedDocument, + ResolvedDocument, + }, + types::{ + GenericIndexName, + IndexDescriptor, + }, +}; + +pub type ResolvedIndexMetadata = IndexMetadata; +pub type TabletIndexMetadata = IndexMetadata; +pub type DeveloperIndexMetadata = IndexMetadata; + +/// In-memory representation of an index's metadata. +#[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] +pub struct IndexMetadata { + /// Unique name for the index. + pub name: GenericIndexName, + + /// Configuration that depends on the type of index. + pub config: IndexConfig, +} + +impl IndexMetadata { + pub fn new_backfilling(name: GenericIndexName, fields: IndexedFields) -> Self { + Self { + name, + config: IndexConfig::Database { + developer_config: DeveloperDatabaseIndexConfig { fields }, + on_disk_state: DatabaseIndexState::Backfilling(DatabaseIndexBackfillState {}), + }, + } + } + + pub fn new_backfilling_search_index( + name: GenericIndexName, + search_field: FieldPath, + filter_fields: BTreeSet, + ) -> Self { + Self::new_search_index( + name, + DeveloperSearchIndexConfig { + search_field, + filter_fields, + }, + SearchIndexState::Backfilling, + ) + } + + pub fn new_backfilling_vector_index( + name: GenericIndexName, + vector_field: FieldPath, + dimensions: VectorDimensions, + filter_fields: BTreeSet, + ) -> Self { + Self { + name, + config: IndexConfig::Vector { + developer_config: DeveloperVectorIndexConfig { + dimensions, + vector_field, + filter_fields, + }, + on_disk_state: VectorIndexState::Backfilling(VectorIndexBackfillState { + segments: vec![], + cursor: None, + backfill_snapshot_ts: None, + }), + }, + } + } + + pub fn new_search_index( + name: GenericIndexName, + developer_config: DeveloperSearchIndexConfig, + on_disk_state: SearchIndexState, + ) -> Self { + Self { + name, + config: IndexConfig::Search { + developer_config, + on_disk_state, + }, + } + } + + pub fn new_enabled(name: GenericIndexName, fields: IndexedFields) -> Self { + Self { + name, + config: IndexConfig::Database { + developer_config: DeveloperDatabaseIndexConfig { fields }, + on_disk_state: DatabaseIndexState::Enabled, + }, + } + } + + pub fn is_database_index(&self) -> bool { + matches!(self.config, IndexConfig::Database { .. }) + } + + pub fn is_search_index(&self) -> bool { + matches!(self.config, IndexConfig::Search { .. }) + } + + pub fn is_vector_index(&self) -> bool { + matches!(self.config, IndexConfig::Vector { .. }) + } + + pub fn map_table( + self, + f: &impl Fn(T) -> anyhow::Result, + ) -> anyhow::Result> { + Ok(IndexMetadata { + name: self.name.map_table(f)?, + config: self.config, + }) + } +} + +impl From for TabletIndexMetadata { + fn from(value: ResolvedIndexMetadata) -> Self { + Self { + name: value.name.into(), + config: value.config, + } + } +} + +impl ResolvedIndexMetadata { + pub fn from_document( + f: impl Fn(TableId) -> anyhow::Result, + document: ResolvedDocument, + ) -> anyhow::Result> { + let index_metadata_: ParsedDocument = document.try_into()?; + let index_metadata: ParsedDocument = index_metadata_.map(|d| d.map_table(&f))?; + Ok(index_metadata) + } +} + +impl TabletIndexMetadata { + pub fn from_document(document: ResolvedDocument) -> anyhow::Result> { + document.try_into() + } +} + +pub fn index_metadata_serialize_table_id(table_id: &TableId) -> anyhow::Result { + ConvexValue::try_from(table_id.to_string()) +} + +#[derive(Serialize, Deserialize)] +struct SerializedTabletIndexMetadata { + table_id: String, + descriptor: String, + config: SerializedIndexConfig, +} + +impl TryFrom for SerializedTabletIndexMetadata { + type Error = anyhow::Error; + + fn try_from(m: TabletIndexMetadata) -> anyhow::Result { + Ok(Self { + // New format: write table_id(v5) + descriptor. + table_id: m.name.table().to_string(), + descriptor: m.name.descriptor().to_string(), + config: m.config.try_into()?, + }) + } +} + +impl TryFrom for TabletIndexMetadata { + type Error = anyhow::Error; + + fn try_from(s: SerializedTabletIndexMetadata) -> anyhow::Result { + let table_id: TableId = s.table_id.parse()?; + let descriptor: IndexDescriptor = s.descriptor.parse()?; + let name = if descriptor.is_reserved() { + GenericIndexName::new_reserved(table_id, descriptor) + } else { + GenericIndexName::new(table_id, descriptor) + }?; + let config = IndexConfig::try_from(s.config)?; + Ok(Self { name, config }) + } +} + +codegen_convex_serialization!(TabletIndexMetadata, SerializedTabletIndexMetadata); diff --git a/crates/common/src/bootstrap_model/index/index_validation_error.rs b/crates/common/src/bootstrap_model/index/index_validation_error.rs new file mode 100644 index 00000000..9eb57653 --- /dev/null +++ b/crates/common/src/bootstrap_model/index/index_validation_error.rs @@ -0,0 +1,199 @@ +use errors::ErrorMetadata; +use value::{ + TableIdentifier, + TableName, +}; + +use crate::{ + paths::FieldPath, + schemas::IndexSchema, + types::IndexDescriptor, +}; + +pub fn empty_index(table_name: &TableName, index: &IndexSchema) -> ErrorMetadata { + ErrorMetadata::bad_request( + "EmptyIndex", + format!("In table \"{table_name}\" index \"{index}\" must have at least one field."), + ) +} +pub fn fields_not_unique_within_index(field: &FieldPath) -> ErrorMetadata { + ErrorMetadata::bad_request( + "FieldsNotUniqueWithinIndex", + format!("Duplicate field {field}. Index fields must be unique within an index."), + ) +} +pub fn index_not_unique( + table_name: &TableName, + index1: &IndexDescriptor, + index2: &IndexDescriptor, +) -> ErrorMetadata { + ErrorMetadata::bad_request( + "IndexNotUnique", + format!( + "In table \"{table_name}\" index \"{index1}\" and index \"{index2}\" have the same \ + fields. Indexes must be unique within a table." + ), + ) +} +// IndexFieldsContainId is a more specific version of +// IndexFieldNameReserved. It provides a more actionable error +// message. +pub fn fields_contain_id() -> ErrorMetadata { + ErrorMetadata::bad_request( + "IndexFieldsContainId", + "`_id` is not a valid index field. To load documents by ID, use `db.get(id)`.", + ) +} +// IndexFieldsContainCreationTime is a more specific version of +// IndexFieldNameReserved. It provides a more actionable error message. +pub fn fields_contain_creation_time() -> ErrorMetadata { + ErrorMetadata::bad_request("IndexFieldsContainCreationTime", + "`_creationTime` is automatically added to the end of each index. It should not \ + be added explicitly in the index definition. See https://docs.convex.dev/using/indexes \ + for more details." + ) +} +pub fn field_name_reserved() -> ErrorMetadata { + ErrorMetadata::bad_request( + "IndexFieldNameReserved", + "Reserved fields (starting with `_`) are not allowed in indexes.", + ) +} +pub fn search_field_not_unique( + table_name: &TableName, + index1: &IndexDescriptor, + index2: &IndexDescriptor, +) -> ErrorMetadata { + ErrorMetadata::bad_request( + "SearchIndexFieldNotUnique", + format!( + "In table \"{table_name}\" search index \"{index1}\" and search index \"{index2}\" \ + have the same `searchField`. Search index fields must be unique within a table. You \ + should combine the + indexes with the same `searchField` into one index containing all `filterField`s and \ + then use different subsets of the `filterField`s at query time." + ), + ) +} +pub fn vector_field_not_unique( + table_name: &TableName, + index1: &IndexDescriptor, + index2: &IndexDescriptor, +) -> ErrorMetadata { + ErrorMetadata::bad_request( + "VectorIndexFieldNotUnique", + format!( + "In table \"{table_name}\" vector index \"{index1}\" and vector index \"{index2}\" \ + have the same `vectorField`. Vector index fields must be unique within a table. You \ + should combine the + indexes with the same `vectorField` into one index containing all `filterField`s and \ + then use different subsets of the `filterField`s at query time." + ), + ) +} +pub fn name_reserved(table_name: &T, name: &IndexDescriptor) -> ErrorMetadata { + ErrorMetadata::bad_request( + "IndexNameReserved", + format!( + "In table \"{table_name}\" cannot name an index \"{name}\" because the name is \ + reserved. Indexes may not start with an underscore or be named \"by_id\" or \ + \"by_creation_time\"." + ), + ) +} +pub fn names_not_unique(table_name: &TableName, index: &IndexDescriptor) -> ErrorMetadata { + ErrorMetadata::bad_request( + "IndexNamesNotUnique", + format!("Table \"{table_name}\" has two or more definitions of index \"{index}\"."), + ) +} +pub fn invalid_index_name(descriptor: &str) -> ErrorMetadata { + ErrorMetadata::bad_request( + "InvalidIndexName", + format!( + "Invalid index name: \"{descriptor}\". Identifiers must be 64 characters or less, \ + start with a letter, and only contain letters, digits, underscores." + ), + ) +} +pub fn invalid_index_field(descriptor: &IndexDescriptor, field: &str) -> ErrorMetadata { + ErrorMetadata::bad_request( + "InvalidIndexField", + format!("In index \"{descriptor}\": Invalid index field: \"{field}\""), + ) +} + +// TODO - move elsewhere (near table names) - it's not indexing related +pub fn invalid_table_name(table_name: &str) -> ErrorMetadata { + ErrorMetadata::bad_request( + "InvalidTableName", + format!( + "Invalid table name: \"{table_name}\". Identifiers must start with a letter and can \ + only contain letters, digits, and underscores." + ), + ) +} +pub fn not_enough_name_components(index_name: &str) -> ErrorMetadata { + ErrorMetadata::bad_request( + "IndexNotEnoughNameComponents", + format!("Insufficient components in index name {index_name}"), + ) +} +pub fn too_many_fields(num_fields: usize) -> ErrorMetadata { + ErrorMetadata::bad_request( + "IndexTooManyFields", + format!("Indexes may have up to {num_fields} fields."), + ) +} +pub fn too_many_filter_fields(num_fields: usize) -> ErrorMetadata { + ErrorMetadata::bad_request( + "IndexTooManyFilterFields", + format!("Search indexes may have up to {num_fields} filter fields."), + ) +} +pub fn too_many_indexes(table_name: &TableName, num_indexes: usize) -> ErrorMetadata { + ErrorMetadata::bad_request( + "TooManyIndexes", + format!("Table \"{table_name}\" cannot have more than {num_indexes} indexes."), + ) +} +pub fn too_many_search_indexes(table_name: &TableName, num_indexes: usize) -> ErrorMetadata { + ErrorMetadata::bad_request( + "TooManySearchIndexes", + format!("Table \"{table_name}\" cannot have more than {num_indexes} search indexes."), + ) +} +pub fn too_many_vector_indexes(table_name: &TableName, num_indexes: usize) -> ErrorMetadata { + ErrorMetadata::bad_request( + "TooManyVectorIndexes", + format!("Table \"{table_name}\" cannot have more than {num_indexes} vector indexes."), + ) +} +pub fn too_many_name_components(index_name: &str) -> ErrorMetadata { + ErrorMetadata::bad_request( + "IndexTooManyNameComponents", + format!("Too many components in index name {index_name}"), + ) +} + +// TODO move elsewhere (near table names) - it's not indexing related +pub fn table_name_reserved(table_name: &TableName) -> ErrorMetadata { + ErrorMetadata::bad_request( + "TableNameReserved", + format!("{table_name} is a reserved table name."), + ) +} +pub fn too_many_total_user_indexes(num_total_indexes: usize) -> ErrorMetadata { + ErrorMetadata::bad_request( + "TooManyTotalIndexes", + format!("Number of total indexes cannot exceed {num_total_indexes}."), + ) +} + +// TODO move elsewhere - it's not indexing related +pub fn too_many_tables(num_tables: usize) -> ErrorMetadata { + ErrorMetadata::bad_request( + "TooManyTables", + format!("Number of tables cannot exceed {num_tables}."), + ) +} diff --git a/crates/common/src/bootstrap_model/index/mod.rs b/crates/common/src/bootstrap_model/index/mod.rs index 21f01a71..8e9ccd51 100644 --- a/crates/common/src/bootstrap_model/index/mod.rs +++ b/crates/common/src/bootstrap_model/index/mod.rs @@ -1,60 +1,29 @@ -//! Index metadata. pub mod database_index; +mod developer_index_config; +mod index_config; +mod index_metadata; +pub mod index_validation_error; pub mod search_index; pub mod vector_index; -use std::{ - collections::{ - BTreeMap, - BTreeSet, - }, - convert::{ - TryFrom, - TryInto, - }, - fmt::Debug, - sync::LazyLock, -}; +use std::sync::LazyLock; -use value::{ - ConvexObject, - ConvexValue, - FieldName, - IdentifierFieldName, - TableId, - TableIdAndTableNumber, - TableIdentifier, -}; +use value::IdentifierFieldName; -use self::{ - database_index::{ - DatabaseIndexState, - DeveloperDatabaseIndexConfig, - }, - search_index::{ - DeveloperSearchIndexConfig, - SearchIndexState, - }, - vector_index::{ - DeveloperVectorIndexConfig, - VectorDimensions, - VectorIndexSnapshotData, - VectorIndexState, +pub use self::{ + developer_index_config::DeveloperIndexConfig, + index_config::IndexConfig, + index_metadata::{ + index_metadata_serialize_table_id, + DeveloperIndexMetadata, + IndexMetadata, + ResolvedIndexMetadata, + TabletIndexMetadata, }, }; use crate::{ - bootstrap_model::index::vector_index::VectorIndexBackfillState, - document::{ - ParsedDocument, - ResolvedDocument, - }, - obj, paths::FieldPath, - types::{ - GenericIndexName, - IndexDescriptor, - TableName, - }, + types::TableName, }; /// Table name for Index data. @@ -69,891 +38,3 @@ pub static TABLE_ID_FIELD_PATH: LazyLock = pub const MAX_INDEX_FIELDS_SIZE: usize = 16; pub const MAX_SEARCH_INDEX_FILTER_FIELDS_SIZE: usize = 16; pub const MAX_VECTOR_INDEX_FILTER_FIELDS_SIZE: usize = 16; - -pub mod index_validation_error { - use errors::ErrorMetadata; - use value::{ - TableIdentifier, - TableName, - }; - - use crate::{ - paths::FieldPath, - schemas::IndexSchema, - types::IndexDescriptor, - }; - - pub fn empty_index(table_name: &TableName, index: &IndexSchema) -> ErrorMetadata { - ErrorMetadata::bad_request( - "EmptyIndex", - format!("In table \"{table_name}\" index \"{index}\" must have at least one field."), - ) - } - pub fn fields_not_unique_within_index(field: &FieldPath) -> ErrorMetadata { - ErrorMetadata::bad_request( - "FieldsNotUniqueWithinIndex", - format!("Duplicate field {field}. Index fields must be unique within an index."), - ) - } - pub fn index_not_unique( - table_name: &TableName, - index1: &IndexDescriptor, - index2: &IndexDescriptor, - ) -> ErrorMetadata { - ErrorMetadata::bad_request( - "IndexNotUnique", - format!( - "In table \"{table_name}\" index \"{index1}\" and index \"{index2}\" have the \ - same fields. Indexes must be unique within a table." - ), - ) - } - // IndexFieldsContainId is a more specific version of - // IndexFieldNameReserved. It provides a more actionable error - // message. - pub fn fields_contain_id() -> ErrorMetadata { - ErrorMetadata::bad_request( - "IndexFieldsContainId", - "`_id` is not a valid index field. To load documents by ID, use `db.get(id)`.", - ) - } - // IndexFieldsContainCreationTime is a more specific version of - // IndexFieldNameReserved. It provides a more actionable error message. - pub fn fields_contain_creation_time() -> ErrorMetadata { - ErrorMetadata::bad_request("IndexFieldsContainCreationTime", - "`_creationTime` is automatically added to the end of each index. It should not \ - be added explicitly in the index definition. See https://docs.convex.dev/using/indexes \ - for more details." - ) - } - pub fn field_name_reserved() -> ErrorMetadata { - ErrorMetadata::bad_request( - "IndexFieldNameReserved", - "Reserved fields (starting with `_`) are not allowed in indexes.", - ) - } - pub fn search_field_not_unique( - table_name: &TableName, - index1: &IndexDescriptor, - index2: &IndexDescriptor, - ) -> ErrorMetadata { - ErrorMetadata::bad_request( - "SearchIndexFieldNotUnique", - format!( - "In table \"{table_name}\" search index \"{index1}\" and search index \ - \"{index2}\" have the same `searchField`. Search index fields must be unique \ - within a table. You should combine the - indexes with the same `searchField` into one index containing all `filterField`s and \ - then use different subsets of the `filterField`s at query time." - ), - ) - } - pub fn vector_field_not_unique( - table_name: &TableName, - index1: &IndexDescriptor, - index2: &IndexDescriptor, - ) -> ErrorMetadata { - ErrorMetadata::bad_request( - "VectorIndexFieldNotUnique", - format!( - "In table \"{table_name}\" vector index \"{index1}\" and vector index \ - \"{index2}\" have the same `vectorField`. Vector index fields must be unique \ - within a table. You should combine the - indexes with the same `vectorField` into one index containing all `filterField`s and \ - then use different subsets of the `filterField`s at query time." - ), - ) - } - pub fn name_reserved( - table_name: &T, - name: &IndexDescriptor, - ) -> ErrorMetadata { - ErrorMetadata::bad_request( - "IndexNameReserved", - format!( - "In table \"{table_name}\" cannot name an index \"{name}\" because the name is \ - reserved. Indexes may not start with an underscore or be named \"by_id\" or \ - \"by_creation_time\"." - ), - ) - } - pub fn names_not_unique(table_name: &TableName, index: &IndexDescriptor) -> ErrorMetadata { - ErrorMetadata::bad_request( - "IndexNamesNotUnique", - format!("Table \"{table_name}\" has two or more definitions of index \"{index}\"."), - ) - } - pub fn invalid_index_name(descriptor: &str) -> ErrorMetadata { - ErrorMetadata::bad_request( - "InvalidIndexName", - format!( - "Invalid index name: \"{descriptor}\". Identifiers must be 64 characters or less, \ - start with a letter, and only contain letters, digits, underscores." - ), - ) - } - pub fn invalid_index_field(descriptor: &IndexDescriptor, field: &str) -> ErrorMetadata { - ErrorMetadata::bad_request( - "InvalidIndexField", - format!("In index \"{descriptor}\": Invalid index field: \"{field}\""), - ) - } - - // TODO - move elsewhere (near table names) - it's not indexing related - pub fn invalid_table_name(table_name: &str) -> ErrorMetadata { - ErrorMetadata::bad_request( - "InvalidTableName", - format!( - "Invalid table name: \"{table_name}\". Identifiers must start with a letter and \ - can only contain letters, digits, and underscores." - ), - ) - } - pub fn not_enough_name_components(index_name: &str) -> ErrorMetadata { - ErrorMetadata::bad_request( - "IndexNotEnoughNameComponents", - format!("Insufficient components in index name {index_name}"), - ) - } - pub fn too_many_fields(num_fields: usize) -> ErrorMetadata { - ErrorMetadata::bad_request( - "IndexTooManyFields", - format!("Indexes may have up to {num_fields} fields."), - ) - } - pub fn too_many_filter_fields(num_fields: usize) -> ErrorMetadata { - ErrorMetadata::bad_request( - "IndexTooManyFilterFields", - format!("Search indexes may have up to {num_fields} filter fields."), - ) - } - pub fn too_many_indexes(table_name: &TableName, num_indexes: usize) -> ErrorMetadata { - ErrorMetadata::bad_request( - "TooManyIndexes", - format!("Table \"{table_name}\" cannot have more than {num_indexes} indexes."), - ) - } - pub fn too_many_search_indexes(table_name: &TableName, num_indexes: usize) -> ErrorMetadata { - ErrorMetadata::bad_request( - "TooManySearchIndexes", - format!("Table \"{table_name}\" cannot have more than {num_indexes} search indexes."), - ) - } - pub fn too_many_vector_indexes(table_name: &TableName, num_indexes: usize) -> ErrorMetadata { - ErrorMetadata::bad_request( - "TooManyVectorIndexes", - format!("Table \"{table_name}\" cannot have more than {num_indexes} vector indexes."), - ) - } - pub fn too_many_name_components(index_name: &str) -> ErrorMetadata { - ErrorMetadata::bad_request( - "IndexTooManyNameComponents", - format!("Too many components in index name {index_name}"), - ) - } - - // TODO move elsewhere (near table names) - it's not indexing related - pub fn table_name_reserved(table_name: &TableName) -> ErrorMetadata { - ErrorMetadata::bad_request( - "TableNameReserved", - format!("{table_name} is a reserved table name."), - ) - } - pub fn too_many_total_user_indexes(num_total_indexes: usize) -> ErrorMetadata { - ErrorMetadata::bad_request( - "TooManyTotalIndexes", - format!("Number of total indexes cannot exceed {num_total_indexes}."), - ) - } - - // TODO move elsewhere - it's not indexing related - pub fn too_many_tables(num_tables: usize) -> ErrorMetadata { - ErrorMetadata::bad_request( - "TooManyTables", - format!("Number of tables cannot exceed {num_tables}."), - ) - } -} - -// -------------------------------------------------------------------------------- - -//////////////////////////////////////////////////////////////////////////////// - -/// Configuration that depends on the type of index. -#[derive(Debug, Clone, PartialEq, Eq)] -#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] -pub enum IndexConfig { - /// Standard database index. - Database { - developer_config: DeveloperDatabaseIndexConfig, - - /// Whether the index is fully backfilled or not on disk. - on_disk_state: DatabaseIndexState, - }, - - /// Full text search index. - Search { - developer_config: DeveloperSearchIndexConfig, - - /// Whether the index is fully backfilled or not on disk. - on_disk_state: SearchIndexState, - }, - - Vector { - developer_config: DeveloperVectorIndexConfig, - on_disk_state: VectorIndexState, - }, -} - -impl IndexConfig { - pub fn is_enabled(&self) -> bool { - match self { - IndexConfig::Database { on_disk_state, .. } => { - matches!(on_disk_state, DatabaseIndexState::Enabled) - }, - IndexConfig::Search { on_disk_state, .. } => { - matches!(on_disk_state, SearchIndexState::SnapshottedAt(_)) - }, - IndexConfig::Vector { on_disk_state, .. } => { - matches!(on_disk_state, VectorIndexState::SnapshottedAt(_)) - }, - } - } - - pub fn is_backfilling(&self) -> bool { - match self { - IndexConfig::Database { on_disk_state, .. } => { - matches!(on_disk_state, DatabaseIndexState::Backfilling(_)) - }, - IndexConfig::Search { on_disk_state, .. } => { - matches!(on_disk_state, SearchIndexState::Backfilling) - }, - IndexConfig::Vector { on_disk_state, .. } => { - matches!(on_disk_state, VectorIndexState::Backfilling(_)) - }, - } - } - - pub fn same_config(&self, config: &IndexConfig) -> bool { - match (self, config) { - ( - IndexConfig::Database { - developer_config, .. - }, - IndexConfig::Database { - developer_config: config_to_compare, - .. - }, - ) => developer_config == config_to_compare, - ( - IndexConfig::Search { - developer_config, .. - }, - IndexConfig::Search { - developer_config: config_to_compare, - .. - }, - ) => developer_config == config_to_compare, - ( - IndexConfig::Vector { - developer_config, .. - }, - IndexConfig::Vector { - developer_config: config_to_compare, - .. - }, - ) => developer_config == config_to_compare, - (..) => false, - } - } - - /// Returns the estimated size of the index in bytes in a manner suitable - /// for usage and pricing. - /// - /// The estimate here may not accurately reflect the actual number of - /// stored bytes and may not be appropriate for estimate resource usage. For - /// example, small dimension vector indexes may have 20% overhead from - /// HNSW indexes that won't be reflected here, but would require - /// additional RAM or disk space to serve. - /// - /// This is only implemented for vector indexes for now. Calling this method - /// on other index types will panic. - pub fn estimate_pricing_size_bytes(&self) -> anyhow::Result { - match self { - IndexConfig::Database { .. } | IndexConfig::Search { .. } => { - // TODO(sam): We should support this for all index types in the future. Right - // now search indexes are free and we estimate the size of - // database indexes. Both of those could instead track usage in their metadata, - // similar to vector indexes. - anyhow::bail!("Only supported for vector indexes!") - }, - IndexConfig::Vector { - on_disk_state, - developer_config, - } => match on_disk_state { - VectorIndexState::Backfilling(_) | VectorIndexState::Backfilled(_) => Ok(0), - VectorIndexState::SnapshottedAt(snapshot) => match &snapshot.data { - VectorIndexSnapshotData::Unknown(_) => Ok(0), - VectorIndexSnapshotData::MultiSegment(segments) => segments - .iter() - .map(|segment| segment.non_deleted_size_bytes(developer_config.dimensions)) - .sum::>(), - }, - }, - } - } -} - -impl TryFrom for ConvexObject { - type Error = anyhow::Error; - - fn try_from(index_config: IndexConfig) -> anyhow::Result { - match index_config { - IndexConfig::Database { - developer_config, - on_disk_state, - } => { - let object: ConvexObject = obj!( - "type" => "database", - "onDiskState" => ConvexValue::Object(on_disk_state.try_into()?), - )?; - // TODO: Using merge here is very sketchy. Seems like DeveloperIndexConfig also - // adds "type" but it happens to match the value from above. - object.shallow_merge(ConvexObject::try_from(DeveloperIndexConfig::Database( - developer_config, - ))?) - }, - IndexConfig::Search { - developer_config, - on_disk_state, - } => { - let object = obj!( - "type" => "search", - "onDiskState" => ConvexValue::Object(on_disk_state.try_into()?) - )?; - // TODO: Using merge here is very sketchy. Seems like DeveloperIndexConfig also - // adds "type" but it happens to match the value from above. - object.shallow_merge(ConvexObject::try_from(DeveloperIndexConfig::Search( - developer_config, - ))?) - }, - IndexConfig::Vector { - developer_config, - on_disk_state, - } => { - let object = obj!( - "type" => "vector", - "onDiskState" => ConvexValue::Object(on_disk_state.try_into()?) - )?; - // TODO: Using merge here is very sketchy. Seems like DeveloperIndexConfig also - // adds "type" but it happens to match the value from above. - object.shallow_merge(ConvexObject::try_from(DeveloperIndexConfig::Vector( - developer_config, - ))?) - }, - } - } -} - -impl TryFrom for IndexConfig { - type Error = anyhow::Error; - - fn try_from(object: ConvexObject) -> anyhow::Result { - let mut object_fields: BTreeMap<_, _> = object.into(); - let config_type = match object_fields.remove("type") { - Some(ConvexValue::String(s)) => s, - _ => anyhow::bail!("Missing `type` field for IndexConfig: {:?}", object_fields), - }; - - Ok(match config_type.to_string().as_str() { - "database" => { - let fields = match object_fields.remove("fields") { - Some(v) => Vec::::try_from(v)?.try_into()?, - _ => anyhow::bail!( - "Missing `fields` field for IndexConfig: {:?}", - object_fields - ), - }; - let on_disk_state = match object_fields.remove("onDiskState") { - Some(ConvexValue::Object(o)) => o.try_into()?, - _ => anyhow::bail!( - "Invalid or missing`onDiskState` field for IndexConfig: {:?}", - object_fields - ), - }; - IndexConfig::Database { - developer_config: DeveloperDatabaseIndexConfig { fields }, - on_disk_state, - } - }, - "search" => { - let search_field = match object_fields.remove("searchField") { - Some(v) => v.try_into()?, - _ => anyhow::bail!( - "Missing `searchField` field for IndexConfig: {:?}", - object_fields - ), - }; - let filter_fields = match object_fields.remove("filterFields") { - Some(ConvexValue::Array(arr)) => arr - .into_iter() - .map(FieldPath::try_from) - .collect::>>()?, - _ => anyhow::bail!( - "Missing `filterFields` field for IndexConfig: {:?}", - object_fields - ), - }; - let on_disk_state = match object_fields.remove("onDiskState") { - Some(ConvexValue::Object(o)) => o.try_into()?, - _ => anyhow::bail!( - "Invalid or missing`onDiskState` field for IndexConfig: {:?}", - object_fields - ), - }; - IndexConfig::Search { - developer_config: DeveloperSearchIndexConfig { - search_field, - filter_fields, - }, - on_disk_state, - } - }, - "vector" => { - let dimensions = match object_fields.remove("dimensions") { - Some(ConvexValue::Int64(dimensions)) => { - VectorDimensions::try_from(u32::try_from(dimensions)?)? - }, - // Support legacy alpha users with the old dimension field. - None => match object_fields.remove("dimension") { - Some(ConvexValue::Int64(dimension)) => { - VectorDimensions::try_from(u32::try_from(dimension)?)? - }, - _ => anyhow::bail!( - "Invalid or missing `dimension` field for IndexConfig: {:?}", - object_fields - ), - }, - _ => anyhow::bail!( - "Invalid or missing `dimensions` field for IndexConfig: {:?}", - object_fields - ), - }; - let vector_field = match object_fields.remove("vectorField") { - Some(v) => v.try_into()?, - _ => anyhow::bail!( - "Missing `vectorField` field for IndexConfig: {:?}", - object_fields - ), - }; - let filter_fields = match object_fields.remove("filterFields") { - Some(ConvexValue::Array(arr)) => arr - .into_iter() - .map(FieldPath::try_from) - .collect::>>()?, - _ => anyhow::bail!( - "Missing `filterFields` field for IndexConfig: {:?}", - object_fields - ), - }; - let on_disk_state = match object_fields.remove("onDiskState") { - Some(ConvexValue::Object(o)) => o.try_into()?, - _ => anyhow::bail!( - "Invalid or missing`onDiskState` field for IndexConfig: {:?}", - object_fields - ), - }; - IndexConfig::Vector { - developer_config: DeveloperVectorIndexConfig { - dimensions, - vector_field, - filter_fields, - }, - on_disk_state, - } - }, - _ => anyhow::bail!("Invalid `type` field for IndexConfig: {:?}", object_fields), - }) - } -} - -// Index config that's specified by the developer -#[derive(Debug, Clone, PartialEq, Eq)] -#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] -pub enum DeveloperIndexConfig { - /// Standard database index. - Database(DeveloperDatabaseIndexConfig), - - /// Full text search index. - Search(DeveloperSearchIndexConfig), - - Vector(DeveloperVectorIndexConfig), -} - -impl From for DeveloperIndexConfig { - fn from(value: IndexConfig) -> Self { - match value { - IndexConfig::Database { - developer_config, .. - } => DeveloperIndexConfig::Database(developer_config), - IndexConfig::Search { - developer_config, .. - } => DeveloperIndexConfig::Search(developer_config), - IndexConfig::Vector { - developer_config, .. - } => DeveloperIndexConfig::Vector(developer_config), - } - } -} - -impl TryFrom for ConvexObject { - type Error = anyhow::Error; - - fn try_from(index_config: DeveloperIndexConfig) -> anyhow::Result { - match index_config { - DeveloperIndexConfig::Database(config) => { - obj!( - "type" => "database", - "fields" => config.fields, - ) - }, - DeveloperIndexConfig::Search(config) => { - let filter_fields = config - .filter_fields - .into_iter() - .map(ConvexValue::try_from) - .collect::>>()?; - obj!( - "type" => "search", - "searchField" => config.search_field, - "filterFields" => filter_fields, - ) - }, - DeveloperIndexConfig::Vector(config) => { - let filter_fields = config - .filter_fields - .into_iter() - .map(ConvexValue::try_from) - .collect::>>()?; - obj!( - "type" => "vector", - "dimensions" => (u32::from(config.dimensions) as i64), - "vectorField" => config.vector_field, - "filterFields" => filter_fields, - ) - }, - } - } -} - -impl TryFrom for DeveloperIndexConfig { - type Error = anyhow::Error; - - fn try_from(obj: ConvexObject) -> anyhow::Result { - let mut fields: BTreeMap = obj.into(); - let type_string = match fields.remove("type") { - Some(ConvexValue::String(s)) => s.to_string(), - Some(_) => anyhow::bail!("Invalid value for `type`"), - None => anyhow::bail!("Missing field `type`"), - }; - if type_string == "database" { - let indexed_fields = match fields.remove("fields") { - Some(fields_value) => database_index::IndexedFields::try_from(fields_value)?, - None => anyhow::bail!("Missing field `fields`"), - }; - - Ok(DeveloperIndexConfig::Database( - DeveloperDatabaseIndexConfig { - fields: indexed_fields, - }, - )) - } else if type_string == "search" { - let filter_fields = match fields.remove("filterFields") { - Some(ConvexValue::Array(filter_fields_arr)) => filter_fields_arr - .into_iter() - .map(FieldPath::try_from) - .collect::>>()?, - Some(_) => anyhow::bail!("Invalid value for filterFields"), - None => anyhow::bail!("Missing field filterFields"), - }; - let search_field = match fields.remove("searchField") { - Some(val) => FieldPath::try_from(val)?, - None => anyhow::bail!("Missing field searchField"), - }; - return Ok(DeveloperIndexConfig::Search(DeveloperSearchIndexConfig { - search_field, - filter_fields, - })); - } else if type_string == "vector" { - let dimensions = match fields.remove("dimensions") { - Some(ConvexValue::Int64(dimensions)) => { - VectorDimensions::try_from(u32::try_from(dimensions)?)? - }, - // Support legacy alpha users with the old dimension field. - None => match fields.remove("dimension") { - Some(ConvexValue::Int64(dimension)) => { - VectorDimensions::try_from(u32::try_from(dimension)?)? - }, - _ => anyhow::bail!("Invalid value for dimension"), - }, - _ => anyhow::bail!("Invalid value for dimensions"), - }; - let filter_fields = match fields.remove("filterFields") { - Some(ConvexValue::Array(filter_fields)) => filter_fields - .into_iter() - .map(FieldPath::try_from) - .collect::>>()?, - _ => anyhow::bail!("Invalid value for filterFields"), - }; - let vector_field = match fields.remove("vectorField") { - Some(val) => FieldPath::try_from(val)?, - None => anyhow::bail!("Missing field vectorField"), - }; - return Ok(DeveloperIndexConfig::Vector(DeveloperVectorIndexConfig { - dimensions, - vector_field, - filter_fields, - })); - } else { - anyhow::bail!("Unknown type {type_string}") - } - } -} - -impl TryFrom for DeveloperIndexConfig { - type Error = anyhow::Error; - - fn try_from(value: ConvexValue) -> Result { - if let ConvexValue::Object(obj) = value { - obj.try_into() - } else { - anyhow::bail!("Invalid value for DeveloperIndexConfig") - } - } -} - -impl TryFrom for ConvexValue { - type Error = anyhow::Error; - - fn try_from(value: DeveloperIndexConfig) -> Result { - Ok(ConvexObject::try_from(value)?.into()) - } -} - -pub type ResolvedIndexMetadata = IndexMetadata; -pub type TabletIndexMetadata = IndexMetadata; -pub type DeveloperIndexMetadata = IndexMetadata; - -impl From for IndexMetadata { - fn from(value: ResolvedIndexMetadata) -> Self { - Self { - name: value.name.into(), - config: value.config, - } - } -} - -impl ResolvedIndexMetadata { - pub fn from_document( - f: impl Fn(TableId) -> anyhow::Result, - document: ResolvedDocument, - ) -> anyhow::Result> { - let index_metadata_: ParsedDocument = document.try_into()?; - let index_metadata: ParsedDocument = index_metadata_.map(|d| d.map_table(&f))?; - Ok(index_metadata) - } -} - -impl TabletIndexMetadata { - pub fn from_document(document: ResolvedDocument) -> anyhow::Result> { - document.try_into() - } -} -/// In-memory representation of an index's metadata. -#[derive(Debug, Clone, PartialEq, Eq)] -#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] -pub struct IndexMetadata { - /// Unique name for the index. - pub name: GenericIndexName, - - /// Configuration that depends on the type of index. - pub config: IndexConfig, -} - -impl IndexMetadata { - pub fn new_backfilling( - name: GenericIndexName, - fields: database_index::IndexedFields, - ) -> Self { - Self { - name, - config: IndexConfig::Database { - developer_config: DeveloperDatabaseIndexConfig { fields }, - on_disk_state: DatabaseIndexState::Backfilling( - database_index::DatabaseIndexBackfillState {}, - ), - }, - } - } - - pub fn new_backfilling_search_index( - name: GenericIndexName, - search_field: FieldPath, - filter_fields: BTreeSet, - ) -> Self { - Self::new_search_index( - name, - DeveloperSearchIndexConfig { - search_field, - filter_fields, - }, - SearchIndexState::Backfilling, - ) - } - - pub fn new_backfilling_vector_index( - name: GenericIndexName, - vector_field: FieldPath, - dimensions: VectorDimensions, - filter_fields: BTreeSet, - ) -> Self { - Self { - name, - config: IndexConfig::Vector { - developer_config: DeveloperVectorIndexConfig { - dimensions, - vector_field, - filter_fields, - }, - on_disk_state: VectorIndexState::Backfilling(VectorIndexBackfillState { - segments: vec![], - cursor: None, - backfill_snapshot_ts: None, - }), - }, - } - } - - pub fn new_search_index( - name: GenericIndexName, - developer_config: DeveloperSearchIndexConfig, - on_disk_state: SearchIndexState, - ) -> Self { - Self { - name, - config: IndexConfig::Search { - developer_config, - on_disk_state, - }, - } - } - - pub fn new_enabled(name: GenericIndexName, fields: database_index::IndexedFields) -> Self { - Self { - name, - config: IndexConfig::Database { - developer_config: DeveloperDatabaseIndexConfig { fields }, - on_disk_state: DatabaseIndexState::Enabled, - }, - } - } - - pub fn is_database_index(&self) -> bool { - matches!(self.config, IndexConfig::Database { .. }) - } - - pub fn is_search_index(&self) -> bool { - matches!(self.config, IndexConfig::Search { .. }) - } - - pub fn is_vector_index(&self) -> bool { - matches!(self.config, IndexConfig::Vector { .. }) - } - - pub fn map_table( - self, - f: &impl Fn(T) -> anyhow::Result, - ) -> anyhow::Result> { - Ok(IndexMetadata { - name: self.name.map_table(f)?, - config: self.config, - }) - } -} - -pub fn index_metadata_serialize_table_id(table_id: &TableId) -> anyhow::Result { - ConvexValue::try_from(table_id.to_string()) -} - -impl TryFrom for ConvexObject { - type Error = anyhow::Error; - - fn try_from(m: TabletIndexMetadata) -> anyhow::Result { - let name = m.name; - obj!( - // New format: write table_id(v5) + descriptor. - *TABLE_ID_FIELD_NAME => index_metadata_serialize_table_id(name.table())?, - "descriptor" => name.descriptor().to_string(), - "config" => ConvexObject::try_from(m.config)? - ) - } -} - -impl TryFrom for IndexMetadata { - type Error = anyhow::Error; - - fn try_from(o: ConvexObject) -> Result { - let mut object_fields: BTreeMap<_, _> = o.into(); - let table_id: TableId = match object_fields.remove("table_id") { - Some(ConvexValue::String(s)) => s.parse()?, - _ => anyhow::bail!( - "Missing or invalid `table_id` field for IndexMetadata: {:?}", - object_fields - ), - }; - let descriptor: IndexDescriptor = match object_fields.remove("descriptor") { - Some(ConvexValue::String(d)) => d.parse()?, - _ => anyhow::bail!( - "Missing or invalid `table_id` field for IndexMetadata: {:?}", - object_fields - ), - }; - let name = if descriptor.is_reserved() { - GenericIndexName::new_reserved(table_id, descriptor) - } else { - GenericIndexName::new(table_id, descriptor) - }?; - let config = match object_fields.remove("config") { - Some(ConvexValue::Object(config)) => IndexConfig::try_from(config)?, - _ => anyhow::bail!( - "Missing or invalid `config` field for IndexMetadata: {:?}", - object_fields - ), - }; - - Ok(Self { name, config }) - } -} - -#[cfg(test)] -mod tests { - use cmd_util::env::env_config; - use proptest::prelude::*; - use value::ConvexObject; - - use super::*; - use crate::testing::assert_roundtrips; - - proptest! { - #![proptest_config(ProptestConfig { cases: 64 * env_config("CONVEX_PROPTEST_MULTIPLIER", 1), failure_persistence: None, .. ProptestConfig::default() })] - #[test] - fn test_indexed_config_roundtrips(config in any::()) { - assert_roundtrips::(config); - } - - #[test] - fn test_developer_index_config_roundtrips(config in any::()) { - assert_roundtrips::(config); - } - } -} diff --git a/crates/common/src/bootstrap_model/index/search_index.rs b/crates/common/src/bootstrap_model/index/search_index.rs deleted file mode 100644 index d5925e43..00000000 --- a/crates/common/src/bootstrap_model/index/search_index.rs +++ /dev/null @@ -1,232 +0,0 @@ -use std::{ - collections::{ - BTreeMap, - BTreeSet, - }, - convert::TryFrom, -}; - -use value::{ - obj, - ConvexObject, - ConvexValue, - FieldName, -}; - -use crate::{ - paths::FieldPath, - types::{ - ObjectKey, - PersistenceVersion, - Timestamp, - }, -}; - -#[derive(Debug, Clone, PartialEq, Eq)] -#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] -pub struct DeveloperSearchIndexConfig { - /// The field to index for full text search. - pub search_field: FieldPath, - - /// Other fields to index for equality filtering. - pub filter_fields: BTreeSet, -} - -/// The state of a search index. -/// Search indexes begin in `Backfilling`. -/// Once the backfill completes, we'll have a snapshot at a timestamp which -/// continually moves forward. -#[derive(Debug, Clone, PartialEq, Eq)] -#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] -pub enum SearchIndexState { - Backfilling, - Backfilled(SearchIndexSnapshot), - SnapshottedAt(SearchIndexSnapshot), -} - -#[derive(Debug, Clone, PartialEq, Eq)] -#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] -pub struct SearchIndexSnapshot { - pub index: ObjectKey, - pub ts: Timestamp, - pub version: SearchSnapshotVersion, -} - -#[derive(Copy, Clone, Eq, PartialEq, Debug)] -#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] -pub enum SearchSnapshotVersion { - /// V0 is the original version for search snapshots. - /// In particular, it interprets missing fields as null. - V0, - /// V1 interprets missing fields as undefined. - V1MissingAsUndefined, - /// V2 uses string IDs - V2UseStringIds, -} - -impl SearchSnapshotVersion { - pub fn new(persistence_version: PersistenceVersion) -> Self { - // Add a new SearchSnapshotVersion if the index key format changes between - // different persistence versions. - match persistence_version { - PersistenceVersion::V5 => Self::V2UseStringIds, - } - } - - pub fn to_code(&self) -> i64 { - match self { - Self::V0 => 0, - Self::V1MissingAsUndefined => 1, - Self::V2UseStringIds => 2, - } - } - - pub fn from_code(code: i64) -> anyhow::Result { - match code { - 0 => Ok(Self::V0), - 1 => Ok(Self::V1MissingAsUndefined), - 2 => Ok(Self::V2UseStringIds), - _ => anyhow::bail!("unrecognized search snapshot version {code:?}"), - } - } -} - -impl TryFrom for ConvexObject { - type Error = anyhow::Error; - - fn try_from(state: SearchIndexState) -> Result { - match state { - SearchIndexState::Backfilling => obj!( - "state" => "backfilling", - ), - SearchIndexState::Backfilled(snapshot) => snapshot_to_object("backfilled", &snapshot), - SearchIndexState::SnapshottedAt(snapshot) => { - snapshot_to_object("snapshotted", &snapshot) - }, - } - } -} - -pub(crate) fn snapshot_to_object( - state: &str, - snapshot: &SearchIndexSnapshot, -) -> anyhow::Result { - // This structure is intentionally flat for backwards compatibility. - obj!( - "state" => state, - "index" => snapshot.index.to_string(), - "ts" => ConvexValue::Int64(snapshot.ts.into()), - "version" => snapshot.version.to_code(), - ) -} - -pub(crate) fn snapshot_from_object( - mut object_fields: BTreeMap, -) -> anyhow::Result { - let index: ObjectKey = match object_fields.remove("index") { - Some(ConvexValue::String(s)) => String::from(s).try_into()?, - _ => anyhow::bail!( - "Invalid or missing `index` field for SearchIndexState: {:?}", - object_fields - ), - }; - let ts: Timestamp = match object_fields.remove("ts") { - Some(ConvexValue::Int64(i)) => i.try_into()?, - _ => anyhow::bail!( - "Invalid or missing `ts` field for SearchIndexState: {:?}", - object_fields - ), - }; - let version = match object_fields.remove("version") { - Some(ConvexValue::Int64(i)) => SearchSnapshotVersion::from_code(i)?, - _ => anyhow::bail!( - "Invalid or missing `version` field for SearchIndexState: {:?}", - object_fields - ), - }; - Ok(SearchIndexSnapshot { index, ts, version }) -} - -impl TryFrom for SearchIndexState { - type Error = anyhow::Error; - - fn try_from(object: ConvexObject) -> Result { - let mut object_fields: BTreeMap<_, _> = object.into(); - let state = match object_fields.remove("state") { - Some(ConvexValue::String(s)) => s, - _ => anyhow::bail!( - "Missing `state` field for SearchIndexState: {:?}", - object_fields - ), - }; - Ok(match state.to_string().as_str() { - "backfilling" => SearchIndexState::Backfilling, - "backfilled" => { - let snapshot = snapshot_from_object(object_fields)?; - SearchIndexState::Backfilled(snapshot) - }, - "snapshotted" => { - let snapshot = snapshot_from_object(object_fields)?; - SearchIndexState::SnapshottedAt(snapshot) - }, - _ => anyhow::bail!( - "Invalid `state` field for SearchIndexState: {:?}", - object_fields - ), - }) - } -} - -impl TryFrom for DeveloperSearchIndexConfig { - type Error = anyhow::Error; - - fn try_from(proto: pb::searchlight::SearchIndexConfig) -> anyhow::Result { - Ok(DeveloperSearchIndexConfig { - search_field: proto - .search_field_path - .ok_or_else(|| anyhow::format_err!("Missing search_field_path"))? - .try_into()?, - filter_fields: proto - .filter_fields - .into_iter() - .map(|i| i.try_into()) - .collect::, _>>()? - .into_iter() - .collect(), - }) - } -} - -impl From for pb::searchlight::SearchIndexConfig { - fn from(config: DeveloperSearchIndexConfig) -> Self { - pb::searchlight::SearchIndexConfig { - search_field_path: Some(config.search_field.into()), - filter_fields: config - .filter_fields - .into_iter() - .map(|f| f.into()) - .collect::>(), - } - } -} - -#[cfg(test)] -mod tests { - use cmd_util::env::env_config; - use proptest::prelude::*; - use sync_types::testing::assert_roundtrips; - - use super::*; - - proptest! { - #![proptest_config(ProptestConfig { cases: 64 * env_config("CONVEX_PROPTEST_MULTIPLIER", 1), failure_persistence: None, .. ProptestConfig::default() })] - - #[test] - fn test_developer_search_index_config_roundtrips(v in any::()) { - assert_roundtrips::< - DeveloperSearchIndexConfig, - pb::searchlight::SearchIndexConfig - >(v); - } - } -} diff --git a/crates/common/src/bootstrap_model/index/search_index/index_config.rs b/crates/common/src/bootstrap_model/index/search_index/index_config.rs new file mode 100644 index 00000000..fcb01434 --- /dev/null +++ b/crates/common/src/bootstrap_model/index/search_index/index_config.rs @@ -0,0 +1,93 @@ +use std::{ + collections::BTreeSet, + convert::TryFrom, +}; + +use serde::{ + Deserialize, + Serialize, +}; +use value::codegen_convex_serialization; + +use crate::paths::FieldPath; + +#[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] +pub struct DeveloperSearchIndexConfig { + /// The field to index for full text search. + pub search_field: FieldPath, + + /// Other fields to index for equality filtering. + pub filter_fields: BTreeSet, +} + +#[derive(Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct SerializedDeveloperSearchIndexConfig { + search_field: String, + filter_fields: Vec, +} + +impl TryFrom for SerializedDeveloperSearchIndexConfig { + type Error = anyhow::Error; + + fn try_from(config: DeveloperSearchIndexConfig) -> anyhow::Result { + Ok(Self { + search_field: config.search_field.into(), + filter_fields: config.filter_fields.into_iter().map(String::from).collect(), + }) + } +} + +impl TryFrom for DeveloperSearchIndexConfig { + type Error = anyhow::Error; + + fn try_from(config: SerializedDeveloperSearchIndexConfig) -> anyhow::Result { + Ok(Self { + search_field: config.search_field.parse()?, + filter_fields: config + .filter_fields + .into_iter() + .map(|p| p.parse()) + .collect::>>()?, + }) + } +} + +codegen_convex_serialization!( + DeveloperSearchIndexConfig, + SerializedDeveloperSearchIndexConfig +); + +impl TryFrom for DeveloperSearchIndexConfig { + type Error = anyhow::Error; + + fn try_from(proto: pb::searchlight::SearchIndexConfig) -> anyhow::Result { + Ok(DeveloperSearchIndexConfig { + search_field: proto + .search_field_path + .ok_or_else(|| anyhow::format_err!("Missing search_field_path"))? + .try_into()?, + filter_fields: proto + .filter_fields + .into_iter() + .map(|i| i.try_into()) + .collect::, _>>()? + .into_iter() + .collect(), + }) + } +} + +impl From for pb::searchlight::SearchIndexConfig { + fn from(config: DeveloperSearchIndexConfig) -> Self { + pb::searchlight::SearchIndexConfig { + search_field_path: Some(config.search_field.into()), + filter_fields: config + .filter_fields + .into_iter() + .map(|f| f.into()) + .collect::>(), + } + } +} diff --git a/crates/common/src/bootstrap_model/index/search_index/index_snapshot.rs b/crates/common/src/bootstrap_model/index/search_index/index_snapshot.rs new file mode 100644 index 00000000..5bd35641 --- /dev/null +++ b/crates/common/src/bootstrap_model/index/search_index/index_snapshot.rs @@ -0,0 +1,91 @@ +use std::convert::TryFrom; + +use serde::{ + Deserialize, + Serialize, +}; + +use crate::types::{ + ObjectKey, + PersistenceVersion, + Timestamp, +}; + +#[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] +pub struct SearchIndexSnapshot { + pub index: ObjectKey, + pub ts: Timestamp, + pub version: SearchSnapshotVersion, +} + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] +pub enum SearchSnapshotVersion { + /// V0 is the original version for search snapshots. + /// In particular, it interprets missing fields as null. + V0, + /// V1 interprets missing fields as undefined. + V1MissingAsUndefined, + /// V2 uses string IDs + V2UseStringIds, +} + +impl SearchSnapshotVersion { + pub fn new(persistence_version: PersistenceVersion) -> Self { + // Add a new SearchSnapshotVersion if the index key format changes between + // different persistence versions. + match persistence_version { + PersistenceVersion::V5 => Self::V2UseStringIds, + } + } + + pub fn to_code(&self) -> i64 { + match self { + Self::V0 => 0, + Self::V1MissingAsUndefined => 1, + Self::V2UseStringIds => 2, + } + } + + pub fn from_code(code: i64) -> anyhow::Result { + match code { + 0 => Ok(Self::V0), + 1 => Ok(Self::V1MissingAsUndefined), + 2 => Ok(Self::V2UseStringIds), + _ => anyhow::bail!("unrecognized search snapshot version {code:?}"), + } + } +} + +#[derive(Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct SerializedSearchIndexSnapshot { + index: String, + ts: i64, + version: i64, +} + +impl TryFrom for SerializedSearchIndexSnapshot { + type Error = anyhow::Error; + + fn try_from(snapshot: SearchIndexSnapshot) -> Result { + Ok(Self { + index: snapshot.index.to_string(), + ts: snapshot.ts.into(), + version: snapshot.version.to_code(), + }) + } +} + +impl TryFrom for SearchIndexSnapshot { + type Error = anyhow::Error; + + fn try_from(serialized: SerializedSearchIndexSnapshot) -> Result { + Ok(Self { + index: serialized.index.try_into()?, + ts: serialized.ts.try_into()?, + version: SearchSnapshotVersion::from_code(serialized.version)?, + }) + } +} diff --git a/crates/common/src/bootstrap_model/index/search_index/index_state.rs b/crates/common/src/bootstrap_model/index/search_index/index_state.rs new file mode 100644 index 00000000..7900695a --- /dev/null +++ b/crates/common/src/bootstrap_model/index/search_index/index_state.rs @@ -0,0 +1,72 @@ +use std::convert::TryFrom; + +use serde::{ + Deserialize, + Serialize, +}; +use value::codegen_convex_serialization; + +use super::{ + index_snapshot::SerializedSearchIndexSnapshot, + SearchIndexSnapshot, +}; + +/// The state of a search index. +/// Search indexes begin in `Backfilling`. +/// Once the backfill completes, we'll have a snapshot at a timestamp which +/// continually moves forward. +#[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] +pub enum SearchIndexState { + Backfilling, + Backfilled(SearchIndexSnapshot), + SnapshottedAt(SearchIndexSnapshot), +} + +#[derive(Serialize, Deserialize)] +#[serde(tag = "state", rename_all = "camelCase")] +pub enum SerializedSearchIndexState { + Backfilling, + Backfilled { + #[serde(flatten)] + snapshot: SerializedSearchIndexSnapshot, + }, + Snapshotted { + #[serde(flatten)] + snapshot: SerializedSearchIndexSnapshot, + }, +} + +impl TryFrom for SerializedSearchIndexState { + type Error = anyhow::Error; + + fn try_from(state: SearchIndexState) -> Result { + Ok(match state { + SearchIndexState::Backfilling => SerializedSearchIndexState::Backfilling, + SearchIndexState::Backfilled(snapshot) => SerializedSearchIndexState::Backfilled { + snapshot: snapshot.try_into()?, + }, + SearchIndexState::SnapshottedAt(snapshot) => SerializedSearchIndexState::Snapshotted { + snapshot: snapshot.try_into()?, + }, + }) + } +} + +impl TryFrom for SearchIndexState { + type Error = anyhow::Error; + + fn try_from(serialized: SerializedSearchIndexState) -> Result { + Ok(match serialized { + SerializedSearchIndexState::Backfilling => SearchIndexState::Backfilling, + SerializedSearchIndexState::Backfilled { snapshot } => { + SearchIndexState::Backfilled(snapshot.try_into()?) + }, + SerializedSearchIndexState::Snapshotted { snapshot } => { + SearchIndexState::SnapshottedAt(snapshot.try_into()?) + }, + }) + } +} + +codegen_convex_serialization!(SearchIndexState, SerializedSearchIndexState); diff --git a/crates/common/src/bootstrap_model/index/search_index/mod.rs b/crates/common/src/bootstrap_model/index/search_index/mod.rs new file mode 100644 index 00000000..0ebc1644 --- /dev/null +++ b/crates/common/src/bootstrap_model/index/search_index/mod.rs @@ -0,0 +1,39 @@ +mod index_config; +mod index_snapshot; +mod index_state; + +pub use self::{ + index_config::{ + DeveloperSearchIndexConfig, + SerializedDeveloperSearchIndexConfig, + }, + index_snapshot::{ + SearchIndexSnapshot, + SearchSnapshotVersion, + }, + index_state::{ + SearchIndexState, + SerializedSearchIndexState, + }, +}; + +#[cfg(test)] +mod tests { + use cmd_util::env::env_config; + use proptest::prelude::*; + use sync_types::testing::assert_roundtrips; + + use super::*; + + proptest! { + #![proptest_config(ProptestConfig { cases: 64 * env_config("CONVEX_PROPTEST_MULTIPLIER", 1), failure_persistence: None, .. ProptestConfig::default() })] + + #[test] + fn test_developer_search_index_config_roundtrips(v in any::()) { + assert_roundtrips::< + DeveloperSearchIndexConfig, + pb::searchlight::SearchIndexConfig + >(v); + } + } +} diff --git a/crates/common/src/bootstrap_model/index/vector_index.rs b/crates/common/src/bootstrap_model/index/vector_index.rs deleted file mode 100644 index ad6641fb..00000000 --- a/crates/common/src/bootstrap_model/index/vector_index.rs +++ /dev/null @@ -1,668 +0,0 @@ -use std::{ - collections::{ - BTreeMap, - BTreeSet, - }, - ops::Deref, - str::FromStr, -}; - -use anyhow::Context; -use errors::ErrorMetadata; -use sync_types::Timestamp; -use value::{ - obj, - ConvexObject, - ConvexValue, - FieldName, - FieldPath, - InternalId, -}; - -use crate::types::ObjectKey; - -pub const MIN_VECTOR_DIMENSIONS: u32 = 2; -pub const MAX_VECTOR_DIMENSIONS: u32 = 4096; - -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] -pub struct VectorDimensions( - #[cfg_attr( - any(test, feature = "testing"), - proptest(strategy = "MIN_VECTOR_DIMENSIONS..=MAX_VECTOR_DIMENSIONS") - )] - u32, -); - -impl From for usize { - fn from(value: VectorDimensions) -> Self { - value.0 as usize - } -} - -impl From for u32 { - fn from(value: VectorDimensions) -> Self { - value.0 - } -} - -impl Deref for VectorDimensions { - type Target = u32; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl TryFrom for VectorDimensions { - type Error = anyhow::Error; - - fn try_from(value: u32) -> Result { - anyhow::ensure!( - (MIN_VECTOR_DIMENSIONS..=MAX_VECTOR_DIMENSIONS).contains(&value), - ErrorMetadata::bad_request( - "InvalidVectorDimensionError", - format!( - "Dimensions {} must be between {} and {}.", - value, MIN_VECTOR_DIMENSIONS, MAX_VECTOR_DIMENSIONS - ) - ) - ); - Ok(Self(value)) - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] -pub struct DeveloperVectorIndexConfig { - // Dimensions of the vectors - pub dimensions: VectorDimensions, - - /// The field to index for vector search. - pub vector_field: FieldPath, - - /// Other fields to index for equality filtering. - pub filter_fields: BTreeSet, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] -pub enum VectorIndexState { - Backfilling(VectorIndexBackfillState), - Backfilled(VectorIndexSnapshot), - SnapshottedAt(VectorIndexSnapshot), -} - -impl VectorIndexState { - pub fn segments(&self) -> anyhow::Result<&Vec> { - match self { - VectorIndexState::Backfilling(backfill_state) => Ok(&backfill_state.segments), - VectorIndexState::Backfilled(snapshot) | VectorIndexState::SnapshottedAt(snapshot) => { - match snapshot.data { - VectorIndexSnapshotData::Unknown(_) => anyhow::bail!("Unknown snapshot data!"), - VectorIndexSnapshotData::MultiSegment(ref segments) => Ok(segments), - } - }, - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] -pub struct VectorIndexBackfillState { - pub segments: Vec, - // Both of these variables will be None at the start of backfill. - // They will be set after the first backfill iteration. - pub cursor: Option, - pub backfill_snapshot_ts: Option, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] -pub struct VectorIndexSnapshot { - pub data: VectorIndexSnapshotData, - pub ts: Timestamp, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] -pub enum VectorIndexSnapshotData { - // Some future or previous incompatible version. The contained object is the - // unmodified data that can safely be serialized again without dropping - // unrecognized fields. Because we expect all data to be rollback - // compatible, we have to be robust to future formats that might only be - // recognized by versions ahead of ours. - Unknown(ConvexObject), - MultiSegment(Vec), -} - -/// A qdrant Segment that's split into three separate parts, the qdrant Segment -/// which depends on an IdTracker implementation, which depends on a deleted -/// bitset. -/// -/// Each file is stored independently, but they're composed to form a queryable -/// segment. The deleted bitset can be written to independently. The id tracker -/// can be queried independently. Using the segment requires all three files. -#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct FragmentedVectorSegment { - pub segment_key: ObjectKey, - pub id_tracker_key: ObjectKey, - pub deleted_bitset_key: ObjectKey, - pub num_vectors: u32, - pub num_deleted: u32, - // A random UUID that can be used to identify a segment to determine if the - // segment has changed during non-transactional index changes (compaction). - pub id: String, -} - -impl FragmentedVectorSegment { - fn extract_key( - object_fields: &mut BTreeMap, - serialized_field_name: &str, - ) -> anyhow::Result { - match object_fields.remove(serialized_field_name) { - Some(ConvexValue::String(s)) => s.try_into(), - _ => anyhow::bail!( - "Invalid or missing `{serialized_field_name}` field for VectorMultiPartData", - ), - } - } - - pub fn non_deleted_vectors(&self) -> anyhow::Result { - let total_vectors = if self.num_vectors < self.num_deleted { - // Some early segments have been created with num_vectors sent to the initially - // available point count, which excluded deletes. If sufficient vectors are - // deleted, that can result in num_deleted exceeding the initial num_vectors. - // That doesn't strictly mean the segment is empty, but it should be close - // enough and a backfill to fix these segments is complex. - Ok(0) - } else { - self.num_vectors - .checked_sub(self.num_deleted) - .ok_or_else(|| { - anyhow::anyhow!( - "Failed to subtract {} from {}", - self.num_deleted, - self.num_vectors - ) - }) - }; - total_vectors.map(|value| value as u64) - } - - /// The estimated size bytes based only on the non-deleted vectors in the - /// segment. - /// - /// The actual size of the segment in s3 will be bigger due both to deleted - /// vectors excluded from this size estimation and also overhead from the - /// HNSW index (if present). Index overhead is larger as a percentage for - /// small dimensional vectors than large dimensional vectors. - pub fn non_deleted_size_bytes(&self, dimensions: VectorDimensions) -> anyhow::Result { - Self::size_bytes(self.non_deleted_vectors()?, dimensions) - } - - /// The estimated size bytes based on both deleted and non-deleted vectors - /// in the segment. - /// - /// The actual size of the segment in s3 will be bigger due to the overhead - /// from the HNSW index (if present). Index overhead is larger as a - /// percentage for small dimensional vectors than large dimensional - /// vectors. - pub fn total_size_bytes(&self, dimensions: VectorDimensions) -> anyhow::Result { - Self::size_bytes(self.num_vectors as u64, dimensions) - } - - fn size_bytes(estimated_vectors: u64, dimensions: VectorDimensions) -> anyhow::Result { - // A little extra paranoia since all of these numbers are not originally u64 and - // can overflow u32. - (estimated_vectors) - .checked_mul(dimensions.0 as u64) - .and_then(|value| value.checked_mul(4_u64)) - .context("Overflowed size calculation!") - } - - pub fn to_paths_proto(self) -> anyhow::Result { - Ok(pb::searchlight::FragmentedVectorSegmentPaths { - segment: Some(pb::searchlight::StorageKey { - storage_key: self.segment_key.into(), - }), - id_tracker: Some(pb::searchlight::StorageKey { - storage_key: self.id_tracker_key.into(), - }), - deleted_bitset: Some(pb::searchlight::StorageKey { - storage_key: self.deleted_bitset_key.into(), - }), - }) - } -} - -impl TryFrom for pb::searchlight::FragmentedVectorSegment { - type Error = anyhow::Error; - - fn try_from(value: FragmentedVectorSegment) -> Result { - Ok(Self { - segment_key: value.segment_key.try_into()?, - id_tracker_key: value.id_tracker_key.try_into()?, - deleted_bitset_key: value.deleted_bitset_key.try_into()?, - num_vectors: value.num_vectors, - num_deleted: value.num_deleted, - id: value.id, - }) - } -} - -impl TryFrom for FragmentedVectorSegment { - type Error = anyhow::Error; - - fn try_from(value: pb::searchlight::FragmentedVectorSegment) -> Result { - Ok(Self { - segment_key: value.segment_key.try_into()?, - id_tracker_key: value.id_tracker_key.try_into()?, - deleted_bitset_key: value.deleted_bitset_key.try_into()?, - num_vectors: value.num_vectors, - num_deleted: value.num_deleted, - id: value.id, - }) - } -} - -impl TryFrom for ConvexObject { - type Error = anyhow::Error; - - fn try_from(value: FragmentedVectorSegment) -> Result { - obj!( - "segment_key" => value.segment_key.to_string(), - "id_tracker_key" => value.id_tracker_key.to_string(), - "deleted_bitset_key" => value.deleted_bitset_key.to_string(), - "id" => value.id, - "num_vectors" => (value.num_vectors as i64), - "num_deleted" => (value.num_deleted as i64), - ) - } -} - -impl TryFrom for FragmentedVectorSegment { - type Error = anyhow::Error; - - fn try_from(value: ConvexObject) -> Result { - let mut object_fields: BTreeMap<_, _> = value.into(); - let segment_key = Self::extract_key(&mut object_fields, "segment_key")?; - let id_tracker_key = Self::extract_key(&mut object_fields, "id_tracker_key")?; - let deleted_bitset_key = Self::extract_key(&mut object_fields, "deleted_bitset_key")?; - let id = match object_fields.remove("id") { - Some(ConvexValue::String(s)) => String::from(s), - _ => anyhow::bail!( - "Invalid or missing `id` field fo FragmentedVectorSegment: {:?}", - object_fields - ), - }; - let num_vectors = match object_fields.remove("num_vectors") { - Some(ConvexValue::Int64(i)) => i as u32, - _ => anyhow::bail!( - "Invalid or missing `num_vectors` field for FragmentedVectorSegment: {:?}", - object_fields - ), - }; - let num_deleted = match object_fields.remove("num_deleted") { - Some(ConvexValue::Int64(i)) => i as u32, - _ => anyhow::bail!( - "Invalid or missing `num_deleted` field for FragmentedVectorSegment: {:?}", - object_fields - ), - }; - - Ok(Self { - segment_key, - id_tracker_key, - deleted_bitset_key, - id, - num_vectors, - num_deleted, - }) - } -} - -impl VectorIndexSnapshotData { - pub fn is_version_current(&self) -> bool { - let result = matches!(self, VectorIndexSnapshotData::MultiSegment(_)); - if !result { - tracing::warn!( - "Vector version mismatch, stored: {:?}, current: MultiSegment", - self, - ); - } - result - } -} - -impl TryFrom<&ConvexObject> for VectorIndexSnapshotData { - type Error = anyhow::Error; - - fn try_from(value: &ConvexObject) -> Result { - let mut object_fields: BTreeMap<_, _> = value.clone().into(); - let data_type: String = match object_fields.remove("data_type") { - Some(ConvexValue::String(s)) => String::from(s), - _ => anyhow::bail!( - "Invalid or missing `data_type` field for VectorIndexSnapshotData: {:?}", - object_fields - ), - }; - if data_type == "MultiSegment" { - let parts = match object_fields.remove("segments") { - Some(ConvexValue::Array(values)) => values - .into_iter() - .map(|value| ConvexObject::try_from(value)?.try_into()) - .try_collect::>()?, - _ => anyhow::bail!( - "Invalid or missing `parts` field for VectorIndexSnapshotData::MultiSegment: \ - {:?}", - object_fields - ), - }; - return Ok(VectorIndexSnapshotData::MultiSegment(parts)); - } - anyhow::bail!( - "Unrecognized vector index snapshot data: {:?}", - object_fields - ); - } -} - -impl TryFrom for ConvexObject { - type Error = anyhow::Error; - - fn try_from(value: VectorIndexSnapshotData) -> anyhow::Result { - match value { - VectorIndexSnapshotData::MultiSegment(parts) => obj!( - "data_type" => "MultiSegment", - "segments" => ConvexValue::Array( - parts.into_iter().map(|value| value.try_into().map(ConvexValue::Object)) - .try_collect::>()? - .try_into()? - ), - ), - // If we're written back, restore whatever data we originally read. - VectorIndexSnapshotData::Unknown(obj) => Ok(obj), - } - } -} - -impl From for VectorIndexSnapshotData { - fn from(value: ConvexObject) -> Self { - match Self::try_from(&value) { - Ok(result) => result, - Err(e) => { - // Fallback to an unknown value that will trigger a rebuild and that can - // pass through the unknown data without modifying it. - tracing::error!("Unrecognized vector index snapshot data: {:?}", e); - VectorIndexSnapshotData::Unknown(value) - }, - } - } -} - -impl TryFrom for ConvexObject { - type Error = anyhow::Error; - - fn try_from(state: VectorIndexState) -> Result { - match state { - VectorIndexState::Backfilling(VectorIndexBackfillState { - segments, - cursor, - backfill_snapshot_ts, - }) => { - let backfill_snapshot_ts = backfill_snapshot_ts - .map(|ts| anyhow::Ok(ConvexValue::Int64(ts.try_into()?))) - .transpose()? - .unwrap_or(ConvexValue::Null); - let segments = ConvexValue::Array( - segments - .into_iter() - .map(|value| value.try_into().map(ConvexValue::Object)) - .try_collect::>()? - .try_into()?, - ); - obj!( - "state" => "backfilling", - "document_cursor" => cursor.map(|c| ConvexValue::try_from(c.to_string())).transpose()?.unwrap_or(ConvexValue::Null), - "backfill_snapshot_ts" => backfill_snapshot_ts, - "segments" => segments, - ) - }, - VectorIndexState::Backfilled(snapshot) => snapshot_to_object("backfilled", snapshot), - VectorIndexState::SnapshottedAt(snapshot) => { - snapshot_to_object("snapshotted", snapshot) - }, - } - } -} - -pub fn snapshot_to_object( - state: &str, - snapshot: VectorIndexSnapshot, -) -> anyhow::Result { - match snapshot.data { - VectorIndexSnapshotData::MultiSegment(_) => obj!( - "state" => state, - "ts" => ConvexValue::Int64(snapshot.ts.into()), - "data" => ConvexValue::Object(snapshot.data.try_into()?), - ), - VectorIndexSnapshotData::Unknown(obj) => obj!( - "state" => state, - "ts" => ConvexValue::Int64(snapshot.ts.into()), - "data" => ConvexValue::Object(obj), - ), - } -} - -pub(crate) fn snapshot_from_object( - mut object_fields: BTreeMap, -) -> anyhow::Result { - let data = match object_fields.remove("data") { - Some(ConvexValue::Object(obj)) => obj.into(), - _ => anyhow::bail!( - "Invalid or missing `data` field for VectorIndexSnapshot: {:?}", - object_fields - ), - }; - let ts: Timestamp = match object_fields.remove("ts") { - Some(ConvexValue::Int64(i)) => i.try_into()?, - _ => anyhow::bail!( - "Invalid or missing `ts` field for VectorIndexSnapshot: {:?}", - object_fields - ), - }; - Ok(VectorIndexSnapshot { data, ts }) -} - -impl TryFrom for VectorIndexState { - type Error = anyhow::Error; - - fn try_from(object: ConvexObject) -> Result { - let mut object_fields: BTreeMap<_, _> = object.into(); - let state = match object_fields.remove("state") { - Some(ConvexValue::String(s)) => s, - _ => anyhow::bail!( - "Missing `state` field for VectorIndexState: {:?}", - object_fields - ), - }; - Ok(match state.to_string().as_str() { - "backfilling" => { - // The fields cursor, backfill_snapshot_ts, and segments are not present in old - // indexes in Backfilling state. Thus, these all support being deserialized when - // missing using empty defaults (None or vec![]). This allows backfilling to be - // backwards-compatible - let cursor: Option = match object_fields.remove("document_cursor") { - None | Some(ConvexValue::Null) => None, - Some(ConvexValue::String(v)) => Some(InternalId::from_str(&v)?), - Some(_) => anyhow::bail!("expected document_cursor to be string"), - }; - let segments = match object_fields.remove("segments") { - Some(ConvexValue::Array(values)) => values - .into_iter() - .map(|value| ConvexObject::try_from(value)?.try_into()) - .try_collect::>()?, - None => vec![], - v => anyhow::bail!("Invalid `segments` field for VectorIndexState: {:?}", v), - }; - let backfill_snapshot_ts = match object_fields.remove("backfill_snapshot_ts") { - Some(ConvexValue::Int64(ts)) => Some(Timestamp::try_from(ts)?), - None | Some(ConvexValue::Null) => None, - v => anyhow::bail!( - "Invalid `backfill_snapshot_ts` field for VectorIndexState: {:?}", - v - ), - }; - - VectorIndexState::Backfilling(VectorIndexBackfillState { - cursor, - segments, - backfill_snapshot_ts, - }) - }, - "backfilled" => { - let snapshot = snapshot_from_object(object_fields)?; - VectorIndexState::Backfilled(snapshot) - }, - "snapshotted" => { - let snapshot = snapshot_from_object(object_fields)?; - VectorIndexState::SnapshottedAt(snapshot) - }, - _ => anyhow::bail!( - "Invalid `state` field for VectorIndexState: {:?}", - object_fields - ), - }) - } -} - -impl TryFrom for DeveloperVectorIndexConfig { - type Error = anyhow::Error; - - fn try_from(proto: pb::searchlight::VectorIndexConfig) -> anyhow::Result { - Ok(DeveloperVectorIndexConfig { - dimensions: VectorDimensions::try_from(proto.dimension)?, - vector_field: proto - .vector_field_path - .ok_or_else(|| anyhow::format_err!("Missing vector_field_path"))? - .try_into()?, - filter_fields: proto - .filter_fields - .into_iter() - .map(|i| i.try_into()) - .collect::, _>>()? - .into_iter() - .collect(), - }) - } -} - -impl From for pb::searchlight::VectorIndexConfig { - fn from(config: DeveloperVectorIndexConfig) -> Self { - pb::searchlight::VectorIndexConfig { - dimension: u32::from(config.dimensions), - vector_field_path: Some(config.vector_field.into()), - filter_fields: config - .filter_fields - .into_iter() - .map(|f| f.into()) - .collect::>(), - } - } -} - -#[cfg(test)] -mod tests { - use cmd_util::env::env_config; - use must_let::must_let; - use proptest::prelude::*; - use sync_types::testing::assert_roundtrips; - use value::assert_obj; - - use super::*; - - fn serialized_index_state_name_having_data() -> impl Strategy { - prop::string::string_regex("backfilled|snapshotted").unwrap() - } - - proptest! { - #![proptest_config(ProptestConfig { cases: 64 * env_config("CONVEX_PROPTEST_MULTIPLIER", 1), failure_persistence: None, .. ProptestConfig::default() })] - - #[test] - fn test_developer_vector_index_config_roundtrips(v in any::()) { - assert_roundtrips::< - DeveloperVectorIndexConfig, - pb::searchlight::VectorIndexConfig - >(v); - } - - #[test] - fn vector_index_state_roundtrips(v in any::()) { - assert_roundtrips::(v) - } - - #[test] - fn from_legacy_resolved_object_fails( - key in any::(), - ts in any::(), - serialized_index_state_name in serialized_index_state_name_having_data(), - ) { - let legacy_object = assert_obj!( - "state" => serialized_index_state_name.as_str(), - "index" => key.to_string(), - "ts" => ConvexValue::Int64(ts.into()), - "version" => 0, - ); - // We don't have an unknown field at the state level, only for data, so we have to let - // this error. - assert!(VectorIndexState::try_from(legacy_object).is_err()); - } - - #[test] - fn missing_data_type_defaults_to_unknown( - ts in any::(), - serialized_index_state_name in serialized_index_state_name_having_data(), - ) { - let legacy_object = assert_obj!( - "state" => serialized_index_state_name.as_str(), - "data" => {"something" => "invalid"}, - "ts" => ConvexValue::Int64(ts.into()), - ); - let state: VectorIndexState = legacy_object.try_into().unwrap(); - let snapshot = extract_snapshot(serialized_index_state_name, state); - - must_let!(let VectorIndexSnapshotData::Unknown(_) = snapshot.data); - } - - #[test] - fn unrecognized_data_type_defaults_to_unknown( - ts in any::(), - serialized_index_state_name in serialized_index_state_name_having_data(), - ) { - let legacy_object = assert_obj!( - "state" => serialized_index_state_name.as_str(), - "data" => {"data_type" => "invalid"}, - "ts" => ConvexValue::Int64(ts.into()), - ); - let state: VectorIndexState = legacy_object.try_into().unwrap(); - let snapshot = extract_snapshot(serialized_index_state_name, state); - - must_let!(let VectorIndexSnapshotData::Unknown(_) = snapshot.data); - } - } - - fn extract_snapshot( - expected_index_state: String, - state: VectorIndexState, - ) -> VectorIndexSnapshot { - if expected_index_state == "backfilled" { - must_let!(let VectorIndexState::Backfilled(snapshot) = state); - snapshot - } else { - must_let!(let VectorIndexState::SnapshottedAt(snapshot) = state); - snapshot - } - } -} diff --git a/crates/common/src/bootstrap_model/index/vector_index/backfill_state.rs b/crates/common/src/bootstrap_model/index/vector_index/backfill_state.rs new file mode 100644 index 00000000..fe881f3b --- /dev/null +++ b/crates/common/src/bootstrap_model/index/vector_index/backfill_state.rs @@ -0,0 +1,83 @@ +use std::str::FromStr; + +use serde::{ + Deserialize, + Serialize, +}; +use sync_types::Timestamp; +use value::{ + codegen_convex_serialization, + InternalId, +}; + +use super::segment::{ + FragmentedVectorSegment, + SerializedFragmentedVectorSegment, +}; + +#[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] +pub struct VectorIndexBackfillState { + pub segments: Vec, + // Both of these variables will be None at the start of backfill. + // They will be set after the first backfill iteration. + pub cursor: Option, + pub backfill_snapshot_ts: Option, +} + +#[derive(Serialize, Deserialize)] +pub struct SerializedVectorIndexBackfillState { + segments: Option>, + document_cursor: Option, + backfill_snapshot_ts: Option, +} + +impl TryFrom for SerializedVectorIndexBackfillState { + type Error = anyhow::Error; + + fn try_from(backfill_state: VectorIndexBackfillState) -> Result { + Ok(SerializedVectorIndexBackfillState { + segments: Some( + backfill_state + .segments + .into_iter() + .map(|s| s.try_into()) + .collect::>>()?, + ), + document_cursor: backfill_state.cursor.map(|id| id.to_string()), + backfill_snapshot_ts: backfill_state + .backfill_snapshot_ts + .map(|ts| ts.try_into()) + .transpose()?, + }) + } +} + +impl TryFrom for VectorIndexBackfillState { + type Error = anyhow::Error; + + fn try_from(serialized: SerializedVectorIndexBackfillState) -> Result { + // The fields cursor, backfill_snapshot_ts, and segments are not present in old + // indexes in Backfilling state. Thus, these all support being deserialized when + // missing using empty defaults (None or vec![]). This allows backfilling to be + // backwards-compatible. + Ok(VectorIndexBackfillState { + segments: serialized + .segments + .unwrap_or_default() + .into_iter() + .map(|s| s.try_into()) + .collect::>>()?, + cursor: serialized + .document_cursor + .map(|id| InternalId::from_str(&id)) + .transpose()?, + backfill_snapshot_ts: serialized + .backfill_snapshot_ts + .map(Timestamp::try_from) + .transpose()?, + }) + } +} + +codegen_convex_serialization!(VectorIndexBackfillState, SerializedVectorIndexBackfillState); diff --git a/crates/common/src/bootstrap_model/index/vector_index/dimensions.rs b/crates/common/src/bootstrap_model/index/vector_index/dimensions.rs new file mode 100644 index 00000000..00df76b8 --- /dev/null +++ b/crates/common/src/bootstrap_model/index/vector_index/dimensions.rs @@ -0,0 +1,54 @@ +use std::ops::Deref; + +use errors::ErrorMetadata; + +pub const MIN_VECTOR_DIMENSIONS: u32 = 2; +pub const MAX_VECTOR_DIMENSIONS: u32 = 4096; + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] +pub struct VectorDimensions( + #[cfg_attr( + any(test, feature = "testing"), + proptest(strategy = "MIN_VECTOR_DIMENSIONS..=MAX_VECTOR_DIMENSIONS") + )] + u32, +); + +impl From for usize { + fn from(value: VectorDimensions) -> Self { + value.0 as usize + } +} + +impl From for u32 { + fn from(value: VectorDimensions) -> Self { + value.0 + } +} + +impl Deref for VectorDimensions { + type Target = u32; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl TryFrom for VectorDimensions { + type Error = anyhow::Error; + + fn try_from(value: u32) -> Result { + anyhow::ensure!( + (MIN_VECTOR_DIMENSIONS..=MAX_VECTOR_DIMENSIONS).contains(&value), + ErrorMetadata::bad_request( + "InvalidVectorDimensionError", + format!( + "Dimensions {} must be between {} and {}.", + value, MIN_VECTOR_DIMENSIONS, MAX_VECTOR_DIMENSIONS + ) + ) + ); + Ok(Self(value)) + } +} diff --git a/crates/common/src/bootstrap_model/index/vector_index/index_config.rs b/crates/common/src/bootstrap_model/index/vector_index/index_config.rs new file mode 100644 index 00000000..5ede53c7 --- /dev/null +++ b/crates/common/src/bootstrap_model/index/vector_index/index_config.rs @@ -0,0 +1,103 @@ +use std::collections::BTreeSet; + +use serde::{ + Deserialize, + Serialize, +}; +use value::{ + codegen_convex_serialization, + FieldPath, +}; + +use super::VectorDimensions; + +#[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] +pub struct DeveloperVectorIndexConfig { + // Dimensions of the vectors + pub dimensions: VectorDimensions, + + /// The field to index for vector search. + pub vector_field: FieldPath, + + /// Other fields to index for equality filtering. + pub filter_fields: BTreeSet, +} + +#[derive(Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct SerializedDeveloperVectorIndexConfig { + // Support legacy alpha users with the old dimension field. + #[serde(alias = "dimension")] + dimensions: i64, + vector_field: String, + filter_fields: Vec, +} + +impl TryFrom for SerializedDeveloperVectorIndexConfig { + type Error = anyhow::Error; + + fn try_from(config: DeveloperVectorIndexConfig) -> anyhow::Result { + Ok(Self { + dimensions: u32::from(config.dimensions) as i64, + vector_field: config.vector_field.into(), + filter_fields: config.filter_fields.into_iter().map(String::from).collect(), + }) + } +} + +impl TryFrom for DeveloperVectorIndexConfig { + type Error = anyhow::Error; + + fn try_from(config: SerializedDeveloperVectorIndexConfig) -> anyhow::Result { + Ok(Self { + dimensions: VectorDimensions::try_from(u32::try_from(config.dimensions)?)?, + vector_field: config.vector_field.parse()?, + filter_fields: config + .filter_fields + .into_iter() + .map(|p| p.parse()) + .collect::>>()?, + }) + } +} + +codegen_convex_serialization!( + DeveloperVectorIndexConfig, + SerializedDeveloperVectorIndexConfig +); + +impl TryFrom for DeveloperVectorIndexConfig { + type Error = anyhow::Error; + + fn try_from(proto: pb::searchlight::VectorIndexConfig) -> anyhow::Result { + Ok(DeveloperVectorIndexConfig { + dimensions: VectorDimensions::try_from(proto.dimension)?, + vector_field: proto + .vector_field_path + .ok_or_else(|| anyhow::format_err!("Missing vector_field_path"))? + .try_into()?, + filter_fields: proto + .filter_fields + .into_iter() + .map(|i| i.try_into()) + .collect::, _>>()? + .into_iter() + .collect(), + }) + } +} + +impl From for pb::searchlight::VectorIndexConfig { + fn from(config: DeveloperVectorIndexConfig) -> Self { + pb::searchlight::VectorIndexConfig { + dimension: u32::from(config.dimensions), + vector_field_path: Some(config.vector_field.into()), + filter_fields: config + .filter_fields + .into_iter() + .map(|f| f.into()) + .collect::>(), + } + } +} diff --git a/crates/common/src/bootstrap_model/index/vector_index/index_snapshot.rs b/crates/common/src/bootstrap_model/index/vector_index/index_snapshot.rs new file mode 100644 index 00000000..ed5814a4 --- /dev/null +++ b/crates/common/src/bootstrap_model/index/vector_index/index_snapshot.rs @@ -0,0 +1,153 @@ +use serde::{ + Deserialize, + Serialize, +}; +use sync_types::Timestamp; +use value::{ + serde::WithUnknown, + ConvexObject, +}; + +use super::segment::{ + FragmentedVectorSegment, + SerializedFragmentedVectorSegment, +}; + +#[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] +pub struct VectorIndexSnapshot { + pub data: VectorIndexSnapshotData, + pub ts: Timestamp, +} + +#[derive(Serialize, Deserialize)] +pub struct SerializedVectorIndexSnapshot { + data: WithUnknown, + ts: i64, +} + +impl TryFrom for SerializedVectorIndexSnapshot { + type Error = anyhow::Error; + + fn try_from(value: VectorIndexSnapshot) -> Result { + Ok(SerializedVectorIndexSnapshot { + ts: value.ts.into(), + data: WithUnknown::::try_from(value.data)?, + }) + } +} + +impl TryFrom for VectorIndexSnapshot { + type Error = anyhow::Error; + + fn try_from(value: SerializedVectorIndexSnapshot) -> Result { + Ok(VectorIndexSnapshot { + ts: value.ts.try_into()?, + data: value.data.try_into()?, + }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum VectorIndexSnapshotData { + MultiSegment(Vec), + Unknown(ConvexObject), +} + +#[cfg(any(test, feature = "testing"))] +mod proptest { + use proptest::{ + prelude::*, + sample::size_range, + }; + use value::{ + ConvexObject, + ExcludeSetsAndMaps, + FieldType, + }; + + use super::VectorIndexSnapshotData; + use crate::bootstrap_model::index::vector_index::FragmentedVectorSegment; + + impl Arbitrary for VectorIndexSnapshotData { + type Parameters = (); + type Strategy = BoxedStrategy; + + fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy { + prop_oneof![ + any::>() + .prop_map(VectorIndexSnapshotData::MultiSegment), + any_with::(( + size_range(0..=4), + FieldType::User, + ExcludeSetsAndMaps(true) + )) + .prop_map(VectorIndexSnapshotData::Unknown), + ] + .boxed() + } + } +} + +impl VectorIndexSnapshotData { + pub fn is_version_current(&self) -> bool { + let result = matches!(self, VectorIndexSnapshotData::MultiSegment(_)); + if !result { + tracing::warn!( + "Vector version mismatch, stored: {:?}, current: MultiSegment", + self, + ); + } + result + } +} + +#[derive(Serialize, Deserialize)] +#[serde(tag = "data_type", rename_all = "PascalCase")] +enum SerializedVectorIndexSnapshotData { + MultiSegment { + segments: Vec, + }, +} + +impl TryFrom for WithUnknown { + type Error = anyhow::Error; + + fn try_from(value: VectorIndexSnapshotData) -> Result { + match value { + VectorIndexSnapshotData::MultiSegment(segments) => { + let serialized_segments: Vec = segments + .into_iter() + .map(SerializedFragmentedVectorSegment::try_from) + .collect::>>()?; + Ok(WithUnknown::Known( + SerializedVectorIndexSnapshotData::MultiSegment { + segments: serialized_segments, + }, + )) + }, + VectorIndexSnapshotData::Unknown(unknown) => Ok(WithUnknown::Unknown(unknown)), + } + } +} + +impl TryFrom> for VectorIndexSnapshotData { + type Error = anyhow::Error; + + fn try_from( + value: WithUnknown, + ) -> Result { + match value { + WithUnknown::Known(SerializedVectorIndexSnapshotData::MultiSegment { + segments: serialized_segments, + }) => { + let segments: Vec = serialized_segments + .into_iter() + .map(FragmentedVectorSegment::try_from) + .collect::>>()?; + Ok(VectorIndexSnapshotData::MultiSegment(segments)) + }, + WithUnknown::Unknown(unknown) => Ok(VectorIndexSnapshotData::Unknown(unknown)), + } + } +} diff --git a/crates/common/src/bootstrap_model/index/vector_index/index_state.rs b/crates/common/src/bootstrap_model/index/vector_index/index_state.rs new file mode 100644 index 00000000..90e2b1f9 --- /dev/null +++ b/crates/common/src/bootstrap_model/index/vector_index/index_state.rs @@ -0,0 +1,101 @@ +use serde::{ + Deserialize, + Serialize, +}; +use value::codegen_convex_serialization; + +use super::{ + backfill_state::{ + SerializedVectorIndexBackfillState, + VectorIndexBackfillState, + }, + index_snapshot::{ + SerializedVectorIndexSnapshot, + VectorIndexSnapshot, + VectorIndexSnapshotData, + }, + segment::FragmentedVectorSegment, +}; + +#[derive(Debug, Clone, PartialEq, Eq)] +#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] +pub enum VectorIndexState { + Backfilling(VectorIndexBackfillState), + Backfilled(VectorIndexSnapshot), + SnapshottedAt(VectorIndexSnapshot), +} + +impl VectorIndexState { + pub fn segments(&self) -> anyhow::Result<&Vec> { + match self { + VectorIndexState::Backfilling(backfill_state) => Ok(&backfill_state.segments), + VectorIndexState::Backfilled(snapshot) | VectorIndexState::SnapshottedAt(snapshot) => { + match snapshot.data { + VectorIndexSnapshotData::MultiSegment(ref segments) => Ok(segments), + VectorIndexSnapshotData::Unknown(_) => anyhow::bail!("Unknown snapshot data!"), + } + }, + } + } +} + +#[derive(Serialize, Deserialize)] +#[serde(tag = "state", rename_all = "camelCase")] +pub enum SerializedVectorIndexState { + Backfilling { + #[serde(flatten)] + backfill_state: SerializedVectorIndexBackfillState, + }, + Backfilled { + #[serde(flatten)] + snapshot: SerializedVectorIndexSnapshot, + }, + Snapshotted { + #[serde(flatten)] + snapshot: SerializedVectorIndexSnapshot, + }, +} + +impl TryFrom for SerializedVectorIndexState { + type Error = anyhow::Error; + + fn try_from(state: VectorIndexState) -> Result { + Ok(match state { + VectorIndexState::Backfilling(backfill_state) => { + SerializedVectorIndexState::Backfilling { + backfill_state: backfill_state.try_into()?, + } + }, + VectorIndexState::Backfilled(snapshot) => SerializedVectorIndexState::Backfilled { + snapshot: snapshot.try_into()?, + }, + VectorIndexState::SnapshottedAt(snapshot) => SerializedVectorIndexState::Snapshotted { + snapshot: snapshot.try_into()?, + }, + }) + } +} + +impl TryFrom for VectorIndexState { + type Error = anyhow::Error; + + fn try_from(serialized: SerializedVectorIndexState) -> Result { + Ok(match serialized { + SerializedVectorIndexState::Backfilling { backfill_state } => { + VectorIndexState::Backfilling(backfill_state.try_into()?) + }, + SerializedVectorIndexState::Backfilled { snapshot } => { + VectorIndexState::Backfilled(snapshot.try_into()?) + }, + SerializedVectorIndexState::Snapshotted { snapshot } => { + VectorIndexState::SnapshottedAt(snapshot.try_into()?) + }, + }) + } +} + +codegen_convex_serialization!( + VectorIndexState, + SerializedVectorIndexState, + test_cases = 64 +); diff --git a/crates/common/src/bootstrap_model/index/vector_index/mod.rs b/crates/common/src/bootstrap_model/index/vector_index/mod.rs new file mode 100644 index 00000000..1354cb21 --- /dev/null +++ b/crates/common/src/bootstrap_model/index/vector_index/mod.rs @@ -0,0 +1,124 @@ +mod backfill_state; +mod dimensions; +mod index_config; +mod index_snapshot; +mod index_state; +mod segment; + +pub use self::{ + backfill_state::VectorIndexBackfillState, + dimensions::{ + VectorDimensions, + MAX_VECTOR_DIMENSIONS, + MIN_VECTOR_DIMENSIONS, + }, + index_config::{ + DeveloperVectorIndexConfig, + SerializedDeveloperVectorIndexConfig, + }, + index_snapshot::{ + VectorIndexSnapshot, + VectorIndexSnapshotData, + }, + index_state::{ + SerializedVectorIndexState, + VectorIndexState, + }, + segment::FragmentedVectorSegment, +}; + +#[cfg(test)] +mod tests { + use cmd_util::env::env_config; + use must_let::must_let; + use proptest::prelude::*; + use sync_types::{ + testing::assert_roundtrips, + Timestamp, + }; + use value::{ + assert_obj, + ConvexValue, + }; + + use super::*; + use crate::types::ObjectKey; + + fn serialized_index_state_name_having_data() -> impl Strategy { + prop::string::string_regex("backfilled|snapshotted").unwrap() + } + + proptest! { + #![proptest_config(ProptestConfig { cases: 64 * env_config("CONVEX_PROPTEST_MULTIPLIER", 1), failure_persistence: None, .. ProptestConfig::default() })] + + #[test] + fn test_developer_vector_index_config_roundtrips(v in any::()) { + assert_roundtrips::< + DeveloperVectorIndexConfig, + pb::searchlight::VectorIndexConfig + >(v); + } + + #[test] + fn from_legacy_resolved_object_fails( + key in any::(), + ts in any::(), + serialized_index_state_name in serialized_index_state_name_having_data(), + ) { + let legacy_object = assert_obj!( + "state" => serialized_index_state_name.as_str(), + "index" => key.to_string(), + "ts" => ConvexValue::Int64(ts.into()), + "version" => 0, + ); + // We don't have an unknown field at the state level, only for data, so we have to let + // this error. + assert!(VectorIndexState::try_from(legacy_object).is_err()); + } + + #[test] + fn missing_data_type_defaults_to_unknown( + ts in any::(), + serialized_index_state_name in serialized_index_state_name_having_data(), + ) { + let legacy_object = assert_obj!( + "state" => serialized_index_state_name.as_str(), + "data" => {"something" => "invalid"}, + "ts" => ConvexValue::Int64(ts.into()), + ); + let state: VectorIndexState = legacy_object.try_into().unwrap(); + let snapshot = extract_snapshot(serialized_index_state_name, state); + + must_let!(let VectorIndexSnapshotData::Unknown(_) = snapshot.data); + } + + #[test] + fn unrecognized_data_type_defaults_to_unknown( + ts in any::(), + serialized_index_state_name in serialized_index_state_name_having_data(), + ) { + let legacy_object = assert_obj!( + "state" => serialized_index_state_name.as_str(), + "data" => {"data_type" => "invalid"}, + "ts" => ConvexValue::Int64(ts.into()), + ); + let state: VectorIndexState = legacy_object.try_into().unwrap(); + let snapshot = extract_snapshot(serialized_index_state_name, state); + + must_let!(let VectorIndexSnapshotData::Unknown(_) = snapshot.data); + } + } + + fn extract_snapshot( + expected_index_state: String, + state: VectorIndexState, + ) -> VectorIndexSnapshot { + if expected_index_state == "backfilled" { + must_let!(let VectorIndexState::Backfilled(snapshot) = state); + snapshot + } else { + must_let!(let VectorIndexState::SnapshottedAt(snapshot) = state); + snapshot + } + } +} diff --git a/crates/common/src/bootstrap_model/index/vector_index/segment.rs b/crates/common/src/bootstrap_model/index/vector_index/segment.rs new file mode 100644 index 00000000..3f058680 --- /dev/null +++ b/crates/common/src/bootstrap_model/index/vector_index/segment.rs @@ -0,0 +1,185 @@ +use std::collections::BTreeMap; + +use anyhow::Context; +use serde::{ + Deserialize, + Serialize, +}; +use value::{ + ConvexValue, + FieldName, +}; + +use super::VectorDimensions; +use crate::types::ObjectKey; + +/// A qdrant Segment that's split into three separate parts, the qdrant Segment +/// which depends on an IdTracker implementation, which depends on a deleted +/// bitset. +/// +/// Each file is stored independently, but they're composed to form a queryable +/// segment. The deleted bitset can be written to independently. The id tracker +/// can be queried independently. Using the segment requires all three files. +#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FragmentedVectorSegment { + pub segment_key: ObjectKey, + pub id_tracker_key: ObjectKey, + pub deleted_bitset_key: ObjectKey, + pub num_vectors: u32, + pub num_deleted: u32, + // A random UUID that can be used to identify a segment to determine if the + // segment has changed during non-transactional index changes (compaction). + pub id: String, +} + +impl FragmentedVectorSegment { + pub fn extract_key( + object_fields: &mut BTreeMap, + serialized_field_name: &str, + ) -> anyhow::Result { + match object_fields.remove(serialized_field_name) { + Some(ConvexValue::String(s)) => s.try_into(), + _ => anyhow::bail!( + "Invalid or missing `{serialized_field_name}` field for VectorMultiPartData", + ), + } + } + + pub fn non_deleted_vectors(&self) -> anyhow::Result { + let total_vectors = if self.num_vectors < self.num_deleted { + // Some early segments have been created with num_vectors sent to the initially + // available point count, which excluded deletes. If sufficient vectors are + // deleted, that can result in num_deleted exceeding the initial num_vectors. + // That doesn't strictly mean the segment is empty, but it should be close + // enough and a backfill to fix these segments is complex. + Ok(0) + } else { + self.num_vectors + .checked_sub(self.num_deleted) + .ok_or_else(|| { + anyhow::anyhow!( + "Failed to subtract {} from {}", + self.num_deleted, + self.num_vectors + ) + }) + }; + total_vectors.map(|value| value as u64) + } + + /// The estimated size bytes based only on the non-deleted vectors in the + /// segment. + /// + /// The actual size of the segment in s3 will be bigger due both to deleted + /// vectors excluded from this size estimation and also overhead from the + /// HNSW index (if present). Index overhead is larger as a percentage for + /// small dimensional vectors than large dimensional vectors. + pub fn non_deleted_size_bytes(&self, dimensions: VectorDimensions) -> anyhow::Result { + Self::size_bytes(self.non_deleted_vectors()?, dimensions) + } + + /// The estimated size bytes based on both deleted and non-deleted vectors + /// in the segment. + /// + /// The actual size of the segment in s3 will be bigger due to the overhead + /// from the HNSW index (if present). Index overhead is larger as a + /// percentage for small dimensional vectors than large dimensional + /// vectors. + pub fn total_size_bytes(&self, dimensions: VectorDimensions) -> anyhow::Result { + Self::size_bytes(self.num_vectors as u64, dimensions) + } + + fn size_bytes(estimated_vectors: u64, dimensions: VectorDimensions) -> anyhow::Result { + // A little extra paranoia since all of these numbers are not originally u64 and + // can overflow u32. + (estimated_vectors) + .checked_mul(u32::from(dimensions) as u64) + .and_then(|value| value.checked_mul(4_u64)) + .context("Overflowed size calculation!") + } + + pub fn to_paths_proto(self) -> anyhow::Result { + Ok(pb::searchlight::FragmentedVectorSegmentPaths { + segment: Some(pb::searchlight::StorageKey { + storage_key: self.segment_key.into(), + }), + id_tracker: Some(pb::searchlight::StorageKey { + storage_key: self.id_tracker_key.into(), + }), + deleted_bitset: Some(pb::searchlight::StorageKey { + storage_key: self.deleted_bitset_key.into(), + }), + }) + } +} + +impl TryFrom for pb::searchlight::FragmentedVectorSegment { + type Error = anyhow::Error; + + fn try_from(value: FragmentedVectorSegment) -> Result { + Ok(Self { + segment_key: value.segment_key.try_into()?, + id_tracker_key: value.id_tracker_key.try_into()?, + deleted_bitset_key: value.deleted_bitset_key.try_into()?, + num_vectors: value.num_vectors, + num_deleted: value.num_deleted, + id: value.id, + }) + } +} + +impl TryFrom for FragmentedVectorSegment { + type Error = anyhow::Error; + + fn try_from(value: pb::searchlight::FragmentedVectorSegment) -> Result { + Ok(Self { + segment_key: value.segment_key.try_into()?, + id_tracker_key: value.id_tracker_key.try_into()?, + deleted_bitset_key: value.deleted_bitset_key.try_into()?, + num_vectors: value.num_vectors, + num_deleted: value.num_deleted, + id: value.id, + }) + } +} + +#[derive(Serialize, Deserialize)] +pub struct SerializedFragmentedVectorSegment { + pub segment_key: String, + pub id_tracker_key: String, + pub deleted_bitset_key: String, + pub num_vectors: i64, + pub num_deleted: i64, + pub id: String, +} + +impl TryFrom for SerializedFragmentedVectorSegment { + type Error = anyhow::Error; + + fn try_from(value: FragmentedVectorSegment) -> Result { + Ok(Self { + segment_key: value.segment_key.to_string(), + id_tracker_key: value.id_tracker_key.to_string(), + deleted_bitset_key: value.deleted_bitset_key.to_string(), + num_vectors: value.num_vectors as i64, + num_deleted: value.num_deleted as i64, + id: value.id, + }) + } +} + +impl TryFrom for FragmentedVectorSegment { + type Error = anyhow::Error; + + fn try_from(value: SerializedFragmentedVectorSegment) -> Result { + Ok(Self { + segment_key: value.segment_key.try_into()?, + id_tracker_key: value.id_tracker_key.try_into()?, + deleted_bitset_key: value.deleted_bitset_key.try_into()?, + num_vectors: value.num_vectors.try_into()?, + num_deleted: value.num_deleted.try_into()?, + id: value.id, + }) + } +} diff --git a/crates/common/src/bootstrap_model/mod.rs b/crates/common/src/bootstrap_model/mod.rs index ec4735b9..b38e7a0e 100644 --- a/crates/common/src/bootstrap_model/mod.rs +++ b/crates/common/src/bootstrap_model/mod.rs @@ -11,4 +11,6 @@ //! colocated in database crate. pub mod index; pub mod schema; +mod schema_metadata; +mod schema_state; pub mod tables; diff --git a/crates/common/src/bootstrap_model/schema.rs b/crates/common/src/bootstrap_model/schema.rs index 0a758127..d338b7a3 100644 --- a/crates/common/src/bootstrap_model/schema.rs +++ b/crates/common/src/bootstrap_model/schema.rs @@ -1,160 +1,18 @@ -use std::{ - collections::BTreeMap, - str::FromStr, -}; +use std::str::FromStr; use errors::ErrorMetadata; -use serde_json::Value as JsonValue; use value::{ id_v6::DocumentIdV6, - obj, - val, - ConvexObject, - ConvexValue, GenericDocumentId, ResolvedDocumentId, TableId, TableMapping, }; -use crate::schemas::DatabaseSchema; - -#[derive(Debug, Clone, PartialEq)] -#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] -pub struct SchemaMetadata { - pub state: SchemaState, - pub schema: DatabaseSchema, -} - -/// SchemaState state machine: -/// ```text -/// +----------+-----------------| -/// | Pending |-+ | -/// +---+------+ | +--------+ | -/// | +->| Failed | | -/// v +--------+ | -/// +----------+ ^ | -/// |Validated |---------+ | -/// +---+------+ | | -/// | | | -/// v v v -/// +------+ +-----------+ -/// |Active|---------->|Overwritten| -/// +------+ +-----------+ -/// ``` -/// Invariants: -/// 1. At most one schema can be in the `Pending` or `Validated` state at a -/// time. -/// -/// 2. At most one schema can be in the `Active` state at a time. -#[derive(Debug, Clone, PartialEq)] -#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] -pub enum SchemaState { - Pending, - Validated, - Active, - Failed { - error: String, - table_name: Option, - }, - Overwritten, -} - -impl TryFrom for ConvexValue { - type Error = anyhow::Error; - - fn try_from(s: SchemaState) -> anyhow::Result { - let object = match s { - SchemaState::Pending => obj!("state" => "pending"), - SchemaState::Validated => obj!("state" => "validated"), - SchemaState::Active => obj!("state" => "active"), - SchemaState::Failed { error, table_name } => { - obj!( - "state" => "failed", - "error" => error.as_str(), - "table_name" => if let Some(table_name) = table_name { - val!(table_name) - } else { - val!(null) - } - ) - }, - SchemaState::Overwritten => obj!("state" => "overwritten"), - }?; - Ok(ConvexValue::Object(object)) - } -} - -impl TryFrom for SchemaState { - type Error = anyhow::Error; - - fn try_from(v: ConvexValue) -> anyhow::Result { - let o = if let ConvexValue::Object(o) = v { - Ok(o) - } else { - Err(anyhow::anyhow!("Schema state must be an object")) - }?; - let mut fields: BTreeMap<_, _> = o.into(); - match fields.remove("state") { - Some(ConvexValue::String(s)) => match s.to_string().as_str() { - "pending" => Ok(SchemaState::Pending), - "validated" => Ok(SchemaState::Validated), - "active" => Ok(SchemaState::Active), - "failed" => { - let table_name = fields.remove("table_name").and_then(|table_name| { - if let ConvexValue::String(s) = table_name { - Some(s.into()) - } else { - None - } - }); - match fields.remove("error") { - Some(ConvexValue::String(e)) => Ok(SchemaState::Failed { - error: e.to_string(), - table_name, - }), - _ => Err(anyhow::anyhow!("Failed schema is missing error")), - } - }, - "overwritten" => Ok(SchemaState::Overwritten), - _ => Err(anyhow::anyhow!("Invalid schema state: {s}")), - }, - _ => Err(anyhow::anyhow!( - "Schema state object is missing state field." - )), - } - } -} - -impl TryFrom for SchemaMetadata { - type Error = anyhow::Error; - - fn try_from(o: ConvexObject) -> anyhow::Result { - let mut fields: BTreeMap<_, _> = o.into(); - let state = fields - .remove("state") - .map(SchemaState::try_from) - .ok_or_else(|| anyhow::anyhow!("Schema is missing state field."))??; - let schema = match fields.remove("schema") { - Some(ConvexValue::String(s)) => { - let deserialized_value: JsonValue = serde_json::from_str(&s)?; - DatabaseSchema::try_from(deserialized_value) - }, - None => Err(anyhow::anyhow!("Schema is missing schema field.")), - _ => Err(anyhow::anyhow!("Schema is not serialized as a string")), - }?; - Ok(SchemaMetadata { state, schema }) - } -} - -impl TryFrom for ConvexObject { - type Error = anyhow::Error; - - fn try_from(SchemaMetadata { state, schema }: SchemaMetadata) -> anyhow::Result { - let serialized_schema = serde_json::to_string(&JsonValue::try_from(schema)?)?; - obj!("state" => state, "schema" => serialized_schema) - } -} +pub use super::{ + schema_metadata::SchemaMetadata, + schema_state::SchemaState, +}; pub fn parse_schema_id( schema_id: &str, @@ -177,23 +35,3 @@ pub fn invalid_schema_id(schema_id: &str) -> ErrorMetadata { format!("Invalid schema id: {}", schema_id), ) } - -#[cfg(test)] -mod tests { - use cmd_util::env::env_config; - use proptest::prelude::*; - use value::{ - testing::assert_roundtrips, - ConvexObject, - }; - - use crate::bootstrap_model::schema::SchemaMetadata; - - proptest! { - #![proptest_config(ProptestConfig { cases: 16 * env_config("CONVEX_PROPTEST_MULTIPLIER", 1), failure_persistence: None, .. ProptestConfig::default() })] - #[test] - fn test_schema_roundtrip(v in any::()) { - assert_roundtrips::(v); - } - } -} diff --git a/crates/common/src/bootstrap_model/schema_metadata.rs b/crates/common/src/bootstrap_model/schema_metadata.rs new file mode 100644 index 00000000..e911542f --- /dev/null +++ b/crates/common/src/bootstrap_model/schema_metadata.rs @@ -0,0 +1,50 @@ +use serde::{ + Deserialize, + Serialize, +}; +use serde_json::Value as JsonValue; +use value::codegen_convex_serialization; + +use super::schema_state::{ + SchemaState, + SerializedSchemaState, +}; +use crate::schemas::DatabaseSchema; + +#[derive(Debug, Clone, PartialEq)] +#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] +pub struct SchemaMetadata { + pub state: SchemaState, + pub schema: DatabaseSchema, +} + +#[derive(Serialize, Deserialize)] +pub struct SerializedSchemaMetadata { + state: SerializedSchemaState, + schema: String, +} + +impl TryFrom for SerializedSchemaMetadata { + type Error = anyhow::Error; + + fn try_from(s: SchemaMetadata) -> anyhow::Result { + Ok(Self { + state: s.state.try_into()?, + schema: serde_json::to_string(&JsonValue::try_from(s.schema)?)?, + }) + } +} + +impl TryFrom for SchemaMetadata { + type Error = anyhow::Error; + + fn try_from(s: SerializedSchemaMetadata) -> anyhow::Result { + let deserialized_value: JsonValue = serde_json::from_str(&s.schema)?; + Ok(Self { + state: s.state.try_into()?, + schema: DatabaseSchema::try_from(deserialized_value)?, + }) + } +} + +codegen_convex_serialization!(SchemaMetadata, SerializedSchemaMetadata); diff --git a/crates/common/src/bootstrap_model/schema_state.rs b/crates/common/src/bootstrap_model/schema_state.rs new file mode 100644 index 00000000..c9490bcf --- /dev/null +++ b/crates/common/src/bootstrap_model/schema_state.rs @@ -0,0 +1,112 @@ +use serde::{ + Deserialize, + Serialize, +}; +use value::codegen_convex_serialization; + +/// SchemaState state machine: +/// ```text +/// +----------+------------------| +/// | Pending |-+ | +/// +---+------+ | +--------+ | +/// | +->| Failed | | +/// v +--------+ | +/// +-----------+ ^ | +/// | Validated |---------+ | +/// +---+-------+ | | +/// | | | +/// v v v +/// +------+ +-----------+ +/// |Active|----------->|Overwritten| +/// +------+ +-----------+ +/// ``` +/// Invariants: +/// 1. At most one schema can be in the `Pending` or `Validated` state at a +/// time. +/// +/// 2. At most one schema can be in the `Active` state at a time. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))] +pub enum SchemaState { + Pending, + Validated, + Active, + Failed { + error: String, + table_name: Option, + }, + Overwritten, +} + +#[derive(Serialize, Deserialize)] +#[serde(tag = "state", rename_all = "camelCase")] +pub enum SerializedSchemaState { + Pending, + Validated, + Active, + Failed { + error: String, + table_name: Option, + }, + Overwritten, +} + +impl TryFrom for SerializedSchemaState { + type Error = anyhow::Error; + + fn try_from(s: SchemaState) -> anyhow::Result { + Ok(match s { + SchemaState::Pending => Self::Pending, + SchemaState::Validated => Self::Validated, + SchemaState::Active => Self::Active, + SchemaState::Failed { error, table_name } => Self::Failed { error, table_name }, + SchemaState::Overwritten => Self::Overwritten, + }) + } +} + +impl TryFrom for SchemaState { + type Error = anyhow::Error; + + fn try_from(s: SerializedSchemaState) -> anyhow::Result { + Ok(match s { + SerializedSchemaState::Pending => Self::Pending, + SerializedSchemaState::Validated => Self::Validated, + SerializedSchemaState::Active => Self::Active, + SerializedSchemaState::Failed { error, table_name } => { + Self::Failed { error, table_name } + }, + SerializedSchemaState::Overwritten => Self::Overwritten, + }) + } +} + +codegen_convex_serialization!(SchemaState, SerializedSchemaState); + +#[cfg(test)] +mod tests { + use value::{ + obj, + ConvexValue, + }; + + use crate::bootstrap_model::schema::SchemaState; + + #[test] + fn test_backwards_compatibility() -> anyhow::Result<()> { + let serialized = obj!( + "state" => "failed", + "error" => "dis failed", + "table_name" => ConvexValue::Null, + )?; + let deserialized: SchemaState = serialized.try_into().unwrap(); + assert_eq!( + deserialized, + SchemaState::Failed { + error: "dis failed".to_string(), + table_name: None + } + ); + Ok(()) + } +} diff --git a/crates/common/src/bootstrap_model/tables.rs b/crates/common/src/bootstrap_model/tables.rs index 0e4e657c..422beaaa 100644 --- a/crates/common/src/bootstrap_model/tables.rs +++ b/crates/common/src/bootstrap_model/tables.rs @@ -1,20 +1,17 @@ -use std::{ - collections::BTreeMap, - sync::LazyLock, -}; +use std::sync::LazyLock; +use serde::{ + Deserialize, + Serialize, +}; use value::{ - ConvexObject, - ConvexValue, + codegen_convex_serialization, TableNumber, }; -use crate::{ - obj, - types::{ - FieldName, - TableName, - }, +use crate::types::{ + FieldName, + TableName, }; pub static TABLES_TABLE: LazyLock = @@ -77,80 +74,71 @@ impl TableMetadata { } } -impl TryFrom for ConvexObject { +#[derive(Serialize, Deserialize)] +struct SerializedTableMetadata { + name: String, + number: i64, + state: String, +} + +impl TryFrom for SerializedTableMetadata { type Error = anyhow::Error; - fn try_from(value: TableMetadata) -> Result { - obj!( - "name" => String::from(value.name), - "state" => String::from(match value.state { - TableState::Active => "active", - TableState::Deleting => "deleting", - TableState::Hidden => "hidden", - }), - "number" => ConvexValue::Int64(u32::from(value.number).into()), - ) + fn try_from(m: TableMetadata) -> anyhow::Result { + Ok(Self { + name: m.name.into(), + number: u32::from(m.number) as i64, + state: match m.state { + TableState::Active => "active".to_owned(), + TableState::Deleting => "deleting".to_owned(), + TableState::Hidden => "hidden".to_owned(), + }, + }) } } -impl TryFrom for TableMetadata { +impl TryFrom for TableMetadata { type Error = anyhow::Error; - fn try_from(object: ConvexObject) -> Result { - let mut fields: BTreeMap<_, _> = object.into(); - let name = match fields.remove(&*NAME_FIELD) { - Some(ConvexValue::String(s)) => s.parse()?, - v => anyhow::bail!("Invalid name field for TableMetadata: {:?}", v), - }; - - let number = match fields.remove("number") { - Some(ConvexValue::Int64(v)) => u32::try_from(v)?.try_into()?, - v => anyhow::bail!("Invalid number field for TableMetadata: {:?}", v), - }; - let state = match fields.remove("state") { - Some(ConvexValue::String(s)) => match &s[..] { + fn try_from(m: SerializedTableMetadata) -> anyhow::Result { + Ok(Self { + name: m.name.parse()?, + number: u32::try_from(m.number)?.try_into()?, + state: match &m.state[..] { "active" => TableState::Active, "deleting" => TableState::Deleting, "hidden" => TableState::Hidden, - _ => anyhow::bail!("invalid table state {s}"), + s => anyhow::bail!("invalid table state {s}"), }, - None => TableState::Active, - _ => anyhow::bail!("invalid table state {fields:?}"), - }; - Ok(Self { - name, - number, - state, }) } } -impl TryFrom for TableMetadata { - type Error = anyhow::Error; - - fn try_from(value: ConvexValue) -> Result { - match value { - ConvexValue::Object(o) => o.try_into(), - _ => anyhow::bail!("Invalid table metadata value"), - } - } -} +codegen_convex_serialization!(TableMetadata, SerializedTableMetadata); #[cfg(test)] mod tests { - use proptest::prelude::*; - use value::ConvexObject; + use value::obj; use super::TableMetadata; - use crate::testing::assert_roundtrips; - - proptest! { - #![proptest_config( - ProptestConfig { failure_persistence: None, ..ProptestConfig::default() } - )] - #[test] - fn test_table_roundtrips(v in any::()) { - assert_roundtrips::(v); - } + use crate::bootstrap_model::tables::TableState; + + #[test] + fn test_backwards_compatibility() -> anyhow::Result<()> { + let serialized = obj!( + "name" => "foo", + "state" => "hidden", + "number" => 1017, + )?; + let deserialized: TableMetadata = serialized.try_into().unwrap(); + assert_eq!( + deserialized, + TableMetadata { + name: "foo".parse()?, + number: 1017.try_into()?, + state: TableState::Hidden + } + ); + Ok(()) } } diff --git a/crates/common/src/types/actions.rs b/crates/common/src/types/actions.rs index bc6dd449..c45d2761 100644 --- a/crates/common/src/types/actions.rs +++ b/crates/common/src/types/actions.rs @@ -1,5 +1,4 @@ use std::{ - collections::BTreeMap, fmt::{ self, Debug, @@ -8,15 +7,15 @@ use std::{ str::FromStr, }; +use serde::{ + Deserialize, + Serialize, +}; use serde_json::{ json, Value as JsonValue, }; -use value::{ - obj, - ConvexObject, - ConvexValue, -}; +use value::codegen_convex_serialization; use crate::heap_size::HeapSize; @@ -32,55 +31,35 @@ pub struct NodeDependency { pub version: String, } -impl TryFrom for NodeDependency { - type Error = anyhow::Error; - - fn try_from(obj: ConvexObject) -> Result { - let mut fields = BTreeMap::from(obj); - - let package: String = match fields.remove("package") { - Some(ConvexValue::String(s)) => s.into(), - _ => anyhow::bail!("Invalid or missing 'package' in NodeDependency: {fields:?}"), - }; - let version: String = match fields.remove("version") { - Some(ConvexValue::String(s)) => s.into(), - _ => anyhow::bail!("Invalid or missing 'version' in NodeDependency: {fields:?}"), - }; - Ok(Self { package, version }) - } +#[derive(Serialize, Deserialize)] +struct SerializedNodeDependency { + package: String, + version: String, } -impl TryFrom for ConvexObject { +impl TryFrom for SerializedNodeDependency { type Error = anyhow::Error; - fn try_from(value: NodeDependency) -> Result { - obj!( - "package" => value.package, - "version" => value.version - ) + fn try_from(dep: NodeDependency) -> Result { + Ok(Self { + package: dep.package, + version: dep.version, + }) } } -impl TryFrom for NodeDependency { +impl TryFrom for NodeDependency { type Error = anyhow::Error; - fn try_from(value: ConvexValue) -> Result { - if let ConvexValue::Object(o) = value { - o.try_into() - } else { - anyhow::bail!("NodeDependency expected an Object, got {value:?}") - } + fn try_from(dep: SerializedNodeDependency) -> Result { + Ok(Self { + package: dep.package, + version: dep.version, + }) } } -impl TryFrom for ConvexValue { - type Error = anyhow::Error; - - fn try_from(value: NodeDependency) -> Result { - let obj: ConvexObject = value.try_into()?; - Ok(ConvexValue::Object(obj)) - } -} +codegen_convex_serialization!(NodeDependency, SerializedNodeDependency); impl From for JsonValue { fn from(dep: NodeDependency) -> Self { @@ -163,3 +142,26 @@ impl FromStr for HttpActionRoute { Ok(Self { method, path }) } } + +#[cfg(test)] +mod tests { + use value::assert_obj; + + use super::NodeDependency; + + #[test] + fn test_backwards_compatibility() { + let serialized = assert_obj!( + "package" => "foo", + "version" => "1.0.0", + ); + let deserialized: NodeDependency = serialized.try_into().unwrap(); + assert_eq!( + deserialized, + NodeDependency { + package: "foo".to_string(), + version: "1.0.0".to_string(), + } + ); + } +} diff --git a/crates/convex_macro/Cargo.toml b/crates/convex_macro/Cargo.toml index 21d8c861..8891b836 100644 --- a/crates/convex_macro/Cargo.toml +++ b/crates/convex_macro/Cargo.toml @@ -13,3 +13,6 @@ proc-macro = true proc-macro2 = { workspace = true } quote = { workspace = true } syn = { workspace = true } + +[dev-dependencies] +anyhow = { workspace = true } diff --git a/crates/value/src/lib.rs b/crates/value/src/lib.rs index d12087ce..fb1809c4 100644 --- a/crates/value/src/lib.rs +++ b/crates/value/src/lib.rs @@ -23,6 +23,7 @@ mod map; mod metrics; pub mod numeric; mod object; +pub mod serde; mod set; pub mod sha256; mod size; diff --git a/crates/value/src/serde/de.rs b/crates/value/src/serde/de.rs new file mode 100644 index 00000000..f6b95f59 --- /dev/null +++ b/crates/value/src/serde/de.rs @@ -0,0 +1,777 @@ +use std::{ + collections::BTreeMap, + fmt::{ + self, + Display, + }, + num::TryFromIntError, +}; + +use serde::de::{ + DeserializeOwned, + DeserializeSeed, + Error as SerdeError, + MapAccess, + SeqAccess, + Visitor, +}; + +use crate::{ + ConvexArray, + ConvexObject, + ConvexValue, + FieldName, +}; + +#[derive(thiserror::Error)] +pub enum Error { + #[error("Invalid type: received {received}, expected {expected}")] + InvalidType { + expected: &'static str, + received: &'static str, + }, + + #[error("ConvexValue::Int64 was out of range: {0:?}.")] + IntegerOutofRange(#[from] TryFromIntError), + + #[error("f32s aren't supported, use an f64 instead.")] + Float32Unsupported, + + #[error("chars aren't supported, use a string instead.")] + CharUnsupported, + + #[error("Tuple structs aren't supported.")] + TupleStructsUnsupported, + + #[error("Unit structs aren't supported.")] + UnitStructUnsupported, + + #[error("Newtype structs aren't supported.")] + NewtypeStructUnsupported, + + #[error("Deserializing object field into invalid type {field_type}")] + InvalidField { field_type: &'static str }, + + #[error("Ignored any unsupported.")] + IgnoredAnyUnsupported, + + #[error("Direct enum unsupported, use #[serde(tag = \"type\")] instead.")] + EnumUnsupported, + + #[error(transparent)] + Anyhow(#[from] anyhow::Error), + + #[error("{0}")] + Custom(String), +} + +impl fmt::Debug for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{self}") + } +} + +impl SerdeError for Error { + fn custom(msg: T) -> Self { + Error::Custom(msg.to_string()) + } +} + +impl<'de> serde::Deserializer<'de> for ConvexValue { + type Error = Error; + + #[inline] + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + ConvexValue::Null => visitor.visit_unit(), + ConvexValue::Int64(n) => visitor.visit_i64(n), + ConvexValue::Float64(n) => visitor.visit_f64(n), + ConvexValue::Boolean(b) => visitor.visit_bool(b), + ConvexValue::String(s) => visitor.visit_string(s.into()), + ConvexValue::Bytes(b) => visitor.visit_byte_buf(b.into()), + ConvexValue::Array(v) => visit_array(v, visitor), + ConvexValue::Object(v) => visit_object(v, visitor), + v => Err(anyhow::anyhow!("Unsupported value: {v}").into()), + } + } + + fn deserialize_i8(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + ConvexValue::Int64(n) => visitor.visit_i8(n.try_into()?), + v => Err(Error::InvalidType { + expected: "Int64", + received: v.type_name(), + }), + } + } + + fn deserialize_i16(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + ConvexValue::Int64(n) => visitor.visit_i16(n.try_into()?), + v => Err(Error::InvalidType { + expected: "Int64", + received: v.type_name(), + }), + } + } + + fn deserialize_i32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + ConvexValue::Int64(n) => visitor.visit_i32(n.try_into()?), + v => Err(Error::InvalidType { + expected: "Int64", + received: v.type_name(), + }), + } + } + + fn deserialize_i64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + ConvexValue::Int64(n) => visitor.visit_i64(n), + v => Err(Error::InvalidType { + expected: "Int64", + received: v.type_name(), + }), + } + } + + fn deserialize_u8(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + ConvexValue::Int64(n) => visitor.visit_u8(n.try_into()?), + v => Err(Error::InvalidType { + expected: "Int64", + received: v.type_name(), + }), + } + } + + fn deserialize_u16(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + ConvexValue::Int64(n) => visitor.visit_u16(n.try_into()?), + v => Err(Error::InvalidType { + expected: "Int64", + received: v.type_name(), + }), + } + } + + fn deserialize_u32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + ConvexValue::Int64(n) => visitor.visit_u32(n.try_into()?), + v => Err(Error::InvalidType { + expected: "Int64", + received: v.type_name(), + }), + } + } + + fn deserialize_u64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + ConvexValue::Int64(n) => visitor.visit_u64(n.try_into()?), + v => Err(Error::InvalidType { + expected: "Int64", + received: v.type_name(), + }), + } + } + + fn deserialize_f32(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::Float32Unsupported) + } + + fn deserialize_f64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + ConvexValue::Float64(n) => visitor.visit_f64(n), + v => Err(Error::InvalidType { + expected: "Float", + received: v.type_name(), + }), + } + } + + #[inline] + fn deserialize_option(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + ConvexValue::Null => visitor.visit_none(), + _ => visitor.visit_some(self), + } + } + + #[inline] + fn deserialize_enum( + self, + _name: &str, + _variants: &'static [&'static str], + _visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + Err(Error::EnumUnsupported) + } + + #[inline] + fn deserialize_newtype_struct( + self, + _name: &'static str, + _visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + Err(Error::NewtypeStructUnsupported) + } + + fn deserialize_bool(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + ConvexValue::Boolean(b) => visitor.visit_bool(b), + v => Err(Error::InvalidType { + expected: "Boolean", + received: v.type_name(), + }), + } + } + + fn deserialize_char(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::CharUnsupported) + } + + fn deserialize_str(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_string(visitor) + } + + fn deserialize_string(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + ConvexValue::String(s) => visitor.visit_string(s.into()), + v => Err(Error::InvalidType { + expected: "String", + received: v.type_name(), + }), + } + } + + fn deserialize_bytes(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_byte_buf(visitor) + } + + fn deserialize_byte_buf(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + ConvexValue::Bytes(b) => visitor.visit_byte_buf(b.into()), + v => Err(Error::InvalidType { + expected: "Bytes", + received: v.type_name(), + }), + } + } + + fn deserialize_unit(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + ConvexValue::Null => visitor.visit_unit(), + v => Err(Error::InvalidType { + expected: "Null", + received: v.type_name(), + }), + } + } + + fn deserialize_unit_struct(self, _name: &'static str, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::UnitStructUnsupported) + } + + fn deserialize_seq(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + ConvexValue::Array(v) => visit_array(v, visitor), + v => Err(Error::InvalidType { + expected: "Array", + received: v.type_name(), + }), + } + } + + fn deserialize_tuple(self, _len: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_seq(visitor) + } + + fn deserialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + _visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + Err(Error::TupleStructsUnsupported) + } + + fn deserialize_map(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + ConvexValue::Object(v) => visit_object(v, visitor), + v => Err(Error::InvalidType { + expected: "Object", + received: v.type_name(), + }), + } + } + + fn deserialize_struct( + self, + _name: &'static str, + _fields: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + match self { + ConvexValue::Object(v) => visit_object(v, visitor), + v => Err(Error::InvalidType { + expected: "Object", + received: v.type_name(), + }), + } + } + + fn deserialize_identifier(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_string(visitor) + } + + fn deserialize_ignored_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_any(visitor) + } +} + +fn visit_array<'de, V>(array: ConvexArray, visitor: V) -> Result +where + V: Visitor<'de>, +{ + let _len = array.len(); + let mut deserializer = SeqDeserializer { + iter: Vec::from(array).into_iter(), + }; + let seq = visitor.visit_seq(&mut deserializer)?; + let remaining = deserializer.iter.len(); + if remaining != 0 { + return Err(anyhow::anyhow!("Items remaining after deserialization").into()); + } + Ok(seq) +} + +struct SeqDeserializer { + iter: std::vec::IntoIter, +} + +impl<'de> SeqAccess<'de> for SeqDeserializer { + type Error = Error; + + fn next_element_seed(&mut self, seed: T) -> Result, Error> + where + T: DeserializeSeed<'de>, + { + match self.iter.next() { + Some(value) => seed.deserialize(value).map(Some), + None => Ok(None), + } + } + + fn size_hint(&self) -> Option { + match self.iter.size_hint() { + (lower, Some(upper)) if lower == upper => Some(upper), + _ => None, + } + } +} + +fn visit_object<'de, V>(object: ConvexObject, visitor: V) -> Result +where + V: Visitor<'de>, +{ + let _len = object.len(); + let mut deserializer = MapDeserializer { + iter: BTreeMap::from(object).into_iter(), + value: None, + }; + let map = visitor.visit_map(&mut deserializer)?; + let remaining = deserializer.iter.len(); + if remaining != 0 { + return Err(anyhow::anyhow!("Items remaining after deserialization").into()); + } + Ok(map) +} + +struct MapDeserializer { + iter: as IntoIterator>::IntoIter, + value: Option, +} + +impl<'de> MapAccess<'de> for MapDeserializer { + type Error = Error; + + fn next_key_seed(&mut self, seed: T) -> Result, Error> + where + T: DeserializeSeed<'de>, + { + match self.iter.next() { + Some((key, value)) => { + self.value = Some(value); + let key_de = MapKeyDeserializer { key }; + Ok(Some(seed.deserialize(key_de)?)) + }, + None => Ok(None), + } + } + + fn next_value_seed(&mut self, seed: T) -> Result + where + T: DeserializeSeed<'de>, + { + match self.value.take() { + Some(value) => seed.deserialize(value), + None => Err(anyhow::anyhow!("value is missing").into()), + } + } + + fn size_hint(&self) -> Option { + match self.iter.size_hint() { + (lower, Some(upper)) if lower == upper => Some(upper), + _ => None, + } + } +} + +struct MapKeyDeserializer { + key: FieldName, +} + +impl<'de> serde::Deserializer<'de> for MapKeyDeserializer { + type Error = Error; + + #[inline] + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_string(visitor) + } + + fn deserialize_i8(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { field_type: "i8" }) + } + + fn deserialize_i16(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { field_type: "i16" }) + } + + fn deserialize_i32(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { field_type: "i32" }) + } + + fn deserialize_i64(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { field_type: "i64" }) + } + + fn deserialize_u8(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { field_type: "u8" }) + } + + fn deserialize_u16(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { field_type: "u16" }) + } + + fn deserialize_u32(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { field_type: "u32" }) + } + + fn deserialize_u64(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { field_type: "u64" }) + } + + fn deserialize_f32(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { field_type: "f32" }) + } + + fn deserialize_f64(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { field_type: "f64" }) + } + + #[inline] + fn deserialize_option(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { + field_type: "Option", + }) + } + + #[inline] + fn deserialize_enum( + self, + _name: &str, + _variants: &'static [&'static str], + _visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { field_type: "enum" }) + } + + #[inline] + fn deserialize_newtype_struct( + self, + _name: &'static str, + _visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { + field_type: "newtype struct", + }) + } + + fn deserialize_bool(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { field_type: "bool" }) + } + + fn deserialize_char(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { field_type: "char" }) + } + + fn deserialize_str(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_string(visitor) + } + + fn deserialize_string(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_string(self.key.into()) + } + + fn deserialize_bytes(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { + field_type: "bytes", + }) + } + + fn deserialize_byte_buf(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { + field_type: "bytes", + }) + } + + fn deserialize_unit(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { field_type: "unit" }) + } + + fn deserialize_unit_struct(self, _name: &'static str, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { + field_type: "unit struct", + }) + } + + fn deserialize_seq(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { + field_type: "sequence", + }) + } + + fn deserialize_tuple(self, _len: usize, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { + field_type: "tuple", + }) + } + + fn deserialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + _visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { + field_type: "tuple struct", + }) + } + + fn deserialize_map(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { field_type: "map" }) + } + + fn deserialize_struct( + self, + _name: &'static str, + _fields: &'static [&'static str], + _visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { + field_type: "struct", + }) + } + + fn deserialize_identifier(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_string(visitor) + } + + fn deserialize_ignored_any(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::InvalidField { + field_type: "ignored any", + }) + } +} + +pub fn from_value(value: ConvexValue) -> Result { + T::deserialize(value) +} + +pub fn from_object(value: ConvexObject) -> Result { + T::deserialize(ConvexValue::Object(value)) +} diff --git a/crates/value/src/serde/mod.rs b/crates/value/src/serde/mod.rs new file mode 100644 index 00000000..305477e9 --- /dev/null +++ b/crates/value/src/serde/mod.rs @@ -0,0 +1,118 @@ +mod de; +mod ser; +mod value; + +pub use de::{ + from_object, + from_value, +}; +pub use ser::{ + to_object, + to_value, +}; +use serde::{ + Deserialize, + Serialize, +}; + +use crate::ConvexObject; + +#[macro_export] +macro_rules! codegen_convex_serialization { + ($struct:ident, $serialized_struct:ident) => { + codegen_convex_serialization!($struct, $serialized_struct, test_cases = 256); + }; + ($struct:ident, $serialized_struct:ident, test_cases = $test_cases:expr) => { + impl TryFrom<$struct> for value::ConvexObject { + type Error = anyhow::Error; + + fn try_from(s: $struct) -> anyhow::Result { + Ok(value::serde::to_object($serialized_struct::try_from(s)?)?) + } + } + + impl TryFrom<$struct> for value::ConvexValue { + type Error = anyhow::Error; + + fn try_from(s: $struct) -> anyhow::Result { + Ok(value::ConvexObject::try_from(s)?.into()) + } + } + + impl TryFrom for $struct { + type Error = anyhow::Error; + + fn try_from(s: value::ConvexObject) -> anyhow::Result<$struct> { + value::serde::from_object::<$serialized_struct>(s)?.try_into() + } + } + + impl TryFrom for $struct { + type Error = anyhow::Error; + + fn try_from(s: value::ConvexValue) -> anyhow::Result<$struct> { + value::ConvexObject::try_from(s)?.try_into() + } + } + + #[cfg(test)] + mod roundtrip_test { + use cmd_util::env::env_config; + use proptest::prelude::*; + + use super::$struct; + + // TODO: For some reason, `proptest!` isn't usable from within this macro. + #[test] + #[allow(non_snake_case)] + fn $struct() { + let mut config = ProptestConfig { + cases: $test_cases * env_config("CONVEX_PROPTEST_MULTIPLIER", 1), + failure_persistence: None, + ..ProptestConfig::default() + }; + config.test_name = Some(concat!(module_path!(), "::test_roundtrips")); + proptest::test_runner::TestRunner::new(config) + .run(&any::<$struct>(), |left| { + let right = + $struct::try_from(value::ConvexObject::try_from(left.clone()).unwrap()) + .unwrap(); + prop_assert_eq!(left, right); + Ok(()) + }) + .unwrap(); + } + } + }; +} + +/// For forwards compatibility on enums, it's often useful to preserve an +/// unknown variant as a raw `ConvexObject`. To do so, wrap your enum in this +/// struct: +/// ```ignore,rust +/// #[derive(Serialize, Deserialize)] +/// struct SerializedStruct { +/// state: WithUnknown, +/// another_field: String, +/// } +/// +/// #[derive(Serialize, Deserialize)] +/// #[serde(tag = "type")] +/// enum SerializedEnum { +/// Variant1 { +/// field: i32, +/// }, +/// Variant2 { +/// another_field: String, +/// }, +/// } +/// ``` +/// With this setup, `state` will be `WithUnknown::Unknown` when +/// `SerializedEnum` fails to deserialize, so we can preserve an unknown variant +/// for forwards compatibility. +#[derive(Serialize, Deserialize)] +#[serde(untagged)] +pub enum WithUnknown { + Known(T), + Unknown(ConvexObject), +} diff --git a/crates/value/src/serde/ser.rs b/crates/value/src/serde/ser.rs new file mode 100644 index 00000000..9f60b0ae --- /dev/null +++ b/crates/value/src/serde/ser.rs @@ -0,0 +1,677 @@ +use std::{ + collections::BTreeMap, + fmt::{ + self, + Display, + }, + num::TryFromIntError, +}; + +use serde::{ + ser::{ + Error as SerdeError, + Impossible, + }, + Serialize, +}; + +use crate::{ + ConvexObject, + ConvexValue, + FieldName, +}; + +#[derive(thiserror::Error)] +pub enum Error { + #[error("Integer isn't in range for ConvexValue::Int64: {0:?}.")] + IntegerOutofRange(#[from] TryFromIntError), + + #[error("f32s aren't supported, use an f64 instead.")] + Float32Unsupported, + + #[error("chars aren't supported, use a string instead.")] + CharUnsupported, + + #[error("Tuple structs aren't supported.")] + TupleStructsUnsupported, + + #[error("Unit structs aren't supported.")] + UnitStructUnsupported, + + #[error("Newtype structs aren't supported.")] + NewtypeStructUnsupported, + + #[error("Invalid field {field} for Convex object: {err}")] + InvalidField { field: String, err: String }, + + #[error( + "Struct enum variants unsupported. Set #[serde(tag = \"type\")] to serialize as a regular \ + object." + )] + StructVariantsUnsupported, + + #[error( + "Unit enum variants unsupported. Set #[serde(tag = \"type\")] to serialize as a regular \ + object." + )] + EnumVariantsUnsupported, + + #[error( + "Newtype enum variants unsupported. Set #[serde(tag = \"type\")] to serialize as a \ + regular object." + )] + NewtypeVariantsUnsupported, + + #[error( + "Tuple enum variants unsupported. Set #[serde(tag = \"type\")] to serialize as a regular \ + object." + )] + TupleVariantsUnsupported, + + #[error("{0}")] + Custom(String), + + #[error(transparent)] + Anyhow(#[from] anyhow::Error), +} + +impl fmt::Debug for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{self}") + } +} + +impl SerdeError for Error { + fn custom(msg: T) -> Self { + Error::Custom(msg.to_string()) + } +} + +type Result = std::result::Result; + +struct Serializer; + +impl serde::Serializer for Serializer { + type Error = Error; + type Ok = ConvexValue; + type SerializeMap = SerializeObject; + type SerializeSeq = SerializeVec; + type SerializeStruct = SerializeObject; + type SerializeStructVariant = Impossible; + type SerializeTuple = SerializeVec; + type SerializeTupleStruct = Impossible; + type SerializeTupleVariant = Impossible; + + #[inline] + fn serialize_bool(self, value: bool) -> Result { + Ok(ConvexValue::Boolean(value)) + } + + #[inline] + fn serialize_i8(self, value: i8) -> Result { + Ok(ConvexValue::Int64(value as i64)) + } + + #[inline] + fn serialize_i16(self, value: i16) -> Result { + Ok(ConvexValue::Int64(value as i64)) + } + + #[inline] + fn serialize_i32(self, value: i32) -> Result { + Ok(ConvexValue::Int64(value as i64)) + } + + fn serialize_i64(self, value: i64) -> Result { + Ok(ConvexValue::Int64(value)) + } + + fn serialize_i128(self, value: i128) -> Result { + Ok(ConvexValue::Int64(value.try_into()?)) + } + + #[inline] + fn serialize_u8(self, value: u8) -> Result { + Ok(ConvexValue::Int64(value as i64)) + } + + #[inline] + fn serialize_u16(self, value: u16) -> Result { + Ok(ConvexValue::Int64(value as i64)) + } + + #[inline] + fn serialize_u32(self, value: u32) -> Result { + Ok(ConvexValue::Int64(value as i64)) + } + + #[inline] + fn serialize_u64(self, value: u64) -> Result { + Ok(ConvexValue::Int64(value.try_into()?)) + } + + fn serialize_u128(self, value: u128) -> Result { + Ok(ConvexValue::Int64(value.try_into()?)) + } + + #[inline] + fn serialize_f32(self, _float: f32) -> Result { + // We don't serialize `f32` so we don't have to worry about roundtripping from + // f32 to f64 to f32. + Err(Error::Float32Unsupported) + } + + #[inline] + fn serialize_f64(self, float: f64) -> Result { + Ok(ConvexValue::Float64(float)) + } + + #[inline] + fn serialize_char(self, _value: char) -> Result { + Err(Error::CharUnsupported) + } + + #[inline] + fn serialize_str(self, value: &str) -> Result { + Ok(ConvexValue::String(value.try_into()?)) + } + + fn serialize_bytes(self, value: &[u8]) -> Result { + Ok(ConvexValue::Bytes(value.to_vec().try_into()?)) + } + + #[inline] + fn serialize_unit(self) -> Result { + Ok(ConvexValue::Null) + } + + #[inline] + fn serialize_unit_struct(self, _name: &'static str) -> Result { + Err(Error::UnitStructUnsupported) + } + + #[inline] + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + ) -> Result { + Err(Error::EnumVariantsUnsupported) + } + + #[inline] + fn serialize_newtype_struct(self, _name: &'static str, _value: &T) -> Result + where + T: ?Sized + Serialize, + { + Err(Error::NewtypeStructUnsupported) + } + + fn serialize_newtype_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _value: &T, + ) -> Result + where + T: ?Sized + Serialize, + { + Err(Error::NewtypeVariantsUnsupported) + } + + #[inline] + fn serialize_none(self) -> Result { + Ok(ConvexValue::Null) + } + + #[inline] + fn serialize_some(self, value: &T) -> Result + where + T: ?Sized + Serialize, + { + value.serialize(self) + } + + fn serialize_seq(self, len: Option) -> Result { + Ok(SerializeVec { + vec: Vec::with_capacity(len.unwrap_or(0)), + }) + } + + fn serialize_tuple(self, len: usize) -> Result { + Ok(SerializeVec { + vec: Vec::with_capacity(len), + }) + } + + fn serialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result { + Err(Error::TupleStructsUnsupported) + } + + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + Err(Error::TupleVariantsUnsupported) + } + + fn serialize_map(self, _len: Option) -> Result { + Ok(SerializeObject { + fields: BTreeMap::new(), + next_key: None, + }) + } + + fn serialize_struct(self, _name: &'static str, _len: usize) -> Result { + Ok(SerializeObject { + fields: BTreeMap::new(), + next_key: None, + }) + } + + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + Err(Error::StructVariantsUnsupported) + } + + fn collect_str(self, value: &T) -> Result + where + T: ?Sized + Display, + { + Ok(ConvexValue::String(value.to_string().try_into()?)) + } +} + +struct SerializeVec { + vec: Vec, +} + +impl serde::ser::SerializeSeq for SerializeVec { + type Error = Error; + type Ok = ConvexValue; + + fn serialize_element(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + self.vec.push(to_value(value)?); + Ok(()) + } + + fn end(self) -> Result { + Ok(ConvexValue::Array(self.vec.try_into()?)) + } +} + +impl serde::ser::SerializeTuple for SerializeVec { + type Error = Error; + type Ok = ConvexValue; + + fn serialize_element(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + self.vec.push(to_value(value)?); + Ok(()) + } + + fn end(self) -> Result { + Ok(ConvexValue::Array(self.vec.try_into()?)) + } +} + +struct SerializeObject { + fields: BTreeMap, + next_key: Option, +} + +impl serde::ser::SerializeMap for SerializeObject { + type Error = Error; + type Ok = ConvexValue; + + fn serialize_key(&mut self, key: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + assert!( + self.next_key.is_none(), + "serialize_key called twice without serialize_value" + ); + self.next_key = Some(key.serialize(FieldSerializer)?); + Ok(()) + } + + fn serialize_value(&mut self, value: &T) -> Result<()> + where + T: ?Sized + Serialize, + { + let key = self + .next_key + .take() + .expect("serialize_value called without preceding serialize_key"); + self.fields.insert(key, to_value(value)?); + Ok(()) + } + + fn end(self) -> Result { + Ok(ConvexValue::Object(self.fields.try_into()?)) + } +} + +struct FieldSerializer; + +impl serde::Serializer for FieldSerializer { + type Error = Error; + type Ok = FieldName; + type SerializeMap = Impossible; + type SerializeSeq = Impossible; + type SerializeStruct = Impossible; + type SerializeStructVariant = Impossible; + type SerializeTuple = Impossible; + type SerializeTupleStruct = Impossible; + type SerializeTupleVariant = Impossible; + + #[inline] + fn serialize_str(self, value: &str) -> Result { + value.parse::().map_err(|e| Error::InvalidField { + field: value.to_string(), + err: e.to_string(), + }) + } + + #[inline] + fn serialize_bool(self, value: bool) -> Result { + Err(Error::InvalidField { + field: value.to_string(), + err: "fields must be strings".to_string(), + }) + } + + #[inline] + fn serialize_i8(self, value: i8) -> Result { + Err(Error::InvalidField { + field: value.to_string(), + err: "fields must be strings".to_string(), + }) + } + + #[inline] + fn serialize_i16(self, value: i16) -> Result { + Err(Error::InvalidField { + field: value.to_string(), + err: "fields must be strings".to_string(), + }) + } + + #[inline] + fn serialize_i32(self, value: i32) -> Result { + Err(Error::InvalidField { + field: value.to_string(), + err: "fields must be strings".to_string(), + }) + } + + fn serialize_i64(self, value: i64) -> Result { + Err(Error::InvalidField { + field: value.to_string(), + err: "fields must be strings".to_string(), + }) + } + + fn serialize_i128(self, value: i128) -> Result { + Err(Error::InvalidField { + field: value.to_string(), + err: "fields must be strings".to_string(), + }) + } + + #[inline] + fn serialize_u8(self, value: u8) -> Result { + Err(Error::InvalidField { + field: value.to_string(), + err: "fields must be strings".to_string(), + }) + } + + #[inline] + fn serialize_u16(self, value: u16) -> Result { + Err(Error::InvalidField { + field: value.to_string(), + err: "fields must be strings".to_string(), + }) + } + + #[inline] + fn serialize_u32(self, value: u32) -> Result { + Err(Error::InvalidField { + field: value.to_string(), + err: "fields must be strings".to_string(), + }) + } + + #[inline] + fn serialize_u64(self, value: u64) -> Result { + Err(Error::InvalidField { + field: value.to_string(), + err: "fields must be strings".to_string(), + }) + } + + fn serialize_u128(self, value: u128) -> Result { + Err(Error::InvalidField { + field: value.to_string(), + err: "fields must be strings".to_string(), + }) + } + + #[inline] + fn serialize_f32(self, value: f32) -> Result { + Err(Error::InvalidField { + field: value.to_string(), + err: "fields must be strings".to_string(), + }) + } + + #[inline] + fn serialize_f64(self, value: f64) -> Result { + Err(Error::InvalidField { + field: value.to_string(), + err: "fields must be strings".to_string(), + }) + } + + #[inline] + fn serialize_char(self, value: char) -> Result { + Err(Error::InvalidField { + field: value.to_string(), + err: "fields must be strings".to_string(), + }) + } + + fn serialize_bytes(self, _value: &[u8]) -> Result { + Err(Error::InvalidField { + field: "bytes".to_string(), + err: "fields must be strings".to_string(), + }) + } + + #[inline] + fn serialize_unit(self) -> Result { + Err(Error::InvalidField { + field: "unit".to_string(), + err: "fields must be strings".to_string(), + }) + } + + #[inline] + fn serialize_unit_struct(self, _name: &'static str) -> Result { + Err(Error::InvalidField { + field: "unit struct".to_string(), + err: "fields must be strings".to_string(), + }) + } + + #[inline] + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + ) -> Result { + Err(Error::InvalidField { + field: "unit variant".to_string(), + err: "fields must be strings".to_string(), + }) + } + + #[inline] + fn serialize_newtype_struct(self, _name: &'static str, _value: &T) -> Result + where + T: ?Sized + Serialize, + { + Err(Error::InvalidField { + field: "newtype struct".to_string(), + err: "fields must be strings".to_string(), + }) + } + + fn serialize_newtype_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _value: &T, + ) -> Result + where + T: ?Sized + Serialize, + { + Err(Error::InvalidField { + field: "newtype variant".to_string(), + err: "fields must be strings".to_string(), + }) + } + + #[inline] + fn serialize_none(self) -> Result { + Err(Error::InvalidField { + field: "None".to_string(), + err: "fields must be strings".to_string(), + }) + } + + #[inline] + fn serialize_some(self, _value: &T) -> Result + where + T: ?Sized + Serialize, + { + Err(Error::InvalidField { + field: "Some".to_string(), + err: "fields must be strings".to_string(), + }) + } + + fn serialize_seq(self, _len: Option) -> Result { + Err(Error::InvalidField { + field: "seq".to_string(), + err: "fields must be strings".to_string(), + }) + } + + fn serialize_tuple(self, _len: usize) -> Result { + Err(Error::InvalidField { + field: "tuple".to_string(), + err: "fields must be strings".to_string(), + }) + } + + fn serialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result { + Err(Error::InvalidField { + field: "tuple struct".to_string(), + err: "fields must be strings".to_string(), + }) + } + + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + Err(Error::InvalidField { + field: "tuple variant".to_string(), + err: "fields must be strings".to_string(), + }) + } + + fn serialize_map(self, _len: Option) -> Result { + Err(Error::InvalidField { + field: "map".to_string(), + err: "fields must be strings".to_string(), + }) + } + + fn serialize_struct(self, _name: &'static str, _len: usize) -> Result { + Err(Error::InvalidField { + field: "struct".to_string(), + err: "fields must be strings".to_string(), + }) + } + + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result { + Err(Error::InvalidField { + field: "struct variant".to_string(), + err: "fields must be strings".to_string(), + }) + } +} + +impl serde::ser::SerializeStruct for SerializeObject { + type Error = Error; + type Ok = ConvexValue; + + fn serialize_field( + &mut self, + field: &'static str, + value: &T, + ) -> Result<()> { + self.fields.insert(field.parse()?, to_value(value)?); + Ok(()) + } + + fn end(self) -> Result { + Ok(ConvexValue::Object(self.fields.try_into()?)) + } +} + +pub fn to_value(value: T) -> Result { + value.serialize(Serializer) +} + +pub fn to_object(value: T) -> Result { + Ok(to_value(value)?.try_into()?) +} diff --git a/crates/value/src/serde/value.rs b/crates/value/src/serde/value.rs new file mode 100644 index 00000000..5eb62ac3 --- /dev/null +++ b/crates/value/src/serde/value.rs @@ -0,0 +1,228 @@ +use std::collections::BTreeMap; + +use serde::{ + de::Error as DeError, + ser::{ + Error as SerError, + SerializeMap, + SerializeSeq, + }, + Deserialize, + Serialize, +}; + +use crate::{ + ConvexArray, + ConvexObject, + ConvexValue, + FieldName, +}; + +impl Serialize for ConvexValue { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self { + ConvexValue::Null => serializer.serialize_unit(), + ConvexValue::Int64(n) => serializer.serialize_i64(*n), + ConvexValue::Float64(n) => serializer.serialize_f64(*n), + ConvexValue::Boolean(b) => serializer.serialize_bool(*b), + ConvexValue::String(s) => serializer.serialize_str(s), + ConvexValue::Bytes(b) => serializer.serialize_bytes(b), + ConvexValue::Array(a) => a.serialize(serializer), + ConvexValue::Set(_) => Err(S::Error::custom("Set serialization not supported")), + ConvexValue::Map(_) => Err(S::Error::custom("Map serialization not supported")), + ConvexValue::Object(o) => o.serialize(serializer), + } + } +} + +impl Serialize for ConvexObject { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let mut serializer = serializer.serialize_map(Some(self.len()))?; + for (key, value) in self.iter() { + serializer.serialize_entry(key, value)?; + } + serializer.end() + } +} + +impl Serialize for ConvexArray { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let mut serializer = serializer.serialize_seq(Some(self.len()))?; + for element in self { + serializer.serialize_element(element)?; + } + serializer.end() + } +} + +impl Serialize for FieldName { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + self[..].serialize(serializer) + } +} + +impl<'de> Deserialize<'de> for ConvexValue { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + struct ConvexValueVisitor; + + impl<'de> serde::de::Visitor<'de> for ConvexValueVisitor { + type Value = ConvexValue; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("a ConvexValue") + } + + fn visit_unit(self) -> Result + where + E: serde::de::Error, + { + Ok(ConvexValue::Null) + } + + fn visit_i64(self, v: i64) -> Result + where + E: serde::de::Error, + { + Ok(ConvexValue::Int64(v)) + } + + fn visit_f64(self, v: f64) -> Result + where + E: serde::de::Error, + { + Ok(ConvexValue::Float64(v)) + } + + fn visit_bool(self, v: bool) -> Result + where + E: serde::de::Error, + { + Ok(ConvexValue::Boolean(v)) + } + + fn visit_str(self, v: &str) -> Result + where + E: serde::de::Error, + { + Ok(ConvexValue::String(v.try_into().map_err(E::custom)?)) + } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: serde::de::Error, + { + Ok(ConvexValue::Bytes( + v.to_vec().try_into().map_err(E::custom)?, + )) + } + + fn visit_seq(self, mut seq: A) -> Result + where + A: serde::de::SeqAccess<'de>, + { + let mut vec = Vec::new(); + while let Some(value) = seq.next_element()? { + vec.push(value); + } + Ok(ConvexValue::Array( + vec.try_into().map_err(A::Error::custom)?, + )) + } + + fn visit_map(self, mut map: A) -> Result + where + A: serde::de::MapAccess<'de>, + { + let mut m = BTreeMap::::new(); + while let Some((key, value)) = map.next_entry()? { + m.insert(key, value); + } + Ok(ConvexValue::Object(m.try_into().map_err(A::Error::custom)?)) + } + } + + deserializer.deserialize_any(ConvexValueVisitor) + } +} + +impl<'de> Deserialize<'de> for ConvexObject { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let m: BTreeMap = Deserialize::deserialize(deserializer)?; + m.try_into().map_err(D::Error::custom) + } +} + +impl<'de> Deserialize<'de> for ConvexArray { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let v: Vec = Deserialize::deserialize(deserializer)?; + v.try_into().map_err(D::Error::custom) + } +} + +impl<'de> Deserialize<'de> for FieldName { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + s.parse::().map_err(D::Error::custom) + } +} + +#[cfg(test)] +mod tests { + use proptest::prelude::*; + + use crate::{ + serde::{ + from_value, + to_value, + }, + ConvexValue, + ExcludeSetsAndMaps, + FieldType, + }; + + proptest! { + #![proptest_config( + ProptestConfig { failure_persistence: None, ..ProptestConfig::default() } + )] + + #[test] + fn test_serde_value_roundtrips( + start in any_with::((FieldType::User, ExcludeSetsAndMaps(true))) + ) { + // This is a bit of a funky test. We're going to start with a `ConvexValue`, feed it through Serde's + // data model (with `ConvexValue`'s implementation of `Serialize`) and then serialize that Serde + // representation back into a `ConvexValue` (using our implementation of `ser::Serializer`). Then, + // we'll run the process in reverse: deserialize the `ConvexValue` back into a Serde representation + // and then deserialize that Serde representation back into a `ConvexValue`. + let serialized = to_value(start.clone()).unwrap(); + assert_eq!(start, serialized); + + let deserialized: ConvexValue = from_value(serialized).unwrap(); + assert_eq!(start, deserialized); + } + } +}