chroma_types/
collection_schema.rs

1use chroma_error::{ChromaError, ErrorCodes};
2use regex::Regex;
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::sync::{Arc, LazyLock};
6use thiserror::Error;
7use validator::Validate;
8
9use crate::chroma_proto;
10use crate::collection_configuration::{
11    EmbeddingFunctionConfiguration, InternalCollectionConfiguration,
12    UpdateVectorIndexConfiguration, VectorIndexConfiguration,
13};
14use crate::hnsw_configuration::Space;
15use crate::metadata::{MetadataComparison, MetadataValueType, Where};
16use crate::operator::QueryVector;
17use crate::{
18    default_batch_size, default_construction_ef, default_construction_ef_spann,
19    default_initial_lambda, default_m, default_m_spann, default_merge_threshold,
20    default_nreplica_count, default_num_centers_to_merge_to, default_num_samples_kmeans,
21    default_num_threads, default_reassign_neighbor_count, default_resize_factor, default_search_ef,
22    default_search_ef_spann, default_search_nprobe, default_search_rng_epsilon,
23    default_search_rng_factor, default_space, default_split_threshold, default_sync_threshold,
24    default_write_nprobe, default_write_rng_epsilon, default_write_rng_factor, ConversionError,
25    HnswParametersFromSegmentError, InternalHnswConfiguration, InternalSpannConfiguration,
26    InternalUpdateCollectionConfiguration, KnnIndex, Segment, CHROMA_KEY,
27};
28
29impl ChromaError for SchemaError {
30    fn code(&self) -> ErrorCodes {
31        match self {
32            // Internal errors (500)
33            // These indicate system/internal issues during schema operations
34            SchemaError::MissingIndexConfiguration { .. } => ErrorCodes::Internal,
35            SchemaError::InvalidSchema { .. } => ErrorCodes::Internal,
36            // DefaultsMismatch and ConfigurationConflict only occur during schema merge()
37            // which happens internally during compaction, not from user input
38            SchemaError::DefaultsMismatch => ErrorCodes::Internal,
39            SchemaError::ConfigurationConflict { .. } => ErrorCodes::Internal,
40
41            // User/External errors (400)
42            // These indicate user-provided invalid input
43            SchemaError::InvalidUserInput { .. } => ErrorCodes::InvalidArgument,
44            SchemaError::ConfigAndSchemaConflict => ErrorCodes::InvalidArgument,
45            SchemaError::InvalidHnswConfig(_) => ErrorCodes::InvalidArgument,
46            SchemaError::InvalidSpannConfig(_) => ErrorCodes::InvalidArgument,
47            SchemaError::Builder(e) => e.code(),
48        }
49    }
50}
51
52#[derive(Debug, Error)]
53pub enum SchemaError {
54    #[error("Schema is malformed: missing index configuration for metadata key '{key}' with type '{value_type}'")]
55    MissingIndexConfiguration { key: String, value_type: String },
56    #[error("Schema reconciliation failed: {reason}")]
57    InvalidSchema { reason: String },
58    #[error("Cannot set both collection config and schema simultaneously")]
59    ConfigAndSchemaConflict,
60    #[error("Cannot merge schemas with differing defaults")]
61    DefaultsMismatch,
62    #[error("Conflicting configuration for {context}")]
63    ConfigurationConflict { context: String },
64    #[error("Invalid HNSW configuration: {0}")]
65    InvalidHnswConfig(validator::ValidationErrors),
66    #[error("Invalid SPANN configuration: {0}")]
67    InvalidSpannConfig(validator::ValidationErrors),
68    #[error("Invalid schema input: {reason}")]
69    InvalidUserInput { reason: String },
70    #[error(transparent)]
71    Builder(#[from] SchemaBuilderError),
72}
73
74#[derive(Debug, Error)]
75pub enum SchemaBuilderError {
76    #[error("Vector index must be configured globally using create_index(None, config), not on specific key '{key}'")]
77    VectorIndexMustBeGlobal { key: String },
78    #[error("FTS index must be configured globally using create_index(None, config), not on specific key '{key}'")]
79    FtsIndexMustBeGlobal { key: String },
80    #[error("Cannot modify special key '{key}' - it is managed automatically by the system. To customize vector search, modify the global vector config instead.")]
81    SpecialKeyModificationNotAllowed { key: String },
82    #[error("Sparse vector index requires a specific key. Use create_index(Some(\"key_name\"), config) instead of create_index(None, config)")]
83    SparseVectorRequiresKey,
84    #[error("Only one sparse vector index allowed per collection. Key '{existing_key}' already has a sparse vector index. Remove it first or use that key.")]
85    MultipleSparseVectorIndexes { existing_key: String },
86    #[error("Vector index deletion not supported. The vector index is always enabled on #embedding. To disable vector search, disable the collection instead.")]
87    VectorIndexDeletionNotSupported,
88    #[error("FTS index deletion not supported. The FTS index is always enabled on #document. To disable full-text search, use a different collection without FTS.")]
89    FtsIndexDeletionNotSupported,
90    #[error("Sparse vector index deletion not supported yet. Sparse vector indexes cannot be removed once created.")]
91    SparseVectorIndexDeletionNotSupported,
92}
93
94#[derive(Debug, Error)]
95pub enum FilterValidationError {
96    #[error(
97        "Cannot filter using metadata key '{key}' with type '{value_type:?}' because indexing is disabled"
98    )]
99    IndexingDisabled {
100        key: String,
101        value_type: MetadataValueType,
102    },
103    #[error(transparent)]
104    Schema(#[from] SchemaError),
105}
106
107impl ChromaError for SchemaBuilderError {
108    fn code(&self) -> ErrorCodes {
109        ErrorCodes::InvalidArgument
110    }
111}
112
113impl ChromaError for FilterValidationError {
114    fn code(&self) -> ErrorCodes {
115        match self {
116            FilterValidationError::IndexingDisabled { .. } => ErrorCodes::InvalidArgument,
117            FilterValidationError::Schema(_) => ErrorCodes::Internal,
118        }
119    }
120}
121
122// ============================================================================
123// SCHEMA CONSTANTS
124// ============================================================================
125// These constants must match the Python constants in chromadb/api/types.py
126
127// Value type name constants
128pub const STRING_VALUE_NAME: &str = "string";
129pub const INT_VALUE_NAME: &str = "int";
130pub const BOOL_VALUE_NAME: &str = "bool";
131pub const FLOAT_VALUE_NAME: &str = "float";
132pub const FLOAT_LIST_VALUE_NAME: &str = "float_list";
133pub const SPARSE_VECTOR_VALUE_NAME: &str = "sparse_vector";
134
135// Index type name constants
136pub const FTS_INDEX_NAME: &str = "fts_index";
137pub const VECTOR_INDEX_NAME: &str = "vector_index";
138pub const SPARSE_VECTOR_INDEX_NAME: &str = "sparse_vector_index";
139pub const STRING_INVERTED_INDEX_NAME: &str = "string_inverted_index";
140pub const INT_INVERTED_INDEX_NAME: &str = "int_inverted_index";
141pub const FLOAT_INVERTED_INDEX_NAME: &str = "float_inverted_index";
142pub const BOOL_INVERTED_INDEX_NAME: &str = "bool_inverted_index";
143
144// Special metadata keys - must match Python constants in chromadb/api/types.py
145pub const DOCUMENT_KEY: &str = "#document";
146pub const EMBEDDING_KEY: &str = "#embedding";
147
148// Static regex pattern to validate CMEK for GCP
149static CMEK_GCP_RE: LazyLock<Regex> = LazyLock::new(|| {
150    Regex::new(r"^projects/.+/locations/.+/keyRings/.+/cryptoKeys/.+$")
151        .expect("The CMEK pattern for GCP should be valid")
152});
153
154/// Customer-managed encryption key for storage encryption.
155///
156/// CMEK allows you to use your own encryption keys managed by cloud providers'
157/// key management services (KMS) instead of default provider-managed keys.
158#[derive(Clone, Debug, Eq, PartialEq, serde::Deserialize, serde::Serialize)]
159#[serde(rename_all = "snake_case")]
160pub enum Cmek {
161    /// Google Cloud Platform KMS key resource name.
162    ///
163    /// Format: `projects/{project}/locations/{location}/keyRings/{keyRing}/cryptoKeys/{cryptoKey}`
164    Gcp(Arc<String>),
165}
166
167impl Cmek {
168    /// Create a GCP CMEK from a KMS resource name
169    ///
170    /// # Example
171    /// ```
172    /// use chroma_types::Cmek;
173    /// let cmek = Cmek::gcp(
174    ///     "projects/my-project/locations/us-central1/keyRings/my-ring/cryptoKeys/my-key".to_string()
175    /// );
176    /// ```
177    pub fn gcp(resource: String) -> Self {
178        Cmek::Gcp(Arc::new(resource))
179    }
180
181    /// Validates that the CMEK resource name matches the expected pattern.
182    ///
183    /// Returns `true` if the resource name is well-formed according to the
184    /// provider's format requirements. Does not verify that the key exists
185    /// or is accessible.
186    pub fn validate_pattern(&self) -> bool {
187        match self {
188            Cmek::Gcp(resource) => CMEK_GCP_RE.is_match(resource),
189        }
190    }
191}
192
193impl TryFrom<chroma_proto::Cmek> for Cmek {
194    type Error = ConversionError;
195
196    fn try_from(proto: chroma_proto::Cmek) -> Result<Self, Self::Error> {
197        match proto.provider {
198            Some(chroma_proto::cmek::Provider::Gcp(resource)) => Ok(Cmek::gcp(resource)),
199            None => Err(ConversionError::DecodeError),
200        }
201    }
202}
203
204impl From<Cmek> for chroma_proto::Cmek {
205    fn from(cmek: Cmek) -> Self {
206        match cmek {
207            Cmek::Gcp(resource) => chroma_proto::Cmek {
208                provider: Some(chroma_proto::cmek::Provider::Gcp((*resource).clone())),
209            },
210        }
211    }
212}
213
214// ============================================================================
215// SCHEMA STRUCTURES
216// ============================================================================
217
218/// Schema representation for collection index configurations
219///
220/// This represents the server-side schema structure used for index management
221
222#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
223#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
224pub struct Schema {
225    /// Default index configurations for each value type
226    pub defaults: ValueTypes,
227    /// Key-specific index overrides
228    /// TODO(Sanket): Needed for backwards compatibility. Should remove after deploy.
229    #[serde(rename = "keys", alias = "key_overrides")]
230    pub keys: HashMap<String, ValueTypes>,
231    /// Customer-managed encryption key for collection data
232    #[serde(skip_serializing_if = "Option::is_none")]
233    #[cfg_attr(feature = "utoipa", schema(value_type = Option<Object>))]
234    pub cmek: Option<Cmek>,
235    /// ID of the attached function that created this output collection (if applicable)
236    #[serde(skip_serializing_if = "Option::is_none")]
237    pub source_attached_function_id: Option<String>,
238}
239
240impl Schema {
241    pub fn update(&mut self, configuration: &InternalUpdateCollectionConfiguration) {
242        if let Some(vector_update) = &configuration.vector_index {
243            if let Some(default_vector_index) = self.defaults_vector_index_mut() {
244                Self::apply_vector_index_update(default_vector_index, vector_update);
245            }
246            if let Some(embedding_vector_index) = self.embedding_vector_index_mut() {
247                Self::apply_vector_index_update(embedding_vector_index, vector_update);
248            }
249        }
250
251        if let Some(embedding_function) = configuration.embedding_function.as_ref() {
252            if let Some(default_vector_index) = self.defaults_vector_index_mut() {
253                default_vector_index.config.embedding_function = Some(embedding_function.clone());
254            }
255            if let Some(embedding_vector_index) = self.embedding_vector_index_mut() {
256                embedding_vector_index.config.embedding_function = Some(embedding_function.clone());
257            }
258        }
259    }
260
261    fn defaults_vector_index_mut(&mut self) -> Option<&mut VectorIndexType> {
262        self.defaults
263            .float_list
264            .as_mut()
265            .and_then(|float_list| float_list.vector_index.as_mut())
266    }
267
268    fn embedding_vector_index_mut(&mut self) -> Option<&mut VectorIndexType> {
269        self.keys
270            .get_mut(EMBEDDING_KEY)
271            .and_then(|value_types| value_types.float_list.as_mut())
272            .and_then(|float_list| float_list.vector_index.as_mut())
273    }
274
275    fn apply_vector_index_update(
276        vector_index: &mut VectorIndexType,
277        update: &UpdateVectorIndexConfiguration,
278    ) {
279        match update {
280            UpdateVectorIndexConfiguration::Hnsw(Some(hnsw_update)) => {
281                if let Some(hnsw_config) = vector_index.config.hnsw.as_mut() {
282                    if let Some(ef_search) = hnsw_update.ef_search {
283                        hnsw_config.ef_search = Some(ef_search);
284                    }
285                    if let Some(max_neighbors) = hnsw_update.max_neighbors {
286                        hnsw_config.max_neighbors = Some(max_neighbors);
287                    }
288                    if let Some(num_threads) = hnsw_update.num_threads {
289                        hnsw_config.num_threads = Some(num_threads);
290                    }
291                    if let Some(resize_factor) = hnsw_update.resize_factor {
292                        hnsw_config.resize_factor = Some(resize_factor);
293                    }
294                    if let Some(sync_threshold) = hnsw_update.sync_threshold {
295                        hnsw_config.sync_threshold = Some(sync_threshold);
296                    }
297                    if let Some(batch_size) = hnsw_update.batch_size {
298                        hnsw_config.batch_size = Some(batch_size);
299                    }
300                }
301            }
302            UpdateVectorIndexConfiguration::Hnsw(None) => {}
303            UpdateVectorIndexConfiguration::Spann(Some(spann_update)) => {
304                if let Some(spann_config) = vector_index.config.spann.as_mut() {
305                    if let Some(search_nprobe) = spann_update.search_nprobe {
306                        spann_config.search_nprobe = Some(search_nprobe);
307                    }
308                    if let Some(ef_search) = spann_update.ef_search {
309                        spann_config.ef_search = Some(ef_search);
310                    }
311                }
312            }
313            UpdateVectorIndexConfiguration::Spann(None) => {}
314        }
315    }
316
317    pub fn is_sparse_index_enabled(&self) -> bool {
318        let defaults_enabled = self
319            .defaults
320            .sparse_vector
321            .as_ref()
322            .and_then(|sv| sv.sparse_vector_index.as_ref())
323            .is_some_and(|idx| idx.enabled);
324        let key_enabled = self.keys.values().any(|value_types| {
325            value_types
326                .sparse_vector
327                .as_ref()
328                .and_then(|sv| sv.sparse_vector_index.as_ref())
329                .is_some_and(|idx| idx.enabled)
330        });
331        defaults_enabled || key_enabled
332    }
333}
334
335impl Default for Schema {
336    /// Create a default Schema that matches Python's behavior exactly.
337    ///
338    /// Python creates a Schema with:
339    /// - All inverted indexes enabled by default (string, int, float, bool)
340    /// - Vector and FTS indexes disabled in defaults
341    /// - Special keys configured: #document (FTS enabled) and #embedding (vector enabled)
342    /// - Vector config has space=None, hnsw=None, spann=None (deferred to backend)
343    ///
344    /// # Examples
345    /// ```
346    /// use chroma_types::Schema;
347    ///
348    /// let schema = Schema::default();
349    /// assert!(schema.keys.contains_key("#document"));
350    /// assert!(schema.keys.contains_key("#embedding"));
351    /// ```
352    fn default() -> Self {
353        // Initialize defaults - match Python's _initialize_defaults()
354        let defaults = ValueTypes {
355            string: Some(StringValueType {
356                fts_index: Some(FtsIndexType {
357                    enabled: false,
358                    config: FtsIndexConfig {},
359                }),
360                string_inverted_index: Some(StringInvertedIndexType {
361                    enabled: true,
362                    config: StringInvertedIndexConfig {},
363                }),
364            }),
365            float_list: Some(FloatListValueType {
366                vector_index: Some(VectorIndexType {
367                    enabled: false,
368                    config: VectorIndexConfig {
369                        space: None, // Python leaves as None (resolved on serialization)
370                        embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
371                        source_key: None,
372                        hnsw: None,  // Python doesn't specify
373                        spann: None, // Python doesn't specify
374                    },
375                }),
376            }),
377            sparse_vector: Some(SparseVectorValueType {
378                sparse_vector_index: Some(SparseVectorIndexType {
379                    enabled: false,
380                    config: SparseVectorIndexConfig {
381                        embedding_function: None,
382                        source_key: None,
383                        bm25: None,
384                    },
385                }),
386            }),
387            int: Some(IntValueType {
388                int_inverted_index: Some(IntInvertedIndexType {
389                    enabled: true,
390                    config: IntInvertedIndexConfig {},
391                }),
392            }),
393            float: Some(FloatValueType {
394                float_inverted_index: Some(FloatInvertedIndexType {
395                    enabled: true,
396                    config: FloatInvertedIndexConfig {},
397                }),
398            }),
399            boolean: Some(BoolValueType {
400                bool_inverted_index: Some(BoolInvertedIndexType {
401                    enabled: true,
402                    config: BoolInvertedIndexConfig {},
403                }),
404            }),
405        };
406
407        // Initialize key-specific overrides - match Python's _initialize_keys()
408        let mut keys = HashMap::new();
409
410        // #document: FTS enabled, string inverted disabled
411        keys.insert(
412            DOCUMENT_KEY.to_string(),
413            ValueTypes {
414                string: Some(StringValueType {
415                    fts_index: Some(FtsIndexType {
416                        enabled: true,
417                        config: FtsIndexConfig {},
418                    }),
419                    string_inverted_index: Some(StringInvertedIndexType {
420                        enabled: false,
421                        config: StringInvertedIndexConfig {},
422                    }),
423                }),
424                ..Default::default()
425            },
426        );
427
428        // #embedding: Vector index enabled with source_key=#document
429        keys.insert(
430            EMBEDDING_KEY.to_string(),
431            ValueTypes {
432                float_list: Some(FloatListValueType {
433                    vector_index: Some(VectorIndexType {
434                        enabled: true,
435                        config: VectorIndexConfig {
436                            space: None, // Python leaves as None (resolved on serialization)
437                            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
438                            source_key: Some(DOCUMENT_KEY.to_string()),
439                            hnsw: None,  // Python doesn't specify
440                            spann: None, // Python doesn't specify
441                        },
442                    }),
443                }),
444                ..Default::default()
445            },
446        );
447
448        Schema {
449            defaults,
450            keys,
451            cmek: None,
452            source_attached_function_id: None,
453        }
454    }
455}
456
457pub fn is_embedding_function_default(
458    embedding_function: &Option<EmbeddingFunctionConfiguration>,
459) -> bool {
460    match embedding_function {
461        None => true,
462        Some(embedding_function) => embedding_function.is_default(),
463    }
464}
465
466/// Check if space is default (None means default, or if present, should be default space)
467pub fn is_space_default(space: &Option<Space>) -> bool {
468    match space {
469        None => true,                     // None means default
470        Some(s) => *s == default_space(), // If present, check if it's the default space
471    }
472}
473
474/// Check if HNSW config is default
475pub fn is_hnsw_config_default(hnsw_config: &HnswIndexConfig) -> bool {
476    hnsw_config.ef_construction == Some(default_construction_ef())
477        && hnsw_config.ef_search == Some(default_search_ef())
478        && hnsw_config.max_neighbors == Some(default_m())
479        && hnsw_config.num_threads == Some(default_num_threads())
480        && hnsw_config.batch_size == Some(default_batch_size())
481        && hnsw_config.sync_threshold == Some(default_sync_threshold())
482        && hnsw_config.resize_factor == Some(default_resize_factor())
483}
484
485// ============================================================================
486// NEW STRONGLY-TYPED SCHEMA STRUCTURES
487// ============================================================================
488
489/// Strongly-typed value type configurations
490/// Contains optional configurations for each supported value type
491#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Default)]
492#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
493pub struct ValueTypes {
494    #[serde(
495        rename = "string",
496        alias = "#string",
497        skip_serializing_if = "Option::is_none"
498    )] // STRING_VALUE_NAME
499    pub string: Option<StringValueType>,
500
501    #[serde(
502        rename = "float_list",
503        alias = "#float_list",
504        skip_serializing_if = "Option::is_none"
505    )]
506    // FLOAT_LIST_VALUE_NAME
507    pub float_list: Option<FloatListValueType>,
508
509    #[serde(
510        rename = "sparse_vector",
511        alias = "#sparse_vector",
512        skip_serializing_if = "Option::is_none"
513    )]
514    // SPARSE_VECTOR_VALUE_NAME
515    pub sparse_vector: Option<SparseVectorValueType>,
516
517    #[serde(
518        rename = "int",
519        alias = "#int",
520        skip_serializing_if = "Option::is_none"
521    )] // INT_VALUE_NAME
522    pub int: Option<IntValueType>,
523
524    #[serde(
525        rename = "float",
526        alias = "#float",
527        skip_serializing_if = "Option::is_none"
528    )] // FLOAT_VALUE_NAME
529    pub float: Option<FloatValueType>,
530
531    #[serde(
532        rename = "bool",
533        alias = "#bool",
534        skip_serializing_if = "Option::is_none"
535    )] // BOOL_VALUE_NAME
536    pub boolean: Option<BoolValueType>,
537}
538
539/// String value type index configurations
540#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
541#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
542pub struct StringValueType {
543    #[serde(
544        rename = "fts_index",
545        alias = "$fts_index",
546        skip_serializing_if = "Option::is_none"
547    )] // FTS_INDEX_NAME
548    pub fts_index: Option<FtsIndexType>,
549
550    #[serde(
551        rename = "string_inverted_index", // STRING_INVERTED_INDEX_NAME
552        alias = "$string_inverted_index",
553        skip_serializing_if = "Option::is_none"
554    )]
555    pub string_inverted_index: Option<StringInvertedIndexType>,
556}
557
558/// Float list value type index configurations (for vectors)
559#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
560#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
561pub struct FloatListValueType {
562    #[serde(
563        rename = "vector_index",
564        alias = "$vector_index",
565        skip_serializing_if = "Option::is_none"
566    )] // VECTOR_INDEX_NAME
567    pub vector_index: Option<VectorIndexType>,
568}
569
570/// Sparse vector value type index configurations
571#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
572#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
573pub struct SparseVectorValueType {
574    #[serde(
575        rename = "sparse_vector_index", // SPARSE_VECTOR_INDEX_NAME
576        alias = "$sparse_vector_index",
577        skip_serializing_if = "Option::is_none"
578    )]
579    pub sparse_vector_index: Option<SparseVectorIndexType>,
580}
581
582/// Integer value type index configurations
583#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
584#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
585pub struct IntValueType {
586    #[serde(
587        rename = "int_inverted_index",
588        alias = "$int_inverted_index",
589        skip_serializing_if = "Option::is_none"
590    )]
591    // INT_INVERTED_INDEX_NAME
592    pub int_inverted_index: Option<IntInvertedIndexType>,
593}
594
595/// Float value type index configurations
596#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
597#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
598pub struct FloatValueType {
599    #[serde(
600        rename = "float_inverted_index", // FLOAT_INVERTED_INDEX_NAME
601        alias = "$float_inverted_index",
602        skip_serializing_if = "Option::is_none"
603    )]
604    pub float_inverted_index: Option<FloatInvertedIndexType>,
605}
606
607/// Boolean value type index configurations
608#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
609#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
610pub struct BoolValueType {
611    #[serde(
612        rename = "bool_inverted_index", // BOOL_INVERTED_INDEX_NAME
613        alias = "$bool_inverted_index",
614        skip_serializing_if = "Option::is_none"
615    )]
616    pub bool_inverted_index: Option<BoolInvertedIndexType>,
617}
618
619// Individual index type structs with enabled status and config
620#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
621#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
622pub struct FtsIndexType {
623    pub enabled: bool,
624    pub config: FtsIndexConfig,
625}
626
627#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
628#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
629pub struct VectorIndexType {
630    pub enabled: bool,
631    pub config: VectorIndexConfig,
632}
633
634#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
635#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
636pub struct SparseVectorIndexType {
637    pub enabled: bool,
638    pub config: SparseVectorIndexConfig,
639}
640
641#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
642#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
643pub struct StringInvertedIndexType {
644    pub enabled: bool,
645    pub config: StringInvertedIndexConfig,
646}
647
648#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
649#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
650pub struct IntInvertedIndexType {
651    pub enabled: bool,
652    pub config: IntInvertedIndexConfig,
653}
654
655#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
656#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
657pub struct FloatInvertedIndexType {
658    pub enabled: bool,
659    pub config: FloatInvertedIndexConfig,
660}
661
662#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
663#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
664pub struct BoolInvertedIndexType {
665    pub enabled: bool,
666    pub config: BoolInvertedIndexConfig,
667}
668
669impl Schema {
670    /// Create a new Schema with strongly-typed default configurations
671    pub fn new_default(default_knn_index: KnnIndex) -> Self {
672        // Vector index disabled on all keys except #embedding.
673        let vector_config = VectorIndexType {
674            enabled: false,
675            config: VectorIndexConfig {
676                space: Some(default_space()),
677                embedding_function: None,
678                source_key: None,
679                hnsw: match default_knn_index {
680                    KnnIndex::Hnsw => Some(HnswIndexConfig {
681                        ef_construction: Some(default_construction_ef()),
682                        max_neighbors: Some(default_m()),
683                        ef_search: Some(default_search_ef()),
684                        num_threads: Some(default_num_threads()),
685                        batch_size: Some(default_batch_size()),
686                        sync_threshold: Some(default_sync_threshold()),
687                        resize_factor: Some(default_resize_factor()),
688                    }),
689                    KnnIndex::Spann => None,
690                },
691                spann: match default_knn_index {
692                    KnnIndex::Hnsw => None,
693                    KnnIndex::Spann => Some(SpannIndexConfig {
694                        search_nprobe: Some(default_search_nprobe()),
695                        search_rng_factor: Some(default_search_rng_factor()),
696                        search_rng_epsilon: Some(default_search_rng_epsilon()),
697                        nreplica_count: Some(default_nreplica_count()),
698                        write_rng_factor: Some(default_write_rng_factor()),
699                        write_rng_epsilon: Some(default_write_rng_epsilon()),
700                        split_threshold: Some(default_split_threshold()),
701                        num_samples_kmeans: Some(default_num_samples_kmeans()),
702                        initial_lambda: Some(default_initial_lambda()),
703                        reassign_neighbor_count: Some(default_reassign_neighbor_count()),
704                        merge_threshold: Some(default_merge_threshold()),
705                        num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
706                        write_nprobe: Some(default_write_nprobe()),
707                        ef_construction: Some(default_construction_ef_spann()),
708                        ef_search: Some(default_search_ef_spann()),
709                        max_neighbors: Some(default_m_spann()),
710                    }),
711                },
712            },
713        };
714
715        // Initialize defaults struct directly instead of using Default::default() + field assignments
716        let defaults = ValueTypes {
717            string: Some(StringValueType {
718                string_inverted_index: Some(StringInvertedIndexType {
719                    enabled: true,
720                    config: StringInvertedIndexConfig {},
721                }),
722                fts_index: Some(FtsIndexType {
723                    enabled: false,
724                    config: FtsIndexConfig {},
725                }),
726            }),
727            float: Some(FloatValueType {
728                float_inverted_index: Some(FloatInvertedIndexType {
729                    enabled: true,
730                    config: FloatInvertedIndexConfig {},
731                }),
732            }),
733            int: Some(IntValueType {
734                int_inverted_index: Some(IntInvertedIndexType {
735                    enabled: true,
736                    config: IntInvertedIndexConfig {},
737                }),
738            }),
739            boolean: Some(BoolValueType {
740                bool_inverted_index: Some(BoolInvertedIndexType {
741                    enabled: true,
742                    config: BoolInvertedIndexConfig {},
743                }),
744            }),
745            float_list: Some(FloatListValueType {
746                vector_index: Some(vector_config),
747            }),
748            sparse_vector: Some(SparseVectorValueType {
749                sparse_vector_index: Some(SparseVectorIndexType {
750                    enabled: false,
751                    config: SparseVectorIndexConfig {
752                        embedding_function: Some(EmbeddingFunctionConfiguration::Unknown),
753                        source_key: None,
754                        bm25: Some(false),
755                    },
756                }),
757            }),
758        };
759
760        // Set up key overrides
761        let mut keys = HashMap::new();
762
763        // Enable vector index for #embedding.
764        let embedding_defaults = ValueTypes {
765            float_list: Some(FloatListValueType {
766                vector_index: Some(VectorIndexType {
767                    enabled: true,
768                    config: VectorIndexConfig {
769                        space: Some(default_space()),
770                        embedding_function: None,
771                        source_key: Some(DOCUMENT_KEY.to_string()),
772                        hnsw: match default_knn_index {
773                            KnnIndex::Hnsw => Some(HnswIndexConfig {
774                                ef_construction: Some(default_construction_ef()),
775                                max_neighbors: Some(default_m()),
776                                ef_search: Some(default_search_ef()),
777                                num_threads: Some(default_num_threads()),
778                                batch_size: Some(default_batch_size()),
779                                sync_threshold: Some(default_sync_threshold()),
780                                resize_factor: Some(default_resize_factor()),
781                            }),
782                            KnnIndex::Spann => None,
783                        },
784                        spann: match default_knn_index {
785                            KnnIndex::Hnsw => None,
786                            KnnIndex::Spann => Some(SpannIndexConfig {
787                                search_nprobe: Some(default_search_nprobe()),
788                                search_rng_factor: Some(default_search_rng_factor()),
789                                search_rng_epsilon: Some(default_search_rng_epsilon()),
790                                nreplica_count: Some(default_nreplica_count()),
791                                write_rng_factor: Some(default_write_rng_factor()),
792                                write_rng_epsilon: Some(default_write_rng_epsilon()),
793                                split_threshold: Some(default_split_threshold()),
794                                num_samples_kmeans: Some(default_num_samples_kmeans()),
795                                initial_lambda: Some(default_initial_lambda()),
796                                reassign_neighbor_count: Some(default_reassign_neighbor_count()),
797                                merge_threshold: Some(default_merge_threshold()),
798                                num_centers_to_merge_to: Some(default_num_centers_to_merge_to()),
799                                write_nprobe: Some(default_write_nprobe()),
800                                ef_construction: Some(default_construction_ef_spann()),
801                                ef_search: Some(default_search_ef_spann()),
802                                max_neighbors: Some(default_m_spann()),
803                            }),
804                        },
805                    },
806                }),
807            }),
808            ..Default::default()
809        };
810        keys.insert(EMBEDDING_KEY.to_string(), embedding_defaults);
811
812        // Document defaults - initialize directly instead of Default::default() + field assignment
813        let document_defaults = ValueTypes {
814            string: Some(StringValueType {
815                fts_index: Some(FtsIndexType {
816                    enabled: true,
817                    config: FtsIndexConfig {},
818                }),
819                string_inverted_index: Some(StringInvertedIndexType {
820                    enabled: false,
821                    config: StringInvertedIndexConfig {},
822                }),
823            }),
824            ..Default::default()
825        };
826        keys.insert(DOCUMENT_KEY.to_string(), document_defaults);
827
828        Schema {
829            defaults,
830            keys,
831            cmek: None,
832            source_attached_function_id: None,
833        }
834    }
835
836    pub fn get_internal_spann_config(&self) -> Option<InternalSpannConfiguration> {
837        let to_internal = |vector_index: &VectorIndexType| {
838            let space = vector_index.config.space.clone();
839            vector_index
840                .config
841                .spann
842                .clone()
843                .map(|config| (space.as_ref(), &config).into())
844        };
845
846        self.keys
847            .get(EMBEDDING_KEY)
848            .and_then(|value_types| value_types.float_list.as_ref())
849            .and_then(|float_list| float_list.vector_index.as_ref())
850            .and_then(to_internal)
851            .or_else(|| {
852                self.defaults
853                    .float_list
854                    .as_ref()
855                    .and_then(|float_list| float_list.vector_index.as_ref())
856                    .and_then(to_internal)
857            })
858    }
859
860    pub fn get_internal_hnsw_config(&self) -> Option<InternalHnswConfiguration> {
861        let to_internal = |vector_index: &VectorIndexType| {
862            if vector_index.config.spann.is_some() {
863                return None;
864            }
865            let space = vector_index.config.space.as_ref();
866            let hnsw_config = vector_index.config.hnsw.as_ref();
867            Some((space, hnsw_config).into())
868        };
869
870        self.keys
871            .get(EMBEDDING_KEY)
872            .and_then(|value_types| value_types.float_list.as_ref())
873            .and_then(|float_list| float_list.vector_index.as_ref())
874            .and_then(to_internal)
875            .or_else(|| {
876                self.defaults
877                    .float_list
878                    .as_ref()
879                    .and_then(|float_list| float_list.vector_index.as_ref())
880                    .and_then(to_internal)
881            })
882    }
883
884    pub fn get_internal_hnsw_config_with_legacy_fallback(
885        &self,
886        segment: &Segment,
887    ) -> Result<Option<InternalHnswConfiguration>, HnswParametersFromSegmentError> {
888        if let Some(config) = self.get_internal_hnsw_config() {
889            let config_from_metadata =
890                InternalHnswConfiguration::from_legacy_segment_metadata(&segment.metadata)?;
891
892            if config == InternalHnswConfiguration::default() && config != config_from_metadata {
893                return Ok(Some(config_from_metadata));
894            }
895
896            return Ok(Some(config));
897        }
898
899        Ok(None)
900    }
901
902    /// Reconcile user-provided schema with system defaults
903    ///
904    /// This method merges user configurations with system defaults, ensuring that:
905    /// - User overrides take precedence over defaults
906    /// - Missing user configurations fall back to system defaults
907    /// - Field-level merging for complex configurations (Vector, HNSW, SPANN, etc.)
908    pub fn reconcile_with_defaults(
909        user_schema: Option<&Schema>,
910        knn_index: KnnIndex,
911    ) -> Result<Self, SchemaError> {
912        let default_schema = Schema::new_default(knn_index);
913
914        match user_schema {
915            Some(user) => {
916                // Merge defaults with user overrides
917                let merged_defaults =
918                    Self::merge_value_types(&default_schema.defaults, &user.defaults, knn_index)?;
919
920                // Merge key overrides
921                let mut merged_keys = default_schema.keys.clone();
922                for (key, user_value_types) in &user.keys {
923                    if let Some(default_value_types) = merged_keys.get(key) {
924                        // Merge with existing default key override
925                        let merged_value_types = Self::merge_value_types(
926                            default_value_types,
927                            user_value_types,
928                            knn_index,
929                        )?;
930                        merged_keys.insert(key.clone(), merged_value_types);
931                    } else {
932                        // New key override from user
933                        merged_keys.insert(key.clone(), user_value_types.clone());
934                    }
935                }
936
937                Ok(Schema {
938                    defaults: merged_defaults,
939                    keys: merged_keys,
940                    cmek: user.cmek.clone().or(default_schema.cmek.clone()),
941                    source_attached_function_id: user
942                        .source_attached_function_id
943                        .clone()
944                        .or(default_schema.source_attached_function_id.clone()),
945                })
946            }
947            None => Ok(default_schema),
948        }
949    }
950
951    /// Merge two schemas together, combining key overrides when possible.
952    pub fn merge(&self, other: &Schema) -> Result<Schema, SchemaError> {
953        if self.defaults != other.defaults {
954            return Err(SchemaError::DefaultsMismatch);
955        }
956
957        let mut keys = self.keys.clone();
958
959        for (key, other_value_types) in &other.keys {
960            if let Some(existing) = keys.get(key).cloned() {
961                let merged = Self::merge_override_value_types(key, &existing, other_value_types)?;
962                keys.insert(key.clone(), merged);
963            } else {
964                keys.insert(key.clone(), other_value_types.clone());
965            }
966        }
967
968        Ok(Schema {
969            defaults: self.defaults.clone(),
970            keys,
971            cmek: other.cmek.clone().or(self.cmek.clone()),
972            source_attached_function_id: other
973                .source_attached_function_id
974                .clone()
975                .or(self.source_attached_function_id.clone()),
976        })
977    }
978
979    fn merge_override_value_types(
980        key: &str,
981        left: &ValueTypes,
982        right: &ValueTypes,
983    ) -> Result<ValueTypes, SchemaError> {
984        Ok(ValueTypes {
985            string: Self::merge_string_override(key, left.string.as_ref(), right.string.as_ref())?,
986            float: Self::merge_float_override(key, left.float.as_ref(), right.float.as_ref())?,
987            int: Self::merge_int_override(key, left.int.as_ref(), right.int.as_ref())?,
988            boolean: Self::merge_bool_override(key, left.boolean.as_ref(), right.boolean.as_ref())?,
989            float_list: Self::merge_float_list_override(
990                key,
991                left.float_list.as_ref(),
992                right.float_list.as_ref(),
993            )?,
994            sparse_vector: Self::merge_sparse_vector_override(
995                key,
996                left.sparse_vector.as_ref(),
997                right.sparse_vector.as_ref(),
998            )?,
999        })
1000    }
1001
1002    fn merge_string_override(
1003        key: &str,
1004        left: Option<&StringValueType>,
1005        right: Option<&StringValueType>,
1006    ) -> Result<Option<StringValueType>, SchemaError> {
1007        match (left, right) {
1008            (Some(l), Some(r)) => Ok(Some(StringValueType {
1009                string_inverted_index: Self::merge_index_or_error(
1010                    l.string_inverted_index.as_ref(),
1011                    r.string_inverted_index.as_ref(),
1012                    &format!("key '{key}' string.string_inverted_index"),
1013                )?,
1014                fts_index: Self::merge_index_or_error(
1015                    l.fts_index.as_ref(),
1016                    r.fts_index.as_ref(),
1017                    &format!("key '{key}' string.fts_index"),
1018                )?,
1019            })),
1020            (Some(l), None) => Ok(Some(l.clone())),
1021            (None, Some(r)) => Ok(Some(r.clone())),
1022            (None, None) => Ok(None),
1023        }
1024    }
1025
1026    fn merge_float_override(
1027        key: &str,
1028        left: Option<&FloatValueType>,
1029        right: Option<&FloatValueType>,
1030    ) -> Result<Option<FloatValueType>, SchemaError> {
1031        match (left, right) {
1032            (Some(l), Some(r)) => Ok(Some(FloatValueType {
1033                float_inverted_index: Self::merge_index_or_error(
1034                    l.float_inverted_index.as_ref(),
1035                    r.float_inverted_index.as_ref(),
1036                    &format!("key '{key}' float.float_inverted_index"),
1037                )?,
1038            })),
1039            (Some(l), None) => Ok(Some(l.clone())),
1040            (None, Some(r)) => Ok(Some(r.clone())),
1041            (None, None) => Ok(None),
1042        }
1043    }
1044
1045    fn merge_int_override(
1046        key: &str,
1047        left: Option<&IntValueType>,
1048        right: Option<&IntValueType>,
1049    ) -> Result<Option<IntValueType>, SchemaError> {
1050        match (left, right) {
1051            (Some(l), Some(r)) => Ok(Some(IntValueType {
1052                int_inverted_index: Self::merge_index_or_error(
1053                    l.int_inverted_index.as_ref(),
1054                    r.int_inverted_index.as_ref(),
1055                    &format!("key '{key}' int.int_inverted_index"),
1056                )?,
1057            })),
1058            (Some(l), None) => Ok(Some(l.clone())),
1059            (None, Some(r)) => Ok(Some(r.clone())),
1060            (None, None) => Ok(None),
1061        }
1062    }
1063
1064    fn merge_bool_override(
1065        key: &str,
1066        left: Option<&BoolValueType>,
1067        right: Option<&BoolValueType>,
1068    ) -> Result<Option<BoolValueType>, SchemaError> {
1069        match (left, right) {
1070            (Some(l), Some(r)) => Ok(Some(BoolValueType {
1071                bool_inverted_index: Self::merge_index_or_error(
1072                    l.bool_inverted_index.as_ref(),
1073                    r.bool_inverted_index.as_ref(),
1074                    &format!("key '{key}' bool.bool_inverted_index"),
1075                )?,
1076            })),
1077            (Some(l), None) => Ok(Some(l.clone())),
1078            (None, Some(r)) => Ok(Some(r.clone())),
1079            (None, None) => Ok(None),
1080        }
1081    }
1082
1083    fn merge_float_list_override(
1084        key: &str,
1085        left: Option<&FloatListValueType>,
1086        right: Option<&FloatListValueType>,
1087    ) -> Result<Option<FloatListValueType>, SchemaError> {
1088        match (left, right) {
1089            (Some(l), Some(r)) => Ok(Some(FloatListValueType {
1090                vector_index: Self::merge_index_or_error(
1091                    l.vector_index.as_ref(),
1092                    r.vector_index.as_ref(),
1093                    &format!("key '{key}' float_list.vector_index"),
1094                )?,
1095            })),
1096            (Some(l), None) => Ok(Some(l.clone())),
1097            (None, Some(r)) => Ok(Some(r.clone())),
1098            (None, None) => Ok(None),
1099        }
1100    }
1101
1102    fn merge_sparse_vector_override(
1103        key: &str,
1104        left: Option<&SparseVectorValueType>,
1105        right: Option<&SparseVectorValueType>,
1106    ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1107        match (left, right) {
1108            (Some(l), Some(r)) => Ok(Some(SparseVectorValueType {
1109                sparse_vector_index: Self::merge_index_or_error(
1110                    l.sparse_vector_index.as_ref(),
1111                    r.sparse_vector_index.as_ref(),
1112                    &format!("key '{key}' sparse_vector.sparse_vector_index"),
1113                )?,
1114            })),
1115            (Some(l), None) => Ok(Some(l.clone())),
1116            (None, Some(r)) => Ok(Some(r.clone())),
1117            (None, None) => Ok(None),
1118        }
1119    }
1120
1121    fn merge_index_or_error<T: Clone + PartialEq>(
1122        left: Option<&T>,
1123        right: Option<&T>,
1124        context: &str,
1125    ) -> Result<Option<T>, SchemaError> {
1126        match (left, right) {
1127            (Some(l), Some(r)) => {
1128                if l == r {
1129                    Ok(Some(l.clone()))
1130                } else {
1131                    Err(SchemaError::ConfigurationConflict {
1132                        context: context.to_string(),
1133                    })
1134                }
1135            }
1136            (Some(l), None) => Ok(Some(l.clone())),
1137            (None, Some(r)) => Ok(Some(r.clone())),
1138            (None, None) => Ok(None),
1139        }
1140    }
1141
1142    /// Merge two ValueTypes with field-level merging
1143    /// User values take precedence over default values
1144    fn merge_value_types(
1145        default: &ValueTypes,
1146        user: &ValueTypes,
1147        knn_index: KnnIndex,
1148    ) -> Result<ValueTypes, SchemaError> {
1149        // Merge float_list first
1150        let float_list = Self::merge_float_list_type(
1151            default.float_list.as_ref(),
1152            user.float_list.as_ref(),
1153            knn_index,
1154        );
1155
1156        // Validate the merged float_list (covers all merge cases)
1157        if let Some(ref fl) = float_list {
1158            Self::validate_float_list_value_type(fl)?;
1159        }
1160
1161        Ok(ValueTypes {
1162            string: Self::merge_string_type(default.string.as_ref(), user.string.as_ref())?,
1163            float: Self::merge_float_type(default.float.as_ref(), user.float.as_ref())?,
1164            int: Self::merge_int_type(default.int.as_ref(), user.int.as_ref())?,
1165            boolean: Self::merge_bool_type(default.boolean.as_ref(), user.boolean.as_ref())?,
1166            float_list,
1167            sparse_vector: Self::merge_sparse_vector_type(
1168                default.sparse_vector.as_ref(),
1169                user.sparse_vector.as_ref(),
1170            )?,
1171        })
1172    }
1173
1174    /// Merge StringValueType configurations
1175    fn merge_string_type(
1176        default: Option<&StringValueType>,
1177        user: Option<&StringValueType>,
1178    ) -> Result<Option<StringValueType>, SchemaError> {
1179        match (default, user) {
1180            (Some(default), Some(user)) => Ok(Some(StringValueType {
1181                string_inverted_index: Self::merge_string_inverted_index_type(
1182                    default.string_inverted_index.as_ref(),
1183                    user.string_inverted_index.as_ref(),
1184                )?,
1185                fts_index: Self::merge_fts_index_type(
1186                    default.fts_index.as_ref(),
1187                    user.fts_index.as_ref(),
1188                )?,
1189            })),
1190            (Some(default), None) => Ok(Some(default.clone())),
1191            (None, Some(user)) => Ok(Some(user.clone())),
1192            (None, None) => Ok(None),
1193        }
1194    }
1195
1196    /// Merge FloatValueType configurations
1197    fn merge_float_type(
1198        default: Option<&FloatValueType>,
1199        user: Option<&FloatValueType>,
1200    ) -> Result<Option<FloatValueType>, SchemaError> {
1201        match (default, user) {
1202            (Some(default), Some(user)) => Ok(Some(FloatValueType {
1203                float_inverted_index: Self::merge_float_inverted_index_type(
1204                    default.float_inverted_index.as_ref(),
1205                    user.float_inverted_index.as_ref(),
1206                )?,
1207            })),
1208            (Some(default), None) => Ok(Some(default.clone())),
1209            (None, Some(user)) => Ok(Some(user.clone())),
1210            (None, None) => Ok(None),
1211        }
1212    }
1213
1214    /// Merge IntValueType configurations
1215    fn merge_int_type(
1216        default: Option<&IntValueType>,
1217        user: Option<&IntValueType>,
1218    ) -> Result<Option<IntValueType>, SchemaError> {
1219        match (default, user) {
1220            (Some(default), Some(user)) => Ok(Some(IntValueType {
1221                int_inverted_index: Self::merge_int_inverted_index_type(
1222                    default.int_inverted_index.as_ref(),
1223                    user.int_inverted_index.as_ref(),
1224                )?,
1225            })),
1226            (Some(default), None) => Ok(Some(default.clone())),
1227            (None, Some(user)) => Ok(Some(user.clone())),
1228            (None, None) => Ok(None),
1229        }
1230    }
1231
1232    /// Merge BoolValueType configurations
1233    fn merge_bool_type(
1234        default: Option<&BoolValueType>,
1235        user: Option<&BoolValueType>,
1236    ) -> Result<Option<BoolValueType>, SchemaError> {
1237        match (default, user) {
1238            (Some(default), Some(user)) => Ok(Some(BoolValueType {
1239                bool_inverted_index: Self::merge_bool_inverted_index_type(
1240                    default.bool_inverted_index.as_ref(),
1241                    user.bool_inverted_index.as_ref(),
1242                )?,
1243            })),
1244            (Some(default), None) => Ok(Some(default.clone())),
1245            (None, Some(user)) => Ok(Some(user.clone())),
1246            (None, None) => Ok(None),
1247        }
1248    }
1249
1250    /// Merge FloatListValueType configurations
1251    fn merge_float_list_type(
1252        default: Option<&FloatListValueType>,
1253        user: Option<&FloatListValueType>,
1254        knn_index: KnnIndex,
1255    ) -> Option<FloatListValueType> {
1256        match (default, user) {
1257            (Some(default), Some(user)) => Some(FloatListValueType {
1258                vector_index: Self::merge_vector_index_type(
1259                    default.vector_index.as_ref(),
1260                    user.vector_index.as_ref(),
1261                    knn_index,
1262                ),
1263            }),
1264            (Some(default), None) => Some(default.clone()),
1265            (None, Some(user)) => Some(user.clone()),
1266            (None, None) => None,
1267        }
1268    }
1269
1270    /// Merge SparseVectorValueType configurations
1271    fn merge_sparse_vector_type(
1272        default: Option<&SparseVectorValueType>,
1273        user: Option<&SparseVectorValueType>,
1274    ) -> Result<Option<SparseVectorValueType>, SchemaError> {
1275        match (default, user) {
1276            (Some(default), Some(user)) => Ok(Some(SparseVectorValueType {
1277                sparse_vector_index: Self::merge_sparse_vector_index_type(
1278                    default.sparse_vector_index.as_ref(),
1279                    user.sparse_vector_index.as_ref(),
1280                )?,
1281            })),
1282            (Some(default), None) => Ok(Some(default.clone())),
1283            (None, Some(user)) => Ok(Some(user.clone())),
1284            (None, None) => Ok(None),
1285        }
1286    }
1287
1288    /// Merge individual index type configurations
1289    fn merge_string_inverted_index_type(
1290        default: Option<&StringInvertedIndexType>,
1291        user: Option<&StringInvertedIndexType>,
1292    ) -> Result<Option<StringInvertedIndexType>, SchemaError> {
1293        match (default, user) {
1294            (Some(_default), Some(user)) => {
1295                Ok(Some(StringInvertedIndexType {
1296                    enabled: user.enabled,       // User enabled state takes precedence
1297                    config: user.config.clone(), // User config takes precedence
1298                }))
1299            }
1300            (Some(default), None) => Ok(Some(default.clone())),
1301            (None, Some(user)) => Ok(Some(user.clone())),
1302            (None, None) => Ok(None),
1303        }
1304    }
1305
1306    fn merge_fts_index_type(
1307        default: Option<&FtsIndexType>,
1308        user: Option<&FtsIndexType>,
1309    ) -> Result<Option<FtsIndexType>, SchemaError> {
1310        match (default, user) {
1311            (Some(_default), Some(user)) => Ok(Some(FtsIndexType {
1312                enabled: user.enabled,
1313                config: user.config.clone(),
1314            })),
1315            (Some(default), None) => Ok(Some(default.clone())),
1316            (None, Some(user)) => Ok(Some(user.clone())),
1317            (None, None) => Ok(None),
1318        }
1319    }
1320
1321    fn merge_float_inverted_index_type(
1322        default: Option<&FloatInvertedIndexType>,
1323        user: Option<&FloatInvertedIndexType>,
1324    ) -> Result<Option<FloatInvertedIndexType>, SchemaError> {
1325        match (default, user) {
1326            (Some(_default), Some(user)) => Ok(Some(FloatInvertedIndexType {
1327                enabled: user.enabled,
1328                config: user.config.clone(),
1329            })),
1330            (Some(default), None) => Ok(Some(default.clone())),
1331            (None, Some(user)) => Ok(Some(user.clone())),
1332            (None, None) => Ok(None),
1333        }
1334    }
1335
1336    fn merge_int_inverted_index_type(
1337        default: Option<&IntInvertedIndexType>,
1338        user: Option<&IntInvertedIndexType>,
1339    ) -> Result<Option<IntInvertedIndexType>, SchemaError> {
1340        match (default, user) {
1341            (Some(_default), Some(user)) => Ok(Some(IntInvertedIndexType {
1342                enabled: user.enabled,
1343                config: user.config.clone(),
1344            })),
1345            (Some(default), None) => Ok(Some(default.clone())),
1346            (None, Some(user)) => Ok(Some(user.clone())),
1347            (None, None) => Ok(None),
1348        }
1349    }
1350
1351    fn merge_bool_inverted_index_type(
1352        default: Option<&BoolInvertedIndexType>,
1353        user: Option<&BoolInvertedIndexType>,
1354    ) -> Result<Option<BoolInvertedIndexType>, SchemaError> {
1355        match (default, user) {
1356            (Some(_default), Some(user)) => Ok(Some(BoolInvertedIndexType {
1357                enabled: user.enabled,
1358                config: user.config.clone(),
1359            })),
1360            (Some(default), None) => Ok(Some(default.clone())),
1361            (None, Some(user)) => Ok(Some(user.clone())),
1362            (None, None) => Ok(None),
1363        }
1364    }
1365
1366    fn merge_vector_index_type(
1367        default: Option<&VectorIndexType>,
1368        user: Option<&VectorIndexType>,
1369        knn_index: KnnIndex,
1370    ) -> Option<VectorIndexType> {
1371        match (default, user) {
1372            (Some(default), Some(user)) => Some(VectorIndexType {
1373                enabled: user.enabled,
1374                config: Self::merge_vector_index_config(&default.config, &user.config, knn_index),
1375            }),
1376            (Some(default), None) => Some(default.clone()),
1377            (None, Some(user)) => Some(user.clone()),
1378            (None, None) => None,
1379        }
1380    }
1381
1382    fn merge_sparse_vector_index_type(
1383        default: Option<&SparseVectorIndexType>,
1384        user: Option<&SparseVectorIndexType>,
1385    ) -> Result<Option<SparseVectorIndexType>, SchemaError> {
1386        match (default, user) {
1387            (Some(default), Some(user)) => Ok(Some(SparseVectorIndexType {
1388                enabled: user.enabled,
1389                config: Self::merge_sparse_vector_index_config(&default.config, &user.config),
1390            })),
1391            (Some(default), None) => Ok(Some(default.clone())),
1392            (None, Some(user)) => Ok(Some(user.clone())),
1393            (None, None) => Ok(None),
1394        }
1395    }
1396
1397    /// Validate FloatListValueType vector index configurations
1398    /// This validates HNSW and SPANN configs within the merged float_list
1399    fn validate_float_list_value_type(float_list: &FloatListValueType) -> Result<(), SchemaError> {
1400        if let Some(vector_index) = &float_list.vector_index {
1401            if let Some(hnsw) = &vector_index.config.hnsw {
1402                hnsw.validate().map_err(SchemaError::InvalidHnswConfig)?;
1403            }
1404            if let Some(spann) = &vector_index.config.spann {
1405                spann.validate().map_err(SchemaError::InvalidSpannConfig)?;
1406            }
1407        }
1408        Ok(())
1409    }
1410
1411    /// Merge VectorIndexConfig with field-level merging
1412    fn merge_vector_index_config(
1413        default: &VectorIndexConfig,
1414        user: &VectorIndexConfig,
1415        knn_index: KnnIndex,
1416    ) -> VectorIndexConfig {
1417        match knn_index {
1418            KnnIndex::Hnsw => VectorIndexConfig {
1419                space: user.space.clone().or(default.space.clone()),
1420                embedding_function: user
1421                    .embedding_function
1422                    .clone()
1423                    .or(default.embedding_function.clone()),
1424                source_key: user.source_key.clone().or(default.source_key.clone()),
1425                hnsw: Self::merge_hnsw_configs(default.hnsw.as_ref(), user.hnsw.as_ref()),
1426                spann: None,
1427            },
1428            KnnIndex::Spann => VectorIndexConfig {
1429                space: user.space.clone().or(default.space.clone()),
1430                embedding_function: user
1431                    .embedding_function
1432                    .clone()
1433                    .or(default.embedding_function.clone()),
1434                source_key: user.source_key.clone().or(default.source_key.clone()),
1435                hnsw: None,
1436                spann: Self::merge_spann_configs(default.spann.as_ref(), user.spann.as_ref()),
1437            },
1438        }
1439    }
1440
1441    /// Merge SparseVectorIndexConfig with field-level merging
1442    fn merge_sparse_vector_index_config(
1443        default: &SparseVectorIndexConfig,
1444        user: &SparseVectorIndexConfig,
1445    ) -> SparseVectorIndexConfig {
1446        SparseVectorIndexConfig {
1447            embedding_function: user
1448                .embedding_function
1449                .clone()
1450                .or(default.embedding_function.clone()),
1451            source_key: user.source_key.clone().or(default.source_key.clone()),
1452            bm25: user.bm25.or(default.bm25),
1453        }
1454    }
1455
1456    /// Merge HNSW configurations with field-level merging
1457    fn merge_hnsw_configs(
1458        default_hnsw: Option<&HnswIndexConfig>,
1459        user_hnsw: Option<&HnswIndexConfig>,
1460    ) -> Option<HnswIndexConfig> {
1461        match (default_hnsw, user_hnsw) {
1462            (Some(default), Some(user)) => Some(HnswIndexConfig {
1463                ef_construction: user.ef_construction.or(default.ef_construction),
1464                max_neighbors: user.max_neighbors.or(default.max_neighbors),
1465                ef_search: user.ef_search.or(default.ef_search),
1466                num_threads: user.num_threads.or(default.num_threads),
1467                batch_size: user.batch_size.or(default.batch_size),
1468                sync_threshold: user.sync_threshold.or(default.sync_threshold),
1469                resize_factor: user.resize_factor.or(default.resize_factor),
1470            }),
1471            (Some(default), None) => Some(default.clone()),
1472            (None, Some(user)) => Some(user.clone()),
1473            (None, None) => None,
1474        }
1475    }
1476
1477    /// Merge SPANN configurations with field-level merging
1478    fn merge_spann_configs(
1479        default_spann: Option<&SpannIndexConfig>,
1480        user_spann: Option<&SpannIndexConfig>,
1481    ) -> Option<SpannIndexConfig> {
1482        match (default_spann, user_spann) {
1483            (Some(default), Some(user)) => Some(SpannIndexConfig {
1484                search_nprobe: user.search_nprobe.or(default.search_nprobe),
1485                search_rng_factor: user.search_rng_factor.or(default.search_rng_factor),
1486                search_rng_epsilon: user.search_rng_epsilon.or(default.search_rng_epsilon),
1487                nreplica_count: user.nreplica_count.or(default.nreplica_count),
1488                write_rng_factor: user.write_rng_factor.or(default.write_rng_factor),
1489                write_rng_epsilon: user.write_rng_epsilon.or(default.write_rng_epsilon),
1490                split_threshold: user.split_threshold.or(default.split_threshold),
1491                num_samples_kmeans: user.num_samples_kmeans.or(default.num_samples_kmeans),
1492                initial_lambda: user.initial_lambda.or(default.initial_lambda),
1493                reassign_neighbor_count: user
1494                    .reassign_neighbor_count
1495                    .or(default.reassign_neighbor_count),
1496                merge_threshold: user.merge_threshold.or(default.merge_threshold),
1497                num_centers_to_merge_to: user
1498                    .num_centers_to_merge_to
1499                    .or(default.num_centers_to_merge_to),
1500                write_nprobe: user.write_nprobe.or(default.write_nprobe),
1501                ef_construction: user.ef_construction.or(default.ef_construction),
1502                ef_search: user.ef_search.or(default.ef_search),
1503                max_neighbors: user.max_neighbors.or(default.max_neighbors),
1504            }),
1505            (Some(default), None) => Some(default.clone()),
1506            (None, Some(user)) => Some(user.clone()),
1507            (None, None) => None,
1508        }
1509    }
1510
1511    /// Reconcile Schema with InternalCollectionConfiguration
1512    ///
1513    /// Simple reconciliation logic:
1514    /// 1. If collection config is default → return schema (schema is source of truth)
1515    /// 2. If collection config is non-default and schema is default → override schema with collection config
1516    ///
1517    /// Note: The case where both are non-default is validated earlier in reconcile_schema_and_config
1518    pub fn reconcile_with_collection_config(
1519        schema: &Schema,
1520        collection_config: &InternalCollectionConfiguration,
1521        default_knn_index: KnnIndex,
1522    ) -> Result<Schema, SchemaError> {
1523        // 1. Check if collection config is default
1524        if collection_config.is_default() {
1525            if schema.is_default() {
1526                // if both are default, use the schema, and apply the ef from config if available
1527                // for both defaults and #embedding key
1528                let mut new_schema = Schema::new_default(default_knn_index);
1529
1530                if collection_config.embedding_function.is_some() {
1531                    if let Some(float_list) = &mut new_schema.defaults.float_list {
1532                        if let Some(vector_index) = &mut float_list.vector_index {
1533                            vector_index.config.embedding_function =
1534                                collection_config.embedding_function.clone();
1535                        }
1536                    }
1537                    if let Some(embedding_types) = new_schema.keys.get_mut(EMBEDDING_KEY) {
1538                        if let Some(float_list) = &mut embedding_types.float_list {
1539                            if let Some(vector_index) = &mut float_list.vector_index {
1540                                vector_index.config.embedding_function =
1541                                    collection_config.embedding_function.clone();
1542                            }
1543                        }
1544                    }
1545                }
1546                return Ok(new_schema);
1547            } else {
1548                // Collection config is default and schema is non-default → schema is source of truth
1549                return Ok(schema.clone());
1550            }
1551        }
1552
1553        // 2. Collection config is non-default, schema must be default (already validated earlier)
1554        // Convert collection config to schema
1555        Self::try_from(collection_config)
1556    }
1557
1558    pub fn reconcile_schema_and_config(
1559        schema: Option<&Schema>,
1560        configuration: Option<&InternalCollectionConfiguration>,
1561        knn_index: KnnIndex,
1562    ) -> Result<Schema, SchemaError> {
1563        // Early validation: check if both user-provided schema and config are non-default
1564        if let (Some(user_schema), Some(config)) = (schema, configuration) {
1565            if !user_schema.is_default() && !config.is_default() {
1566                return Err(SchemaError::ConfigAndSchemaConflict);
1567            }
1568        }
1569
1570        let reconciled_schema = Self::reconcile_with_defaults(schema, knn_index)?;
1571        if let Some(config) = configuration {
1572            Self::reconcile_with_collection_config(&reconciled_schema, config, knn_index)
1573        } else {
1574            Ok(reconciled_schema)
1575        }
1576    }
1577
1578    pub fn default_with_embedding_function(
1579        embedding_function: EmbeddingFunctionConfiguration,
1580    ) -> Schema {
1581        let mut schema = Schema::new_default(KnnIndex::Spann);
1582        if let Some(float_list) = &mut schema.defaults.float_list {
1583            if let Some(vector_index) = &mut float_list.vector_index {
1584                vector_index.config.embedding_function = Some(embedding_function.clone());
1585            }
1586        }
1587        if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
1588            if let Some(float_list) = &mut embedding_types.float_list {
1589                if let Some(vector_index) = &mut float_list.vector_index {
1590                    vector_index.config.embedding_function = Some(embedding_function);
1591                }
1592            }
1593        }
1594        schema
1595    }
1596
1597    /// Check if schema is default by checking each field individually
1598    pub fn is_default(&self) -> bool {
1599        // Check if defaults are default (field by field)
1600        if !Self::is_value_types_default(&self.defaults) {
1601            return false;
1602        }
1603
1604        for key in self.keys.keys() {
1605            if key != EMBEDDING_KEY && key != DOCUMENT_KEY {
1606                return false;
1607            }
1608        }
1609
1610        // Check #embedding key
1611        if let Some(embedding_value) = self.keys.get(EMBEDDING_KEY) {
1612            if !Self::is_embedding_value_types_default(embedding_value) {
1613                return false;
1614            }
1615        }
1616
1617        // Check #document key
1618        if let Some(document_value) = self.keys.get(DOCUMENT_KEY) {
1619            if !Self::is_document_value_types_default(document_value) {
1620                return false;
1621            }
1622        }
1623
1624        // Check CMEK is None (default)
1625        if self.cmek.is_some() {
1626            return false;
1627        }
1628
1629        true
1630    }
1631
1632    /// Check if ValueTypes (defaults) are in default state
1633    fn is_value_types_default(value_types: &ValueTypes) -> bool {
1634        // Check string field
1635        if let Some(string) = &value_types.string {
1636            if let Some(string_inverted) = &string.string_inverted_index {
1637                if !string_inverted.enabled {
1638                    return false;
1639                }
1640                // Config is an empty struct, so no need to check it
1641            }
1642            if let Some(fts) = &string.fts_index {
1643                if fts.enabled {
1644                    return false;
1645                }
1646                // Config is an empty struct, so no need to check it
1647            }
1648        }
1649
1650        // Check float field
1651        if let Some(float) = &value_types.float {
1652            if let Some(float_inverted) = &float.float_inverted_index {
1653                if !float_inverted.enabled {
1654                    return false;
1655                }
1656                // Config is an empty struct, so no need to check it
1657            }
1658        }
1659
1660        // Check int field
1661        if let Some(int) = &value_types.int {
1662            if let Some(int_inverted) = &int.int_inverted_index {
1663                if !int_inverted.enabled {
1664                    return false;
1665                }
1666                // Config is an empty struct, so no need to check it
1667            }
1668        }
1669
1670        // Check boolean field
1671        if let Some(boolean) = &value_types.boolean {
1672            if let Some(bool_inverted) = &boolean.bool_inverted_index {
1673                if !bool_inverted.enabled {
1674                    return false;
1675                }
1676                // Config is an empty struct, so no need to check it
1677            }
1678        }
1679
1680        // Check float_list field (vector index should be disabled)
1681        if let Some(float_list) = &value_types.float_list {
1682            if let Some(vector_index) = &float_list.vector_index {
1683                if vector_index.enabled {
1684                    return false;
1685                }
1686                if !is_embedding_function_default(&vector_index.config.embedding_function) {
1687                    return false;
1688                }
1689                if !is_space_default(&vector_index.config.space) {
1690                    return false;
1691                }
1692                // Check that the config has default structure
1693                if vector_index.config.source_key.is_some() {
1694                    return false;
1695                }
1696                // Check that either hnsw or spann config is present (not both, not neither)
1697                // and that the config values are default
1698                match (&vector_index.config.hnsw, &vector_index.config.spann) {
1699                    (Some(hnsw_config), None) => {
1700                        if !hnsw_config.is_default() {
1701                            return false;
1702                        }
1703                    }
1704                    (None, Some(spann_config)) => {
1705                        if !spann_config.is_default() {
1706                            return false;
1707                        }
1708                    }
1709                    (Some(_), Some(_)) => return false, // Both present
1710                    (None, None) => {}
1711                }
1712            }
1713        }
1714
1715        // Check sparse_vector field (should be disabled)
1716        if let Some(sparse_vector) = &value_types.sparse_vector {
1717            if let Some(sparse_index) = &sparse_vector.sparse_vector_index {
1718                if sparse_index.enabled {
1719                    return false;
1720                }
1721                // Check config structure
1722                if !is_embedding_function_default(&sparse_index.config.embedding_function) {
1723                    return false;
1724                }
1725                if sparse_index.config.source_key.is_some() {
1726                    return false;
1727                }
1728                if let Some(bm25) = &sparse_index.config.bm25 {
1729                    if bm25 != &false {
1730                        return false;
1731                    }
1732                }
1733            }
1734        }
1735
1736        true
1737    }
1738
1739    /// Check if ValueTypes for #embedding key are in default state
1740    fn is_embedding_value_types_default(value_types: &ValueTypes) -> bool {
1741        // For #embedding, only float_list should be set
1742        if value_types.string.is_some()
1743            || value_types.float.is_some()
1744            || value_types.int.is_some()
1745            || value_types.boolean.is_some()
1746            || value_types.sparse_vector.is_some()
1747        {
1748            return false;
1749        }
1750
1751        // Check float_list field (vector index should be enabled)
1752        if let Some(float_list) = &value_types.float_list {
1753            if let Some(vector_index) = &float_list.vector_index {
1754                if !vector_index.enabled {
1755                    return false;
1756                }
1757                if !is_space_default(&vector_index.config.space) {
1758                    return false;
1759                }
1760                // Check that embedding_function is default
1761                if !is_embedding_function_default(&vector_index.config.embedding_function) {
1762                    return false;
1763                }
1764                // Check that source_key is #document
1765                if vector_index.config.source_key.as_deref() != Some(DOCUMENT_KEY) {
1766                    return false;
1767                }
1768                // Check that either hnsw or spann config is present (not both, not neither)
1769                // and that the config values are default
1770                match (&vector_index.config.hnsw, &vector_index.config.spann) {
1771                    (Some(hnsw_config), None) => {
1772                        if !hnsw_config.is_default() {
1773                            return false;
1774                        }
1775                    }
1776                    (None, Some(spann_config)) => {
1777                        if !spann_config.is_default() {
1778                            return false;
1779                        }
1780                    }
1781                    (Some(_), Some(_)) => return false, // Both present
1782                    (None, None) => {}
1783                }
1784            }
1785        }
1786
1787        true
1788    }
1789
1790    /// Check if ValueTypes for #document key are in default state
1791    fn is_document_value_types_default(value_types: &ValueTypes) -> bool {
1792        // For #document, only string should be set
1793        if value_types.float_list.is_some()
1794            || value_types.float.is_some()
1795            || value_types.int.is_some()
1796            || value_types.boolean.is_some()
1797            || value_types.sparse_vector.is_some()
1798        {
1799            return false;
1800        }
1801
1802        // Check string field
1803        if let Some(string) = &value_types.string {
1804            if let Some(fts) = &string.fts_index {
1805                if !fts.enabled {
1806                    return false;
1807                }
1808                // Config is an empty struct, so no need to check it
1809            }
1810            if let Some(string_inverted) = &string.string_inverted_index {
1811                if string_inverted.enabled {
1812                    return false;
1813                }
1814                // Config is an empty struct, so no need to check it
1815            }
1816        }
1817
1818        true
1819    }
1820
1821    /// Check if a specific metadata key-value should be indexed based on schema configuration
1822    pub fn is_metadata_type_index_enabled(
1823        &self,
1824        key: &str,
1825        value_type: MetadataValueType,
1826    ) -> Result<bool, SchemaError> {
1827        let v_type = self.keys.get(key).unwrap_or(&self.defaults);
1828
1829        match value_type {
1830            MetadataValueType::Bool => match &v_type.boolean {
1831                Some(bool_type) => match &bool_type.bool_inverted_index {
1832                    Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1833                    None => Err(SchemaError::MissingIndexConfiguration {
1834                        key: key.to_string(),
1835                        value_type: "bool".to_string(),
1836                    }),
1837                },
1838                None => match &self.defaults.boolean {
1839                    Some(bool_type) => match &bool_type.bool_inverted_index {
1840                        Some(bool_inverted_index) => Ok(bool_inverted_index.enabled),
1841                        None => Err(SchemaError::MissingIndexConfiguration {
1842                            key: key.to_string(),
1843                            value_type: "bool".to_string(),
1844                        }),
1845                    },
1846                    None => Err(SchemaError::MissingIndexConfiguration {
1847                        key: key.to_string(),
1848                        value_type: "bool".to_string(),
1849                    }),
1850                },
1851            },
1852            MetadataValueType::Int => match &v_type.int {
1853                Some(int_type) => match &int_type.int_inverted_index {
1854                    Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1855                    None => Err(SchemaError::MissingIndexConfiguration {
1856                        key: key.to_string(),
1857                        value_type: "int".to_string(),
1858                    }),
1859                },
1860                None => match &self.defaults.int {
1861                    Some(int_type) => match &int_type.int_inverted_index {
1862                        Some(int_inverted_index) => Ok(int_inverted_index.enabled),
1863                        None => Err(SchemaError::MissingIndexConfiguration {
1864                            key: key.to_string(),
1865                            value_type: "int".to_string(),
1866                        }),
1867                    },
1868                    None => Err(SchemaError::MissingIndexConfiguration {
1869                        key: key.to_string(),
1870                        value_type: "int".to_string(),
1871                    }),
1872                },
1873            },
1874            MetadataValueType::Float => match &v_type.float {
1875                Some(float_type) => match &float_type.float_inverted_index {
1876                    Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1877                    None => Err(SchemaError::MissingIndexConfiguration {
1878                        key: key.to_string(),
1879                        value_type: "float".to_string(),
1880                    }),
1881                },
1882                None => match &self.defaults.float {
1883                    Some(float_type) => match &float_type.float_inverted_index {
1884                        Some(float_inverted_index) => Ok(float_inverted_index.enabled),
1885                        None => Err(SchemaError::MissingIndexConfiguration {
1886                            key: key.to_string(),
1887                            value_type: "float".to_string(),
1888                        }),
1889                    },
1890                    None => Err(SchemaError::MissingIndexConfiguration {
1891                        key: key.to_string(),
1892                        value_type: "float".to_string(),
1893                    }),
1894                },
1895            },
1896            MetadataValueType::Str => match &v_type.string {
1897                Some(string_type) => match &string_type.string_inverted_index {
1898                    Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1899                    None => Err(SchemaError::MissingIndexConfiguration {
1900                        key: key.to_string(),
1901                        value_type: "string".to_string(),
1902                    }),
1903                },
1904                None => match &self.defaults.string {
1905                    Some(string_type) => match &string_type.string_inverted_index {
1906                        Some(string_inverted_index) => Ok(string_inverted_index.enabled),
1907                        None => Err(SchemaError::MissingIndexConfiguration {
1908                            key: key.to_string(),
1909                            value_type: "string".to_string(),
1910                        }),
1911                    },
1912                    None => Err(SchemaError::MissingIndexConfiguration {
1913                        key: key.to_string(),
1914                        value_type: "string".to_string(),
1915                    }),
1916                },
1917            },
1918            MetadataValueType::SparseVector => match &v_type.sparse_vector {
1919                Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1920                    Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1921                    None => Err(SchemaError::MissingIndexConfiguration {
1922                        key: key.to_string(),
1923                        value_type: "sparse_vector".to_string(),
1924                    }),
1925                },
1926                None => match &self.defaults.sparse_vector {
1927                    Some(sparse_vector_type) => match &sparse_vector_type.sparse_vector_index {
1928                        Some(sparse_vector_index) => Ok(sparse_vector_index.enabled),
1929                        None => Err(SchemaError::MissingIndexConfiguration {
1930                            key: key.to_string(),
1931                            value_type: "sparse_vector".to_string(),
1932                        }),
1933                    },
1934                    None => Err(SchemaError::MissingIndexConfiguration {
1935                        key: key.to_string(),
1936                        value_type: "sparse_vector".to_string(),
1937                    }),
1938                },
1939            },
1940        }
1941    }
1942
1943    pub fn is_metadata_where_indexing_enabled(
1944        &self,
1945        where_clause: &Where,
1946    ) -> Result<(), FilterValidationError> {
1947        match where_clause {
1948            Where::Composite(composite) => {
1949                for child in &composite.children {
1950                    self.is_metadata_where_indexing_enabled(child)?;
1951                }
1952                Ok(())
1953            }
1954            Where::Document(_) => Ok(()),
1955            Where::Metadata(expression) => {
1956                let value_type = match &expression.comparison {
1957                    MetadataComparison::Primitive(_, value) => value.value_type(),
1958                    MetadataComparison::Set(_, set_value) => set_value.value_type(),
1959                };
1960                let is_enabled = self
1961                    .is_metadata_type_index_enabled(expression.key.as_str(), value_type)
1962                    .map_err(FilterValidationError::Schema)?;
1963                if !is_enabled {
1964                    return Err(FilterValidationError::IndexingDisabled {
1965                        key: expression.key.clone(),
1966                        value_type,
1967                    });
1968                }
1969                Ok(())
1970            }
1971        }
1972    }
1973
1974    pub fn is_knn_key_indexing_enabled(
1975        &self,
1976        key: &str,
1977        query: &QueryVector,
1978    ) -> Result<(), FilterValidationError> {
1979        match query {
1980            QueryVector::Sparse(_) => {
1981                let is_enabled = self
1982                    .is_metadata_type_index_enabled(key, MetadataValueType::SparseVector)
1983                    .map_err(FilterValidationError::Schema)?;
1984                if !is_enabled {
1985                    return Err(FilterValidationError::IndexingDisabled {
1986                        key: key.to_string(),
1987                        value_type: MetadataValueType::SparseVector,
1988                    });
1989                }
1990                Ok(())
1991            }
1992            QueryVector::Dense(_) => {
1993                // TODO: once we allow turning off dense vector indexing, we need to check if the key is enabled
1994                // Dense vectors are always indexed
1995                Ok(())
1996            }
1997        }
1998    }
1999
2000    pub fn ensure_key_from_metadata(&mut self, key: &str, value_type: MetadataValueType) -> bool {
2001        if key.starts_with(CHROMA_KEY) {
2002            return false;
2003        }
2004        let value_types = self.keys.entry(key.to_string()).or_default();
2005        match value_type {
2006            MetadataValueType::Bool => {
2007                if value_types.boolean.is_none() {
2008                    value_types.boolean = self.defaults.boolean.clone();
2009                    return true;
2010                }
2011            }
2012            MetadataValueType::Int => {
2013                if value_types.int.is_none() {
2014                    value_types.int = self.defaults.int.clone();
2015                    return true;
2016                }
2017            }
2018            MetadataValueType::Float => {
2019                if value_types.float.is_none() {
2020                    value_types.float = self.defaults.float.clone();
2021                    return true;
2022                }
2023            }
2024            MetadataValueType::Str => {
2025                if value_types.string.is_none() {
2026                    value_types.string = self.defaults.string.clone();
2027                    return true;
2028                }
2029            }
2030            MetadataValueType::SparseVector => {
2031                if value_types.sparse_vector.is_none() {
2032                    value_types.sparse_vector = self.defaults.sparse_vector.clone();
2033                    return true;
2034                }
2035            }
2036        }
2037        false
2038    }
2039
2040    // ========================================================================
2041    // BUILDER PATTERN METHODS
2042    // ========================================================================
2043
2044    /// Create an index configuration (builder pattern)
2045    ///
2046    /// This method allows fluent, chainable configuration of indexes on a schema.
2047    /// It matches the Python API's `.create_index()` method.
2048    ///
2049    /// # Arguments
2050    /// * `key` - Optional key name for per-key index. `None` applies to defaults/special keys
2051    /// * `config` - Index configuration to create
2052    ///
2053    /// # Returns
2054    /// `Self` for method chaining
2055    ///
2056    /// # Errors
2057    /// Returns error if:
2058    /// - Attempting to create index on special keys (`#document`, `#embedding`)
2059    /// - Invalid configuration (e.g., vector index on non-embedding key)
2060    /// - Conflicting with existing indexes (e.g., multiple sparse vector indexes)
2061    ///
2062    /// # Examples
2063    /// ```
2064    /// use chroma_types::{Schema, VectorIndexConfig, StringInvertedIndexConfig, Space, SchemaBuilderError};
2065    ///
2066    /// # fn main() -> Result<(), SchemaBuilderError> {
2067    /// let schema = Schema::default()
2068    ///     .create_index(None, VectorIndexConfig {
2069    ///         space: Some(Space::Cosine),
2070    ///         embedding_function: None,
2071    ///         source_key: None,
2072    ///         hnsw: None,
2073    ///         spann: None,
2074    ///     }.into())?
2075    ///     .create_index(Some("category"), StringInvertedIndexConfig {}.into())?;
2076    /// # Ok(())
2077    /// # }
2078    /// ```
2079    pub fn create_index(
2080        mut self,
2081        key: Option<&str>,
2082        config: IndexConfig,
2083    ) -> Result<Self, SchemaBuilderError> {
2084        // Handle special cases: Vector and FTS (global configs only)
2085        match (&key, &config) {
2086            (None, IndexConfig::Vector(cfg)) => {
2087                self._set_vector_index_config_builder(cfg.clone());
2088                return Ok(self);
2089            }
2090            (None, IndexConfig::Fts(cfg)) => {
2091                self._set_fts_index_config_builder(cfg.clone());
2092                return Ok(self);
2093            }
2094            (Some(k), IndexConfig::Vector(_)) => {
2095                return Err(SchemaBuilderError::VectorIndexMustBeGlobal { key: k.to_string() });
2096            }
2097            (Some(k), IndexConfig::Fts(_)) => {
2098                return Err(SchemaBuilderError::FtsIndexMustBeGlobal { key: k.to_string() });
2099            }
2100            _ => {}
2101        }
2102
2103        // Validate special keys
2104        if let Some(k) = key {
2105            if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
2106                return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2107                    key: k.to_string(),
2108                });
2109            }
2110        }
2111
2112        // Validate sparse vector requires key
2113        if key.is_none() && matches!(config, IndexConfig::SparseVector(_)) {
2114            return Err(SchemaBuilderError::SparseVectorRequiresKey);
2115        }
2116
2117        // Dispatch to appropriate helper
2118        match key {
2119            Some(k) => self._set_index_for_key_builder(k, config, true)?,
2120            None => self._set_index_in_defaults_builder(config, true)?,
2121        }
2122
2123        Ok(self)
2124    }
2125
2126    /// Delete/disable an index configuration (builder pattern)
2127    ///
2128    /// This method allows disabling indexes on a schema.
2129    /// It matches the Python API's `.delete_index()` method.
2130    ///
2131    /// # Arguments
2132    /// * `key` - Optional key name for per-key index. `None` applies to defaults
2133    /// * `config` - Index configuration to disable
2134    ///
2135    /// # Returns
2136    /// `Self` for method chaining
2137    ///
2138    /// # Errors
2139    /// Returns error if:
2140    /// - Attempting to delete index on special keys (`#document`, `#embedding`)
2141    /// - Attempting to delete vector, FTS, or sparse vector indexes (not currently supported)
2142    ///
2143    /// # Examples
2144    /// ```
2145    /// use chroma_types::{Schema, StringInvertedIndexConfig, SchemaBuilderError};
2146    ///
2147    /// # fn main() -> Result<(), SchemaBuilderError> {
2148    /// let schema = Schema::default()
2149    ///     .delete_index(Some("category"), StringInvertedIndexConfig {}.into())?;
2150    /// # Ok(())
2151    /// # }
2152    /// ```
2153    pub fn delete_index(
2154        mut self,
2155        key: Option<&str>,
2156        config: IndexConfig,
2157    ) -> Result<Self, SchemaBuilderError> {
2158        // Validate special keys
2159        if let Some(k) = key {
2160            if k == DOCUMENT_KEY || k == EMBEDDING_KEY {
2161                return Err(SchemaBuilderError::SpecialKeyModificationNotAllowed {
2162                    key: k.to_string(),
2163                });
2164            }
2165        }
2166
2167        // Disallow deleting vector, FTS, and sparse vector indexes (match Python restrictions)
2168        match &config {
2169            IndexConfig::Vector(_) => {
2170                return Err(SchemaBuilderError::VectorIndexDeletionNotSupported);
2171            }
2172            IndexConfig::Fts(_) => {
2173                return Err(SchemaBuilderError::FtsIndexDeletionNotSupported);
2174            }
2175            IndexConfig::SparseVector(_) => {
2176                return Err(SchemaBuilderError::SparseVectorIndexDeletionNotSupported);
2177            }
2178            _ => {}
2179        }
2180
2181        // Dispatch to appropriate helper (enabled=false)
2182        match key {
2183            Some(k) => self._set_index_for_key_builder(k, config, false)?,
2184            None => self._set_index_in_defaults_builder(config, false)?,
2185        }
2186
2187        Ok(self)
2188    }
2189
2190    /// Set customer-managed encryption key for the collection (builder pattern)
2191    ///
2192    /// This method allows setting CMEK on a schema for fluent, chainable configuration.
2193    ///
2194    /// # Arguments
2195    /// * `cmek` - Customer-managed encryption key configuration
2196    ///
2197    /// # Returns
2198    /// `Self` for method chaining
2199    ///
2200    /// # Examples
2201    /// ```
2202    /// use chroma_types::{Schema, Cmek};
2203    ///
2204    /// let schema = Schema::default()
2205    ///     .with_cmek(Cmek::gcp("projects/my-project/locations/us/keyRings/my-ring/cryptoKeys/my-key".to_string()));
2206    /// ```
2207    pub fn with_cmek(mut self, cmek: Cmek) -> Self {
2208        self.cmek = Some(cmek);
2209        self
2210    }
2211
2212    /// Set vector index config globally (applies to #embedding)
2213    fn _set_vector_index_config_builder(&mut self, config: VectorIndexConfig) {
2214        // Update defaults (disabled, just config update)
2215        if let Some(float_list) = &mut self.defaults.float_list {
2216            if let Some(vector_index) = &mut float_list.vector_index {
2217                vector_index.config = config.clone();
2218            }
2219        }
2220
2221        // Update #embedding key (enabled, config update, preserve source_key=#document)
2222        if let Some(embedding_types) = self.keys.get_mut(EMBEDDING_KEY) {
2223            if let Some(float_list) = &mut embedding_types.float_list {
2224                if let Some(vector_index) = &mut float_list.vector_index {
2225                    let mut updated_config = config;
2226                    // Preserve source_key as #document
2227                    updated_config.source_key = Some(DOCUMENT_KEY.to_string());
2228                    vector_index.config = updated_config;
2229                }
2230            }
2231        }
2232    }
2233
2234    /// Set FTS index config globally (applies to #document)
2235    fn _set_fts_index_config_builder(&mut self, config: FtsIndexConfig) {
2236        // Update defaults (disabled, just config update)
2237        if let Some(string) = &mut self.defaults.string {
2238            if let Some(fts_index) = &mut string.fts_index {
2239                fts_index.config = config.clone();
2240            }
2241        }
2242
2243        // Update #document key (enabled, config update)
2244        if let Some(document_types) = self.keys.get_mut(DOCUMENT_KEY) {
2245            if let Some(string) = &mut document_types.string {
2246                if let Some(fts_index) = &mut string.fts_index {
2247                    fts_index.config = config;
2248                }
2249            }
2250        }
2251    }
2252
2253    /// Set index configuration for a specific key
2254    fn _set_index_for_key_builder(
2255        &mut self,
2256        key: &str,
2257        config: IndexConfig,
2258        enabled: bool,
2259    ) -> Result<(), SchemaBuilderError> {
2260        // Check for multiple sparse vector indexes BEFORE getting mutable reference
2261        if enabled && matches!(config, IndexConfig::SparseVector(_)) {
2262            // Find existing sparse vector index
2263            let existing_key = self
2264                .keys
2265                .iter()
2266                .find(|(k, v)| {
2267                    k.as_str() != key
2268                        && v.sparse_vector
2269                            .as_ref()
2270                            .and_then(|sv| sv.sparse_vector_index.as_ref())
2271                            .map(|idx| idx.enabled)
2272                            .unwrap_or(false)
2273                })
2274                .map(|(k, _)| k.clone());
2275
2276            if let Some(existing_key) = existing_key {
2277                return Err(SchemaBuilderError::MultipleSparseVectorIndexes { existing_key });
2278            }
2279        }
2280
2281        // Get or create ValueTypes for this key
2282        let value_types = self.keys.entry(key.to_string()).or_default();
2283
2284        // Set the appropriate index based on config type
2285        match config {
2286            IndexConfig::Vector(_) => {
2287                return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2288                    key: key.to_string(),
2289                });
2290            }
2291            IndexConfig::Fts(_) => {
2292                return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
2293                    key: key.to_string(),
2294                });
2295            }
2296            IndexConfig::SparseVector(cfg) => {
2297                value_types.sparse_vector = Some(SparseVectorValueType {
2298                    sparse_vector_index: Some(SparseVectorIndexType {
2299                        enabled,
2300                        config: cfg,
2301                    }),
2302                });
2303            }
2304            IndexConfig::StringInverted(cfg) => {
2305                if value_types.string.is_none() {
2306                    value_types.string = Some(StringValueType {
2307                        fts_index: None,
2308                        string_inverted_index: None,
2309                    });
2310                }
2311                if let Some(string) = &mut value_types.string {
2312                    string.string_inverted_index = Some(StringInvertedIndexType {
2313                        enabled,
2314                        config: cfg,
2315                    });
2316                }
2317            }
2318            IndexConfig::IntInverted(cfg) => {
2319                value_types.int = Some(IntValueType {
2320                    int_inverted_index: Some(IntInvertedIndexType {
2321                        enabled,
2322                        config: cfg,
2323                    }),
2324                });
2325            }
2326            IndexConfig::FloatInverted(cfg) => {
2327                value_types.float = Some(FloatValueType {
2328                    float_inverted_index: Some(FloatInvertedIndexType {
2329                        enabled,
2330                        config: cfg,
2331                    }),
2332                });
2333            }
2334            IndexConfig::BoolInverted(cfg) => {
2335                value_types.boolean = Some(BoolValueType {
2336                    bool_inverted_index: Some(BoolInvertedIndexType {
2337                        enabled,
2338                        config: cfg,
2339                    }),
2340                });
2341            }
2342        }
2343
2344        Ok(())
2345    }
2346
2347    /// Set index configuration in defaults
2348    fn _set_index_in_defaults_builder(
2349        &mut self,
2350        config: IndexConfig,
2351        enabled: bool,
2352    ) -> Result<(), SchemaBuilderError> {
2353        match config {
2354            IndexConfig::Vector(_) => {
2355                return Err(SchemaBuilderError::VectorIndexMustBeGlobal {
2356                    key: "defaults".to_string(),
2357                });
2358            }
2359            IndexConfig::Fts(_) => {
2360                return Err(SchemaBuilderError::FtsIndexMustBeGlobal {
2361                    key: "defaults".to_string(),
2362                });
2363            }
2364            IndexConfig::SparseVector(cfg) => {
2365                self.defaults.sparse_vector = Some(SparseVectorValueType {
2366                    sparse_vector_index: Some(SparseVectorIndexType {
2367                        enabled,
2368                        config: cfg,
2369                    }),
2370                });
2371            }
2372            IndexConfig::StringInverted(cfg) => {
2373                if self.defaults.string.is_none() {
2374                    self.defaults.string = Some(StringValueType {
2375                        fts_index: None,
2376                        string_inverted_index: None,
2377                    });
2378                }
2379                if let Some(string) = &mut self.defaults.string {
2380                    string.string_inverted_index = Some(StringInvertedIndexType {
2381                        enabled,
2382                        config: cfg,
2383                    });
2384                }
2385            }
2386            IndexConfig::IntInverted(cfg) => {
2387                self.defaults.int = Some(IntValueType {
2388                    int_inverted_index: Some(IntInvertedIndexType {
2389                        enabled,
2390                        config: cfg,
2391                    }),
2392                });
2393            }
2394            IndexConfig::FloatInverted(cfg) => {
2395                self.defaults.float = Some(FloatValueType {
2396                    float_inverted_index: Some(FloatInvertedIndexType {
2397                        enabled,
2398                        config: cfg,
2399                    }),
2400                });
2401            }
2402            IndexConfig::BoolInverted(cfg) => {
2403                self.defaults.boolean = Some(BoolValueType {
2404                    bool_inverted_index: Some(BoolInvertedIndexType {
2405                        enabled,
2406                        config: cfg,
2407                    }),
2408                });
2409            }
2410        }
2411
2412        Ok(())
2413    }
2414}
2415
2416// ============================================================================
2417// INDEX CONFIGURATION STRUCTURES
2418// ============================================================================
2419
2420#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2421#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2422#[serde(deny_unknown_fields)]
2423pub struct VectorIndexConfig {
2424    /// Vector space for similarity calculation (cosine, l2, ip)
2425    #[serde(skip_serializing_if = "Option::is_none")]
2426    pub space: Option<Space>,
2427    /// Embedding function configuration
2428    #[serde(skip_serializing_if = "Option::is_none")]
2429    pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2430    /// Key to source the vector from
2431    #[serde(skip_serializing_if = "Option::is_none")]
2432    pub source_key: Option<String>,
2433    /// HNSW algorithm configuration
2434    #[serde(skip_serializing_if = "Option::is_none")]
2435    pub hnsw: Option<HnswIndexConfig>,
2436    /// SPANN algorithm configuration
2437    #[serde(skip_serializing_if = "Option::is_none")]
2438    pub spann: Option<SpannIndexConfig>,
2439}
2440
2441/// Configuration for HNSW vector index algorithm parameters
2442#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2443#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2444#[serde(deny_unknown_fields)]
2445pub struct HnswIndexConfig {
2446    #[serde(skip_serializing_if = "Option::is_none")]
2447    pub ef_construction: Option<usize>,
2448    #[serde(skip_serializing_if = "Option::is_none")]
2449    pub max_neighbors: Option<usize>,
2450    #[serde(skip_serializing_if = "Option::is_none")]
2451    pub ef_search: Option<usize>,
2452    #[serde(skip_serializing_if = "Option::is_none")]
2453    pub num_threads: Option<usize>,
2454    #[serde(skip_serializing_if = "Option::is_none")]
2455    #[validate(range(min = 2))]
2456    pub batch_size: Option<usize>,
2457    #[serde(skip_serializing_if = "Option::is_none")]
2458    #[validate(range(min = 2))]
2459    pub sync_threshold: Option<usize>,
2460    #[serde(skip_serializing_if = "Option::is_none")]
2461    pub resize_factor: Option<f64>,
2462}
2463
2464impl HnswIndexConfig {
2465    /// Check if this config has default values
2466    /// None values are considered default (not set by user)
2467    /// Note: We skip num_threads as it's variable based on available_parallelism
2468    pub fn is_default(&self) -> bool {
2469        if let Some(ef_construction) = self.ef_construction {
2470            if ef_construction != default_construction_ef() {
2471                return false;
2472            }
2473        }
2474        if let Some(max_neighbors) = self.max_neighbors {
2475            if max_neighbors != default_m() {
2476                return false;
2477            }
2478        }
2479        if let Some(ef_search) = self.ef_search {
2480            if ef_search != default_search_ef() {
2481                return false;
2482            }
2483        }
2484        if let Some(batch_size) = self.batch_size {
2485            if batch_size != default_batch_size() {
2486                return false;
2487            }
2488        }
2489        if let Some(sync_threshold) = self.sync_threshold {
2490            if sync_threshold != default_sync_threshold() {
2491                return false;
2492            }
2493        }
2494        if let Some(resize_factor) = self.resize_factor {
2495            if resize_factor != default_resize_factor() {
2496                return false;
2497            }
2498        }
2499        // Skip num_threads check as it's system-dependent
2500        true
2501    }
2502}
2503
2504/// Configuration for SPANN vector index algorithm parameters
2505#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, Validate, Default)]
2506#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2507#[serde(deny_unknown_fields)]
2508pub struct SpannIndexConfig {
2509    #[serde(skip_serializing_if = "Option::is_none")]
2510    #[validate(range(max = 128))]
2511    pub search_nprobe: Option<u32>,
2512    #[serde(skip_serializing_if = "Option::is_none")]
2513    #[validate(range(min = 1.0, max = 1.0))]
2514    pub search_rng_factor: Option<f32>,
2515    #[serde(skip_serializing_if = "Option::is_none")]
2516    #[validate(range(min = 5.0, max = 10.0))]
2517    pub search_rng_epsilon: Option<f32>,
2518    #[serde(skip_serializing_if = "Option::is_none")]
2519    #[validate(range(max = 8))]
2520    pub nreplica_count: Option<u32>,
2521    #[serde(skip_serializing_if = "Option::is_none")]
2522    #[validate(range(min = 1.0, max = 1.0))]
2523    pub write_rng_factor: Option<f32>,
2524    #[serde(skip_serializing_if = "Option::is_none")]
2525    #[validate(range(min = 5.0, max = 10.0))]
2526    pub write_rng_epsilon: Option<f32>,
2527    #[serde(skip_serializing_if = "Option::is_none")]
2528    #[validate(range(min = 50, max = 200))]
2529    pub split_threshold: Option<u32>,
2530    #[serde(skip_serializing_if = "Option::is_none")]
2531    #[validate(range(max = 1000))]
2532    pub num_samples_kmeans: Option<usize>,
2533    #[serde(skip_serializing_if = "Option::is_none")]
2534    #[validate(range(min = 100.0, max = 100.0))]
2535    pub initial_lambda: Option<f32>,
2536    #[serde(skip_serializing_if = "Option::is_none")]
2537    #[validate(range(max = 64))]
2538    pub reassign_neighbor_count: Option<u32>,
2539    #[serde(skip_serializing_if = "Option::is_none")]
2540    #[validate(range(min = 25, max = 100))]
2541    pub merge_threshold: Option<u32>,
2542    #[serde(skip_serializing_if = "Option::is_none")]
2543    #[validate(range(max = 8))]
2544    pub num_centers_to_merge_to: Option<u32>,
2545    #[serde(skip_serializing_if = "Option::is_none")]
2546    #[validate(range(max = 64))]
2547    pub write_nprobe: Option<u32>,
2548    #[serde(skip_serializing_if = "Option::is_none")]
2549    #[validate(range(max = 200))]
2550    pub ef_construction: Option<usize>,
2551    #[serde(skip_serializing_if = "Option::is_none")]
2552    #[validate(range(max = 200))]
2553    pub ef_search: Option<usize>,
2554    #[serde(skip_serializing_if = "Option::is_none")]
2555    #[validate(range(max = 64))]
2556    pub max_neighbors: Option<usize>,
2557}
2558
2559impl SpannIndexConfig {
2560    /// Check if this config has default values
2561    /// None values are considered default (not set by user)
2562    pub fn is_default(&self) -> bool {
2563        if let Some(search_nprobe) = self.search_nprobe {
2564            if search_nprobe != default_search_nprobe() {
2565                return false;
2566            }
2567        }
2568        if let Some(search_rng_factor) = self.search_rng_factor {
2569            if search_rng_factor != default_search_rng_factor() {
2570                return false;
2571            }
2572        }
2573        if let Some(search_rng_epsilon) = self.search_rng_epsilon {
2574            if search_rng_epsilon != default_search_rng_epsilon() {
2575                return false;
2576            }
2577        }
2578        if let Some(nreplica_count) = self.nreplica_count {
2579            if nreplica_count != default_nreplica_count() {
2580                return false;
2581            }
2582        }
2583        if let Some(write_rng_factor) = self.write_rng_factor {
2584            if write_rng_factor != default_write_rng_factor() {
2585                return false;
2586            }
2587        }
2588        if let Some(write_rng_epsilon) = self.write_rng_epsilon {
2589            if write_rng_epsilon != default_write_rng_epsilon() {
2590                return false;
2591            }
2592        }
2593        if let Some(split_threshold) = self.split_threshold {
2594            if split_threshold != default_split_threshold() {
2595                return false;
2596            }
2597        }
2598        if let Some(num_samples_kmeans) = self.num_samples_kmeans {
2599            if num_samples_kmeans != default_num_samples_kmeans() {
2600                return false;
2601            }
2602        }
2603        if let Some(initial_lambda) = self.initial_lambda {
2604            if initial_lambda != default_initial_lambda() {
2605                return false;
2606            }
2607        }
2608        if let Some(reassign_neighbor_count) = self.reassign_neighbor_count {
2609            if reassign_neighbor_count != default_reassign_neighbor_count() {
2610                return false;
2611            }
2612        }
2613        if let Some(merge_threshold) = self.merge_threshold {
2614            if merge_threshold != default_merge_threshold() {
2615                return false;
2616            }
2617        }
2618        if let Some(num_centers_to_merge_to) = self.num_centers_to_merge_to {
2619            if num_centers_to_merge_to != default_num_centers_to_merge_to() {
2620                return false;
2621            }
2622        }
2623        if let Some(write_nprobe) = self.write_nprobe {
2624            if write_nprobe != default_write_nprobe() {
2625                return false;
2626            }
2627        }
2628        if let Some(ef_construction) = self.ef_construction {
2629            if ef_construction != default_construction_ef_spann() {
2630                return false;
2631            }
2632        }
2633        if let Some(ef_search) = self.ef_search {
2634            if ef_search != default_search_ef_spann() {
2635                return false;
2636            }
2637        }
2638        if let Some(max_neighbors) = self.max_neighbors {
2639            if max_neighbors != default_m_spann() {
2640                return false;
2641            }
2642        }
2643        true
2644    }
2645}
2646
2647#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2648#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2649#[serde(deny_unknown_fields)]
2650pub struct SparseVectorIndexConfig {
2651    /// Embedding function configuration
2652    #[serde(skip_serializing_if = "Option::is_none")]
2653    pub embedding_function: Option<EmbeddingFunctionConfiguration>,
2654    /// Key to source the sparse vector from
2655    #[serde(skip_serializing_if = "Option::is_none")]
2656    pub source_key: Option<String>,
2657    /// Whether this embedding is BM25
2658    #[serde(skip_serializing_if = "Option::is_none")]
2659    pub bm25: Option<bool>,
2660}
2661
2662#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2663#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2664#[serde(deny_unknown_fields)]
2665pub struct FtsIndexConfig {
2666    // FTS index typically has no additional parameters
2667}
2668
2669#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2670#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2671#[serde(deny_unknown_fields)]
2672pub struct StringInvertedIndexConfig {
2673    // String inverted index typically has no additional parameters
2674}
2675
2676#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2677#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2678#[serde(deny_unknown_fields)]
2679pub struct IntInvertedIndexConfig {
2680    // Integer inverted index typically has no additional parameters
2681}
2682
2683#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2684#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2685#[serde(deny_unknown_fields)]
2686pub struct FloatInvertedIndexConfig {
2687    // Float inverted index typically has no additional parameters
2688}
2689
2690#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2691#[cfg_attr(feature = "utoipa", derive(utoipa::ToSchema))]
2692#[serde(deny_unknown_fields)]
2693pub struct BoolInvertedIndexConfig {
2694    // Boolean inverted index typically has no additional parameters
2695}
2696
2697// ============================================================================
2698// BUILDER PATTERN SUPPORT
2699// ============================================================================
2700
2701/// Union type for all index configurations (used by builder pattern)
2702#[derive(Clone, Debug)]
2703pub enum IndexConfig {
2704    Vector(VectorIndexConfig),
2705    SparseVector(SparseVectorIndexConfig),
2706    Fts(FtsIndexConfig),
2707    StringInverted(StringInvertedIndexConfig),
2708    IntInverted(IntInvertedIndexConfig),
2709    FloatInverted(FloatInvertedIndexConfig),
2710    BoolInverted(BoolInvertedIndexConfig),
2711}
2712
2713// Convenience From implementations for ergonomic usage
2714impl From<VectorIndexConfig> for IndexConfig {
2715    fn from(config: VectorIndexConfig) -> Self {
2716        IndexConfig::Vector(config)
2717    }
2718}
2719
2720impl From<SparseVectorIndexConfig> for IndexConfig {
2721    fn from(config: SparseVectorIndexConfig) -> Self {
2722        IndexConfig::SparseVector(config)
2723    }
2724}
2725
2726impl From<FtsIndexConfig> for IndexConfig {
2727    fn from(config: FtsIndexConfig) -> Self {
2728        IndexConfig::Fts(config)
2729    }
2730}
2731
2732impl From<StringInvertedIndexConfig> for IndexConfig {
2733    fn from(config: StringInvertedIndexConfig) -> Self {
2734        IndexConfig::StringInverted(config)
2735    }
2736}
2737
2738impl From<IntInvertedIndexConfig> for IndexConfig {
2739    fn from(config: IntInvertedIndexConfig) -> Self {
2740        IndexConfig::IntInverted(config)
2741    }
2742}
2743
2744impl From<FloatInvertedIndexConfig> for IndexConfig {
2745    fn from(config: FloatInvertedIndexConfig) -> Self {
2746        IndexConfig::FloatInverted(config)
2747    }
2748}
2749
2750impl From<BoolInvertedIndexConfig> for IndexConfig {
2751    fn from(config: BoolInvertedIndexConfig) -> Self {
2752        IndexConfig::BoolInverted(config)
2753    }
2754}
2755
2756impl TryFrom<&InternalCollectionConfiguration> for Schema {
2757    type Error = SchemaError;
2758
2759    fn try_from(config: &InternalCollectionConfiguration) -> Result<Self, Self::Error> {
2760        // Start with a default schema structure
2761        let mut schema = match &config.vector_index {
2762            VectorIndexConfiguration::Hnsw(_) => Schema::new_default(KnnIndex::Hnsw),
2763            VectorIndexConfiguration::Spann(_) => Schema::new_default(KnnIndex::Spann),
2764        };
2765        // Convert vector index configuration
2766        let vector_config = match &config.vector_index {
2767            VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
2768                space: Some(hnsw_config.space.clone()),
2769                embedding_function: config.embedding_function.clone(),
2770                source_key: None,
2771                hnsw: Some(HnswIndexConfig {
2772                    ef_construction: Some(hnsw_config.ef_construction),
2773                    max_neighbors: Some(hnsw_config.max_neighbors),
2774                    ef_search: Some(hnsw_config.ef_search),
2775                    num_threads: Some(hnsw_config.num_threads),
2776                    batch_size: Some(hnsw_config.batch_size),
2777                    sync_threshold: Some(hnsw_config.sync_threshold),
2778                    resize_factor: Some(hnsw_config.resize_factor),
2779                }),
2780                spann: None,
2781            },
2782            VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
2783                space: Some(spann_config.space.clone()),
2784                embedding_function: config.embedding_function.clone(),
2785                source_key: None,
2786                hnsw: None,
2787                spann: Some(SpannIndexConfig {
2788                    search_nprobe: Some(spann_config.search_nprobe),
2789                    search_rng_factor: Some(spann_config.search_rng_factor),
2790                    search_rng_epsilon: Some(spann_config.search_rng_epsilon),
2791                    nreplica_count: Some(spann_config.nreplica_count),
2792                    write_rng_factor: Some(spann_config.write_rng_factor),
2793                    write_rng_epsilon: Some(spann_config.write_rng_epsilon),
2794                    split_threshold: Some(spann_config.split_threshold),
2795                    num_samples_kmeans: Some(spann_config.num_samples_kmeans),
2796                    initial_lambda: Some(spann_config.initial_lambda),
2797                    reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
2798                    merge_threshold: Some(spann_config.merge_threshold),
2799                    num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
2800                    write_nprobe: Some(spann_config.write_nprobe),
2801                    ef_construction: Some(spann_config.ef_construction),
2802                    ef_search: Some(spann_config.ef_search),
2803                    max_neighbors: Some(spann_config.max_neighbors),
2804                }),
2805            },
2806        };
2807
2808        // Update defaults (keep enabled=false, just update the config)
2809        // This serves as the template for any new float_list fields
2810        if let Some(float_list) = &mut schema.defaults.float_list {
2811            if let Some(vector_index) = &mut float_list.vector_index {
2812                vector_index.config = vector_config.clone();
2813            }
2814        }
2815
2816        // Update the vector_index in the existing #embedding key override
2817        // Keep enabled=true (already set by new_default) and update the config
2818        // Set source_key to DOCUMENT_KEY for the embedding key
2819        if let Some(embedding_types) = schema.keys.get_mut(EMBEDDING_KEY) {
2820            if let Some(float_list) = &mut embedding_types.float_list {
2821                if let Some(vector_index) = &mut float_list.vector_index {
2822                    let mut vector_config = vector_config;
2823                    vector_config.source_key = Some(DOCUMENT_KEY.to_string());
2824                    vector_index.config = vector_config;
2825                }
2826            }
2827        }
2828
2829        Ok(schema)
2830    }
2831}
2832
2833#[cfg(test)]
2834mod tests {
2835    use super::*;
2836    use crate::hnsw_configuration::Space;
2837    use crate::metadata::SparseVector;
2838    use crate::{
2839        EmbeddingFunctionNewConfiguration, InternalHnswConfiguration, InternalSpannConfiguration,
2840    };
2841    use serde_json::json;
2842
2843    #[test]
2844    fn test_reconcile_with_defaults_none_user_schema() {
2845        // Test that when no user schema is provided, we get the default schema
2846        let result = Schema::reconcile_with_defaults(None, KnnIndex::Spann).unwrap();
2847        let expected = Schema::new_default(KnnIndex::Spann);
2848        assert_eq!(result, expected);
2849    }
2850
2851    #[test]
2852    fn test_reconcile_with_defaults_empty_user_schema() {
2853        // Test merging with an empty user schema
2854        let user_schema = Schema {
2855            defaults: ValueTypes::default(),
2856            keys: HashMap::new(),
2857            cmek: None,
2858            source_attached_function_id: None,
2859        };
2860
2861        let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
2862        let expected = Schema::new_default(KnnIndex::Spann);
2863        assert_eq!(result, expected);
2864    }
2865
2866    #[test]
2867    fn test_reconcile_with_defaults_user_overrides_string_enabled() {
2868        // Test that user can override string inverted index enabled state
2869        let mut user_schema = Schema {
2870            defaults: ValueTypes::default(),
2871            keys: HashMap::new(),
2872            cmek: None,
2873            source_attached_function_id: None,
2874        };
2875
2876        user_schema.defaults.string = Some(StringValueType {
2877            string_inverted_index: Some(StringInvertedIndexType {
2878                enabled: false, // Override default (true) to false
2879                config: StringInvertedIndexConfig {},
2880            }),
2881            fts_index: None,
2882        });
2883
2884        let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
2885
2886        // Check that the user override took precedence
2887        assert!(
2888            !result
2889                .defaults
2890                .string
2891                .as_ref()
2892                .unwrap()
2893                .string_inverted_index
2894                .as_ref()
2895                .unwrap()
2896                .enabled
2897        );
2898        // Check that other defaults are still present
2899        assert!(result.defaults.float.is_some());
2900        assert!(result.defaults.int.is_some());
2901    }
2902
2903    #[test]
2904    fn test_reconcile_with_defaults_user_overrides_vector_config() {
2905        // Test field-level merging for vector configurations
2906        let mut user_schema = Schema {
2907            defaults: ValueTypes::default(),
2908            keys: HashMap::new(),
2909            cmek: None,
2910            source_attached_function_id: None,
2911        };
2912
2913        user_schema.defaults.float_list = Some(FloatListValueType {
2914            vector_index: Some(VectorIndexType {
2915                enabled: true, // Enable vector index (default is false)
2916                config: VectorIndexConfig {
2917                    space: Some(Space::L2),                     // Override default space
2918                    embedding_function: None,                   // Will use default
2919                    source_key: Some("custom_key".to_string()), // Override default
2920                    hnsw: Some(HnswIndexConfig {
2921                        ef_construction: Some(500), // Override default
2922                        max_neighbors: None,        // Will use default
2923                        ef_search: None,            // Will use default
2924                        num_threads: None,
2925                        batch_size: None,
2926                        sync_threshold: None,
2927                        resize_factor: None,
2928                    }),
2929                    spann: None,
2930                },
2931            }),
2932        });
2933
2934        // Use HNSW defaults for this test so we have HNSW config to merge with
2935        let result = {
2936            let default_schema = Schema::new_default(KnnIndex::Hnsw);
2937            let merged_defaults = Schema::merge_value_types(
2938                &default_schema.defaults,
2939                &user_schema.defaults,
2940                KnnIndex::Hnsw,
2941            )
2942            .unwrap();
2943            let mut merged_keys = default_schema.keys.clone();
2944            for (key, user_value_types) in user_schema.keys {
2945                if let Some(default_value_types) = merged_keys.get(&key) {
2946                    let merged_value_types = Schema::merge_value_types(
2947                        default_value_types,
2948                        &user_value_types,
2949                        KnnIndex::Hnsw,
2950                    )
2951                    .unwrap();
2952                    merged_keys.insert(key, merged_value_types);
2953                } else {
2954                    merged_keys.insert(key, user_value_types);
2955                }
2956            }
2957            Schema {
2958                defaults: merged_defaults,
2959                keys: merged_keys,
2960                cmek: None,
2961                source_attached_function_id: None,
2962            }
2963        };
2964
2965        let vector_config = &result
2966            .defaults
2967            .float_list
2968            .as_ref()
2969            .unwrap()
2970            .vector_index
2971            .as_ref()
2972            .unwrap()
2973            .config;
2974
2975        // Check user overrides took precedence
2976        assert_eq!(vector_config.space, Some(Space::L2));
2977        assert_eq!(vector_config.source_key, Some("custom_key".to_string()));
2978        assert_eq!(
2979            vector_config.hnsw.as_ref().unwrap().ef_construction,
2980            Some(500)
2981        );
2982
2983        // Check defaults were preserved for unspecified fields
2984        assert_eq!(vector_config.embedding_function, None);
2985        // Since user provided HNSW config, the default max_neighbors should be merged in
2986        assert_eq!(
2987            vector_config.hnsw.as_ref().unwrap().max_neighbors,
2988            Some(default_m())
2989        );
2990    }
2991
2992    #[test]
2993    fn test_reconcile_with_defaults_keys() {
2994        // Test that key overrides are properly merged
2995        let mut user_schema = Schema {
2996            defaults: ValueTypes::default(),
2997            keys: HashMap::new(),
2998            cmek: None,
2999            source_attached_function_id: None,
3000        };
3001
3002        // Add a custom key override
3003        let custom_key_types = ValueTypes {
3004            string: Some(StringValueType {
3005                fts_index: Some(FtsIndexType {
3006                    enabled: true,
3007                    config: FtsIndexConfig {},
3008                }),
3009                string_inverted_index: Some(StringInvertedIndexType {
3010                    enabled: false,
3011                    config: StringInvertedIndexConfig {},
3012                }),
3013            }),
3014            ..Default::default()
3015        };
3016        user_schema
3017            .keys
3018            .insert("custom_key".to_string(), custom_key_types);
3019
3020        let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3021
3022        // Check that default key overrides are preserved
3023        assert!(result.keys.contains_key(EMBEDDING_KEY));
3024        assert!(result.keys.contains_key(DOCUMENT_KEY));
3025
3026        // Check that user key override was added
3027        assert!(result.keys.contains_key("custom_key"));
3028        let custom_override = result.keys.get("custom_key").unwrap();
3029        assert!(
3030            custom_override
3031                .string
3032                .as_ref()
3033                .unwrap()
3034                .fts_index
3035                .as_ref()
3036                .unwrap()
3037                .enabled
3038        );
3039    }
3040
3041    #[test]
3042    fn test_reconcile_with_defaults_override_existing_key() {
3043        // Test overriding an existing key override (like #embedding)
3044        let mut user_schema = Schema {
3045            defaults: ValueTypes::default(),
3046            keys: HashMap::new(),
3047            cmek: None,
3048            source_attached_function_id: None,
3049        };
3050
3051        // Override the #embedding key with custom settings
3052        let embedding_override = ValueTypes {
3053            float_list: Some(FloatListValueType {
3054                vector_index: Some(VectorIndexType {
3055                    enabled: false, // Override default enabled=true to false
3056                    config: VectorIndexConfig {
3057                        space: Some(Space::Ip), // Override default space
3058                        embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3059                        source_key: Some("custom_embedding_key".to_string()),
3060                        hnsw: None,
3061                        spann: None,
3062                    },
3063                }),
3064            }),
3065            ..Default::default()
3066        };
3067        user_schema
3068            .keys
3069            .insert(EMBEDDING_KEY.to_string(), embedding_override);
3070
3071        let result = Schema::reconcile_with_defaults(Some(&user_schema), KnnIndex::Spann).unwrap();
3072
3073        let embedding_config = result.keys.get(EMBEDDING_KEY).unwrap();
3074        let vector_config = &embedding_config
3075            .float_list
3076            .as_ref()
3077            .unwrap()
3078            .vector_index
3079            .as_ref()
3080            .unwrap();
3081
3082        // Check user overrides took precedence
3083        assert!(!vector_config.enabled);
3084        assert_eq!(vector_config.config.space, Some(Space::Ip));
3085        assert_eq!(
3086            vector_config.config.source_key,
3087            Some("custom_embedding_key".to_string())
3088        );
3089    }
3090
3091    #[test]
3092    fn test_convert_schema_to_collection_config_hnsw_roundtrip() {
3093        let collection_config = InternalCollectionConfiguration {
3094            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
3095                space: Space::Cosine,
3096                ef_construction: 128,
3097                ef_search: 96,
3098                max_neighbors: 42,
3099                num_threads: 8,
3100                resize_factor: 1.5,
3101                sync_threshold: 2_000,
3102                batch_size: 256,
3103            }),
3104            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
3105                EmbeddingFunctionNewConfiguration {
3106                    name: "custom".to_string(),
3107                    config: json!({"alpha": 1}),
3108                },
3109            )),
3110        };
3111
3112        let schema = Schema::try_from(&collection_config).unwrap();
3113        let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
3114
3115        assert_eq!(reconstructed, collection_config);
3116    }
3117
3118    #[test]
3119    fn test_convert_schema_to_collection_config_spann_roundtrip() {
3120        let spann_config = InternalSpannConfiguration {
3121            space: Space::Cosine,
3122            search_nprobe: 11,
3123            search_rng_factor: 1.7,
3124            write_nprobe: 5,
3125            nreplica_count: 3,
3126            split_threshold: 150,
3127            merge_threshold: 80,
3128            ef_construction: 120,
3129            ef_search: 90,
3130            max_neighbors: 40,
3131            ..Default::default()
3132        };
3133
3134        let collection_config = InternalCollectionConfiguration {
3135            vector_index: VectorIndexConfiguration::Spann(spann_config.clone()),
3136            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
3137                EmbeddingFunctionNewConfiguration {
3138                    name: "custom".to_string(),
3139                    config: json!({"beta": true}),
3140                },
3141            )),
3142        };
3143
3144        let schema = Schema::try_from(&collection_config).unwrap();
3145        let reconstructed = InternalCollectionConfiguration::try_from(&schema).unwrap();
3146
3147        assert_eq!(reconstructed, collection_config);
3148    }
3149
3150    #[test]
3151    fn test_convert_schema_to_collection_config_rejects_mixed_index() {
3152        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3153        if let Some(embedding) = schema.keys.get_mut(EMBEDDING_KEY) {
3154            if let Some(float_list) = &mut embedding.float_list {
3155                if let Some(vector_index) = &mut float_list.vector_index {
3156                    vector_index.config.spann = Some(SpannIndexConfig {
3157                        search_nprobe: Some(1),
3158                        search_rng_factor: Some(1.0),
3159                        search_rng_epsilon: Some(0.1),
3160                        nreplica_count: Some(1),
3161                        write_rng_factor: Some(1.0),
3162                        write_rng_epsilon: Some(0.1),
3163                        split_threshold: Some(100),
3164                        num_samples_kmeans: Some(10),
3165                        initial_lambda: Some(0.5),
3166                        reassign_neighbor_count: Some(10),
3167                        merge_threshold: Some(50),
3168                        num_centers_to_merge_to: Some(3),
3169                        write_nprobe: Some(1),
3170                        ef_construction: Some(50),
3171                        ef_search: Some(40),
3172                        max_neighbors: Some(20),
3173                    });
3174                }
3175            }
3176        }
3177
3178        let result = InternalCollectionConfiguration::try_from(&schema);
3179        assert!(result.is_err());
3180    }
3181
3182    #[test]
3183    fn test_ensure_key_from_metadata_no_changes_for_existing_key() {
3184        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3185        let before = schema.clone();
3186        let modified = schema.ensure_key_from_metadata(DOCUMENT_KEY, MetadataValueType::Str);
3187        assert!(!modified);
3188        assert_eq!(schema, before);
3189    }
3190
3191    #[test]
3192    fn test_ensure_key_from_metadata_populates_new_key_with_default_value_type() {
3193        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3194        assert!(!schema.keys.contains_key("custom_field"));
3195
3196        let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
3197
3198        assert!(modified);
3199        let entry = schema
3200            .keys
3201            .get("custom_field")
3202            .expect("expected new key override to be inserted");
3203        assert_eq!(entry.boolean, schema.defaults.boolean);
3204        assert!(entry.string.is_none());
3205        assert!(entry.int.is_none());
3206        assert!(entry.float.is_none());
3207        assert!(entry.float_list.is_none());
3208        assert!(entry.sparse_vector.is_none());
3209    }
3210
3211    #[test]
3212    fn test_ensure_key_from_metadata_adds_missing_value_type_to_existing_key() {
3213        let mut schema = Schema::new_default(KnnIndex::Hnsw);
3214        let initial_len = schema.keys.len();
3215        schema.keys.insert(
3216            "custom_field".to_string(),
3217            ValueTypes {
3218                string: schema.defaults.string.clone(),
3219                ..Default::default()
3220            },
3221        );
3222
3223        let modified = schema.ensure_key_from_metadata("custom_field", MetadataValueType::Bool);
3224
3225        assert!(modified);
3226        assert_eq!(schema.keys.len(), initial_len + 1);
3227        let entry = schema
3228            .keys
3229            .get("custom_field")
3230            .expect("expected key override to exist after ensure call");
3231        assert!(entry.string.is_some());
3232        assert_eq!(entry.boolean, schema.defaults.boolean);
3233    }
3234
3235    #[test]
3236    fn test_is_knn_key_indexing_enabled_sparse_disabled_errors() {
3237        let schema = Schema::new_default(KnnIndex::Spann);
3238        let result = schema.is_knn_key_indexing_enabled(
3239            "custom_sparse",
3240            &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32]).unwrap()),
3241        );
3242
3243        let err = result.expect_err("expected indexing disabled error");
3244        match err {
3245            FilterValidationError::IndexingDisabled { key, value_type } => {
3246                assert_eq!(key, "custom_sparse");
3247                assert_eq!(value_type, crate::metadata::MetadataValueType::SparseVector);
3248            }
3249            other => panic!("unexpected error variant: {other:?}"),
3250        }
3251    }
3252
3253    #[test]
3254    fn test_is_knn_key_indexing_enabled_sparse_enabled_succeeds() {
3255        let mut schema = Schema::new_default(KnnIndex::Spann);
3256        schema.keys.insert(
3257            "sparse_enabled".to_string(),
3258            ValueTypes {
3259                sparse_vector: Some(SparseVectorValueType {
3260                    sparse_vector_index: Some(SparseVectorIndexType {
3261                        enabled: true,
3262                        config: SparseVectorIndexConfig {
3263                            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3264                            source_key: None,
3265                            bm25: None,
3266                        },
3267                    }),
3268                }),
3269                ..Default::default()
3270            },
3271        );
3272
3273        let result = schema.is_knn_key_indexing_enabled(
3274            "sparse_enabled",
3275            &QueryVector::Sparse(SparseVector::new(vec![0_u32], vec![1.0_f32]).unwrap()),
3276        );
3277
3278        assert!(result.is_ok());
3279    }
3280
3281    #[test]
3282    fn test_is_knn_key_indexing_enabled_dense_succeeds() {
3283        let schema = Schema::new_default(KnnIndex::Spann);
3284        let result = schema.is_knn_key_indexing_enabled(
3285            EMBEDDING_KEY,
3286            &QueryVector::Dense(vec![0.1_f32, 0.2_f32]),
3287        );
3288
3289        assert!(result.is_ok());
3290    }
3291
3292    #[test]
3293    fn test_merge_hnsw_configs_field_level() {
3294        // Test field-level merging for HNSW configurations
3295        let default_hnsw = HnswIndexConfig {
3296            ef_construction: Some(200),
3297            max_neighbors: Some(16),
3298            ef_search: Some(10),
3299            num_threads: Some(4),
3300            batch_size: Some(100),
3301            sync_threshold: Some(1000),
3302            resize_factor: Some(1.2),
3303        };
3304
3305        let user_hnsw = HnswIndexConfig {
3306            ef_construction: Some(300), // Override
3307            max_neighbors: None,        // Will use default
3308            ef_search: Some(20),        // Override
3309            num_threads: None,          // Will use default
3310            batch_size: None,           // Will use default
3311            sync_threshold: Some(2000), // Override
3312            resize_factor: None,        // Will use default
3313        };
3314
3315        let result = Schema::merge_hnsw_configs(Some(&default_hnsw), Some(&user_hnsw)).unwrap();
3316
3317        // Check user overrides
3318        assert_eq!(result.ef_construction, Some(300));
3319        assert_eq!(result.ef_search, Some(20));
3320        assert_eq!(result.sync_threshold, Some(2000));
3321
3322        // Check defaults preserved
3323        assert_eq!(result.max_neighbors, Some(16));
3324        assert_eq!(result.num_threads, Some(4));
3325        assert_eq!(result.batch_size, Some(100));
3326        assert_eq!(result.resize_factor, Some(1.2));
3327    }
3328
3329    #[test]
3330    fn test_merge_spann_configs_field_level() {
3331        // Test field-level merging for SPANN configurations
3332        let default_spann = SpannIndexConfig {
3333            search_nprobe: Some(10),
3334            search_rng_factor: Some(1.0),  // Must be exactly 1.0
3335            search_rng_epsilon: Some(7.0), // Must be 5.0-10.0
3336            nreplica_count: Some(3),
3337            write_rng_factor: Some(1.0),  // Must be exactly 1.0
3338            write_rng_epsilon: Some(6.0), // Must be 5.0-10.0
3339            split_threshold: Some(100),   // Must be 50-200
3340            num_samples_kmeans: Some(100),
3341            initial_lambda: Some(100.0), // Must be exactly 100.0
3342            reassign_neighbor_count: Some(50),
3343            merge_threshold: Some(50),        // Must be 25-100
3344            num_centers_to_merge_to: Some(4), // Max is 8
3345            write_nprobe: Some(5),
3346            ef_construction: Some(100),
3347            ef_search: Some(10),
3348            max_neighbors: Some(16),
3349        };
3350
3351        let user_spann = SpannIndexConfig {
3352            search_nprobe: Some(20),       // Override
3353            search_rng_factor: None,       // Will use default
3354            search_rng_epsilon: Some(8.0), // Override (valid: 5.0-10.0)
3355            nreplica_count: None,          // Will use default
3356            write_rng_factor: None,
3357            write_rng_epsilon: None,
3358            split_threshold: Some(150), // Override (valid: 50-200)
3359            num_samples_kmeans: None,
3360            initial_lambda: None,
3361            reassign_neighbor_count: None,
3362            merge_threshold: None,
3363            num_centers_to_merge_to: None,
3364            write_nprobe: None,
3365            ef_construction: None,
3366            ef_search: None,
3367            max_neighbors: None,
3368        };
3369
3370        let result = Schema::merge_spann_configs(Some(&default_spann), Some(&user_spann)).unwrap();
3371
3372        // Check user overrides
3373        assert_eq!(result.search_nprobe, Some(20));
3374        assert_eq!(result.search_rng_epsilon, Some(8.0));
3375        assert_eq!(result.split_threshold, Some(150));
3376
3377        // Check defaults preserved
3378        assert_eq!(result.search_rng_factor, Some(1.0));
3379        assert_eq!(result.nreplica_count, Some(3));
3380        assert_eq!(result.initial_lambda, Some(100.0));
3381    }
3382
3383    #[test]
3384    fn test_spann_index_config_into_internal_configuration() {
3385        let config = SpannIndexConfig {
3386            search_nprobe: Some(33),
3387            search_rng_factor: Some(1.2),
3388            search_rng_epsilon: None,
3389            nreplica_count: None,
3390            write_rng_factor: Some(1.5),
3391            write_rng_epsilon: None,
3392            split_threshold: Some(75),
3393            num_samples_kmeans: None,
3394            initial_lambda: Some(0.9),
3395            reassign_neighbor_count: Some(40),
3396            merge_threshold: None,
3397            num_centers_to_merge_to: Some(4),
3398            write_nprobe: Some(60),
3399            ef_construction: Some(180),
3400            ef_search: Some(170),
3401            max_neighbors: Some(32),
3402        };
3403
3404        let with_space: InternalSpannConfiguration = (Some(&Space::Cosine), &config).into();
3405        assert_eq!(with_space.space, Space::Cosine);
3406        assert_eq!(with_space.search_nprobe, 33);
3407        assert_eq!(with_space.search_rng_factor, 1.2);
3408        assert_eq!(with_space.search_rng_epsilon, default_search_rng_epsilon());
3409        assert_eq!(with_space.write_rng_factor, 1.5);
3410        assert_eq!(with_space.write_nprobe, 60);
3411        assert_eq!(with_space.ef_construction, 180);
3412        assert_eq!(with_space.ef_search, 170);
3413        assert_eq!(with_space.max_neighbors, 32);
3414        assert_eq!(with_space.merge_threshold, default_merge_threshold());
3415
3416        let default_space_config: InternalSpannConfiguration = (None, &config).into();
3417        assert_eq!(default_space_config.space, default_space());
3418    }
3419
3420    #[test]
3421    fn test_merge_string_type_combinations() {
3422        // Test all combinations of default and user StringValueType
3423
3424        // Both Some - should merge
3425        let default = StringValueType {
3426            string_inverted_index: Some(StringInvertedIndexType {
3427                enabled: true,
3428                config: StringInvertedIndexConfig {},
3429            }),
3430            fts_index: Some(FtsIndexType {
3431                enabled: false,
3432                config: FtsIndexConfig {},
3433            }),
3434        };
3435
3436        let user = StringValueType {
3437            string_inverted_index: Some(StringInvertedIndexType {
3438                enabled: false, // Override
3439                config: StringInvertedIndexConfig {},
3440            }),
3441            fts_index: None, // Will use default
3442        };
3443
3444        let result = Schema::merge_string_type(Some(&default), Some(&user))
3445            .unwrap()
3446            .unwrap();
3447        assert!(!result.string_inverted_index.as_ref().unwrap().enabled); // User override
3448        assert!(!result.fts_index.as_ref().unwrap().enabled); // Default preserved
3449
3450        // Default Some, User None - should return default
3451        let result = Schema::merge_string_type(Some(&default), None)
3452            .unwrap()
3453            .unwrap();
3454        assert!(result.string_inverted_index.as_ref().unwrap().enabled);
3455
3456        // Default None, User Some - should return user
3457        let result = Schema::merge_string_type(None, Some(&user))
3458            .unwrap()
3459            .unwrap();
3460        assert!(!result.string_inverted_index.as_ref().unwrap().enabled);
3461
3462        // Both None - should return None
3463        let result = Schema::merge_string_type(None, None).unwrap();
3464        assert!(result.is_none());
3465    }
3466
3467    #[test]
3468    fn test_merge_vector_index_config_comprehensive() {
3469        // Test comprehensive vector index config merging
3470        let default_config = VectorIndexConfig {
3471            space: Some(Space::Cosine),
3472            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3473            source_key: Some("default_key".to_string()),
3474            hnsw: Some(HnswIndexConfig {
3475                ef_construction: Some(200),
3476                max_neighbors: Some(16),
3477                ef_search: Some(10),
3478                num_threads: Some(4),
3479                batch_size: Some(100),
3480                sync_threshold: Some(1000),
3481                resize_factor: Some(1.2),
3482            }),
3483            spann: None,
3484        };
3485
3486        let user_config = VectorIndexConfig {
3487            space: Some(Space::L2),                   // Override
3488            embedding_function: None,                 // Will use default
3489            source_key: Some("user_key".to_string()), // Override
3490            hnsw: Some(HnswIndexConfig {
3491                ef_construction: Some(300), // Override
3492                max_neighbors: None,        // Will use default
3493                ef_search: None,            // Will use default
3494                num_threads: None,
3495                batch_size: None,
3496                sync_threshold: None,
3497                resize_factor: None,
3498            }),
3499            spann: Some(SpannIndexConfig {
3500                search_nprobe: Some(15),
3501                search_rng_factor: None,
3502                search_rng_epsilon: None,
3503                nreplica_count: None,
3504                write_rng_factor: None,
3505                write_rng_epsilon: None,
3506                split_threshold: None,
3507                num_samples_kmeans: None,
3508                initial_lambda: None,
3509                reassign_neighbor_count: None,
3510                merge_threshold: None,
3511                num_centers_to_merge_to: None,
3512                write_nprobe: None,
3513                ef_construction: None,
3514                ef_search: None,
3515                max_neighbors: None,
3516            }), // Add SPANN config
3517        };
3518
3519        let result =
3520            Schema::merge_vector_index_config(&default_config, &user_config, KnnIndex::Hnsw);
3521
3522        // Check field-level merging
3523        assert_eq!(result.space, Some(Space::L2)); // User override
3524        assert_eq!(
3525            result.embedding_function,
3526            Some(EmbeddingFunctionConfiguration::Legacy)
3527        ); // Default preserved
3528        assert_eq!(result.source_key, Some("user_key".to_string())); // User override
3529
3530        // Check HNSW merging
3531        assert_eq!(result.hnsw.as_ref().unwrap().ef_construction, Some(300)); // User override
3532        assert_eq!(result.hnsw.as_ref().unwrap().max_neighbors, Some(16)); // Default preserved
3533
3534        // Check SPANN is not present, since merging in the context of HNSW
3535        assert!(result.spann.is_none());
3536    }
3537
3538    #[test]
3539    fn test_merge_sparse_vector_index_config() {
3540        // Test sparse vector index config merging
3541        let default_config = SparseVectorIndexConfig {
3542            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
3543            source_key: Some("default_sparse_key".to_string()),
3544            bm25: None,
3545        };
3546
3547        let user_config = SparseVectorIndexConfig {
3548            embedding_function: None,                        // Will use default
3549            source_key: Some("user_sparse_key".to_string()), // Override
3550            bm25: None,
3551        };
3552
3553        let result = Schema::merge_sparse_vector_index_config(&default_config, &user_config);
3554
3555        // Check user override
3556        assert_eq!(result.source_key, Some("user_sparse_key".to_string()));
3557        // Check default preserved
3558        assert_eq!(
3559            result.embedding_function,
3560            Some(EmbeddingFunctionConfiguration::Legacy)
3561        );
3562    }
3563
3564    #[test]
3565    fn test_complex_nested_merging_scenario() {
3566        // Test a complex scenario with multiple levels of merging
3567        let mut user_schema = Schema {
3568            defaults: ValueTypes::default(),
3569            keys: HashMap::new(),
3570            cmek: None,
3571            source_attached_function_id: None,
3572        };
3573
3574        // Set up complex user defaults
3575        user_schema.defaults.string = Some(StringValueType {
3576            string_inverted_index: Some(StringInvertedIndexType {
3577                enabled: false,
3578                config: StringInvertedIndexConfig {},
3579            }),
3580            fts_index: Some(FtsIndexType {
3581                enabled: true,
3582                config: FtsIndexConfig {},
3583            }),
3584        });
3585
3586        user_schema.defaults.float_list = Some(FloatListValueType {
3587            vector_index: Some(VectorIndexType {
3588                enabled: true,
3589                config: VectorIndexConfig {
3590                    space: Some(Space::Ip),
3591                    embedding_function: None, // Will use default
3592                    source_key: Some("custom_vector_key".to_string()),
3593                    hnsw: Some(HnswIndexConfig {
3594                        ef_construction: Some(400),
3595                        max_neighbors: Some(32),
3596                        ef_search: None, // Will use default
3597                        num_threads: None,
3598                        batch_size: None,
3599                        sync_threshold: None,
3600                        resize_factor: None,
3601                    }),
3602                    spann: None,
3603                },
3604            }),
3605        });
3606
3607        // Set up key overrides
3608        let custom_key_override = ValueTypes {
3609            string: Some(StringValueType {
3610                fts_index: Some(FtsIndexType {
3611                    enabled: true,
3612                    config: FtsIndexConfig {},
3613                }),
3614                string_inverted_index: None,
3615            }),
3616            ..Default::default()
3617        };
3618        user_schema
3619            .keys
3620            .insert("custom_field".to_string(), custom_key_override);
3621
3622        // Use HNSW defaults for this test so we have HNSW config to merge with
3623        let result = {
3624            let default_schema = Schema::new_default(KnnIndex::Hnsw);
3625            let merged_defaults = Schema::merge_value_types(
3626                &default_schema.defaults,
3627                &user_schema.defaults,
3628                KnnIndex::Hnsw,
3629            )
3630            .unwrap();
3631            let mut merged_keys = default_schema.keys.clone();
3632            for (key, user_value_types) in user_schema.keys {
3633                if let Some(default_value_types) = merged_keys.get(&key) {
3634                    let merged_value_types = Schema::merge_value_types(
3635                        default_value_types,
3636                        &user_value_types,
3637                        KnnIndex::Hnsw,
3638                    )
3639                    .unwrap();
3640                    merged_keys.insert(key, merged_value_types);
3641                } else {
3642                    merged_keys.insert(key, user_value_types);
3643                }
3644            }
3645            Schema {
3646                defaults: merged_defaults,
3647                keys: merged_keys,
3648                cmek: None,
3649                source_attached_function_id: None,
3650            }
3651        };
3652
3653        // Verify complex merging worked correctly
3654
3655        // Check defaults merging
3656        assert!(
3657            !result
3658                .defaults
3659                .string
3660                .as_ref()
3661                .unwrap()
3662                .string_inverted_index
3663                .as_ref()
3664                .unwrap()
3665                .enabled
3666        );
3667        assert!(
3668            result
3669                .defaults
3670                .string
3671                .as_ref()
3672                .unwrap()
3673                .fts_index
3674                .as_ref()
3675                .unwrap()
3676                .enabled
3677        );
3678
3679        let vector_config = &result
3680            .defaults
3681            .float_list
3682            .as_ref()
3683            .unwrap()
3684            .vector_index
3685            .as_ref()
3686            .unwrap()
3687            .config;
3688        assert_eq!(vector_config.space, Some(Space::Ip));
3689        assert_eq!(vector_config.embedding_function, None); // Default preserved
3690        assert_eq!(
3691            vector_config.source_key,
3692            Some("custom_vector_key".to_string())
3693        );
3694        assert_eq!(
3695            vector_config.hnsw.as_ref().unwrap().ef_construction,
3696            Some(400)
3697        );
3698        assert_eq!(vector_config.hnsw.as_ref().unwrap().max_neighbors, Some(32));
3699        assert_eq!(
3700            vector_config.hnsw.as_ref().unwrap().ef_search,
3701            Some(default_search_ef())
3702        ); // Default preserved
3703
3704        // Check key overrides
3705        assert!(result.keys.contains_key(EMBEDDING_KEY)); // Default preserved
3706        assert!(result.keys.contains_key(DOCUMENT_KEY)); // Default preserved
3707        assert!(result.keys.contains_key("custom_field")); // User added
3708
3709        let custom_override = result.keys.get("custom_field").unwrap();
3710        assert!(
3711            custom_override
3712                .string
3713                .as_ref()
3714                .unwrap()
3715                .fts_index
3716                .as_ref()
3717                .unwrap()
3718                .enabled
3719        );
3720        assert!(custom_override
3721            .string
3722            .as_ref()
3723            .unwrap()
3724            .string_inverted_index
3725            .is_none());
3726    }
3727
3728    #[test]
3729    fn test_reconcile_with_collection_config_default_config() {
3730        // Test that when collection config is default, schema is returned as-is
3731        let collection_config = InternalCollectionConfiguration::default_hnsw();
3732        let schema = Schema::try_from(&collection_config).unwrap();
3733
3734        let result =
3735            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3736                .unwrap();
3737        assert_eq!(result, schema);
3738    }
3739
3740    // Test all 8 cases of double default scenarios
3741    #[test]
3742    fn test_reconcile_double_default_hnsw_config_hnsw_schema_default_knn_hnsw() {
3743        let collection_config = InternalCollectionConfiguration::default_hnsw();
3744        let schema = Schema::new_default(KnnIndex::Hnsw);
3745        let result =
3746            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3747                .unwrap();
3748
3749        // Should create new schema with default_knn_index (Hnsw)
3750        assert!(result.defaults.float_list.is_some());
3751        assert!(result
3752            .defaults
3753            .float_list
3754            .as_ref()
3755            .unwrap()
3756            .vector_index
3757            .as_ref()
3758            .unwrap()
3759            .config
3760            .hnsw
3761            .is_some());
3762        assert!(result
3763            .defaults
3764            .float_list
3765            .as_ref()
3766            .unwrap()
3767            .vector_index
3768            .as_ref()
3769            .unwrap()
3770            .config
3771            .spann
3772            .is_none());
3773    }
3774
3775    #[test]
3776    fn test_reconcile_double_default_hnsw_config_hnsw_schema_default_knn_spann() {
3777        let collection_config = InternalCollectionConfiguration::default_hnsw();
3778        let schema = Schema::new_default(KnnIndex::Hnsw);
3779        let result =
3780            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
3781                .unwrap();
3782
3783        // Should create new schema with default_knn_index (Spann)
3784        assert!(result.defaults.float_list.is_some());
3785        assert!(result
3786            .defaults
3787            .float_list
3788            .as_ref()
3789            .unwrap()
3790            .vector_index
3791            .as_ref()
3792            .unwrap()
3793            .config
3794            .spann
3795            .is_some());
3796        assert!(result
3797            .defaults
3798            .float_list
3799            .as_ref()
3800            .unwrap()
3801            .vector_index
3802            .as_ref()
3803            .unwrap()
3804            .config
3805            .hnsw
3806            .is_none());
3807    }
3808
3809    #[test]
3810    fn test_reconcile_double_default_hnsw_config_spann_schema_default_knn_hnsw() {
3811        let collection_config = InternalCollectionConfiguration::default_hnsw();
3812        let schema = Schema::new_default(KnnIndex::Spann);
3813        let result =
3814            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3815                .unwrap();
3816
3817        // Should create new schema with default_knn_index (Hnsw)
3818        assert!(result.defaults.float_list.is_some());
3819        assert!(result
3820            .defaults
3821            .float_list
3822            .as_ref()
3823            .unwrap()
3824            .vector_index
3825            .as_ref()
3826            .unwrap()
3827            .config
3828            .hnsw
3829            .is_some());
3830        assert!(result
3831            .defaults
3832            .float_list
3833            .as_ref()
3834            .unwrap()
3835            .vector_index
3836            .as_ref()
3837            .unwrap()
3838            .config
3839            .spann
3840            .is_none());
3841    }
3842
3843    #[test]
3844    fn test_reconcile_double_default_hnsw_config_spann_schema_default_knn_spann() {
3845        let collection_config = InternalCollectionConfiguration::default_hnsw();
3846        let schema = Schema::new_default(KnnIndex::Spann);
3847        let result =
3848            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
3849                .unwrap();
3850
3851        // Should create new schema with default_knn_index (Spann)
3852        assert!(result.defaults.float_list.is_some());
3853        assert!(result
3854            .defaults
3855            .float_list
3856            .as_ref()
3857            .unwrap()
3858            .vector_index
3859            .as_ref()
3860            .unwrap()
3861            .config
3862            .spann
3863            .is_some());
3864        assert!(result
3865            .defaults
3866            .float_list
3867            .as_ref()
3868            .unwrap()
3869            .vector_index
3870            .as_ref()
3871            .unwrap()
3872            .config
3873            .hnsw
3874            .is_none());
3875    }
3876
3877    #[test]
3878    fn test_reconcile_double_default_spann_config_spann_schema_default_knn_hnsw() {
3879        let collection_config = InternalCollectionConfiguration::default_spann();
3880        let schema = Schema::new_default(KnnIndex::Spann);
3881        let result =
3882            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3883                .unwrap();
3884
3885        // Should create new schema with default_knn_index (Hnsw)
3886        assert!(result.defaults.float_list.is_some());
3887        assert!(result
3888            .defaults
3889            .float_list
3890            .as_ref()
3891            .unwrap()
3892            .vector_index
3893            .as_ref()
3894            .unwrap()
3895            .config
3896            .hnsw
3897            .is_some());
3898        assert!(result
3899            .defaults
3900            .float_list
3901            .as_ref()
3902            .unwrap()
3903            .vector_index
3904            .as_ref()
3905            .unwrap()
3906            .config
3907            .spann
3908            .is_none());
3909    }
3910
3911    #[test]
3912    fn test_reconcile_double_default_spann_config_spann_schema_default_knn_spann() {
3913        let collection_config = InternalCollectionConfiguration::default_spann();
3914        let schema = Schema::new_default(KnnIndex::Spann);
3915        let result =
3916            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
3917                .unwrap();
3918
3919        // Should create new schema with default_knn_index (Spann)
3920        assert!(result.defaults.float_list.is_some());
3921        assert!(result
3922            .defaults
3923            .float_list
3924            .as_ref()
3925            .unwrap()
3926            .vector_index
3927            .as_ref()
3928            .unwrap()
3929            .config
3930            .spann
3931            .is_some());
3932        assert!(result
3933            .defaults
3934            .float_list
3935            .as_ref()
3936            .unwrap()
3937            .vector_index
3938            .as_ref()
3939            .unwrap()
3940            .config
3941            .hnsw
3942            .is_none());
3943        // Defaults should have source_key=None
3944        assert_eq!(
3945            result
3946                .defaults
3947                .float_list
3948                .as_ref()
3949                .unwrap()
3950                .vector_index
3951                .as_ref()
3952                .unwrap()
3953                .config
3954                .source_key,
3955            None
3956        );
3957    }
3958
3959    #[test]
3960    fn test_reconcile_double_default_spann_config_hnsw_schema_default_knn_hnsw() {
3961        let collection_config = InternalCollectionConfiguration::default_spann();
3962        let schema = Schema::new_default(KnnIndex::Hnsw);
3963        let result =
3964            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
3965                .unwrap();
3966
3967        // Should create new schema with default_knn_index (Hnsw)
3968        assert!(result.defaults.float_list.is_some());
3969        assert!(result
3970            .defaults
3971            .float_list
3972            .as_ref()
3973            .unwrap()
3974            .vector_index
3975            .as_ref()
3976            .unwrap()
3977            .config
3978            .hnsw
3979            .is_some());
3980        assert!(result
3981            .defaults
3982            .float_list
3983            .as_ref()
3984            .unwrap()
3985            .vector_index
3986            .as_ref()
3987            .unwrap()
3988            .config
3989            .spann
3990            .is_none());
3991    }
3992
3993    #[test]
3994    fn test_reconcile_double_default_spann_config_hnsw_schema_default_knn_spann() {
3995        let collection_config = InternalCollectionConfiguration::default_spann();
3996        let schema = Schema::new_default(KnnIndex::Hnsw);
3997        let result =
3998            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
3999                .unwrap();
4000
4001        // Should create new schema with default_knn_index (Spann)
4002        assert!(result.defaults.float_list.is_some());
4003        assert!(result
4004            .defaults
4005            .float_list
4006            .as_ref()
4007            .unwrap()
4008            .vector_index
4009            .as_ref()
4010            .unwrap()
4011            .config
4012            .spann
4013            .is_some());
4014        assert!(result
4015            .defaults
4016            .float_list
4017            .as_ref()
4018            .unwrap()
4019            .vector_index
4020            .as_ref()
4021            .unwrap()
4022            .config
4023            .hnsw
4024            .is_none());
4025    }
4026
4027    #[test]
4028    fn test_defaults_source_key_not_document() {
4029        // Test that defaults.float_list.vector_index.config.source_key is None, not DOCUMENT_KEY
4030        let schema_hnsw = Schema::new_default(KnnIndex::Hnsw);
4031        let schema_spann = Schema::new_default(KnnIndex::Spann);
4032
4033        // Check HNSW default schema
4034        let defaults_hnsw = schema_hnsw
4035            .defaults
4036            .float_list
4037            .as_ref()
4038            .unwrap()
4039            .vector_index
4040            .as_ref()
4041            .unwrap();
4042        assert_eq!(defaults_hnsw.config.source_key, None);
4043
4044        // Check Spann default schema
4045        let defaults_spann = schema_spann
4046            .defaults
4047            .float_list
4048            .as_ref()
4049            .unwrap()
4050            .vector_index
4051            .as_ref()
4052            .unwrap();
4053        assert_eq!(defaults_spann.config.source_key, None);
4054
4055        // Test after reconcile with NON-default collection config
4056        // This path calls try_from where our fix is
4057        let collection_config_hnsw = InternalCollectionConfiguration {
4058            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4059                ef_construction: 300,
4060                max_neighbors: 32,
4061                ef_search: 50,
4062                num_threads: 8,
4063                batch_size: 200,
4064                sync_threshold: 2000,
4065                resize_factor: 1.5,
4066                space: Space::L2,
4067            }),
4068            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4069        };
4070        let result_hnsw = Schema::reconcile_with_collection_config(
4071            &schema_hnsw,
4072            &collection_config_hnsw,
4073            KnnIndex::Hnsw,
4074        )
4075        .unwrap();
4076        let reconciled_defaults_hnsw = result_hnsw
4077            .defaults
4078            .float_list
4079            .as_ref()
4080            .unwrap()
4081            .vector_index
4082            .as_ref()
4083            .unwrap();
4084        assert_eq!(reconciled_defaults_hnsw.config.source_key, None);
4085
4086        let collection_config_spann = InternalCollectionConfiguration {
4087            vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4088                search_nprobe: 20,
4089                search_rng_factor: 3.0,
4090                search_rng_epsilon: 0.2,
4091                nreplica_count: 5,
4092                write_rng_factor: 2.0,
4093                write_rng_epsilon: 0.1,
4094                split_threshold: 2000,
4095                num_samples_kmeans: 200,
4096                initial_lambda: 0.8,
4097                reassign_neighbor_count: 100,
4098                merge_threshold: 800,
4099                num_centers_to_merge_to: 20,
4100                write_nprobe: 10,
4101                ef_construction: 400,
4102                ef_search: 60,
4103                max_neighbors: 24,
4104                space: Space::Cosine,
4105            }),
4106            embedding_function: None,
4107        };
4108        let result_spann = Schema::reconcile_with_collection_config(
4109            &schema_spann,
4110            &collection_config_spann,
4111            KnnIndex::Spann,
4112        )
4113        .unwrap();
4114        let reconciled_defaults_spann = result_spann
4115            .defaults
4116            .float_list
4117            .as_ref()
4118            .unwrap()
4119            .vector_index
4120            .as_ref()
4121            .unwrap();
4122        assert_eq!(reconciled_defaults_spann.config.source_key, None);
4123
4124        // Verify that #embedding key DOES have source_key set to DOCUMENT_KEY
4125        let embedding_hnsw = result_hnsw.keys.get(EMBEDDING_KEY).unwrap();
4126        let embedding_vector_index_hnsw = embedding_hnsw
4127            .float_list
4128            .as_ref()
4129            .unwrap()
4130            .vector_index
4131            .as_ref()
4132            .unwrap();
4133        assert_eq!(
4134            embedding_vector_index_hnsw.config.source_key,
4135            Some(DOCUMENT_KEY.to_string())
4136        );
4137
4138        let embedding_spann = result_spann.keys.get(EMBEDDING_KEY).unwrap();
4139        let embedding_vector_index_spann = embedding_spann
4140            .float_list
4141            .as_ref()
4142            .unwrap()
4143            .vector_index
4144            .as_ref()
4145            .unwrap();
4146        assert_eq!(
4147            embedding_vector_index_spann.config.source_key,
4148            Some(DOCUMENT_KEY.to_string())
4149        );
4150    }
4151
4152    #[test]
4153    fn test_try_from_source_key() {
4154        // Direct test of try_from to verify source_key behavior
4155        // Defaults should have source_key=None, #embedding should have source_key=DOCUMENT_KEY
4156
4157        // Test with HNSW config
4158        let collection_config_hnsw = InternalCollectionConfiguration {
4159            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4160                ef_construction: 300,
4161                max_neighbors: 32,
4162                ef_search: 50,
4163                num_threads: 8,
4164                batch_size: 200,
4165                sync_threshold: 2000,
4166                resize_factor: 1.5,
4167                space: Space::L2,
4168            }),
4169            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4170        };
4171        let schema_hnsw = Schema::try_from(&collection_config_hnsw).unwrap();
4172
4173        // Check defaults have source_key=None
4174        let defaults_hnsw = schema_hnsw
4175            .defaults
4176            .float_list
4177            .as_ref()
4178            .unwrap()
4179            .vector_index
4180            .as_ref()
4181            .unwrap();
4182        assert_eq!(defaults_hnsw.config.source_key, None);
4183
4184        // Check #embedding has source_key=DOCUMENT_KEY
4185        let embedding_hnsw = schema_hnsw.keys.get(EMBEDDING_KEY).unwrap();
4186        let embedding_vector_index_hnsw = embedding_hnsw
4187            .float_list
4188            .as_ref()
4189            .unwrap()
4190            .vector_index
4191            .as_ref()
4192            .unwrap();
4193        assert_eq!(
4194            embedding_vector_index_hnsw.config.source_key,
4195            Some(DOCUMENT_KEY.to_string())
4196        );
4197
4198        // Test with Spann config
4199        let collection_config_spann = InternalCollectionConfiguration {
4200            vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4201                search_nprobe: 20,
4202                search_rng_factor: 3.0,
4203                search_rng_epsilon: 0.2,
4204                nreplica_count: 5,
4205                write_rng_factor: 2.0,
4206                write_rng_epsilon: 0.1,
4207                split_threshold: 2000,
4208                num_samples_kmeans: 200,
4209                initial_lambda: 0.8,
4210                reassign_neighbor_count: 100,
4211                merge_threshold: 800,
4212                num_centers_to_merge_to: 20,
4213                write_nprobe: 10,
4214                ef_construction: 400,
4215                ef_search: 60,
4216                max_neighbors: 24,
4217                space: Space::Cosine,
4218            }),
4219            embedding_function: None,
4220        };
4221        let schema_spann = Schema::try_from(&collection_config_spann).unwrap();
4222
4223        // Check defaults have source_key=None
4224        let defaults_spann = schema_spann
4225            .defaults
4226            .float_list
4227            .as_ref()
4228            .unwrap()
4229            .vector_index
4230            .as_ref()
4231            .unwrap();
4232        assert_eq!(defaults_spann.config.source_key, None);
4233
4234        // Check #embedding has source_key=DOCUMENT_KEY
4235        let embedding_spann = schema_spann.keys.get(EMBEDDING_KEY).unwrap();
4236        let embedding_vector_index_spann = embedding_spann
4237            .float_list
4238            .as_ref()
4239            .unwrap()
4240            .vector_index
4241            .as_ref()
4242            .unwrap();
4243        assert_eq!(
4244            embedding_vector_index_spann.config.source_key,
4245            Some(DOCUMENT_KEY.to_string())
4246        );
4247    }
4248
4249    #[test]
4250    fn test_default_hnsw_with_default_embedding_function() {
4251        // Test that when InternalCollectionConfiguration is default HNSW but has
4252        // an embedding function with name "default" and config as {}, it still
4253        // goes through the double default path and preserves source_key behavior
4254        use crate::collection_configuration::EmbeddingFunctionNewConfiguration;
4255
4256        let collection_config = InternalCollectionConfiguration {
4257            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration::default()),
4258            embedding_function: Some(EmbeddingFunctionConfiguration::Known(
4259                EmbeddingFunctionNewConfiguration {
4260                    name: "default".to_string(),
4261                    config: serde_json::json!({}),
4262                },
4263            )),
4264        };
4265
4266        // Verify it's still considered default
4267        assert!(collection_config.is_default());
4268
4269        let schema = Schema::new_default(KnnIndex::Hnsw);
4270        let result =
4271            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4272                .unwrap();
4273
4274        // Check that defaults have source_key=None
4275        let defaults = result
4276            .defaults
4277            .float_list
4278            .as_ref()
4279            .unwrap()
4280            .vector_index
4281            .as_ref()
4282            .unwrap();
4283        assert_eq!(defaults.config.source_key, None);
4284
4285        // Check that #embedding has source_key=DOCUMENT_KEY
4286        let embedding = result.keys.get(EMBEDDING_KEY).unwrap();
4287        let embedding_vector_index = embedding
4288            .float_list
4289            .as_ref()
4290            .unwrap()
4291            .vector_index
4292            .as_ref()
4293            .unwrap();
4294        assert_eq!(
4295            embedding_vector_index.config.source_key,
4296            Some(DOCUMENT_KEY.to_string())
4297        );
4298
4299        // verify vector index config is set to spann
4300        let vector_index_config = defaults.config.clone();
4301        assert!(vector_index_config.spann.is_some());
4302        assert!(vector_index_config.hnsw.is_none());
4303
4304        // Verify embedding function was set correctly
4305        assert_eq!(
4306            embedding_vector_index.config.embedding_function,
4307            Some(EmbeddingFunctionConfiguration::Known(
4308                EmbeddingFunctionNewConfiguration {
4309                    name: "default".to_string(),
4310                    config: serde_json::json!({}),
4311                },
4312            ))
4313        );
4314        assert_eq!(
4315            defaults.config.embedding_function,
4316            Some(EmbeddingFunctionConfiguration::Known(
4317                EmbeddingFunctionNewConfiguration {
4318                    name: "default".to_string(),
4319                    config: serde_json::json!({}),
4320                },
4321            ))
4322        );
4323    }
4324
4325    #[test]
4326    fn test_reconcile_with_collection_config_both_non_default() {
4327        // Test that when both schema and collection config are non-default, it returns an error
4328        let mut schema = Schema::new_default(KnnIndex::Hnsw);
4329        schema.defaults.string = Some(StringValueType {
4330            fts_index: Some(FtsIndexType {
4331                enabled: true,
4332                config: FtsIndexConfig {},
4333            }),
4334            string_inverted_index: None,
4335        });
4336
4337        let mut collection_config = InternalCollectionConfiguration::default_hnsw();
4338        // Make collection config non-default by changing a parameter
4339        if let VectorIndexConfiguration::Hnsw(ref mut hnsw_config) = collection_config.vector_index
4340        {
4341            hnsw_config.ef_construction = 500; // Non-default value
4342        }
4343
4344        // Use reconcile_schema_and_config which has the early validation
4345        let result = Schema::reconcile_schema_and_config(
4346            Some(&schema),
4347            Some(&collection_config),
4348            KnnIndex::Spann,
4349        );
4350        assert!(result.is_err());
4351        assert!(matches!(
4352            result.unwrap_err(),
4353            SchemaError::ConfigAndSchemaConflict
4354        ));
4355    }
4356
4357    #[test]
4358    fn test_reconcile_with_collection_config_hnsw_override() {
4359        // Test that non-default HNSW collection config overrides default schema
4360        let schema = Schema::new_default(KnnIndex::Hnsw); // Use actual default schema
4361
4362        let collection_config = InternalCollectionConfiguration {
4363            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4364                ef_construction: 300,
4365                max_neighbors: 32,
4366                ef_search: 50,
4367                num_threads: 8,
4368                batch_size: 200,
4369                sync_threshold: 2000,
4370                resize_factor: 1.5,
4371                space: Space::L2,
4372            }),
4373            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4374        };
4375
4376        let result =
4377            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4378                .unwrap();
4379
4380        // Check that #embedding key override was created with the collection config settings
4381        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4382        let vector_index = embedding_override
4383            .float_list
4384            .as_ref()
4385            .unwrap()
4386            .vector_index
4387            .as_ref()
4388            .unwrap();
4389
4390        assert!(vector_index.enabled);
4391        assert_eq!(vector_index.config.space, Some(Space::L2));
4392        assert_eq!(
4393            vector_index.config.embedding_function,
4394            Some(EmbeddingFunctionConfiguration::Legacy)
4395        );
4396        assert_eq!(
4397            vector_index.config.source_key,
4398            Some(DOCUMENT_KEY.to_string())
4399        );
4400
4401        let hnsw_config = vector_index.config.hnsw.as_ref().unwrap();
4402        assert_eq!(hnsw_config.ef_construction, Some(300));
4403        assert_eq!(hnsw_config.max_neighbors, Some(32));
4404        assert_eq!(hnsw_config.ef_search, Some(50));
4405        assert_eq!(hnsw_config.num_threads, Some(8));
4406        assert_eq!(hnsw_config.batch_size, Some(200));
4407        assert_eq!(hnsw_config.sync_threshold, Some(2000));
4408        assert_eq!(hnsw_config.resize_factor, Some(1.5));
4409
4410        assert!(vector_index.config.spann.is_none());
4411    }
4412
4413    #[test]
4414    fn test_reconcile_with_collection_config_spann_override() {
4415        // Test that non-default SPANN collection config overrides default schema
4416        let schema = Schema::new_default(KnnIndex::Spann); // Use actual default schema
4417
4418        let collection_config = InternalCollectionConfiguration {
4419            vector_index: VectorIndexConfiguration::Spann(InternalSpannConfiguration {
4420                search_nprobe: 20,
4421                search_rng_factor: 3.0,
4422                search_rng_epsilon: 0.2,
4423                nreplica_count: 5,
4424                write_rng_factor: 2.0,
4425                write_rng_epsilon: 0.1,
4426                split_threshold: 2000,
4427                num_samples_kmeans: 200,
4428                initial_lambda: 0.8,
4429                reassign_neighbor_count: 100,
4430                merge_threshold: 800,
4431                num_centers_to_merge_to: 20,
4432                write_nprobe: 10,
4433                ef_construction: 400,
4434                ef_search: 60,
4435                max_neighbors: 24,
4436                space: Space::Cosine,
4437            }),
4438            embedding_function: None,
4439        };
4440
4441        let result =
4442            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Spann)
4443                .unwrap();
4444
4445        // Check that #embedding key override was created with the collection config settings
4446        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4447        let vector_index = embedding_override
4448            .float_list
4449            .as_ref()
4450            .unwrap()
4451            .vector_index
4452            .as_ref()
4453            .unwrap();
4454
4455        assert!(vector_index.enabled);
4456        assert_eq!(vector_index.config.space, Some(Space::Cosine));
4457        assert_eq!(vector_index.config.embedding_function, None);
4458        assert_eq!(
4459            vector_index.config.source_key,
4460            Some(DOCUMENT_KEY.to_string())
4461        );
4462
4463        assert!(vector_index.config.hnsw.is_none());
4464
4465        let spann_config = vector_index.config.spann.as_ref().unwrap();
4466        assert_eq!(spann_config.search_nprobe, Some(20));
4467        assert_eq!(spann_config.search_rng_factor, Some(3.0));
4468        assert_eq!(spann_config.search_rng_epsilon, Some(0.2));
4469        assert_eq!(spann_config.nreplica_count, Some(5));
4470        assert_eq!(spann_config.write_rng_factor, Some(2.0));
4471        assert_eq!(spann_config.write_rng_epsilon, Some(0.1));
4472        assert_eq!(spann_config.split_threshold, Some(2000));
4473        assert_eq!(spann_config.num_samples_kmeans, Some(200));
4474        assert_eq!(spann_config.initial_lambda, Some(0.8));
4475        assert_eq!(spann_config.reassign_neighbor_count, Some(100));
4476        assert_eq!(spann_config.merge_threshold, Some(800));
4477        assert_eq!(spann_config.num_centers_to_merge_to, Some(20));
4478        assert_eq!(spann_config.write_nprobe, Some(10));
4479        assert_eq!(spann_config.ef_construction, Some(400));
4480        assert_eq!(spann_config.ef_search, Some(60));
4481        assert_eq!(spann_config.max_neighbors, Some(24));
4482    }
4483
4484    #[test]
4485    fn test_reconcile_with_collection_config_updates_both_defaults_and_embedding() {
4486        // Test that collection config updates BOTH defaults.float_list.vector_index
4487        // AND keys["embedding"].float_list.vector_index
4488        let schema = Schema::new_default(KnnIndex::Hnsw);
4489
4490        let collection_config = InternalCollectionConfiguration {
4491            vector_index: VectorIndexConfiguration::Hnsw(InternalHnswConfiguration {
4492                ef_construction: 300,
4493                max_neighbors: 32,
4494                ef_search: 50,
4495                num_threads: 8,
4496                batch_size: 200,
4497                sync_threshold: 2000,
4498                resize_factor: 1.5,
4499                space: Space::L2,
4500            }),
4501            embedding_function: Some(EmbeddingFunctionConfiguration::Legacy),
4502        };
4503
4504        let result =
4505            Schema::reconcile_with_collection_config(&schema, &collection_config, KnnIndex::Hnsw)
4506                .unwrap();
4507
4508        // Check that defaults.float_list.vector_index was updated
4509        let defaults_vector_index = result
4510            .defaults
4511            .float_list
4512            .as_ref()
4513            .unwrap()
4514            .vector_index
4515            .as_ref()
4516            .unwrap();
4517
4518        // Should be disabled in defaults (template for new keys)
4519        assert!(!defaults_vector_index.enabled);
4520        // But config should be updated
4521        assert_eq!(defaults_vector_index.config.space, Some(Space::L2));
4522        assert_eq!(
4523            defaults_vector_index.config.embedding_function,
4524            Some(EmbeddingFunctionConfiguration::Legacy)
4525        );
4526        assert_eq!(defaults_vector_index.config.source_key, None);
4527        let defaults_hnsw = defaults_vector_index.config.hnsw.as_ref().unwrap();
4528        assert_eq!(defaults_hnsw.ef_construction, Some(300));
4529        assert_eq!(defaults_hnsw.max_neighbors, Some(32));
4530
4531        // Check that #embedding key override was also updated
4532        let embedding_override = result.keys.get(EMBEDDING_KEY).unwrap();
4533        let embedding_vector_index = embedding_override
4534            .float_list
4535            .as_ref()
4536            .unwrap()
4537            .vector_index
4538            .as_ref()
4539            .unwrap();
4540
4541        // Should be enabled on #embedding
4542        assert!(embedding_vector_index.enabled);
4543        // Config should match defaults
4544        assert_eq!(embedding_vector_index.config.space, Some(Space::L2));
4545        assert_eq!(
4546            embedding_vector_index.config.embedding_function,
4547            Some(EmbeddingFunctionConfiguration::Legacy)
4548        );
4549        assert_eq!(
4550            embedding_vector_index.config.source_key,
4551            Some(DOCUMENT_KEY.to_string())
4552        );
4553        let embedding_hnsw = embedding_vector_index.config.hnsw.as_ref().unwrap();
4554        assert_eq!(embedding_hnsw.ef_construction, Some(300));
4555        assert_eq!(embedding_hnsw.max_neighbors, Some(32));
4556    }
4557
4558    #[test]
4559    fn test_is_schema_default() {
4560        // Test that actual default schemas are correctly identified
4561        let default_hnsw_schema = Schema::new_default(KnnIndex::Hnsw);
4562        assert!(default_hnsw_schema.is_default());
4563
4564        let default_spann_schema = Schema::new_default(KnnIndex::Spann);
4565        assert!(default_spann_schema.is_default());
4566
4567        // Test that a modified default schema is not considered default
4568        let mut modified_schema = Schema::new_default(KnnIndex::Hnsw);
4569        // Make a clear modification - change the string inverted index enabled state
4570        if let Some(ref mut string_type) = modified_schema.defaults.string {
4571            if let Some(ref mut string_inverted) = string_type.string_inverted_index {
4572                string_inverted.enabled = false; // Default is true, so this should make it non-default
4573            }
4574        }
4575        assert!(!modified_schema.is_default());
4576
4577        // Test that schema with additional key overrides is not default
4578        let mut schema_with_extra_overrides = Schema::new_default(KnnIndex::Hnsw);
4579        schema_with_extra_overrides
4580            .keys
4581            .insert("custom_key".to_string(), ValueTypes::default());
4582        assert!(!schema_with_extra_overrides.is_default());
4583    }
4584
4585    #[test]
4586    fn test_is_schema_default_with_space() {
4587        let schema = Schema::new_default(KnnIndex::Hnsw);
4588        assert!(schema.is_default());
4589
4590        let mut schema_with_space = Schema::new_default(KnnIndex::Hnsw);
4591        if let Some(ref mut float_list) = schema_with_space.defaults.float_list {
4592            if let Some(ref mut vector_index) = float_list.vector_index {
4593                vector_index.config.space = Some(Space::Cosine);
4594            }
4595        }
4596        assert!(!schema_with_space.is_default());
4597
4598        let mut schema_with_space_in_embedding_key = Schema::new_default(KnnIndex::Spann);
4599        if let Some(ref mut embedding_key) = schema_with_space_in_embedding_key
4600            .keys
4601            .get_mut(EMBEDDING_KEY)
4602        {
4603            if let Some(ref mut float_list) = embedding_key.float_list {
4604                if let Some(ref mut vector_index) = float_list.vector_index {
4605                    vector_index.config.space = Some(Space::Cosine);
4606                }
4607            }
4608        }
4609        assert!(!schema_with_space_in_embedding_key.is_default());
4610    }
4611
4612    #[test]
4613    fn test_is_schema_default_with_embedding_function() {
4614        let schema = Schema::new_default(KnnIndex::Hnsw);
4615        assert!(schema.is_default());
4616
4617        let mut schema_with_embedding_function = Schema::new_default(KnnIndex::Hnsw);
4618        if let Some(ref mut float_list) = schema_with_embedding_function.defaults.float_list {
4619            if let Some(ref mut vector_index) = float_list.vector_index {
4620                vector_index.config.embedding_function =
4621                    Some(EmbeddingFunctionConfiguration::Legacy);
4622            }
4623        }
4624        assert!(!schema_with_embedding_function.is_default());
4625
4626        let mut schema_with_embedding_function_in_embedding_key =
4627            Schema::new_default(KnnIndex::Spann);
4628        if let Some(ref mut embedding_key) = schema_with_embedding_function_in_embedding_key
4629            .keys
4630            .get_mut(EMBEDDING_KEY)
4631        {
4632            if let Some(ref mut float_list) = embedding_key.float_list {
4633                if let Some(ref mut vector_index) = float_list.vector_index {
4634                    vector_index.config.embedding_function =
4635                        Some(EmbeddingFunctionConfiguration::Legacy);
4636                }
4637            }
4638        }
4639        assert!(!schema_with_embedding_function_in_embedding_key.is_default());
4640    }
4641
4642    #[test]
4643    fn test_add_merges_keys_by_value_type() {
4644        let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
4645        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
4646
4647        let string_override = ValueTypes {
4648            string: Some(StringValueType {
4649                string_inverted_index: Some(StringInvertedIndexType {
4650                    enabled: true,
4651                    config: StringInvertedIndexConfig {},
4652                }),
4653                fts_index: None,
4654            }),
4655            ..Default::default()
4656        };
4657        schema_a
4658            .keys
4659            .insert("custom_field".to_string(), string_override);
4660
4661        let float_override = ValueTypes {
4662            float: Some(FloatValueType {
4663                float_inverted_index: Some(FloatInvertedIndexType {
4664                    enabled: true,
4665                    config: FloatInvertedIndexConfig {},
4666                }),
4667            }),
4668            ..Default::default()
4669        };
4670        schema_b
4671            .keys
4672            .insert("custom_field".to_string(), float_override);
4673
4674        let merged = schema_a.merge(&schema_b).unwrap();
4675        let merged_override = merged.keys.get("custom_field").unwrap();
4676
4677        assert!(merged_override.string.is_some());
4678        assert!(merged_override.float.is_some());
4679        assert!(
4680            merged_override
4681                .string
4682                .as_ref()
4683                .unwrap()
4684                .string_inverted_index
4685                .as_ref()
4686                .unwrap()
4687                .enabled
4688        );
4689        assert!(
4690            merged_override
4691                .float
4692                .as_ref()
4693                .unwrap()
4694                .float_inverted_index
4695                .as_ref()
4696                .unwrap()
4697                .enabled
4698        );
4699    }
4700
4701    #[test]
4702    fn test_add_rejects_different_defaults() {
4703        let schema_a = Schema::new_default(KnnIndex::Hnsw);
4704        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
4705
4706        if let Some(string_type) = schema_b.defaults.string.as_mut() {
4707            if let Some(string_index) = string_type.string_inverted_index.as_mut() {
4708                string_index.enabled = false;
4709            }
4710        }
4711
4712        let err = schema_a.merge(&schema_b).unwrap_err();
4713        assert!(matches!(err, SchemaError::DefaultsMismatch));
4714    }
4715
4716    #[test]
4717    fn test_add_detects_conflicting_value_type_configuration() {
4718        let mut schema_a = Schema::new_default(KnnIndex::Hnsw);
4719        let mut schema_b = Schema::new_default(KnnIndex::Hnsw);
4720
4721        let string_override_enabled = ValueTypes {
4722            string: Some(StringValueType {
4723                string_inverted_index: Some(StringInvertedIndexType {
4724                    enabled: true,
4725                    config: StringInvertedIndexConfig {},
4726                }),
4727                fts_index: None,
4728            }),
4729            ..Default::default()
4730        };
4731        schema_a
4732            .keys
4733            .insert("custom_field".to_string(), string_override_enabled);
4734
4735        let string_override_disabled = ValueTypes {
4736            string: Some(StringValueType {
4737                string_inverted_index: Some(StringInvertedIndexType {
4738                    enabled: false,
4739                    config: StringInvertedIndexConfig {},
4740                }),
4741                fts_index: None,
4742            }),
4743            ..Default::default()
4744        };
4745        schema_b
4746            .keys
4747            .insert("custom_field".to_string(), string_override_disabled);
4748
4749        let err = schema_a.merge(&schema_b).unwrap_err();
4750        assert!(matches!(err, SchemaError::ConfigurationConflict { .. }));
4751    }
4752
4753    // TODO(Sanket): Remove this test once deployed
4754    #[test]
4755    fn test_backward_compatibility_aliases() {
4756        // Test that old format with # and $ prefixes and key_overrides can be deserialized
4757        let old_format_json = r###"{
4758            "defaults": {
4759                "#string": {
4760                    "$fts_index": {
4761                        "enabled": true,
4762                        "config": {}
4763                    }
4764                },
4765                "#int": {
4766                    "$int_inverted_index": {
4767                        "enabled": true,
4768                        "config": {}
4769                    }
4770                },
4771                "#float_list": {
4772                    "$vector_index": {
4773                        "enabled": true,
4774                        "config": {
4775                            "spann": {
4776                                "search_nprobe": 10
4777                            }
4778                        }
4779                    }
4780                }
4781            },
4782            "key_overrides": {
4783                "#document": {
4784                    "#string": {
4785                        "$fts_index": {
4786                            "enabled": false,
4787                            "config": {}
4788                        }
4789                    }
4790                }
4791            }
4792        }"###;
4793
4794        let schema_from_old: Schema = serde_json::from_str(old_format_json).unwrap();
4795
4796        // Test that new format without prefixes and keys can be deserialized
4797        let new_format_json = r###"{
4798            "defaults": {
4799                "string": {
4800                    "fts_index": {
4801                        "enabled": true,
4802                        "config": {}
4803                    }
4804                },
4805                "int": {
4806                    "int_inverted_index": {
4807                        "enabled": true,
4808                        "config": {}
4809                    }
4810                },
4811                "float_list": {
4812                    "vector_index": {
4813                        "enabled": true,
4814                        "config": {
4815                            "spann": {
4816                                "search_nprobe": 10
4817                            }
4818                        }
4819                    }
4820                }
4821            },
4822            "keys": {
4823                "#document": {
4824                    "string": {
4825                        "fts_index": {
4826                            "enabled": false,
4827                            "config": {}
4828                        }
4829                    }
4830                }
4831            }
4832        }"###;
4833
4834        let schema_from_new: Schema = serde_json::from_str(new_format_json).unwrap();
4835
4836        // Both should deserialize to the same structure
4837        assert_eq!(schema_from_old, schema_from_new);
4838
4839        // Verify the deserialized content is correct
4840        assert!(schema_from_old.defaults.string.is_some());
4841        assert!(schema_from_old
4842            .defaults
4843            .string
4844            .as_ref()
4845            .unwrap()
4846            .fts_index
4847            .is_some());
4848        assert!(
4849            schema_from_old
4850                .defaults
4851                .string
4852                .as_ref()
4853                .unwrap()
4854                .fts_index
4855                .as_ref()
4856                .unwrap()
4857                .enabled
4858        );
4859
4860        assert!(schema_from_old.defaults.int.is_some());
4861        assert!(schema_from_old
4862            .defaults
4863            .int
4864            .as_ref()
4865            .unwrap()
4866            .int_inverted_index
4867            .is_some());
4868
4869        assert!(schema_from_old.defaults.float_list.is_some());
4870        assert!(schema_from_old
4871            .defaults
4872            .float_list
4873            .as_ref()
4874            .unwrap()
4875            .vector_index
4876            .is_some());
4877
4878        assert!(schema_from_old.keys.contains_key(DOCUMENT_KEY));
4879        let doc_override = schema_from_old.keys.get(DOCUMENT_KEY).unwrap();
4880        assert!(doc_override.string.is_some());
4881        assert!(
4882            !doc_override
4883                .string
4884                .as_ref()
4885                .unwrap()
4886                .fts_index
4887                .as_ref()
4888                .unwrap()
4889                .enabled
4890        );
4891
4892        // Test that serialization always outputs the new format (without prefixes)
4893        let serialized = serde_json::to_string(&schema_from_old).unwrap();
4894
4895        // Should contain new format keys
4896        assert!(serialized.contains(r#""keys":"#));
4897        assert!(serialized.contains(r#""string":"#));
4898        assert!(serialized.contains(r#""fts_index":"#));
4899        assert!(serialized.contains(r#""int_inverted_index":"#));
4900        assert!(serialized.contains(r#""vector_index":"#));
4901
4902        // Should NOT contain old format keys
4903        assert!(!serialized.contains(r#""key_overrides":"#));
4904        assert!(!serialized.contains(r###""#string":"###));
4905        assert!(!serialized.contains(r###""$fts_index":"###));
4906        assert!(!serialized.contains(r###""$int_inverted_index":"###));
4907        assert!(!serialized.contains(r###""$vector_index":"###));
4908    }
4909
4910    #[test]
4911    fn test_hnsw_index_config_validation() {
4912        use validator::Validate;
4913
4914        // Valid configuration - should pass
4915        let valid_config = HnswIndexConfig {
4916            batch_size: Some(10),
4917            sync_threshold: Some(100),
4918            ef_construction: Some(100),
4919            max_neighbors: Some(16),
4920            ..Default::default()
4921        };
4922        assert!(valid_config.validate().is_ok());
4923
4924        // Invalid: batch_size too small (min 2)
4925        let invalid_batch_size = HnswIndexConfig {
4926            batch_size: Some(1),
4927            ..Default::default()
4928        };
4929        assert!(invalid_batch_size.validate().is_err());
4930
4931        // Invalid: sync_threshold too small (min 2)
4932        let invalid_sync_threshold = HnswIndexConfig {
4933            sync_threshold: Some(1),
4934            ..Default::default()
4935        };
4936        assert!(invalid_sync_threshold.validate().is_err());
4937
4938        // Valid: boundary values (exactly 2) should pass
4939        let boundary_config = HnswIndexConfig {
4940            batch_size: Some(2),
4941            sync_threshold: Some(2),
4942            ..Default::default()
4943        };
4944        assert!(boundary_config.validate().is_ok());
4945
4946        // Valid: None values should pass validation
4947        let all_none_config = HnswIndexConfig {
4948            ..Default::default()
4949        };
4950        assert!(all_none_config.validate().is_ok());
4951
4952        // Valid: fields without validation can be any value
4953        let other_fields_config = HnswIndexConfig {
4954            ef_construction: Some(1),
4955            max_neighbors: Some(1),
4956            ef_search: Some(1),
4957            num_threads: Some(1),
4958            resize_factor: Some(0.1),
4959            ..Default::default()
4960        };
4961        assert!(other_fields_config.validate().is_ok());
4962    }
4963
4964    #[test]
4965    fn test_spann_index_config_validation() {
4966        use validator::Validate;
4967
4968        // Valid configuration - should pass
4969        let valid_config = SpannIndexConfig {
4970            write_nprobe: Some(32),
4971            nreplica_count: Some(4),
4972            split_threshold: Some(100),
4973            merge_threshold: Some(50),
4974            reassign_neighbor_count: Some(32),
4975            num_centers_to_merge_to: Some(4),
4976            ef_construction: Some(100),
4977            ef_search: Some(100),
4978            max_neighbors: Some(32),
4979            search_rng_factor: Some(1.0),
4980            write_rng_factor: Some(1.0),
4981            search_rng_epsilon: Some(7.5),
4982            write_rng_epsilon: Some(7.5),
4983            ..Default::default()
4984        };
4985        assert!(valid_config.validate().is_ok());
4986
4987        // Invalid: write_nprobe too large (max 64)
4988        let invalid_write_nprobe = SpannIndexConfig {
4989            write_nprobe: Some(200),
4990            ..Default::default()
4991        };
4992        assert!(invalid_write_nprobe.validate().is_err());
4993
4994        // Invalid: split_threshold too small (min 50)
4995        let invalid_split_threshold = SpannIndexConfig {
4996            split_threshold: Some(10),
4997            ..Default::default()
4998        };
4999        assert!(invalid_split_threshold.validate().is_err());
5000
5001        // Invalid: split_threshold too large (max 200)
5002        let invalid_split_threshold_high = SpannIndexConfig {
5003            split_threshold: Some(250),
5004            ..Default::default()
5005        };
5006        assert!(invalid_split_threshold_high.validate().is_err());
5007
5008        // Invalid: nreplica_count too large (max 8)
5009        let invalid_nreplica = SpannIndexConfig {
5010            nreplica_count: Some(10),
5011            ..Default::default()
5012        };
5013        assert!(invalid_nreplica.validate().is_err());
5014
5015        // Invalid: reassign_neighbor_count too large (max 64)
5016        let invalid_reassign = SpannIndexConfig {
5017            reassign_neighbor_count: Some(100),
5018            ..Default::default()
5019        };
5020        assert!(invalid_reassign.validate().is_err());
5021
5022        // Invalid: merge_threshold out of range (min 25, max 100)
5023        let invalid_merge_threshold_low = SpannIndexConfig {
5024            merge_threshold: Some(5),
5025            ..Default::default()
5026        };
5027        assert!(invalid_merge_threshold_low.validate().is_err());
5028
5029        let invalid_merge_threshold_high = SpannIndexConfig {
5030            merge_threshold: Some(150),
5031            ..Default::default()
5032        };
5033        assert!(invalid_merge_threshold_high.validate().is_err());
5034
5035        // Invalid: num_centers_to_merge_to too large (max 8)
5036        let invalid_num_centers = SpannIndexConfig {
5037            num_centers_to_merge_to: Some(10),
5038            ..Default::default()
5039        };
5040        assert!(invalid_num_centers.validate().is_err());
5041
5042        // Invalid: ef_construction too large (max 200)
5043        let invalid_ef_construction = SpannIndexConfig {
5044            ef_construction: Some(300),
5045            ..Default::default()
5046        };
5047        assert!(invalid_ef_construction.validate().is_err());
5048
5049        // Invalid: ef_search too large (max 200)
5050        let invalid_ef_search = SpannIndexConfig {
5051            ef_search: Some(300),
5052            ..Default::default()
5053        };
5054        assert!(invalid_ef_search.validate().is_err());
5055
5056        // Invalid: max_neighbors too large (max 64)
5057        let invalid_max_neighbors = SpannIndexConfig {
5058            max_neighbors: Some(100),
5059            ..Default::default()
5060        };
5061        assert!(invalid_max_neighbors.validate().is_err());
5062
5063        // Invalid: search_nprobe too large (max 128)
5064        let invalid_search_nprobe = SpannIndexConfig {
5065            search_nprobe: Some(200),
5066            ..Default::default()
5067        };
5068        assert!(invalid_search_nprobe.validate().is_err());
5069
5070        // Invalid: search_rng_factor not exactly 1.0 (min 1.0, max 1.0)
5071        let invalid_search_rng_factor_low = SpannIndexConfig {
5072            search_rng_factor: Some(0.9),
5073            ..Default::default()
5074        };
5075        assert!(invalid_search_rng_factor_low.validate().is_err());
5076
5077        let invalid_search_rng_factor_high = SpannIndexConfig {
5078            search_rng_factor: Some(1.1),
5079            ..Default::default()
5080        };
5081        assert!(invalid_search_rng_factor_high.validate().is_err());
5082
5083        // Valid: search_rng_factor exactly 1.0
5084        let valid_search_rng_factor = SpannIndexConfig {
5085            search_rng_factor: Some(1.0),
5086            ..Default::default()
5087        };
5088        assert!(valid_search_rng_factor.validate().is_ok());
5089
5090        // Invalid: search_rng_epsilon out of range (min 5.0, max 10.0)
5091        let invalid_search_rng_epsilon_low = SpannIndexConfig {
5092            search_rng_epsilon: Some(4.0),
5093            ..Default::default()
5094        };
5095        assert!(invalid_search_rng_epsilon_low.validate().is_err());
5096
5097        let invalid_search_rng_epsilon_high = SpannIndexConfig {
5098            search_rng_epsilon: Some(11.0),
5099            ..Default::default()
5100        };
5101        assert!(invalid_search_rng_epsilon_high.validate().is_err());
5102
5103        // Valid: search_rng_epsilon within range
5104        let valid_search_rng_epsilon = SpannIndexConfig {
5105            search_rng_epsilon: Some(7.5),
5106            ..Default::default()
5107        };
5108        assert!(valid_search_rng_epsilon.validate().is_ok());
5109
5110        // Invalid: write_rng_factor not exactly 1.0 (min 1.0, max 1.0)
5111        let invalid_write_rng_factor_low = SpannIndexConfig {
5112            write_rng_factor: Some(0.9),
5113            ..Default::default()
5114        };
5115        assert!(invalid_write_rng_factor_low.validate().is_err());
5116
5117        let invalid_write_rng_factor_high = SpannIndexConfig {
5118            write_rng_factor: Some(1.1),
5119            ..Default::default()
5120        };
5121        assert!(invalid_write_rng_factor_high.validate().is_err());
5122
5123        // Valid: write_rng_factor exactly 1.0
5124        let valid_write_rng_factor = SpannIndexConfig {
5125            write_rng_factor: Some(1.0),
5126            ..Default::default()
5127        };
5128        assert!(valid_write_rng_factor.validate().is_ok());
5129
5130        // Invalid: write_rng_epsilon out of range (min 5.0, max 10.0)
5131        let invalid_write_rng_epsilon_low = SpannIndexConfig {
5132            write_rng_epsilon: Some(4.0),
5133            ..Default::default()
5134        };
5135        assert!(invalid_write_rng_epsilon_low.validate().is_err());
5136
5137        let invalid_write_rng_epsilon_high = SpannIndexConfig {
5138            write_rng_epsilon: Some(11.0),
5139            ..Default::default()
5140        };
5141        assert!(invalid_write_rng_epsilon_high.validate().is_err());
5142
5143        // Valid: write_rng_epsilon within range
5144        let valid_write_rng_epsilon = SpannIndexConfig {
5145            write_rng_epsilon: Some(7.5),
5146            ..Default::default()
5147        };
5148        assert!(valid_write_rng_epsilon.validate().is_ok());
5149
5150        // Invalid: num_samples_kmeans too large (max 1000)
5151        let invalid_num_samples_kmeans = SpannIndexConfig {
5152            num_samples_kmeans: Some(1500),
5153            ..Default::default()
5154        };
5155        assert!(invalid_num_samples_kmeans.validate().is_err());
5156
5157        // Valid: num_samples_kmeans within range
5158        let valid_num_samples_kmeans = SpannIndexConfig {
5159            num_samples_kmeans: Some(500),
5160            ..Default::default()
5161        };
5162        assert!(valid_num_samples_kmeans.validate().is_ok());
5163
5164        // Invalid: initial_lambda not exactly 100.0 (min 100.0, max 100.0)
5165        let invalid_initial_lambda_high = SpannIndexConfig {
5166            initial_lambda: Some(150.0),
5167            ..Default::default()
5168        };
5169        assert!(invalid_initial_lambda_high.validate().is_err());
5170
5171        let invalid_initial_lambda_low = SpannIndexConfig {
5172            initial_lambda: Some(50.0),
5173            ..Default::default()
5174        };
5175        assert!(invalid_initial_lambda_low.validate().is_err());
5176
5177        // Valid: initial_lambda exactly 100.0
5178        let valid_initial_lambda = SpannIndexConfig {
5179            initial_lambda: Some(100.0),
5180            ..Default::default()
5181        };
5182        assert!(valid_initial_lambda.validate().is_ok());
5183
5184        // Valid: None values should pass validation
5185        let all_none_config = SpannIndexConfig {
5186            ..Default::default()
5187        };
5188        assert!(all_none_config.validate().is_ok());
5189    }
5190
5191    #[test]
5192    fn test_builder_pattern_crud_workflow() {
5193        // Test comprehensive CRUD workflow using the builder pattern
5194
5195        // CREATE: Build a schema with multiple indexes
5196        let schema = Schema::new_default(KnnIndex::Hnsw)
5197            .create_index(
5198                None,
5199                IndexConfig::Vector(VectorIndexConfig {
5200                    space: Some(Space::Cosine),
5201                    embedding_function: None,
5202                    source_key: None,
5203                    hnsw: Some(HnswIndexConfig {
5204                        ef_construction: Some(200),
5205                        max_neighbors: Some(32),
5206                        ef_search: Some(50),
5207                        num_threads: None,
5208                        batch_size: None,
5209                        sync_threshold: None,
5210                        resize_factor: None,
5211                    }),
5212                    spann: None,
5213                }),
5214            )
5215            .expect("vector config should succeed")
5216            .create_index(
5217                Some("category"),
5218                IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5219            )
5220            .expect("string inverted on key should succeed")
5221            .create_index(
5222                Some("year"),
5223                IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5224            )
5225            .expect("int inverted on key should succeed")
5226            .create_index(
5227                Some("rating"),
5228                IndexConfig::FloatInverted(FloatInvertedIndexConfig {}),
5229            )
5230            .expect("float inverted on key should succeed")
5231            .create_index(
5232                Some("is_active"),
5233                IndexConfig::BoolInverted(BoolInvertedIndexConfig {}),
5234            )
5235            .expect("bool inverted on key should succeed");
5236
5237        // READ: Verify the schema was built correctly
5238        // Check vector config
5239        assert!(schema.keys.contains_key(EMBEDDING_KEY));
5240        let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
5241        assert!(embedding.float_list.is_some());
5242        let vector_index = embedding
5243            .float_list
5244            .as_ref()
5245            .unwrap()
5246            .vector_index
5247            .as_ref()
5248            .unwrap();
5249        assert!(vector_index.enabled);
5250        assert_eq!(vector_index.config.space, Some(Space::Cosine));
5251        assert_eq!(
5252            vector_index.config.hnsw.as_ref().unwrap().ef_construction,
5253            Some(200)
5254        );
5255
5256        // Check per-key indexes
5257        assert!(schema.keys.contains_key("category"));
5258        assert!(schema.keys.contains_key("year"));
5259        assert!(schema.keys.contains_key("rating"));
5260        assert!(schema.keys.contains_key("is_active"));
5261
5262        // Verify category string inverted index
5263        let category = schema.keys.get("category").unwrap();
5264        assert!(category.string.is_some());
5265        let string_idx = category
5266            .string
5267            .as_ref()
5268            .unwrap()
5269            .string_inverted_index
5270            .as_ref()
5271            .unwrap();
5272        assert!(string_idx.enabled);
5273
5274        // Verify year int inverted index
5275        let year = schema.keys.get("year").unwrap();
5276        assert!(year.int.is_some());
5277        let int_idx = year
5278            .int
5279            .as_ref()
5280            .unwrap()
5281            .int_inverted_index
5282            .as_ref()
5283            .unwrap();
5284        assert!(int_idx.enabled);
5285
5286        // UPDATE/DELETE: Disable some indexes
5287        let schema = schema
5288            .delete_index(
5289                Some("category"),
5290                IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5291            )
5292            .expect("delete string inverted should succeed")
5293            .delete_index(
5294                Some("year"),
5295                IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5296            )
5297            .expect("delete int inverted should succeed");
5298
5299        // VERIFY DELETE: Check that indexes were disabled
5300        let category = schema.keys.get("category").unwrap();
5301        let string_idx = category
5302            .string
5303            .as_ref()
5304            .unwrap()
5305            .string_inverted_index
5306            .as_ref()
5307            .unwrap();
5308        assert!(!string_idx.enabled); // Should be disabled now
5309
5310        let year = schema.keys.get("year").unwrap();
5311        let int_idx = year
5312            .int
5313            .as_ref()
5314            .unwrap()
5315            .int_inverted_index
5316            .as_ref()
5317            .unwrap();
5318        assert!(!int_idx.enabled); // Should be disabled now
5319
5320        // Verify other indexes still enabled
5321        let rating = schema.keys.get("rating").unwrap();
5322        let float_idx = rating
5323            .float
5324            .as_ref()
5325            .unwrap()
5326            .float_inverted_index
5327            .as_ref()
5328            .unwrap();
5329        assert!(float_idx.enabled); // Should still be enabled
5330
5331        let is_active = schema.keys.get("is_active").unwrap();
5332        let bool_idx = is_active
5333            .boolean
5334            .as_ref()
5335            .unwrap()
5336            .bool_inverted_index
5337            .as_ref()
5338            .unwrap();
5339        assert!(bool_idx.enabled); // Should still be enabled
5340    }
5341
5342    #[test]
5343    fn test_builder_create_index_validation_errors() {
5344        // Test all validation errors for create_index() as documented in the docstring:
5345        // - Attempting to create index on special keys (#document, #embedding)
5346        // - Invalid configuration (e.g., vector index on non-embedding key)
5347        // - Conflicting with existing indexes (e.g., multiple sparse vector indexes)
5348
5349        // Error: Vector index on specific key (must be global)
5350        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5351            Some("my_vectors"),
5352            IndexConfig::Vector(VectorIndexConfig {
5353                space: Some(Space::L2),
5354                embedding_function: None,
5355                source_key: None,
5356                hnsw: None,
5357                spann: None,
5358            }),
5359        );
5360        assert!(result.is_err());
5361        assert!(matches!(
5362            result.unwrap_err(),
5363            SchemaBuilderError::VectorIndexMustBeGlobal { key } if key == "my_vectors"
5364        ));
5365
5366        // Error: FTS index on specific key (must be global)
5367        let result = Schema::new_default(KnnIndex::Hnsw)
5368            .create_index(Some("my_text"), IndexConfig::Fts(FtsIndexConfig {}));
5369        assert!(result.is_err());
5370        assert!(matches!(
5371            result.unwrap_err(),
5372            SchemaBuilderError::FtsIndexMustBeGlobal { key } if key == "my_text"
5373        ));
5374
5375        // Error: Cannot create index on special key #document
5376        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5377            Some(DOCUMENT_KEY),
5378            IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5379        );
5380        assert!(result.is_err());
5381        assert!(matches!(
5382            result.unwrap_err(),
5383            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5384        ));
5385
5386        // Error: Cannot create index on special key #embedding
5387        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5388            Some(EMBEDDING_KEY),
5389            IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5390        );
5391        assert!(result.is_err());
5392        assert!(matches!(
5393            result.unwrap_err(),
5394            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5395        ));
5396
5397        // Error: Sparse vector without key (must specify key)
5398        let result = Schema::new_default(KnnIndex::Hnsw).create_index(
5399            None,
5400            IndexConfig::SparseVector(SparseVectorIndexConfig {
5401                embedding_function: None,
5402                source_key: None,
5403                bm25: None,
5404            }),
5405        );
5406        assert!(result.is_err());
5407        assert!(matches!(
5408            result.unwrap_err(),
5409            SchemaBuilderError::SparseVectorRequiresKey
5410        ));
5411
5412        // Error: Multiple sparse vector indexes (only one allowed per collection)
5413        let result = Schema::new_default(KnnIndex::Hnsw)
5414            .create_index(
5415                Some("sparse1"),
5416                IndexConfig::SparseVector(SparseVectorIndexConfig {
5417                    embedding_function: None,
5418                    source_key: None,
5419                    bm25: None,
5420                }),
5421            )
5422            .expect("first sparse should succeed")
5423            .create_index(
5424                Some("sparse2"),
5425                IndexConfig::SparseVector(SparseVectorIndexConfig {
5426                    embedding_function: None,
5427                    source_key: None,
5428                    bm25: None,
5429                }),
5430            );
5431        assert!(result.is_err());
5432        assert!(matches!(
5433            result.unwrap_err(),
5434            SchemaBuilderError::MultipleSparseVectorIndexes { existing_key } if existing_key == "sparse1"
5435        ));
5436    }
5437
5438    #[test]
5439    fn test_builder_delete_index_validation_errors() {
5440        // Test all validation errors for delete_index() as documented in the docstring:
5441        // - Attempting to delete index on special keys (#document, #embedding)
5442        // - Attempting to delete vector, FTS, or sparse vector indexes (not currently supported)
5443
5444        // Error: Delete on special key #embedding
5445        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5446            Some(EMBEDDING_KEY),
5447            IndexConfig::StringInverted(StringInvertedIndexConfig {}),
5448        );
5449        assert!(result.is_err());
5450        assert!(matches!(
5451            result.unwrap_err(),
5452            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5453        ));
5454
5455        // Error: Delete on special key #document
5456        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5457            Some(DOCUMENT_KEY),
5458            IndexConfig::IntInverted(IntInvertedIndexConfig {}),
5459        );
5460        assert!(result.is_err());
5461        assert!(matches!(
5462            result.unwrap_err(),
5463            SchemaBuilderError::SpecialKeyModificationNotAllowed { .. }
5464        ));
5465
5466        // Error: Delete vector index (not currently supported)
5467        let result = Schema::new_default(KnnIndex::Hnsw).delete_index(
5468            None,
5469            IndexConfig::Vector(VectorIndexConfig {
5470                space: None,
5471                embedding_function: None,
5472                source_key: None,
5473                hnsw: None,
5474                spann: None,
5475            }),
5476        );
5477        assert!(result.is_err());
5478        assert!(matches!(
5479            result.unwrap_err(),
5480            SchemaBuilderError::VectorIndexDeletionNotSupported
5481        ));
5482
5483        // Error: Delete FTS index (not currently supported)
5484        let result = Schema::new_default(KnnIndex::Hnsw)
5485            .delete_index(None, IndexConfig::Fts(FtsIndexConfig {}));
5486        assert!(result.is_err());
5487        assert!(matches!(
5488            result.unwrap_err(),
5489            SchemaBuilderError::FtsIndexDeletionNotSupported
5490        ));
5491
5492        // Error: Delete sparse vector index (not currently supported)
5493        let result = Schema::new_default(KnnIndex::Hnsw)
5494            .create_index(
5495                Some("sparse"),
5496                IndexConfig::SparseVector(SparseVectorIndexConfig {
5497                    embedding_function: None,
5498                    source_key: None,
5499                    bm25: None,
5500                }),
5501            )
5502            .expect("create should succeed")
5503            .delete_index(
5504                Some("sparse"),
5505                IndexConfig::SparseVector(SparseVectorIndexConfig {
5506                    embedding_function: None,
5507                    source_key: None,
5508                    bm25: None,
5509                }),
5510            );
5511        assert!(result.is_err());
5512        assert!(matches!(
5513            result.unwrap_err(),
5514            SchemaBuilderError::SparseVectorIndexDeletionNotSupported
5515        ));
5516    }
5517
5518    #[test]
5519    fn test_builder_pattern_chaining() {
5520        // Test complex chaining scenario
5521        let schema = Schema::new_default(KnnIndex::Hnsw)
5522            .create_index(Some("tag1"), StringInvertedIndexConfig {}.into())
5523            .unwrap()
5524            .create_index(Some("tag2"), StringInvertedIndexConfig {}.into())
5525            .unwrap()
5526            .create_index(Some("tag3"), StringInvertedIndexConfig {}.into())
5527            .unwrap()
5528            .create_index(Some("count"), IntInvertedIndexConfig {}.into())
5529            .unwrap()
5530            .delete_index(Some("tag2"), StringInvertedIndexConfig {}.into())
5531            .unwrap()
5532            .create_index(Some("score"), FloatInvertedIndexConfig {}.into())
5533            .unwrap();
5534
5535        // Verify tag1 is enabled
5536        assert!(
5537            schema
5538                .keys
5539                .get("tag1")
5540                .unwrap()
5541                .string
5542                .as_ref()
5543                .unwrap()
5544                .string_inverted_index
5545                .as_ref()
5546                .unwrap()
5547                .enabled
5548        );
5549
5550        // Verify tag2 is disabled
5551        assert!(
5552            !schema
5553                .keys
5554                .get("tag2")
5555                .unwrap()
5556                .string
5557                .as_ref()
5558                .unwrap()
5559                .string_inverted_index
5560                .as_ref()
5561                .unwrap()
5562                .enabled
5563        );
5564
5565        // Verify tag3 is enabled
5566        assert!(
5567            schema
5568                .keys
5569                .get("tag3")
5570                .unwrap()
5571                .string
5572                .as_ref()
5573                .unwrap()
5574                .string_inverted_index
5575                .as_ref()
5576                .unwrap()
5577                .enabled
5578        );
5579
5580        // Verify count is enabled
5581        assert!(
5582            schema
5583                .keys
5584                .get("count")
5585                .unwrap()
5586                .int
5587                .as_ref()
5588                .unwrap()
5589                .int_inverted_index
5590                .as_ref()
5591                .unwrap()
5592                .enabled
5593        );
5594
5595        // Verify score is enabled
5596        assert!(
5597            schema
5598                .keys
5599                .get("score")
5600                .unwrap()
5601                .float
5602                .as_ref()
5603                .unwrap()
5604                .float_inverted_index
5605                .as_ref()
5606                .unwrap()
5607                .enabled
5608        );
5609    }
5610
5611    #[test]
5612    fn test_schema_default_matches_python() {
5613        // Test that Schema::default() matches Python's Schema() behavior exactly
5614        let schema = Schema::default();
5615
5616        // ============================================================================
5617        // VERIFY DEFAULTS (match Python's _initialize_defaults)
5618        // ============================================================================
5619
5620        // String defaults: FTS disabled, string inverted enabled
5621        assert!(schema.defaults.string.is_some());
5622        let string = schema.defaults.string.as_ref().unwrap();
5623        assert!(!string.fts_index.as_ref().unwrap().enabled);
5624        assert!(string.string_inverted_index.as_ref().unwrap().enabled);
5625
5626        // Float list defaults: vector index disabled
5627        assert!(schema.defaults.float_list.is_some());
5628        let float_list = schema.defaults.float_list.as_ref().unwrap();
5629        assert!(!float_list.vector_index.as_ref().unwrap().enabled);
5630        let vector_config = &float_list.vector_index.as_ref().unwrap().config;
5631        assert_eq!(vector_config.space, None); // Python leaves as None
5632        assert_eq!(vector_config.hnsw, None); // Python doesn't specify
5633        assert_eq!(vector_config.spann, None); // Python doesn't specify
5634        assert_eq!(vector_config.source_key, None);
5635
5636        // Sparse vector defaults: disabled
5637        assert!(schema.defaults.sparse_vector.is_some());
5638        let sparse = schema.defaults.sparse_vector.as_ref().unwrap();
5639        assert!(!sparse.sparse_vector_index.as_ref().unwrap().enabled);
5640
5641        // Int defaults: inverted index enabled
5642        assert!(schema.defaults.int.is_some());
5643        assert!(
5644            schema
5645                .defaults
5646                .int
5647                .as_ref()
5648                .unwrap()
5649                .int_inverted_index
5650                .as_ref()
5651                .unwrap()
5652                .enabled
5653        );
5654
5655        // Float defaults: inverted index enabled
5656        assert!(schema.defaults.float.is_some());
5657        assert!(
5658            schema
5659                .defaults
5660                .float
5661                .as_ref()
5662                .unwrap()
5663                .float_inverted_index
5664                .as_ref()
5665                .unwrap()
5666                .enabled
5667        );
5668
5669        // Bool defaults: inverted index enabled
5670        assert!(schema.defaults.boolean.is_some());
5671        assert!(
5672            schema
5673                .defaults
5674                .boolean
5675                .as_ref()
5676                .unwrap()
5677                .bool_inverted_index
5678                .as_ref()
5679                .unwrap()
5680                .enabled
5681        );
5682
5683        // ============================================================================
5684        // VERIFY SPECIAL KEYS (match Python's _initialize_keys)
5685        // ============================================================================
5686
5687        // #document: FTS enabled, string inverted disabled
5688        assert!(schema.keys.contains_key(DOCUMENT_KEY));
5689        let doc = schema.keys.get(DOCUMENT_KEY).unwrap();
5690        assert!(doc.string.is_some());
5691        assert!(
5692            doc.string
5693                .as_ref()
5694                .unwrap()
5695                .fts_index
5696                .as_ref()
5697                .unwrap()
5698                .enabled
5699        );
5700        assert!(
5701            !doc.string
5702                .as_ref()
5703                .unwrap()
5704                .string_inverted_index
5705                .as_ref()
5706                .unwrap()
5707                .enabled
5708        );
5709
5710        // #embedding: vector index enabled with source_key=#document
5711        assert!(schema.keys.contains_key(EMBEDDING_KEY));
5712        let embedding = schema.keys.get(EMBEDDING_KEY).unwrap();
5713        assert!(embedding.float_list.is_some());
5714        let vec_idx = embedding
5715            .float_list
5716            .as_ref()
5717            .unwrap()
5718            .vector_index
5719            .as_ref()
5720            .unwrap();
5721        assert!(vec_idx.enabled);
5722        assert_eq!(vec_idx.config.source_key, Some(DOCUMENT_KEY.to_string()));
5723        assert_eq!(vec_idx.config.space, None); // Python leaves as None
5724        assert_eq!(vec_idx.config.hnsw, None); // Python doesn't specify
5725        assert_eq!(vec_idx.config.spann, None); // Python doesn't specify
5726
5727        // Verify only these two special keys exist
5728        assert_eq!(schema.keys.len(), 2);
5729    }
5730
5731    #[test]
5732    fn test_schema_default_works_with_builder() {
5733        // Test that Schema::default() can be used with builder pattern
5734        let schema = Schema::default()
5735            .create_index(Some("category"), StringInvertedIndexConfig {}.into())
5736            .expect("should succeed");
5737
5738        // Verify the new index was added
5739        assert!(schema.keys.contains_key("category"));
5740        assert!(schema.keys.contains_key(DOCUMENT_KEY));
5741        assert!(schema.keys.contains_key(EMBEDDING_KEY));
5742        assert_eq!(schema.keys.len(), 3);
5743    }
5744
5745    #[cfg(feature = "testing")]
5746    mod proptests {
5747        use super::*;
5748        use crate::strategies::{
5749            embedding_function_strategy, internal_collection_configuration_strategy,
5750            internal_hnsw_configuration_strategy, internal_spann_configuration_strategy,
5751            knn_index_strategy, space_strategy, TEST_NAME_PATTERN,
5752        };
5753        use crate::{
5754            HnswIndexConfig, SpannIndexConfig, VectorIndexConfig, DOCUMENT_KEY, EMBEDDING_KEY,
5755        };
5756        use proptest::prelude::*;
5757        use proptest::strategy::BoxedStrategy;
5758        use proptest::string::string_regex;
5759        use serde_json::json;
5760
5761        fn default_embedding_function_strategy(
5762        ) -> impl Strategy<Value = Option<EmbeddingFunctionConfiguration>> {
5763            proptest::option::of(prop_oneof![
5764                Just(EmbeddingFunctionConfiguration::Unknown),
5765                Just(EmbeddingFunctionConfiguration::Known(
5766                    EmbeddingFunctionNewConfiguration {
5767                        name: "default".to_string(),
5768                        config: json!({ "alpha": 1 }),
5769                    }
5770                )),
5771            ])
5772        }
5773
5774        fn sparse_embedding_function_strategy(
5775        ) -> impl Strategy<Value = Option<EmbeddingFunctionConfiguration>> {
5776            let known_strategy = string_regex(TEST_NAME_PATTERN).unwrap().prop_map(|name| {
5777                EmbeddingFunctionConfiguration::Known(EmbeddingFunctionNewConfiguration {
5778                    name,
5779                    config: json!({ "alpha": 1 }),
5780                })
5781            });
5782
5783            proptest::option::of(prop_oneof![
5784                Just(EmbeddingFunctionConfiguration::Unknown),
5785                known_strategy,
5786            ])
5787        }
5788
5789        fn non_default_internal_collection_configuration_strategy(
5790        ) -> impl Strategy<Value = InternalCollectionConfiguration> {
5791            internal_collection_configuration_strategy()
5792                .prop_filter("non-default configuration", |config| !config.is_default())
5793        }
5794
5795        fn partial_hnsw_index_config_strategy() -> impl Strategy<Value = HnswIndexConfig> {
5796            (
5797                proptest::option::of(1usize..=512),
5798                proptest::option::of(1usize..=128),
5799                proptest::option::of(1usize..=512),
5800                proptest::option::of(1usize..=64),
5801                proptest::option::of(2usize..=4096),
5802                proptest::option::of(2usize..=4096),
5803                proptest::option::of(prop_oneof![
5804                    Just(0.5f64),
5805                    Just(1.0f64),
5806                    Just(1.5f64),
5807                    Just(2.0f64)
5808                ]),
5809            )
5810                .prop_map(
5811                    |(
5812                        ef_construction,
5813                        max_neighbors,
5814                        ef_search,
5815                        num_threads,
5816                        batch_size,
5817                        sync_threshold,
5818                        resize_factor,
5819                    )| HnswIndexConfig {
5820                        ef_construction,
5821                        max_neighbors,
5822                        ef_search,
5823                        num_threads,
5824                        batch_size,
5825                        sync_threshold,
5826                        resize_factor,
5827                    },
5828                )
5829        }
5830
5831        fn partial_spann_index_config_strategy() -> impl Strategy<Value = SpannIndexConfig> {
5832            let epsilon_strategy = prop_oneof![Just(5.0f32), Just(7.5f32), Just(10.0f32)];
5833            (
5834                (
5835                    proptest::option::of(1u32..=128),               // search_nprobe
5836                    proptest::option::of(Just(1.0f32)), // search_rng_factor (must be 1.0)
5837                    proptest::option::of(epsilon_strategy.clone()), // search_rng_epsilon
5838                    proptest::option::of(1u32..=8),     // nreplica_count
5839                    proptest::option::of(Just(1.0f32)), // write_rng_factor (must be 1.0)
5840                    proptest::option::of(epsilon_strategy), // write_rng_epsilon
5841                    proptest::option::of(50u32..=200),  // split_threshold
5842                    proptest::option::of(1usize..=1000), // num_samples_kmeans
5843                ),
5844                (
5845                    proptest::option::of(Just(100.0f32)), // initial_lambda (must be 100.0)
5846                    proptest::option::of(1u32..=64),      // reassign_neighbor_count
5847                    proptest::option::of(25u32..=100),    // merge_threshold
5848                    proptest::option::of(1u32..=8),       // num_centers_to_merge_to
5849                    proptest::option::of(1u32..=64),      // write_nprobe
5850                    proptest::option::of(1usize..=200),   // ef_construction
5851                    proptest::option::of(1usize..=200),   // ef_search
5852                    proptest::option::of(1usize..=64),    // max_neighbors
5853                ),
5854            )
5855                .prop_map(
5856                    |(
5857                        (
5858                            search_nprobe,
5859                            search_rng_factor,
5860                            search_rng_epsilon,
5861                            nreplica_count,
5862                            write_rng_factor,
5863                            write_rng_epsilon,
5864                            split_threshold,
5865                            num_samples_kmeans,
5866                        ),
5867                        (
5868                            initial_lambda,
5869                            reassign_neighbor_count,
5870                            merge_threshold,
5871                            num_centers_to_merge_to,
5872                            write_nprobe,
5873                            ef_construction,
5874                            ef_search,
5875                            max_neighbors,
5876                        ),
5877                    )| SpannIndexConfig {
5878                        search_nprobe,
5879                        search_rng_factor,
5880                        search_rng_epsilon,
5881                        nreplica_count,
5882                        write_rng_factor,
5883                        write_rng_epsilon,
5884                        split_threshold,
5885                        num_samples_kmeans,
5886                        initial_lambda,
5887                        reassign_neighbor_count,
5888                        merge_threshold,
5889                        num_centers_to_merge_to,
5890                        write_nprobe,
5891                        ef_construction,
5892                        ef_search,
5893                        max_neighbors,
5894                    },
5895                )
5896        }
5897
5898        proptest! {
5899            #[test]
5900            fn merge_hnsw_configs_preserves_user_overrides(
5901                base in partial_hnsw_index_config_strategy(),
5902                user in partial_hnsw_index_config_strategy(),
5903            ) {
5904                let merged = Schema::merge_hnsw_configs(Some(&base), Some(&user))
5905                    .expect("merge should return Some when both are Some");
5906
5907                // Property: user values always take precedence when Some
5908                if user.ef_construction.is_some() {
5909                    prop_assert_eq!(merged.ef_construction, user.ef_construction);
5910                }
5911                if user.max_neighbors.is_some() {
5912                    prop_assert_eq!(merged.max_neighbors, user.max_neighbors);
5913                }
5914                if user.ef_search.is_some() {
5915                    prop_assert_eq!(merged.ef_search, user.ef_search);
5916                }
5917                if user.num_threads.is_some() {
5918                    prop_assert_eq!(merged.num_threads, user.num_threads);
5919                }
5920                if user.batch_size.is_some() {
5921                    prop_assert_eq!(merged.batch_size, user.batch_size);
5922                }
5923                if user.sync_threshold.is_some() {
5924                    prop_assert_eq!(merged.sync_threshold, user.sync_threshold);
5925                }
5926                if user.resize_factor.is_some() {
5927                    prop_assert_eq!(merged.resize_factor, user.resize_factor);
5928                }
5929            }
5930
5931            #[test]
5932            fn merge_hnsw_configs_falls_back_to_base_when_user_is_none(
5933                base in partial_hnsw_index_config_strategy(),
5934            ) {
5935                let merged = Schema::merge_hnsw_configs(Some(&base), None)
5936                    .expect("merge should return Some when base is Some");
5937
5938                // Property: when user is None, base values are preserved
5939                prop_assert_eq!(merged, base);
5940            }
5941
5942            #[test]
5943            fn merge_hnsw_configs_returns_user_when_base_is_none(
5944                user in partial_hnsw_index_config_strategy(),
5945            ) {
5946                let merged = Schema::merge_hnsw_configs(None, Some(&user))
5947                    .expect("merge should return Some when user is Some");
5948
5949                // Property: when base is None, user values are preserved
5950                prop_assert_eq!(merged, user);
5951            }
5952
5953            #[test]
5954            fn merge_spann_configs_preserves_user_overrides(
5955                base in partial_spann_index_config_strategy(),
5956                user in partial_spann_index_config_strategy(),
5957            ) {
5958                let merged = Schema::merge_spann_configs(Some(&base), Some(&user))
5959                    .expect("merge should return Some when both are Some");
5960
5961                // Property: user values always take precedence when Some
5962                if user.search_nprobe.is_some() {
5963                    prop_assert_eq!(merged.search_nprobe, user.search_nprobe);
5964                }
5965                if user.search_rng_epsilon.is_some() {
5966                    prop_assert_eq!(merged.search_rng_epsilon, user.search_rng_epsilon);
5967                }
5968                if user.split_threshold.is_some() {
5969                    prop_assert_eq!(merged.split_threshold, user.split_threshold);
5970                }
5971                if user.ef_construction.is_some() {
5972                    prop_assert_eq!(merged.ef_construction, user.ef_construction);
5973                }
5974                if user.ef_search.is_some() {
5975                    prop_assert_eq!(merged.ef_search, user.ef_search);
5976                }
5977                if user.max_neighbors.is_some() {
5978                    prop_assert_eq!(merged.max_neighbors, user.max_neighbors);
5979                }
5980            }
5981
5982            #[test]
5983            fn merge_spann_configs_falls_back_to_base_when_user_is_none(
5984                base in partial_spann_index_config_strategy(),
5985            ) {
5986                let merged = Schema::merge_spann_configs(Some(&base), None)
5987                    .expect("merge should return Some when base is Some");
5988
5989                // Property: when user is None, base values are preserved
5990                prop_assert_eq!(merged, base);
5991            }
5992
5993            #[test]
5994            fn merge_vector_index_config_preserves_user_overrides(
5995                base in vector_index_config_strategy(),
5996                user in vector_index_config_strategy(),
5997                knn in knn_index_strategy(),
5998            ) {
5999                let merged = Schema::merge_vector_index_config(&base, &user, knn);
6000
6001                // Property: user values take precedence for top-level fields
6002                if user.space.is_some() {
6003                    prop_assert_eq!(merged.space, user.space);
6004                }
6005                if user.embedding_function.is_some() {
6006                    prop_assert_eq!(merged.embedding_function, user.embedding_function);
6007                }
6008                if user.source_key.is_some() {
6009                    prop_assert_eq!(merged.source_key, user.source_key);
6010                }
6011
6012                // Property: nested configs are merged according to merge rules
6013                match knn {
6014                    KnnIndex::Hnsw => {
6015                        if let (Some(_base_hnsw), Some(user_hnsw)) = (&base.hnsw, &user.hnsw) {
6016                            let merged_hnsw = merged.hnsw.as_ref().expect("hnsw should be Some");
6017                            if user_hnsw.ef_construction.is_some() {
6018                                prop_assert_eq!(merged_hnsw.ef_construction, user_hnsw.ef_construction);
6019                            }
6020                        }
6021                    }
6022                    KnnIndex::Spann => {
6023                        if let (Some(_base_spann), Some(user_spann)) = (&base.spann, &user.spann) {
6024                            let merged_spann = merged.spann.as_ref().expect("spann should be Some");
6025                            if user_spann.search_nprobe.is_some() {
6026                                prop_assert_eq!(merged_spann.search_nprobe, user_spann.search_nprobe);
6027                            }
6028                        }
6029                    }
6030                }
6031            }
6032        }
6033
6034        fn expected_vector_index_config(
6035            config: &InternalCollectionConfiguration,
6036        ) -> VectorIndexConfig {
6037            match &config.vector_index {
6038                VectorIndexConfiguration::Hnsw(hnsw_config) => VectorIndexConfig {
6039                    space: Some(hnsw_config.space.clone()),
6040                    embedding_function: config.embedding_function.clone(),
6041                    source_key: None,
6042                    hnsw: Some(HnswIndexConfig {
6043                        ef_construction: Some(hnsw_config.ef_construction),
6044                        max_neighbors: Some(hnsw_config.max_neighbors),
6045                        ef_search: Some(hnsw_config.ef_search),
6046                        num_threads: Some(hnsw_config.num_threads),
6047                        batch_size: Some(hnsw_config.batch_size),
6048                        sync_threshold: Some(hnsw_config.sync_threshold),
6049                        resize_factor: Some(hnsw_config.resize_factor),
6050                    }),
6051                    spann: None,
6052                },
6053                VectorIndexConfiguration::Spann(spann_config) => VectorIndexConfig {
6054                    space: Some(spann_config.space.clone()),
6055                    embedding_function: config.embedding_function.clone(),
6056                    source_key: None,
6057                    hnsw: None,
6058                    spann: Some(SpannIndexConfig {
6059                        search_nprobe: Some(spann_config.search_nprobe),
6060                        search_rng_factor: Some(spann_config.search_rng_factor),
6061                        search_rng_epsilon: Some(spann_config.search_rng_epsilon),
6062                        nreplica_count: Some(spann_config.nreplica_count),
6063                        write_rng_factor: Some(spann_config.write_rng_factor),
6064                        write_rng_epsilon: Some(spann_config.write_rng_epsilon),
6065                        split_threshold: Some(spann_config.split_threshold),
6066                        num_samples_kmeans: Some(spann_config.num_samples_kmeans),
6067                        initial_lambda: Some(spann_config.initial_lambda),
6068                        reassign_neighbor_count: Some(spann_config.reassign_neighbor_count),
6069                        merge_threshold: Some(spann_config.merge_threshold),
6070                        num_centers_to_merge_to: Some(spann_config.num_centers_to_merge_to),
6071                        write_nprobe: Some(spann_config.write_nprobe),
6072                        ef_construction: Some(spann_config.ef_construction),
6073                        ef_search: Some(spann_config.ef_search),
6074                        max_neighbors: Some(spann_config.max_neighbors),
6075                    }),
6076                },
6077            }
6078        }
6079
6080        fn non_special_key_strategy() -> BoxedStrategy<String> {
6081            string_regex(TEST_NAME_PATTERN)
6082                .unwrap()
6083                .prop_filter("exclude special keys", |key| {
6084                    key != DOCUMENT_KEY && key != EMBEDDING_KEY
6085                })
6086                .boxed()
6087        }
6088
6089        fn source_key_strategy() -> BoxedStrategy<Option<String>> {
6090            proptest::option::of(prop_oneof![
6091                Just(DOCUMENT_KEY.to_string()),
6092                string_regex(TEST_NAME_PATTERN).unwrap(),
6093            ])
6094            .boxed()
6095        }
6096
6097        fn fts_index_type_strategy() -> impl Strategy<Value = FtsIndexType> {
6098            any::<bool>().prop_map(|enabled| FtsIndexType {
6099                enabled,
6100                config: FtsIndexConfig {},
6101            })
6102        }
6103
6104        fn string_inverted_index_type_strategy() -> impl Strategy<Value = StringInvertedIndexType> {
6105            any::<bool>().prop_map(|enabled| StringInvertedIndexType {
6106                enabled,
6107                config: StringInvertedIndexConfig {},
6108            })
6109        }
6110
6111        fn string_value_type_strategy() -> BoxedStrategy<Option<StringValueType>> {
6112            proptest::option::of(
6113                (
6114                    proptest::option::of(string_inverted_index_type_strategy()),
6115                    proptest::option::of(fts_index_type_strategy()),
6116                )
6117                    .prop_map(|(string_inverted_index, fts_index)| {
6118                        StringValueType {
6119                            string_inverted_index,
6120                            fts_index,
6121                        }
6122                    }),
6123            )
6124            .boxed()
6125        }
6126
6127        fn float_inverted_index_type_strategy() -> impl Strategy<Value = FloatInvertedIndexType> {
6128            any::<bool>().prop_map(|enabled| FloatInvertedIndexType {
6129                enabled,
6130                config: FloatInvertedIndexConfig {},
6131            })
6132        }
6133
6134        fn float_value_type_strategy() -> BoxedStrategy<Option<FloatValueType>> {
6135            proptest::option::of(
6136                proptest::option::of(float_inverted_index_type_strategy()).prop_map(
6137                    |float_inverted_index| FloatValueType {
6138                        float_inverted_index,
6139                    },
6140                ),
6141            )
6142            .boxed()
6143        }
6144
6145        fn int_inverted_index_type_strategy() -> impl Strategy<Value = IntInvertedIndexType> {
6146            any::<bool>().prop_map(|enabled| IntInvertedIndexType {
6147                enabled,
6148                config: IntInvertedIndexConfig {},
6149            })
6150        }
6151
6152        fn int_value_type_strategy() -> BoxedStrategy<Option<IntValueType>> {
6153            proptest::option::of(
6154                proptest::option::of(int_inverted_index_type_strategy())
6155                    .prop_map(|int_inverted_index| IntValueType { int_inverted_index }),
6156            )
6157            .boxed()
6158        }
6159
6160        fn bool_inverted_index_type_strategy() -> impl Strategy<Value = BoolInvertedIndexType> {
6161            any::<bool>().prop_map(|enabled| BoolInvertedIndexType {
6162                enabled,
6163                config: BoolInvertedIndexConfig {},
6164            })
6165        }
6166
6167        fn bool_value_type_strategy() -> BoxedStrategy<Option<BoolValueType>> {
6168            proptest::option::of(
6169                proptest::option::of(bool_inverted_index_type_strategy()).prop_map(
6170                    |bool_inverted_index| BoolValueType {
6171                        bool_inverted_index,
6172                    },
6173                ),
6174            )
6175            .boxed()
6176        }
6177
6178        fn sparse_vector_index_config_strategy() -> impl Strategy<Value = SparseVectorIndexConfig> {
6179            (
6180                sparse_embedding_function_strategy(),
6181                source_key_strategy(),
6182                proptest::option::of(any::<bool>()),
6183            )
6184                .prop_map(|(embedding_function, source_key, bm25)| {
6185                    SparseVectorIndexConfig {
6186                        embedding_function,
6187                        source_key,
6188                        bm25,
6189                    }
6190                })
6191        }
6192
6193        fn sparse_vector_value_type_strategy() -> BoxedStrategy<Option<SparseVectorValueType>> {
6194            proptest::option::of(
6195                (
6196                    any::<bool>(),
6197                    proptest::option::of(sparse_vector_index_config_strategy()),
6198                )
6199                    .prop_map(|(enabled, config)| SparseVectorValueType {
6200                        sparse_vector_index: config.map(|cfg| SparseVectorIndexType {
6201                            enabled,
6202                            config: cfg,
6203                        }),
6204                    }),
6205            )
6206            .boxed()
6207        }
6208
6209        fn hnsw_index_config_strategy() -> impl Strategy<Value = HnswIndexConfig> {
6210            internal_hnsw_configuration_strategy().prop_map(|config| HnswIndexConfig {
6211                ef_construction: Some(config.ef_construction),
6212                max_neighbors: Some(config.max_neighbors),
6213                ef_search: Some(config.ef_search),
6214                num_threads: Some(config.num_threads),
6215                batch_size: Some(config.batch_size),
6216                sync_threshold: Some(config.sync_threshold),
6217                resize_factor: Some(config.resize_factor),
6218            })
6219        }
6220
6221        fn spann_index_config_strategy() -> impl Strategy<Value = SpannIndexConfig> {
6222            internal_spann_configuration_strategy().prop_map(|config| SpannIndexConfig {
6223                search_nprobe: Some(config.search_nprobe),
6224                search_rng_factor: Some(config.search_rng_factor),
6225                search_rng_epsilon: Some(config.search_rng_epsilon),
6226                nreplica_count: Some(config.nreplica_count),
6227                write_rng_factor: Some(config.write_rng_factor),
6228                write_rng_epsilon: Some(config.write_rng_epsilon),
6229                split_threshold: Some(config.split_threshold),
6230                num_samples_kmeans: Some(config.num_samples_kmeans),
6231                initial_lambda: Some(config.initial_lambda),
6232                reassign_neighbor_count: Some(config.reassign_neighbor_count),
6233                merge_threshold: Some(config.merge_threshold),
6234                num_centers_to_merge_to: Some(config.num_centers_to_merge_to),
6235                write_nprobe: Some(config.write_nprobe),
6236                ef_construction: Some(config.ef_construction),
6237                ef_search: Some(config.ef_search),
6238                max_neighbors: Some(config.max_neighbors),
6239            })
6240        }
6241
6242        fn vector_index_config_strategy() -> impl Strategy<Value = VectorIndexConfig> {
6243            (
6244                proptest::option::of(space_strategy()),
6245                embedding_function_strategy(),
6246                source_key_strategy(),
6247                proptest::option::of(hnsw_index_config_strategy()),
6248                proptest::option::of(spann_index_config_strategy()),
6249            )
6250                .prop_map(|(space, embedding_function, source_key, hnsw, spann)| {
6251                    VectorIndexConfig {
6252                        space,
6253                        embedding_function,
6254                        source_key,
6255                        hnsw,
6256                        spann,
6257                    }
6258                })
6259        }
6260
6261        fn vector_index_type_strategy() -> impl Strategy<Value = VectorIndexType> {
6262            (any::<bool>(), vector_index_config_strategy())
6263                .prop_map(|(enabled, config)| VectorIndexType { enabled, config })
6264        }
6265
6266        fn float_list_value_type_strategy() -> BoxedStrategy<Option<FloatListValueType>> {
6267            proptest::option::of(
6268                proptest::option::of(vector_index_type_strategy())
6269                    .prop_map(|vector_index| FloatListValueType { vector_index }),
6270            )
6271            .boxed()
6272        }
6273
6274        fn value_types_strategy() -> BoxedStrategy<ValueTypes> {
6275            (
6276                string_value_type_strategy(),
6277                float_list_value_type_strategy(),
6278                sparse_vector_value_type_strategy(),
6279                int_value_type_strategy(),
6280                float_value_type_strategy(),
6281                bool_value_type_strategy(),
6282            )
6283                .prop_map(
6284                    |(string, float_list, sparse_vector, int, float, boolean)| ValueTypes {
6285                        string,
6286                        float_list,
6287                        sparse_vector,
6288                        int,
6289                        float,
6290                        boolean,
6291                    },
6292                )
6293                .boxed()
6294        }
6295
6296        fn schema_strategy() -> BoxedStrategy<Schema> {
6297            (
6298                value_types_strategy(),
6299                proptest::collection::hash_map(
6300                    non_special_key_strategy(),
6301                    value_types_strategy(),
6302                    0..=3,
6303                ),
6304                proptest::option::of(value_types_strategy()),
6305                proptest::option::of(value_types_strategy()),
6306            )
6307                .prop_map(
6308                    |(defaults, mut extra_keys, document_override, embedding_override)| {
6309                        if let Some(doc) = document_override {
6310                            extra_keys.insert(DOCUMENT_KEY.to_string(), doc);
6311                        }
6312                        if let Some(embed) = embedding_override {
6313                            extra_keys.insert(EMBEDDING_KEY.to_string(), embed);
6314                        }
6315                        Schema {
6316                            defaults,
6317                            keys: extra_keys,
6318                            cmek: None,
6319                            source_attached_function_id: None,
6320                        }
6321                    },
6322                )
6323                .boxed()
6324        }
6325
6326        fn force_non_default_schema(mut schema: Schema) -> Schema {
6327            if schema.is_default() {
6328                if let Some(string_value) = schema
6329                    .defaults
6330                    .string
6331                    .as_mut()
6332                    .and_then(|string_value| string_value.string_inverted_index.as_mut())
6333                {
6334                    string_value.enabled = !string_value.enabled;
6335                } else {
6336                    schema.defaults.string = Some(StringValueType {
6337                        string_inverted_index: Some(StringInvertedIndexType {
6338                            enabled: false,
6339                            config: StringInvertedIndexConfig {},
6340                        }),
6341                        fts_index: None,
6342                    });
6343                }
6344            }
6345            schema
6346        }
6347
6348        fn non_default_schema_strategy() -> BoxedStrategy<Schema> {
6349            schema_strategy().prop_map(force_non_default_schema).boxed()
6350        }
6351
6352        fn extract_vector_configs(schema: &Schema) -> (VectorIndexConfig, VectorIndexConfig) {
6353            let defaults = schema
6354                .defaults
6355                .float_list
6356                .as_ref()
6357                .and_then(|fl| fl.vector_index.as_ref())
6358                .map(|vi| vi.config.clone())
6359                .expect("defaults vector index missing");
6360
6361            let embedding = schema
6362                .keys
6363                .get(EMBEDDING_KEY)
6364                .and_then(|value_types| value_types.float_list.as_ref())
6365                .and_then(|fl| fl.vector_index.as_ref())
6366                .map(|vi| vi.config.clone())
6367                .expect("#embedding vector index missing");
6368
6369            (defaults, embedding)
6370        }
6371
6372        proptest! {
6373            #[test]
6374            fn reconcile_schema_and_config_matches_convert_for_config_only(
6375                config in internal_collection_configuration_strategy(),
6376                knn in knn_index_strategy(),
6377            ) {
6378                let result = Schema::reconcile_schema_and_config(None, Some(&config), knn)
6379                    .expect("reconciliation should succeed");
6380
6381                let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6382                let expected_config = expected_vector_index_config(&config);
6383
6384                prop_assert_eq!(defaults_vi, expected_config.clone());
6385
6386                let mut expected_embedding_config = expected_config;
6387                expected_embedding_config.source_key = Some(DOCUMENT_KEY.to_string());
6388                prop_assert_eq!(embedding_vi, expected_embedding_config);
6389
6390                prop_assert_eq!(result.keys.len(), 2);
6391            }
6392        }
6393
6394        proptest! {
6395            #[test]
6396            fn reconcile_schema_and_config_errors_when_both_non_default(
6397                config in non_default_internal_collection_configuration_strategy(),
6398                knn in knn_index_strategy(),
6399            ) {
6400                let schema = Schema::try_from(&config)
6401                    .expect("conversion should succeed");
6402                prop_assume!(!schema.is_default());
6403
6404                let result = Schema::reconcile_schema_and_config(Some(&schema), Some(&config), knn);
6405
6406                prop_assert!(matches!(result, Err(SchemaError::ConfigAndSchemaConflict)));
6407            }
6408        }
6409
6410        proptest! {
6411            #[test]
6412            fn reconcile_schema_and_config_matches_schema_only_path(
6413                schema in schema_strategy(),
6414                knn in knn_index_strategy(),
6415            ) {
6416                let result = Schema::reconcile_schema_and_config(Some(&schema), None, knn)
6417                    .expect("reconciliation should succeed");
6418
6419                let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6420
6421                // Property: schema defaults.float_list vector_index config should be merged into defaults
6422                if let Some(schema_float_list) = schema.defaults.float_list.as_ref() {
6423                    if let Some(schema_vi) = schema_float_list.vector_index.as_ref() {
6424                        // Property: schema values take precedence over defaults
6425                        if let Some(schema_space) = &schema_vi.config.space {
6426                            prop_assert_eq!(defaults_vi.space, Some(schema_space.clone()));
6427                        }
6428                        if let Some(schema_ef) = &schema_vi.config.embedding_function {
6429                            prop_assert_eq!(defaults_vi.embedding_function, Some(schema_ef.clone()));
6430                        }
6431                        // Test nested config merging properties
6432                        match knn {
6433                            KnnIndex::Hnsw => {
6434                                if let Some(schema_hnsw) = &schema_vi.config.hnsw {
6435                                    if let Some(merged_hnsw) = &defaults_vi.hnsw {
6436                                        if let Some(schema_ef_construction) = schema_hnsw.ef_construction {
6437                                            prop_assert_eq!(merged_hnsw.ef_construction, Some(schema_ef_construction));
6438                                        }
6439                                    }
6440                                }
6441                            }
6442                            KnnIndex::Spann => {
6443                                if let Some(schema_spann) = &schema_vi.config.spann {
6444                                    if let Some(merged_spann) = &defaults_vi.spann {
6445                                        if let Some(schema_search_nprobe) = schema_spann.search_nprobe {
6446                                            prop_assert_eq!(merged_spann.search_nprobe, Some(schema_search_nprobe));
6447                                        }
6448                                    }
6449                                }
6450                            }
6451                        }
6452                    }
6453                }
6454
6455                // Property: schema #embedding float_list vector_index config should be merged into embedding
6456                if let Some(embedding_values) = schema.keys.get(EMBEDDING_KEY) {
6457                    if let Some(embedding_float_list) = embedding_values.float_list.as_ref() {
6458                        if let Some(embedding_vi_type) = embedding_float_list.vector_index.as_ref() {
6459                            if let Some(schema_space) = &embedding_vi_type.config.space {
6460                                prop_assert_eq!(embedding_vi.space, Some(schema_space.clone()));
6461                            }
6462                        }
6463                    }
6464                }
6465            }
6466        }
6467
6468        proptest! {
6469            #[test]
6470            fn reconcile_schema_and_config_with_default_schema_and_default_config_applies_embedding_function(
6471                embedding_function in default_embedding_function_strategy(),
6472                knn in knn_index_strategy(),
6473            ) {
6474                let schema = Schema::new_default(knn);
6475                let mut config = match knn {
6476                    KnnIndex::Hnsw => InternalCollectionConfiguration::default_hnsw(),
6477                    KnnIndex::Spann => InternalCollectionConfiguration::default_spann(),
6478                };
6479                config.embedding_function = embedding_function.clone();
6480
6481                let result = Schema::reconcile_schema_and_config(
6482                    Some(&schema),
6483                    Some(&config),
6484                    knn,
6485                )
6486                .expect("reconciliation should succeed");
6487
6488                let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6489
6490                // Property: embedding function from config should be applied to both defaults and embedding
6491                if let Some(ef) = embedding_function {
6492                    prop_assert_eq!(defaults_vi.embedding_function, Some(ef.clone()));
6493                    prop_assert_eq!(embedding_vi.embedding_function, Some(ef));
6494                } else {
6495                    // Property: when embedding function is None, it should remain None
6496                    prop_assert_eq!(defaults_vi.embedding_function, None);
6497                    prop_assert_eq!(embedding_vi.embedding_function, None);
6498                }
6499            }
6500        }
6501
6502        proptest! {
6503            #[test]
6504            fn reconcile_schema_and_config_with_default_config_keeps_non_default_schema(
6505                schema in non_default_schema_strategy(),
6506                knn in knn_index_strategy(),
6507            ) {
6508                let default_config = match knn {
6509                    KnnIndex::Hnsw => InternalCollectionConfiguration::default_hnsw(),
6510                    KnnIndex::Spann => InternalCollectionConfiguration::default_spann(),
6511                };
6512
6513                let result = Schema::reconcile_schema_and_config(
6514                    Some(&schema),
6515                    Some(&default_config),
6516                    knn,
6517                )
6518                .expect("reconciliation should succeed");
6519
6520                let (defaults_vi, embedding_vi) = extract_vector_configs(&result);
6521
6522                // Property: when config is default, schema values should be preserved
6523                // Test that schema defaults.float_list vector_index config is applied
6524                if let Some(schema_float_list) = schema.defaults.float_list.as_ref() {
6525                    if let Some(schema_vi) = schema_float_list.vector_index.as_ref() {
6526                        if let Some(schema_space) = &schema_vi.config.space {
6527                            prop_assert_eq!(defaults_vi.space, Some(schema_space.clone()));
6528                        }
6529                        if let Some(schema_ef) = &schema_vi.config.embedding_function {
6530                            prop_assert_eq!(defaults_vi.embedding_function, Some(schema_ef.clone()));
6531                        }
6532                    }
6533                }
6534
6535                // Property: schema #embedding float_list vector_index config should be applied
6536                if let Some(embedding_values) = schema.keys.get(EMBEDDING_KEY) {
6537                    if let Some(embedding_float_list) = embedding_values.float_list.as_ref() {
6538                        if let Some(embedding_vi_type) = embedding_float_list.vector_index.as_ref() {
6539                            if let Some(schema_space) = &embedding_vi_type.config.space {
6540                                prop_assert_eq!(embedding_vi.space, Some(schema_space.clone()));
6541                            }
6542                        }
6543                    }
6544                }
6545            }
6546        }
6547    }
6548}